~ubuntu-branches/ubuntu/lucid/ffmpeg/lucid-security

« back to all changes in this revision

Viewing changes to libavcodec/arm/h264dsp_neon.S

  • Committer: Bazaar Package Importer
  • Author(s): Reinhard Tartler
  • Date: 2009-03-13 09:18:28 UTC
  • mfrom: (1.1.8 upstream)
  • Revision ID: james.westby@ubuntu.com-20090313091828-n4ktby5eca487uhv
Tags: 3:0.svn20090303-1ubuntu1+unstripped1
merge from ubuntu.jaunty branch

Show diffs side-by-side

added added

removed removed

Lines of Context:
1539
1539
 
1540
1540
@ Weighted prediction
1541
1541
 
1542
 
        .macro  weight_16 mac
 
1542
        .macro  weight_16 add
1543
1543
        vdup.8          d0,  r3
1544
 
        vmov            q2,  q8
1545
 
        vmov            q3,  q8
1546
1544
1:      subs            ip,  ip,  #2
1547
1545
        vld1.8          {d20-d21},[r0,:128], r1
1548
 
        \mac            q2,  d0,  d20
 
1546
        vmull.u8        q2,  d0,  d20
1549
1547
        pld             [r0]
1550
 
        \mac            q3,  d0,  d21
1551
 
        vmov            q12, q8
 
1548
        vmull.u8        q3,  d0,  d21
1552
1549
        vld1.8          {d28-d29},[r0,:128], r1
1553
 
        vmov            q13, q8
1554
 
        \mac            q12, d0,  d28
 
1550
        vmull.u8        q12, d0,  d28
1555
1551
        pld             [r0]
1556
 
        \mac            q13, d0,  d29
1557
 
        vshl.s16        q2,  q2,  q9
1558
 
        vshl.s16        q3,  q3,  q9
 
1552
        vmull.u8        q13, d0,  d29
 
1553
        \add            q2,  q8,  q2
 
1554
        vrshl.s16       q2,  q2,  q9
 
1555
        \add            q3,  q8,  q3
 
1556
        vrshl.s16       q3,  q3,  q9
1559
1557
        vqmovun.s16     d4,  q2
1560
1558
        vqmovun.s16     d5,  q3
1561
 
        vshl.s16        q12, q12, q9
1562
 
        vshl.s16        q13, q13, q9
 
1559
        \add            q12, q8,  q12
 
1560
        vrshl.s16       q12, q12, q9
 
1561
        \add            q13, q8,  q13
 
1562
        vrshl.s16       q13, q13, q9
1563
1563
        vqmovun.s16     d24, q12
1564
1564
        vqmovun.s16     d25, q13
1565
 
        vmov            q3,  q8
1566
1565
        vst1.8          {d4- d5}, [r4,:128], r1
1567
 
        vmov            q2,  q8
1568
1566
        vst1.8          {d24-d25},[r4,:128], r1
1569
1567
        bne             1b
1570
1568
        pop             {r4, pc}
1571
1569
        .endm
1572
1570
 
1573
 
        .macro  weight_8 mac
 
1571
        .macro  weight_8 add
1574
1572
        vdup.8          d0,  r3
1575
 
        vmov            q1,  q8
1576
 
        vmov            q10, q8
1577
1573
1:      subs            ip,  ip,  #2
1578
1574
        vld1.8          {d4},[r0,:64], r1
1579
 
        \mac            q1,  d0,  d4
 
1575
        vmull.u8        q1,  d0,  d4
1580
1576
        pld             [r0]
1581
1577
        vld1.8          {d6},[r0,:64], r1
1582
 
        \mac            q10, d0,  d6
 
1578
        vmull.u8        q10, d0,  d6
 
1579
        \add            q1,  q8,  q1
1583
1580
        pld             [r0]
1584
 
        vshl.s16        q1,  q1,  q9
 
1581
        vrshl.s16       q1,  q1,  q9
1585
1582
        vqmovun.s16     d2,  q1
1586
 
        vshl.s16        q10, q10, q9
 
1583
        \add            q10, q8,  q10
 
1584
        vrshl.s16       q10, q10, q9
1587
1585
        vqmovun.s16     d4,  q10
1588
 
        vmov            q10, q8
1589
1586
        vst1.8          {d2},[r4,:64], r1
1590
 
        vmov            q1,  q8
1591
1587
        vst1.8          {d4},[r4,:64], r1
1592
1588
        bne             1b
1593
1589
        pop             {r4, pc}
1594
1590
        .endm
1595
1591
 
1596
 
        .macro  weight_4 mac
 
1592
        .macro  weight_4 add
1597
1593
        vdup.8          d0,  r3
1598
1594
        vmov            q1,  q8
1599
1595
        vmov            q10, q8
1600
1596
1:      subs            ip,  ip,  #4
1601
1597
        vld1.32         {d4[0]},[r0,:32], r1
1602
1598
        vld1.32         {d4[1]},[r0,:32], r1
1603
 
        \mac            q1,  d0,  d4
 
1599
        vmull.u8        q1,  d0,  d4
1604
1600
        pld             [r0]
1605
1601
        blt             2f
1606
1602
        vld1.32         {d6[0]},[r0,:32], r1
1607
1603
        vld1.32         {d6[1]},[r0,:32], r1
1608
 
        \mac            q10, d0,  d6
 
1604
        vmull.u8        q10, d0,  d6
1609
1605
        pld             [r0]
1610
 
        vshl.s16        q1,  q1,  q9
 
1606
        \add            q1,  q8,  q1
 
1607
        vrshl.s16       q1,  q1,  q9
1611
1608
        vqmovun.s16     d2,  q1
1612
 
        vshl.s16        q10, q10, q9
 
1609
        \add            q10, q8,  q10
 
1610
        vrshl.s16       q10, q10, q9
1613
1611
        vqmovun.s16     d4,  q10
1614
1612
        vmov            q10, q8
1615
1613
        vst1.32         {d2[0]},[r4,:32], r1
1619
1617
        vst1.32         {d4[1]},[r4,:32], r1
1620
1618
        bne             1b
1621
1619
        pop             {r4, pc}
1622
 
2:      vshl.s16        q1,  q1,  q9
 
1620
2:      \add            q1,  q8,  q1
 
1621
        vrshl.s16       q1,  q1,  q9
1623
1622
        vqmovun.s16     d2,  q1
1624
1623
        vst1.32         {d2[0]},[r4,:32], r1
1625
1624
        vst1.32         {d2[1]},[r4,:32], r1
1630
1629
function weight_h264_pixels_\w\()_neon
1631
1630
        push            {r4, lr}
1632
1631
        ldr             r4,  [sp, #8]
1633
 
        vdup.16         q9,  r2
1634
 
        mov             lr,  #1
 
1632
        cmp             r2,  #1
1635
1633
        lsl             r4,  r4,  r2
1636
 
        subs            r2,  r2,  #1
1637
 
        vneg.s16        q9,  q9
1638
 
        addge           r4,  r4,  lr,  lsl r2
1639
 
        cmp             r3,  #0
1640
1634
        vdup.16         q8,  r4
1641
1635
        mov             r4,  r0
1642
 
        blt             10f
1643
 
        weight_\w       vmlal.u8
1644
 
10:     rsb             r3,  r3,  #0
1645
 
        weight_\w       vmlsl.u8
 
1636
        ble             20f
 
1637
        rsb             lr,  r2,  #1
 
1638
        vdup.16         q9,  lr
 
1639
        cmp             r3,  #0
 
1640
        blt             10f
 
1641
        weight_\w       vhadd.s16
 
1642
10:     rsb             r3,  r3,  #0
 
1643
        weight_\w       vhsub.s16
 
1644
20:     rsb             lr,  r2,  #0
 
1645
        vdup.16         q9,  lr
 
1646
        cmp             r3,  #0
 
1647
        blt             10f
 
1648
        weight_\w       vadd.s16
 
1649
10:     rsb             r3,  r3,  #0
 
1650
        weight_\w       vsub.s16
1646
1651
        .endfunc
1647
1652
        .endm
1648
1653