~ubuntu-branches/ubuntu/lucid/ffmpeg/lucid-updates

« back to all changes in this revision

Viewing changes to libavcodec/arm/dsputil_neon_s.S

  • Committer: Bazaar Package Importer
  • Author(s): Reinhard Tartler
  • Date: 2009-02-05 21:45:05 UTC
  • mfrom: (1.1.7 upstream)
  • Revision ID: james.westby@ubuntu.com-20090205214505-fvn0jkiv3lrkaaq4
Tags: 3:0.svn20090204-2ubuntu1+unstripped1
rebuild using a clean, uncrippled ffmpeg tarball

Show diffs side-by-side

added added

removed removed

Lines of Context:
609
609
        vcvt.s32.f32    q1,  q1,  #16
610
610
        b               6b
611
611
        .endfunc
 
612
 
 
613
function ff_vector_fmul_neon, export=1
 
614
        mov             r3,  r0
 
615
        subs            r2,  r2,  #8
 
616
        vld1.64         {d0-d3},  [r0,:128]!
 
617
        vld1.64         {d4-d7},  [r1,:128]!
 
618
        vmul.f32        q8,  q0,  q2
 
619
        vmul.f32        q9,  q1,  q3
 
620
        beq             3f
 
621
        bics            ip,  r2,  #15
 
622
        beq             2f
 
623
1:      subs            ip,  ip,  #16
 
624
        vld1.64         {d0-d1},  [r0,:128]!
 
625
        vld1.64         {d4-d5},  [r1,:128]!
 
626
        vmul.f32        q10, q0,  q2
 
627
        vld1.64         {d2-d3},  [r0,:128]!
 
628
        vld1.64         {d6-d7},  [r1,:128]!
 
629
        vmul.f32        q11, q1,  q3
 
630
        vst1.64         {d16-d19},[r3,:128]!
 
631
        vld1.64         {d0-d1},  [r0,:128]!
 
632
        vld1.64         {d4-d5},  [r1,:128]!
 
633
        vmul.f32        q8,  q0,  q2
 
634
        vld1.64         {d2-d3},  [r0,:128]!
 
635
        vld1.64         {d6-d7},  [r1,:128]!
 
636
        vmul.f32        q9,  q1,  q3
 
637
        vst1.64         {d20-d23},[r3,:128]!
 
638
        bne             1b
 
639
        ands            r2,  r2,  #15
 
640
        beq             3f
 
641
2:      vld1.64         {d0-d1},  [r0,:128]!
 
642
        vld1.64         {d4-d5},  [r1,:128]!
 
643
        vst1.64         {d16-d17},[r3,:128]!
 
644
        vmul.f32        q8,  q0,  q2
 
645
        vld1.64         {d2-d3},  [r0,:128]!
 
646
        vld1.64         {d6-d7},  [r1,:128]!
 
647
        vst1.64         {d18-d19},[r3,:128]!
 
648
        vmul.f32        q9,  q1,  q3
 
649
3:      vst1.64         {d16-d19},[r3,:128]!
 
650
        bx              lr
 
651
        .endfunc
 
652
 
 
653
function ff_vector_fmul_window_neon, export=1
 
654
        vld1.32         {d16[],d17[]}, [sp,:32]
 
655
        push            {r4,r5,lr}
 
656
        ldr             lr,  [sp, #16]
 
657
        sub             r2,  r2,  #8
 
658
        sub             r5,  lr,  #2
 
659
        add             r2,  r2,  r5, lsl #2
 
660
        add             r4,  r3,  r5, lsl #3
 
661
        add             ip,  r0,  r5, lsl #3
 
662
        mov             r5,  #-16
 
663
        vld1.64         {d0,d1},  [r1,:128]!
 
664
        vld1.64         {d2,d3},  [r2,:128], r5
 
665
        vld1.64         {d4,d5},  [r3,:128]!
 
666
        vld1.64         {d6,d7},  [r4,:128], r5
 
667
1:      subs            lr,  lr,  #4
 
668
        vmov            q11, q8
 
669
        vmla.f32        d22, d0,  d4
 
670
        vmov            q10, q8
 
671
        vmla.f32        d23, d1,  d5
 
672
        vrev64.32       q3,  q3
 
673
        vmla.f32        d20, d0,  d7
 
674
        vrev64.32       q1,  q1
 
675
        vmla.f32        d21, d1,  d6
 
676
        beq             2f
 
677
        vmla.f32        d22, d3,  d7
 
678
        vld1.64         {d0,d1},  [r1,:128]!
 
679
        vmla.f32        d23, d2,  d6
 
680
        vld1.64         {d18,d19},[r2,:128], r5
 
681
        vmls.f32        d20, d3,  d4
 
682
        vld1.64         {d24,d25},[r3,:128]!
 
683
        vmls.f32        d21, d2,  d5
 
684
        vld1.64         {d6,d7},  [r4,:128], r5
 
685
        vmov            q1,  q9
 
686
        vrev64.32       q11, q11
 
687
        vmov            q2,  q12
 
688
        vswp            d22, d23
 
689
        vst1.64         {d20,d21},[r0,:128]!
 
690
        vst1.64         {d22,d23},[ip,:128], r5
 
691
        b               1b
 
692
2:      vmla.f32        d22, d3,  d7
 
693
        vmla.f32        d23, d2,  d6
 
694
        vmls.f32        d20, d3,  d4
 
695
        vmls.f32        d21, d2,  d5
 
696
        vrev64.32       q11, q11
 
697
        vswp            d22, d23
 
698
        vst1.64         {d20,d21},[r0,:128]!
 
699
        vst1.64         {d22,d23},[ip,:128], r5
 
700
        pop             {r4,r5,pc}
 
701
        .endfunc