1
1
;*****************************************************************************
3
3
;*****************************************************************************
4
;* Copyright (C) 2005-2008 x264 project
4
;* Copyright (C) 2005-2011 x264 project
6
6
;* Authors: Loren Merritt <lorenm@u.washington.edu>
7
7
;* Anton Mitrofanov <BugMaster@narod.ru>
8
;* Jason Garrett-Glaser <darkshikari@gmail.com>
9
10
;* Permission to use, copy, modify, and/or distribute this software for any
10
11
;* purpose with or without fee is hereby granted, provided that the above
677
;=============================================================================
678
; AVX abstraction layer
679
;=============================================================================
684
CAT_XDEFINE sizeofmm, i, 8
686
CAT_XDEFINE sizeofxmm, i, 16
687
CAT_XDEFINE sizeofymm, i, 32
693
;%2 == 1 if float, 0 if int
694
;%3 == 0 if 3-operand (xmm, xmm, xmm), 1 if 4-operand (xmm, xmm, xmm, imm)
695
;%4 == number of operands given
697
%macro RUN_AVX_INSTR 6-7+
702
%define %%regmov movq
704
%define %%regmov movaps
706
%define %%regmov movdqa
711
%if avx_enabled && sizeof%5==16
729
;%2 == 1 if float, 0 if int
730
;%3 == 0 if 3-operand (xmm, xmm, xmm), 1 if 4-operand (xmm, xmm, xmm, imm)
732
%macro %1 2-8 fnord, fnord, fnord, %1, %2, %3
734
RUN_AVX_INSTR %6, %7, %8, 2, %1, %2
736
RUN_AVX_INSTR %6, %7, %8, 3, %1, %2, %3
738
RUN_AVX_INSTR %6, %7, %8, 4, %1, %2, %3, %4
740
RUN_AVX_INSTR %6, %7, %8, 5, %1, %2, %3, %4, %5
745
AVX_INSTR addpd, 1, 0
746
AVX_INSTR addps, 1, 0
747
AVX_INSTR addsd, 1, 0
748
AVX_INSTR addss, 1, 0
749
AVX_INSTR addsubpd, 1, 0
750
AVX_INSTR addsubps, 1, 0
751
AVX_INSTR andpd, 1, 0
752
AVX_INSTR andps, 1, 0
753
AVX_INSTR andnpd, 1, 0
754
AVX_INSTR andnps, 1, 0
755
AVX_INSTR blendpd, 1, 0
756
AVX_INSTR blendps, 1, 0
757
AVX_INSTR blendvpd, 1, 0
758
AVX_INSTR blendvps, 1, 0
759
AVX_INSTR cmppd, 1, 0
760
AVX_INSTR cmpps, 1, 0
761
AVX_INSTR cmpsd, 1, 0
762
AVX_INSTR cmpss, 1, 0
763
AVX_INSTR divpd, 1, 0
764
AVX_INSTR divps, 1, 0
765
AVX_INSTR divsd, 1, 0
766
AVX_INSTR divss, 1, 0
769
AVX_INSTR haddpd, 1, 0
770
AVX_INSTR haddps, 1, 0
771
AVX_INSTR hsubpd, 1, 0
772
AVX_INSTR hsubps, 1, 0
773
AVX_INSTR maxpd, 1, 0
774
AVX_INSTR maxps, 1, 0
775
AVX_INSTR maxsd, 1, 0
776
AVX_INSTR maxss, 1, 0
777
AVX_INSTR minpd, 1, 0
778
AVX_INSTR minps, 1, 0
779
AVX_INSTR minsd, 1, 0
780
AVX_INSTR minss, 1, 0
781
AVX_INSTR mpsadbw, 0, 1
782
AVX_INSTR mulpd, 1, 0
783
AVX_INSTR mulps, 1, 0
784
AVX_INSTR mulsd, 1, 0
785
AVX_INSTR mulss, 1, 0
788
AVX_INSTR packsswb, 0, 0
789
AVX_INSTR packssdw, 0, 0
790
AVX_INSTR packuswb, 0, 0
791
AVX_INSTR packusdw, 0, 0
792
AVX_INSTR paddb, 0, 0
793
AVX_INSTR paddw, 0, 0
794
AVX_INSTR paddd, 0, 0
795
AVX_INSTR paddq, 0, 0
796
AVX_INSTR paddsb, 0, 0
797
AVX_INSTR paddsw, 0, 0
798
AVX_INSTR paddusb, 0, 0
799
AVX_INSTR paddusw, 0, 0
800
AVX_INSTR palignr, 0, 1
802
AVX_INSTR pandn, 0, 0
803
AVX_INSTR pavgb, 0, 0
804
AVX_INSTR pavgw, 0, 0
805
AVX_INSTR pblendvb, 0, 0
806
AVX_INSTR pblendw, 0, 1
807
AVX_INSTR pcmpestri, 0, 0
808
AVX_INSTR pcmpestrm, 0, 0
809
AVX_INSTR pcmpistri, 0, 0
810
AVX_INSTR pcmpistrm, 0, 0
811
AVX_INSTR pcmpeqb, 0, 0
812
AVX_INSTR pcmpeqw, 0, 0
813
AVX_INSTR pcmpeqd, 0, 0
814
AVX_INSTR pcmpeqq, 0, 0
815
AVX_INSTR pcmpgtb, 0, 0
816
AVX_INSTR pcmpgtw, 0, 0
817
AVX_INSTR pcmpgtd, 0, 0
818
AVX_INSTR pcmpgtq, 0, 0
819
AVX_INSTR phaddw, 0, 0
820
AVX_INSTR phaddd, 0, 0
821
AVX_INSTR phaddsw, 0, 0
822
AVX_INSTR phsubw, 0, 0
823
AVX_INSTR phsubd, 0, 0
824
AVX_INSTR phsubsw, 0, 0
825
AVX_INSTR pmaddwd, 0, 0
826
AVX_INSTR pmaddubsw, 0, 0
827
AVX_INSTR pmaxsb, 0, 0
828
AVX_INSTR pmaxsw, 0, 0
829
AVX_INSTR pmaxsd, 0, 0
830
AVX_INSTR pmaxub, 0, 0
831
AVX_INSTR pmaxuw, 0, 0
832
AVX_INSTR pmaxud, 0, 0
833
AVX_INSTR pminsb, 0, 0
834
AVX_INSTR pminsw, 0, 0
835
AVX_INSTR pminsd, 0, 0
836
AVX_INSTR pminub, 0, 0
837
AVX_INSTR pminuw, 0, 0
838
AVX_INSTR pminud, 0, 0
839
AVX_INSTR pmulhuw, 0, 0
840
AVX_INSTR pmulhrsw, 0, 0
841
AVX_INSTR pmulhw, 0, 0
842
AVX_INSTR pmullw, 0, 0
843
AVX_INSTR pmulld, 0, 0
844
AVX_INSTR pmuludq, 0, 0
845
AVX_INSTR pmuldq, 0, 0
847
AVX_INSTR psadbw, 0, 0
848
AVX_INSTR pshufb, 0, 0
849
AVX_INSTR psignb, 0, 0
850
AVX_INSTR psignw, 0, 0
851
AVX_INSTR psignd, 0, 0
852
AVX_INSTR psllw, 0, 0
853
AVX_INSTR pslld, 0, 0
854
AVX_INSTR psllq, 0, 0
855
AVX_INSTR pslldq, 0, 0
856
AVX_INSTR psraw, 0, 0
857
AVX_INSTR psrad, 0, 0
858
AVX_INSTR psrlw, 0, 0
859
AVX_INSTR psrld, 0, 0
860
AVX_INSTR psrlq, 0, 0
861
AVX_INSTR psrldq, 0, 0
862
AVX_INSTR psubb, 0, 0
863
AVX_INSTR psubw, 0, 0
864
AVX_INSTR psubd, 0, 0
865
AVX_INSTR psubq, 0, 0
866
AVX_INSTR psubsb, 0, 0
867
AVX_INSTR psubsw, 0, 0
868
AVX_INSTR psubusb, 0, 0
869
AVX_INSTR psubusw, 0, 0
870
AVX_INSTR punpckhbw, 0, 0
871
AVX_INSTR punpckhwd, 0, 0
872
AVX_INSTR punpckhdq, 0, 0
873
AVX_INSTR punpckhqdq, 0, 0
874
AVX_INSTR punpcklbw, 0, 0
875
AVX_INSTR punpcklwd, 0, 0
876
AVX_INSTR punpckldq, 0, 0
877
AVX_INSTR punpcklqdq, 0, 0
879
AVX_INSTR shufps, 0, 1
880
AVX_INSTR subpd, 1, 0
881
AVX_INSTR subps, 1, 0
882
AVX_INSTR subsd, 1, 0
883
AVX_INSTR subss, 1, 0
884
AVX_INSTR unpckhpd, 1, 0
885
AVX_INSTR unpckhps, 1, 0
886
AVX_INSTR unpcklpd, 1, 0
887
AVX_INSTR unpcklps, 1, 0
888
AVX_INSTR xorpd, 1, 0
889
AVX_INSTR xorps, 1, 0
891
; 3DNow instructions, for sharing code between AVX, SSE and 3DN
892
AVX_INSTR pfadd, 1, 0
893
AVX_INSTR pfsub, 1, 0
894
AVX_INSTR pfmul, 1, 0