~x3lectric/xbmc/svn-trunk

« back to all changes in this revision

Viewing changes to lib/liblame/libmp3lame/i386/choose_table.nas

  • Committer: wiso
  • Date: 2010-05-07 16:57:13 UTC
  • Revision ID: svn-v4:568bbfeb-2a22-0410-94d2-cc84cf5bfa90:trunk:29897
copy lame-3.98.4 to trunk

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
; new count bit routine
 
2
;       part of this code is origined from
 
3
;       new GOGO-no-coda (1999, 2000)
 
4
;       Copyright (C) 1999 shigeo
 
5
;       modified by Keiichi SAKAI
 
6
 
 
7
%include "nasm.h"
 
8
 
 
9
        globaldef       choose_table_MMX
 
10
        globaldef       MMX_masking
 
11
 
 
12
        externdef       largetbl
 
13
        externdef       t1l
 
14
        externdef       table23
 
15
        externdef       table56
 
16
 
 
17
        segment_data
 
18
        align   16
 
19
D14_14_14_14    dd      0x000E000E, 0x000E000E
 
20
D15_15_15_15    dd      0xfff0fff0, 0xfff0fff0
 
21
mul_add         dd      0x00010010, 0x00010010
 
22
mul_add23       dd      0x00010003, 0x00010003
 
23
mul_add56       dd      0x00010004, 0x00010004
 
24
tableDEF
 
25
        dd      0x00010003,0x01,0x00050005,0x05,0x00070006,0x07,0x00090008,0x08,0x000a0008, 0x09
 
26
        dd      0x000a0009,0x0a,0x000b000a,0x0a,0x000b000a,0x0b,0x000c000a,0x0a,0x000c000b, 0x0b
 
27
        dd      0x000c000b,0x0c,0x000d000c,0x0c,0x000d000c,0x0d,0x000d000c,0x0d,0x000e000d, 0x0e
 
28
        dd      0x000b000e,0x0e,0x00040005,0x04,0x00060005,0x06,0x00080007,0x08,0x00090008, 0x09
 
29
        dd      0x000a0009,0x0a,0x000b0009,0x0a,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0b
 
30
        dd      0x000c000b,0x0b,0x000c000b,0x0c,0x000d000c,0x0c,0x000e000c,0x0d,0x000d000c, 0x0e
 
31
        dd      0x000e000d,0x0e,0x000b000d,0x0e,0x00070006,0x07,0x00080007,0x08,0x00090007, 0x09
 
32
        dd      0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
 
33
        dd      0x000d000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000c,0x0d,0x000d000c, 0x0d
 
34
        dd      0x000e000d,0x0e,0x000e000d,0x0f,0x000c000d,0x0f,0x00090007,0x08,0x00090008, 0x09
 
35
        dd      0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
 
36
        dd      0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000c,0x0d,0x000e000c, 0x0d
 
37
        dd      0x000e000c,0x0d,0x000f000d,0x0e,0x000f000d,0x0f,0x000d000d,0x0f,0x000a0008, 0x09
 
38
        dd      0x000a0008,0x09,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
 
39
        dd      0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0c,0x000e000b,0x0d,0x000e000c, 0x0d
 
40
        dd      0x000e000c,0x0e,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d,0x0f,0x000c000d, 0x10
 
41
        dd      0x000a0009,0x0a,0x000a0009,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
 
42
        dd      0x000d000a,0x0c,0x000d000b,0x0d,0x000e000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
 
43
        dd      0x000e000c,0x0e,0x000f000c,0x0d,0x000f000d,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
 
44
        dd      0x000d000e,0x10,0x000b000a,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
 
45
        dd      0x000d000a,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
 
46
        dd      0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
 
47
        dd      0x0010000e,0x10,0x000d000e,0x10,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0c
 
48
        dd      0x000c000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0e,0x000e000c, 0x0e
 
49
        dd      0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0f,0x000f000c,0x0f,0x000f000d, 0x0f
 
50
        dd      0x0011000d,0x10,0x0011000d,0x12,0x000d000e,0x12,0x000b000a,0x0a,0x000c000a, 0x0a
 
51
        dd      0x000c000a,0x0b,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
 
52
        dd      0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000d, 0x0f
 
53
        dd      0x0010000d,0x0f,0x0010000e,0x10,0x0010000e,0x11,0x000d000e,0x11,0x000c000a, 0x0b
 
54
        dd      0x000c000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
 
55
        dd      0x000e000c,0x0d,0x000f000c,0x0f,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
 
56
        dd      0x0010000d,0x10,0x000f000d,0x10,0x0010000e,0x10,0x000f000e,0x12,0x000e000e, 0x11
 
57
        dd      0x000c000b,0x0b,0x000d000b,0x0c,0x000c000b,0x0c,0x000d000b,0x0d,0x000e000c, 0x0d
 
58
        dd      0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0e,0x0010000d, 0x0f
 
59
        dd      0x0010000d,0x10,0x0010000d,0x0f,0x0011000d,0x10,0x0011000e,0x11,0x0010000f, 0x12
 
60
        dd      0x000d000e,0x13,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b, 0x0d
 
61
        dd      0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0e,0x0010000c,0x0e,0x0010000d, 0x0f
 
62
        dd      0x0010000d,0x0f,0x0010000d,0x0f,0x0010000d,0x10,0x0010000e,0x11,0x000f000e, 0x11
 
63
        dd      0x0010000e,0x11,0x000e000f,0x12,0x000d000c,0x0c,0x000e000c,0x0d,0x000e000b, 0x0d
 
64
        dd      0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0f,0x000f000d,0x0e,0x000f000d, 0x0f
 
65
        dd      0x000f000d,0x10,0x0011000d,0x10,0x0010000d,0x11,0x0010000d,0x11,0x0010000e, 0x11
 
66
        dd      0x0010000e,0x12,0x0012000f,0x12,0x000e000f,0x12,0x000f000c,0x0d,0x000e000c, 0x0d
 
67
        dd      0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
 
68
        dd      0x0010000d,0x10,0x0010000d,0x10,0x0012000e,0x10,0x0011000e,0x10,0x0011000e, 0x11
 
69
        dd      0x0011000e,0x12,0x0013000e,0x11,0x0011000f,0x12,0x000e000f,0x12,0x000e000d, 0x0e
 
70
        dd      0x000f000d,0x0e,0x000d000d,0x0e,0x000e000d,0x0f,0x0010000d,0x0f,0x0010000d, 0x0f
 
71
        dd      0x000f000d,0x11,0x0010000d,0x10,0x0010000e,0x10,0x0011000e,0x13,0x0012000e, 0x11
 
72
        dd      0x0011000e,0x11,0x0013000f,0x11,0x0011000f,0x13,0x0010000e,0x12,0x000e000f, 0x12
 
73
        dd      0x000b000d,0x0d,0x000b000d,0x0e,0x000b000d,0x0f,0x000c000d,0x10,0x000c000d, 0x10
 
74
        dd      0x000d000d,0x10,0x000d000d,0x11,0x000d000e,0x10,0x000e000e,0x11,0x000e000e, 0x11
 
75
        dd      0x000e000e,0x12,0x000e000e,0x12,0x000e000f,0x15,0x000e000f,0x14,0x000e000f, 0x15
 
76
        dd      0x000c000f,0x12
 
77
 
 
78
tableABC
 
79
        dd      0x00020004,0x1,0x00040004,0x4,0x00060006,0x7,0x00080008,0x9,0x00090009,0xa,0x000a000a,0xa
 
80
        dd      0x0009000a,0xa,0x000a000a,0xb,0x00000000,0x0,0x00020003,0x1,0x00040004,0x4,0x00070006,0x7
 
81
        dd      0x00090007,0x9,0x00090009,0x9,0x000a000a,0xa,0x00000000,0x0,0x00040004,0x4,0x00050005,0x6
 
82
        dd      0x00060006,0x8,0x00080007,0x9,0x000a0009,0xa,0x000a0009,0xb,0x0009000a,0xa,0x000a000a,0xa
 
83
        dd      0x00000000,0x0,0x00040004,0x4,0x00040005,0x6,0x00060006,0x8,0x000a0007,0x9,0x000a0008,0x9
 
84
        dd      0x000a000a,0xa,0x00000000,0x0,0x00060006,0x7,0x00070006,0x8,0x00080007,0x9,0x00090008,0xa
 
85
        dd      0x000a0009,0xb,0x000b000a,0xc,0x000a0009,0xb,0x000a000a,0xb,0x00000000,0x0,0x00070005,0x7
 
86
        dd      0x00060006,0x7,0x00080007,0x9,0x000a0008,0xa,0x000a0009,0xa,0x000b000a,0xb,0x00000000,0x0
 
87
        dd      0x00080007,0x8,0x00080007,0x9,0x00090008,0xa,0x000b0008,0xb,0x000a0009,0xc,0x000c000a,0xc
 
88
        dd      0x000a000a,0xb,0x000b000a,0xc,0x00000000,0x0,0x00090007,0x8,0x000a0007,0x9,0x000a0008,0xa
 
89
        dd      0x000b0009,0xb,0x000b0009,0xb,0x000c000a,0xb,0x00000000,0x0,0x00090008,0x9,0x000a0008,0xa
 
90
        dd      0x000a0009,0xb,0x000b0009,0xc,0x000b000a,0xc,0x000c000a,0xc,0x000b000a,0xc,0x000c000b,0xc
 
91
        dd      0x00000000,0x0,0x00090008,0x8,0x00090008,0x9,0x000a0009,0xa,0x000b0009,0xb,0x000c000a,0xb
 
92
        dd      0x000c000b,0xc,0x00000000,0x0,0x00090009,0xa,0x000a0009,0xb,0x000b000a,0xc,0x000c000a,0xc
 
93
        dd      0x000c000a,0xd,0x000d000b,0xd,0x000c000a,0xc,0x000d000b,0xd,0x00000000,0x0,0x000a0009,0x9
 
94
        dd      0x000a0009,0xa,0x000b000a,0xb,0x000b000a,0xc,0x000d000b,0xc,0x000d000b,0xc,0x00000000,0x0
 
95
        dd      0x00090009,0x9,0x00090009,0xa,0x00090009,0xb,0x000a000a,0xc,0x000b000a,0xc,0x000c000b,0xc
 
96
        dd      0x000c000b,0xd,0x000c000c,0xd,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
 
97
        dd      0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x0009000a,0xa,0x0009000a,0xa
 
98
        dd      0x000a000a,0xb,0x000b000b,0xc,0x000c000b,0xc,0x000c000b,0xd,0x000c000b,0xd,0x000c000c,0xd
 
99
        dd      0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
 
100
        dd      0x0,0x00000000, 0x0,0x00000000
 
101
 
 
102
linbits32
 
103
        dd      0x00040004,0x10001,0x00040004,0x20002,0x00040004,0x30003,0x00040004,0x40004
 
104
        dd      0x00050005,0x60006,0x00060006,0x60006,0x00070007,0x80008,0x00080008,0x80008
 
105
        dd      0x00090009,0xa000a,0x000b000b,0xa000a,0x000b000b,0xd000d,0x000d000d,0xd000d
 
106
        dd      0x000d000d,0xd000d
 
107
 
 
108
 
 
109
choose_table_H
 
110
        dw      0x1810, 0x1811, 0x1812, 0x1813, 0x1914, 0x1a14, 0x1b15, 0x1c15
 
111
        dw      0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17
 
112
 
 
113
choose_jump_table_L:
 
114
        dd      table_MMX.L_case_0    - choose_table_MMX
 
115
        dd      table_MMX.L_case_1    - choose_table_MMX
 
116
        dd      table_MMX.L_case_2    - choose_table_MMX
 
117
        dd      table_MMX.L_case_3    - choose_table_MMX
 
118
        dd      table_MMX.L_case_45   - choose_table_MMX
 
119
        dd      table_MMX.L_case_45   - choose_table_MMX
 
120
        dd      table_MMX.L_case_67   - choose_table_MMX
 
121
        dd      table_MMX.L_case_67   - choose_table_MMX
 
122
        dd      table_MMX.L_case_8_15 - choose_table_MMX
 
123
        dd      table_MMX.L_case_8_15 - choose_table_MMX
 
124
        dd      table_MMX.L_case_8_15 - choose_table_MMX
 
125
        dd      table_MMX.L_case_8_15 - choose_table_MMX
 
126
        dd      table_MMX.L_case_8_15 - choose_table_MMX
 
127
        dd      table_MMX.L_case_8_15 - choose_table_MMX
 
128
        dd      table_MMX.L_case_8_15 - choose_table_MMX
 
129
        dd      table_MMX.L_case_8_15 - choose_table_MMX
 
130
 
 
131
        segment_code
 
132
;
 
133
; use MMX
 
134
;
 
135
 
 
136
PIC_OFFSETTABLE
 
137
 
 
138
        align   16
 
139
; int choose_table(int *ix, int *end, int *s)
 
140
choose_table_MMX:
 
141
        push    ebp
 
142
        call    get_pc.bp
 
143
        add     ebp, PIC_BASE()
 
144
 
 
145
        mov     ecx,[esp+8]     ;ecx = begin
 
146
        mov     edx,[esp+12]    ;edx = end
 
147
        sub     ecx,edx         ;ecx = begin-end(should be minus)
 
148
        test    ecx,8
 
149
        pxor    mm0,mm0         ;mm0=[0:0]
 
150
        movq    mm1,[edx+ecx]
 
151
        jz      .lp
 
152
 
 
153
        add     ecx,8
 
154
        jz      .exit
 
155
 
 
156
        align   4
 
157
.lp:
 
158
        movq    mm4,[edx+ecx]
 
159
        movq    mm5,[edx+ecx+8]
 
160
        add     ecx,16
 
161
        psubusw mm4,mm0 ; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B
 
162
        psubusw mm5,mm1 ; $B$=$s$J%3%^%s%I$O$J$$(B :-p
 
163
        paddw   mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B
 
164
        paddw   mm1,mm5
 
165
        jnz     .lp
 
166
.exit:
 
167
        psubusw mm1,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
 
168
        paddw   mm0,mm1
 
169
 
 
170
        movq    mm4,mm0
 
171
        punpckhdq       mm4,mm4
 
172
        psubusw mm4,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
 
173
        paddw   mm0,mm4
 
174
        movd    eax,mm0
 
175
 
 
176
        cmp     eax,15
 
177
        ja      .with_ESC
 
178
        lea     ecx,[PIC_EBP_REL(choose_table_MMX)]
 
179
        add     ecx,[PIC_EBP_REL(choose_jump_table_L+eax*4)]
 
180
        jmp     ecx
 
181
 
 
182
.with_ESC1:
 
183
        emms
 
184
        mov     ecx, [esp+16]   ; *s
 
185
        mov     [ecx], eax
 
186
        or      eax,-1
 
187
        pop     ebp
 
188
        ret
 
189
 
 
190
.with_ESC:
 
191
        cmp     eax, 8191+15
 
192
        ja      .with_ESC1
 
193
 
 
194
        sub     eax,15
 
195
        push    ebx
 
196
        push    esi
 
197
        bsr     eax, eax
 
198
%assign _P 4*2
 
199
        movq    mm5, [PIC_EBP_REL(D15_15_15_15)]
 
200
        movq    mm6, [PIC_EBP_REL(D14_14_14_14)]
 
201
        movq    mm3, [PIC_EBP_REL(mul_add)]
 
202
 
 
203
        mov     ecx, [esp+_P+8]         ; = ix
 
204
;       mov     edx, [esp+_P+12]        ; = end
 
205
        sub     ecx, edx
 
206
 
 
207
        xor     esi, esi        ; sum = 0
 
208
        test    ecx, 8
 
209
        pxor    mm7, mm7        ; linbits_sum, 14$B$r1[$($?$b$N$N?t(B
 
210
        jz      .H_dual_lp1
 
211
 
 
212
        movq    mm0, [edx+ecx]
 
213
        add     ecx,8
 
214
        packssdw        mm0,mm7
 
215
        movq    mm2, mm0
 
216
        paddusw mm0, mm5        ; mm0 = min(ix, 15)+0xfff0
 
217
        pcmpgtw mm2, mm6        ; 14$B$h$jBg$-$$$+!)(B
 
218
        psubw   mm7, mm2        ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
 
219
        pmaddwd mm0, mm3        ; {0, 0, y, x}*{1, 16, 1, 16}
 
220
        movd    ebx, mm0
 
221
        mov     esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
 
222
 
 
223
        jz      .H_dual_exit
 
224
 
 
225
        align   4
 
226
.H_dual_lp1:
 
227
        movq    mm0, [edx+ecx]
 
228
        movq    mm1, [edx+ecx+8]
 
229
        packssdw        mm0,mm1
 
230
        movq    mm2, mm0
 
231
        paddusw mm0, mm5        ; mm0 = min(ix, 15)+0xfff0
 
232
        pcmpgtw mm2, mm6        ; 14$B$h$jBg$-$$$+!)(B
 
233
        pmaddwd mm0, mm3        ; {y, x, y, x}*{1, 16, 1, 16}
 
234
        movd    ebx, mm0
 
235
        punpckhdq       mm0,mm0
 
236
        add     esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
 
237
        movd    ebx, mm0
 
238
        add     esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
 
239
        add     ecx, 16
 
240
        psubw   mm7, mm2        ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
 
241
        jnz     .H_dual_lp1
 
242
 
 
243
.H_dual_exit:
 
244
        pmov    mm1,mm7
 
245
        punpckhdq       mm7,mm7
 
246
        paddd   mm7,mm1
 
247
        punpckldq       mm7,mm7
 
248
 
 
249
        pmaddwd mm7, [PIC_EBP_REL(linbits32+eax*8)]     ; linbits
 
250
        mov     ax, [PIC_EBP_REL(choose_table_H+eax*2)]
 
251
 
 
252
        movd    ecx, mm7
 
253
        punpckhdq       mm7,mm7
 
254
        movd    edx,mm7
 
255
        emms
 
256
        shl     edx, 16
 
257
        add     ecx, edx
 
258
 
 
259
        add     ecx, esi
 
260
 
 
261
        pop     esi
 
262
        pop     ebx
 
263
 
 
264
        mov     edx, ecx
 
265
        and     ecx, 0xffff     ; ecx = sum2
 
266
        shr     edx, 16 ; edx = sum
 
267
 
 
268
        cmp     edx, ecx
 
269
        jle     .chooseE_s1
 
270
        mov     edx, ecx
 
271
        shr     eax, 8
 
272
.chooseE_s1:
 
273
        mov     ecx, [esp+16] ; *s
 
274
        and     eax, 0xff
 
275
        add     [ecx], edx
 
276
        pop     ebp
 
277
        ret
 
278
 
 
279
table_MMX.L_case_0:
 
280
        emms
 
281
        pop     ebp
 
282
        ret
 
283
 
 
284
table_MMX.L_case_1:
 
285
        emms
 
286
        mov     eax, [esp+16] ; *s
 
287
        mov     ecx, [esp+8] ; *ix
 
288
        sub     ecx, edx
 
289
        push    ebx
 
290
.lp:
 
291
        mov     ebx, [edx+ecx]
 
292
        add     ebx, ebx
 
293
        add     ebx, [edx+ecx+4]
 
294
        movzx   ebx, byte [PIC_EBP_REL(ebx+t1l)]
 
295
        add     [eax], ebx
 
296
        add     ecx, 8
 
297
        jnz     .lp
 
298
        pop     ebx
 
299
        mov     eax, 1
 
300
        pop     ebp
 
301
        ret
 
302
 
 
303
table_MMX.L_case_45:
 
304
        push    dword 7
 
305
        lea     ecx, [PIC_EBP_REL(tableABC+9*8)]
 
306
        jmp     from3
 
307
 
 
308
table_MMX.L_case_67:
 
309
        push    dword 10
 
310
        lea     ecx, [PIC_EBP_REL(tableABC)]
 
311
        jmp     from3
 
312
 
 
313
table_MMX.L_case_8_15:
 
314
        push    dword 13
 
315
        lea     ecx, [PIC_EBP_REL(tableDEF)]
 
316
from3:
 
317
        mov     eax,[esp+12]    ;eax = *begin
 
318
;       mov     edx,[esp+16]    ;edx = *end
 
319
 
 
320
        push    ebx
 
321
        sub     eax, edx
 
322
 
 
323
        movq    mm5,[PIC_EBP_REL(mul_add)]
 
324
        pxor    mm2,mm2 ;mm2 = sum
 
325
 
 
326
        test    eax, 8
 
327
        jz      .choose3_lp1
 
328
; odd length
 
329
        movq    mm0,[edx+eax]   ;mm0 = ix[0] | ix[1]
 
330
        add     eax,8
 
331
        packssdw        mm0,mm2
 
332
 
 
333
        pmaddwd mm0,mm5
 
334
        movd    ebx,mm0
 
335
 
 
336
        movq    mm2,  [ecx+ebx*8]
 
337
 
 
338
        jz      .choose3_exit
 
339
 
 
340
        align   4
 
341
.choose3_lp1
 
342
        movq    mm0,[edx+eax]
 
343
        movq    mm1,[edx+eax+8]
 
344
        add     eax,16
 
345
        packssdw        mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
 
346
        pmaddwd mm0,mm5
 
347
        movd    ebx,mm0
 
348
        punpckhdq       mm0,mm0
 
349
        paddd   mm2, [ecx+ebx*8]
 
350
        movd    ebx,mm0
 
351
        paddd   mm2, [ecx+ebx*8]
 
352
        jnz     .choose3_lp1
 
353
.choose3_exit
 
354
;       xor     eax,eax
 
355
        movd    ebx, mm2
 
356
        punpckhdq       mm2,mm2
 
357
        mov     ecx, ebx
 
358
        and     ecx, 0xffff     ; ecx = sum2
 
359
        shr     ebx, 16 ; ebx = sum1
 
360
        movd    edx, mm2        ; edx = sum
 
361
 
 
362
        cmp     edx, ebx
 
363
        jle     .choose3_s1
 
364
        mov     edx, ebx
 
365
        inc     eax
 
366
.choose3_s1:
 
367
        emms
 
368
        pop     ebx
 
369
        cmp     edx, ecx
 
370
        jle     .choose3_s2
 
371
        mov     edx, ecx
 
372
        mov     eax, 2
 
373
.choose3_s2:
 
374
        pop     ecx
 
375
        add     eax, ecx
 
376
        mov     ecx, [esp+16] ; *s
 
377
        add     [ecx], edx
 
378
        pop     ebp
 
379
        ret
 
380
 
 
381
table_MMX.L_case_2:
 
382
        push    dword 2
 
383
        lea     ecx,[PIC_EBP_REL(table23)]
 
384
        pmov    mm5,[PIC_EBP_REL(mul_add23)]
 
385
        jmp     from2
 
386
table_MMX.L_case_3:
 
387
        push    dword 5
 
388
        lea     ecx,[PIC_EBP_REL(table56)]
 
389
        pmov    mm5,[PIC_EBP_REL(mul_add56)]
 
390
from2:
 
391
        mov     eax,[esp+12]    ;eax = *begin
 
392
;       mov     edx,[esp+16]    ;edx = *end
 
393
        push    ebx
 
394
        push    edi
 
395
 
 
396
        sub     eax, edx
 
397
        xor     edi, edi
 
398
        test    eax, 8
 
399
        jz      .choose2_lp1
 
400
; odd length
 
401
        movq    mm0,[edx+eax]   ;mm0 = ix[0] | ix[1]
 
402
        pxor    mm2,mm2         ;mm2 = sum
 
403
        packssdw        mm0,mm2
 
404
 
 
405
        pmaddwd mm0,mm5
 
406
        movd    ebx,mm0
 
407
 
 
408
        mov     edi,  [ecx+ebx*4]
 
409
 
 
410
        add     eax,8
 
411
        jz      .choose2_exit
 
412
 
 
413
        align   4
 
414
.choose2_lp1
 
415
        movq    mm0,[edx+eax]
 
416
        movq    mm1,[edx+eax+8]
 
417
        packssdw        mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
 
418
        pmaddwd mm0,mm5
 
419
        movd    ebx,mm0
 
420
        punpckhdq       mm0,mm0
 
421
        add     edi, [ecx+ebx*4]
 
422
        movd    ebx, mm0
 
423
        add     edi, [ecx+ebx*4]
 
424
        add     eax,16
 
425
        jnc     .choose2_lp1
 
426
.choose2_exit
 
427
        mov     ecx, edi
 
428
        pop     edi
 
429
        pop     ebx
 
430
        pop     eax ; table num.
 
431
        emms
 
432
 
 
433
        mov     edx, ecx
 
434
        and     ecx, 0xffff     ; ecx = sum2
 
435
        shr     edx, 16 ; edx = sum1
 
436
 
 
437
        cmp     edx, ecx
 
438
        jle     .choose2_s1
 
439
        mov     edx, ecx
 
440
        inc     eax
 
441
.choose2_s1:
 
442
        mov     ecx, [esp+16] ; *s
 
443
        add     [ecx], edx
 
444
        pop     ebp
 
445
        ret
 
446
 
 
447
        end