1
; new count bit routine
2
; part of this code is origined from
3
; new GOGO-no-coda (1999, 2000)
4
; Copyright (C) 1999 shigeo
5
; modified by Keiichi SAKAI
9
globaldef choose_table_MMX
19
D14_14_14_14 dd 0x000E000E, 0x000E000E
20
D15_15_15_15 dd 0xfff0fff0, 0xfff0fff0
21
mul_add dd 0x00010010, 0x00010010
22
mul_add23 dd 0x00010003, 0x00010003
23
mul_add56 dd 0x00010004, 0x00010004
25
dd 0x00010003,0x01,0x00050005,0x05,0x00070006,0x07,0x00090008,0x08,0x000a0008, 0x09
26
dd 0x000a0009,0x0a,0x000b000a,0x0a,0x000b000a,0x0b,0x000c000a,0x0a,0x000c000b, 0x0b
27
dd 0x000c000b,0x0c,0x000d000c,0x0c,0x000d000c,0x0d,0x000d000c,0x0d,0x000e000d, 0x0e
28
dd 0x000b000e,0x0e,0x00040005,0x04,0x00060005,0x06,0x00080007,0x08,0x00090008, 0x09
29
dd 0x000a0009,0x0a,0x000b0009,0x0a,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0b
30
dd 0x000c000b,0x0b,0x000c000b,0x0c,0x000d000c,0x0c,0x000e000c,0x0d,0x000d000c, 0x0e
31
dd 0x000e000d,0x0e,0x000b000d,0x0e,0x00070006,0x07,0x00080007,0x08,0x00090007, 0x09
32
dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
33
dd 0x000d000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000c,0x0d,0x000d000c, 0x0d
34
dd 0x000e000d,0x0e,0x000e000d,0x0f,0x000c000d,0x0f,0x00090007,0x08,0x00090008, 0x09
35
dd 0x000a0008,0x0a,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
36
dd 0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000c,0x0d,0x000e000c, 0x0d
37
dd 0x000e000c,0x0d,0x000f000d,0x0e,0x000f000d,0x0f,0x000d000d,0x0f,0x000a0008, 0x09
38
dd 0x000a0008,0x09,0x000b0009,0x0b,0x000b0009,0x0b,0x000c000a,0x0c,0x000c000a, 0x0c
39
dd 0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0c,0x000e000b,0x0d,0x000e000c, 0x0d
40
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d,0x0f,0x000c000d, 0x10
41
dd 0x000a0009,0x0a,0x000a0009,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
42
dd 0x000d000a,0x0c,0x000d000b,0x0d,0x000e000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
43
dd 0x000e000c,0x0e,0x000f000c,0x0d,0x000f000d,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
44
dd 0x000d000e,0x10,0x000b000a,0x0a,0x000b0009,0x0b,0x000b000a,0x0c,0x000c000a, 0x0c
45
dd 0x000d000a,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000e000b, 0x0d
46
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
47
dd 0x0010000e,0x10,0x000d000e,0x10,0x000b000a,0x0b,0x000b000a,0x0b,0x000c000a, 0x0c
48
dd 0x000c000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0d,0x000d000b,0x0e,0x000e000c, 0x0e
49
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0f,0x000f000c,0x0f,0x000f000d, 0x0f
50
dd 0x0011000d,0x10,0x0011000d,0x12,0x000d000e,0x12,0x000b000a,0x0a,0x000c000a, 0x0a
51
dd 0x000c000a,0x0b,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
52
dd 0x000e000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000c,0x0e,0x000f000d, 0x0f
53
dd 0x0010000d,0x0f,0x0010000e,0x10,0x0010000e,0x11,0x000d000e,0x11,0x000c000a, 0x0b
54
dd 0x000c000a,0x0b,0x000c000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0d,0x000e000b, 0x0d
55
dd 0x000e000c,0x0d,0x000f000c,0x0f,0x000f000c,0x0e,0x000f000d,0x0f,0x000f000d, 0x0f
56
dd 0x0010000d,0x10,0x000f000d,0x10,0x0010000e,0x10,0x000f000e,0x12,0x000e000e, 0x11
57
dd 0x000c000b,0x0b,0x000d000b,0x0c,0x000c000b,0x0c,0x000d000b,0x0d,0x000e000c, 0x0d
58
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0e,0x0010000d, 0x0f
59
dd 0x0010000d,0x10,0x0010000d,0x0f,0x0011000d,0x10,0x0011000e,0x11,0x0010000f, 0x12
60
dd 0x000d000e,0x13,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b,0x0c,0x000d000b, 0x0d
61
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0e,0x0010000c,0x0e,0x0010000d, 0x0f
62
dd 0x0010000d,0x0f,0x0010000d,0x0f,0x0010000d,0x10,0x0010000e,0x11,0x000f000e, 0x11
63
dd 0x0010000e,0x11,0x000e000f,0x12,0x000d000c,0x0c,0x000e000c,0x0d,0x000e000b, 0x0d
64
dd 0x000e000c,0x0e,0x000e000c,0x0e,0x000f000c,0x0f,0x000f000d,0x0e,0x000f000d, 0x0f
65
dd 0x000f000d,0x10,0x0011000d,0x10,0x0010000d,0x11,0x0010000d,0x11,0x0010000e, 0x11
66
dd 0x0010000e,0x12,0x0012000f,0x12,0x000e000f,0x12,0x000f000c,0x0d,0x000e000c, 0x0d
67
dd 0x000e000c,0x0e,0x000e000c,0x0f,0x000f000c,0x0f,0x000f000d,0x0f,0x0010000d, 0x10
68
dd 0x0010000d,0x10,0x0010000d,0x10,0x0012000e,0x10,0x0011000e,0x10,0x0011000e, 0x11
69
dd 0x0011000e,0x12,0x0013000e,0x11,0x0011000f,0x12,0x000e000f,0x12,0x000e000d, 0x0e
70
dd 0x000f000d,0x0e,0x000d000d,0x0e,0x000e000d,0x0f,0x0010000d,0x0f,0x0010000d, 0x0f
71
dd 0x000f000d,0x11,0x0010000d,0x10,0x0010000e,0x10,0x0011000e,0x13,0x0012000e, 0x11
72
dd 0x0011000e,0x11,0x0013000f,0x11,0x0011000f,0x13,0x0010000e,0x12,0x000e000f, 0x12
73
dd 0x000b000d,0x0d,0x000b000d,0x0e,0x000b000d,0x0f,0x000c000d,0x10,0x000c000d, 0x10
74
dd 0x000d000d,0x10,0x000d000d,0x11,0x000d000e,0x10,0x000e000e,0x11,0x000e000e, 0x11
75
dd 0x000e000e,0x12,0x000e000e,0x12,0x000e000f,0x15,0x000e000f,0x14,0x000e000f, 0x15
79
dd 0x00020004,0x1,0x00040004,0x4,0x00060006,0x7,0x00080008,0x9,0x00090009,0xa,0x000a000a,0xa
80
dd 0x0009000a,0xa,0x000a000a,0xb,0x00000000,0x0,0x00020003,0x1,0x00040004,0x4,0x00070006,0x7
81
dd 0x00090007,0x9,0x00090009,0x9,0x000a000a,0xa,0x00000000,0x0,0x00040004,0x4,0x00050005,0x6
82
dd 0x00060006,0x8,0x00080007,0x9,0x000a0009,0xa,0x000a0009,0xb,0x0009000a,0xa,0x000a000a,0xa
83
dd 0x00000000,0x0,0x00040004,0x4,0x00040005,0x6,0x00060006,0x8,0x000a0007,0x9,0x000a0008,0x9
84
dd 0x000a000a,0xa,0x00000000,0x0,0x00060006,0x7,0x00070006,0x8,0x00080007,0x9,0x00090008,0xa
85
dd 0x000a0009,0xb,0x000b000a,0xc,0x000a0009,0xb,0x000a000a,0xb,0x00000000,0x0,0x00070005,0x7
86
dd 0x00060006,0x7,0x00080007,0x9,0x000a0008,0xa,0x000a0009,0xa,0x000b000a,0xb,0x00000000,0x0
87
dd 0x00080007,0x8,0x00080007,0x9,0x00090008,0xa,0x000b0008,0xb,0x000a0009,0xc,0x000c000a,0xc
88
dd 0x000a000a,0xb,0x000b000a,0xc,0x00000000,0x0,0x00090007,0x8,0x000a0007,0x9,0x000a0008,0xa
89
dd 0x000b0009,0xb,0x000b0009,0xb,0x000c000a,0xb,0x00000000,0x0,0x00090008,0x9,0x000a0008,0xa
90
dd 0x000a0009,0xb,0x000b0009,0xc,0x000b000a,0xc,0x000c000a,0xc,0x000b000a,0xc,0x000c000b,0xc
91
dd 0x00000000,0x0,0x00090008,0x8,0x00090008,0x9,0x000a0009,0xa,0x000b0009,0xb,0x000c000a,0xb
92
dd 0x000c000b,0xc,0x00000000,0x0,0x00090009,0xa,0x000a0009,0xb,0x000b000a,0xc,0x000c000a,0xc
93
dd 0x000c000a,0xd,0x000d000b,0xd,0x000c000a,0xc,0x000d000b,0xd,0x00000000,0x0,0x000a0009,0x9
94
dd 0x000a0009,0xa,0x000b000a,0xb,0x000b000a,0xc,0x000d000b,0xc,0x000d000b,0xc,0x00000000,0x0
95
dd 0x00090009,0x9,0x00090009,0xa,0x00090009,0xb,0x000a000a,0xc,0x000b000a,0xc,0x000c000b,0xc
96
dd 0x000c000b,0xd,0x000c000c,0xd,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
97
dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x0009000a,0xa,0x0009000a,0xa
98
dd 0x000a000a,0xb,0x000b000b,0xc,0x000c000b,0xc,0x000c000b,0xd,0x000c000b,0xd,0x000c000c,0xd
99
dd 0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0,0x00000000,0x0
100
dd 0x0,0x00000000, 0x0,0x00000000
103
dd 0x00040004,0x10001,0x00040004,0x20002,0x00040004,0x30003,0x00040004,0x40004
104
dd 0x00050005,0x60006,0x00060006,0x60006,0x00070007,0x80008,0x00080008,0x80008
105
dd 0x00090009,0xa000a,0x000b000b,0xa000a,0x000b000b,0xd000d,0x000d000d,0xd000d
106
dd 0x000d000d,0xd000d
110
dw 0x1810, 0x1811, 0x1812, 0x1813, 0x1914, 0x1a14, 0x1b15, 0x1c15
111
dw 0x1d16, 0x1e16, 0x1e17, 0x1f17, 0x1f17
114
dd table_MMX.L_case_0 - choose_table_MMX
115
dd table_MMX.L_case_1 - choose_table_MMX
116
dd table_MMX.L_case_2 - choose_table_MMX
117
dd table_MMX.L_case_3 - choose_table_MMX
118
dd table_MMX.L_case_45 - choose_table_MMX
119
dd table_MMX.L_case_45 - choose_table_MMX
120
dd table_MMX.L_case_67 - choose_table_MMX
121
dd table_MMX.L_case_67 - choose_table_MMX
122
dd table_MMX.L_case_8_15 - choose_table_MMX
123
dd table_MMX.L_case_8_15 - choose_table_MMX
124
dd table_MMX.L_case_8_15 - choose_table_MMX
125
dd table_MMX.L_case_8_15 - choose_table_MMX
126
dd table_MMX.L_case_8_15 - choose_table_MMX
127
dd table_MMX.L_case_8_15 - choose_table_MMX
128
dd table_MMX.L_case_8_15 - choose_table_MMX
129
dd table_MMX.L_case_8_15 - choose_table_MMX
139
; int choose_table(int *ix, int *end, int *s)
145
mov ecx,[esp+8] ;ecx = begin
146
mov edx,[esp+12] ;edx = end
147
sub ecx,edx ;ecx = begin-end(should be minus)
149
pxor mm0,mm0 ;mm0=[0:0]
161
psubusw mm4,mm0 ; $BK\Ev$O(B dword $B$G$J$$$H$$$1$J$$$N$@$,(B
162
psubusw mm5,mm1 ; $B$=$s$J%3%^%s%I$O$J$$(B :-p
163
paddw mm0,mm4 ; $B$,(B, $B$3$3$G07$&CM$NHO0O$O(B 8191+15 $B0J2<$J$N$GLdBj$J$$(B
167
psubusw mm1,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
172
psubusw mm4,mm0 ; $B$3$l$bK\Ev$O(B dword $B$G$J$$$H$$$1$J$$(B
178
lea ecx,[PIC_EBP_REL(choose_table_MMX)]
179
add ecx,[PIC_EBP_REL(choose_jump_table_L+eax*4)]
184
mov ecx, [esp+16] ; *s
199
movq mm5, [PIC_EBP_REL(D15_15_15_15)]
200
movq mm6, [PIC_EBP_REL(D14_14_14_14)]
201
movq mm3, [PIC_EBP_REL(mul_add)]
203
mov ecx, [esp+_P+8] ; = ix
204
; mov edx, [esp+_P+12] ; = end
207
xor esi, esi ; sum = 0
209
pxor mm7, mm7 ; linbits_sum, 14$B$r1[$($?$b$N$N?t(B
216
paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0
217
pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B
218
psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
219
pmaddwd mm0, mm3 ; {0, 0, y, x}*{1, 16, 1, 16}
221
mov esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
228
movq mm1, [edx+ecx+8]
231
paddusw mm0, mm5 ; mm0 = min(ix, 15)+0xfff0
232
pcmpgtw mm2, mm6 ; 14$B$h$jBg$-$$$+!)(B
233
pmaddwd mm0, mm3 ; {y, x, y, x}*{1, 16, 1, 16}
236
add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
238
add esi, [PIC_EBP_REL(largetbl+ebx*4+(16*16+16)*4)]
240
psubw mm7, mm2 ; 14$B$h$jBg$-$$$H$-(B linbits_sum++;
249
pmaddwd mm7, [PIC_EBP_REL(linbits32+eax*8)] ; linbits
250
mov ax, [PIC_EBP_REL(choose_table_H+eax*2)]
265
and ecx, 0xffff ; ecx = sum2
266
shr edx, 16 ; edx = sum
273
mov ecx, [esp+16] ; *s
286
mov eax, [esp+16] ; *s
287
mov ecx, [esp+8] ; *ix
294
movzx ebx, byte [PIC_EBP_REL(ebx+t1l)]
305
lea ecx, [PIC_EBP_REL(tableABC+9*8)]
310
lea ecx, [PIC_EBP_REL(tableABC)]
313
table_MMX.L_case_8_15:
315
lea ecx, [PIC_EBP_REL(tableDEF)]
317
mov eax,[esp+12] ;eax = *begin
318
; mov edx,[esp+16] ;edx = *end
323
movq mm5,[PIC_EBP_REL(mul_add)]
324
pxor mm2,mm2 ;mm2 = sum
329
movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1]
336
movq mm2, [ecx+ebx*8]
345
packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
349
paddd mm2, [ecx+ebx*8]
351
paddd mm2, [ecx+ebx*8]
358
and ecx, 0xffff ; ecx = sum2
359
shr ebx, 16 ; ebx = sum1
360
movd edx, mm2 ; edx = sum
376
mov ecx, [esp+16] ; *s
383
lea ecx,[PIC_EBP_REL(table23)]
384
pmov mm5,[PIC_EBP_REL(mul_add23)]
388
lea ecx,[PIC_EBP_REL(table56)]
389
pmov mm5,[PIC_EBP_REL(mul_add56)]
391
mov eax,[esp+12] ;eax = *begin
392
; mov edx,[esp+16] ;edx = *end
401
movq mm0,[edx+eax] ;mm0 = ix[0] | ix[1]
402
pxor mm2,mm2 ;mm2 = sum
417
packssdw mm0,mm1 ;mm0 = ix[0]|ix[1]|ix[2]|ix[3]
434
and ecx, 0xffff ; ecx = sum2
435
shr edx, 16 ; edx = sum1
442
mov ecx, [esp+16] ; *s