108
* ebp - 36: caller's esi
109
* ebp - 32: caller's edi
117
* ebp + 0: caller's ebp
118
* ebp + 4: return address
119
* ebp + 8: a argument
120
* ebp + 12: a_len argument
121
* ebp + 16: b argument
122
* ebp + 20: c argument
131
__declspec(naked) void
132
s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
141
mov ebx,0 ; carry = 0
142
mov ecx,[ebp+12] ; ecx = a_len
145
je L_4 ; jmp if a_len == 0
146
mov esi,[ebp+8] ; esi = a
149
lodsd ; eax = [ds:esi]; esi += 4
150
mov edx,[ebp+16] ; edx = b
151
mul edx ; edx:eax = Phi:Plo = a_i * b
153
add eax,ebx ; add carry (ebx) to edx:eax
155
mov ebx,[edi] ; add in current word from *c
158
mov ebx,edx ; high half of product becomes next carry
160
stosd ; [es:edi] = ax; edi += 4;
162
jnz L_3 ; jmp if a_len != 0
164
mov [edi],ebx ; *c = carry
175
* ebp - 36: caller's esi
176
* ebp - 32: caller's edi
184
* ebp + 0: caller's ebp
185
* ebp + 4: return address
186
* ebp + 8: a argument
187
* ebp + 12: a_len argument
188
* ebp + 16: b argument
189
* ebp + 20: c argument
198
__declspec(naked) void
199
s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
208
mov ebx,0 ; carry = 0
209
mov ecx,[ebp+12] ; ecx = a_len
121
psubq mm2, mm2 ; carry = 0
122
mov ecx, [ebp+12] ; ecx = a_len
123
movd mm1, [ebp+16] ; mm1 = b
212
126
je L_6 ; jmp if a_len == 0
127
mov esi, [ebp+8] ; esi = a
214
mov esi,[ebp+8] ; esi = a
216
lodsd ; eax = [ds:esi]; esi += 4
217
mov edx,[ebp+16] ; edx = b
218
mul edx ; edx:eax = Phi:Plo = a_i * b
220
add eax,ebx ; add carry (ebx) to edx:eax
222
mov ebx,[edi] ; add in current word from *c
225
mov ebx,edx ; high half of product becomes next carry
227
stosd ; [es:edi] = ax; edi += 4;
130
movd mm0, [esi] ; mm0 = *a++
132
pmuludq mm0, mm1 ; mm0 = b * *a++
133
paddq mm2, mm0 ; add the carry
134
movd [edi], mm2 ; store the 32bit result
136
psrlq mm2, 32 ; save the carry
228
137
dec ecx ; --a_len
229
138
jnz L_5 ; jmp if a_len != 0
140
movd [edi], mm2 ; *c = carry
151
* ebp - 36: caller's esi
152
* ebp - 32: caller's edi
160
* ebp + 0: caller's ebp
161
* ebp + 4: return address
162
* ebp + 8: a argument
163
* ebp + 12: a_len argument
164
* ebp + 16: b argument
165
* ebp + 20: c argument
174
__declspec(naked) void
175
s_mpv_mul_d_add(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
180
je s_mpv_mul_d_add_x86
181
jg s_mpv_mul_d_add_sse2
185
jg s_mpv_mul_d_add_sse2
193
mov ebx,0 ; carry = 0
194
mov ecx,[ebp+12] ; ecx = a_len
197
je L_11 ; jmp if a_len == 0
198
mov esi,[ebp+8] ; esi = a
201
lodsd ; eax = [ds:esi]; esi += 4
202
mov edx,[ebp+16] ; edx = b
203
mul edx ; edx:eax = Phi:Plo = a_i * b
205
add eax,ebx ; add carry (ebx) to edx:eax
207
mov ebx,[edi] ; add in current word from *c
210
mov ebx,edx ; high half of product becomes next carry
212
stosd ; [es:edi] = ax; edi += 4;
214
jnz L_10 ; jmp if a_len != 0
216
mov [edi],ebx ; *c = carry
223
s_mpv_mul_d_add_sse2:
228
psubq mm2, mm2 ; carry = 0
229
mov ecx, [ebp+12] ; ecx = a_len
230
movd mm1, [ebp+16] ; mm1 = b
233
je L_16 ; jmp if a_len == 0
234
mov esi, [ebp+8] ; esi = a
237
movd mm0, [esi] ; mm0 = *a++
239
pmuludq mm0, mm1 ; mm0 = b * *a++
240
paddq mm2, mm0 ; add the carry
242
paddq mm2, mm0 ; add the carry
243
movd [edi], mm2 ; store the 32bit result
245
psrlq mm2, 32 ; save the carry
247
jnz L_15 ; jmp if a_len != 0
249
movd [edi], mm2 ; *c = carry
260
* ebp - 36: caller's esi
261
* ebp - 32: caller's edi
269
* ebp + 0: caller's ebp
270
* ebp + 4: return address
271
* ebp + 8: a argument
272
* ebp + 12: a_len argument
273
* ebp + 16: b argument
274
* ebp + 20: c argument
283
__declspec(naked) void
284
s_mpv_mul_d_add_prop(const mp_digit *a, mp_size a_len, mp_digit b, mp_digit *c)
289
je s_mpv_mul_d_add_prop_x86
290
jg s_mpv_mul_d_add_prop_sse2
294
jg s_mpv_mul_d_add_prop_sse2
295
s_mpv_mul_d_add_prop_x86:
302
mov ebx,0 ; carry = 0
303
mov ecx,[ebp+12] ; ecx = a_len
306
je L_21 ; jmp if a_len == 0
308
mov esi,[ebp+8] ; esi = a
310
lodsd ; eax = [ds:esi]; esi += 4
311
mov edx,[ebp+16] ; edx = b
312
mul edx ; edx:eax = Phi:Plo = a_i * b
314
add eax,ebx ; add carry (ebx) to edx:eax
316
mov ebx,[edi] ; add in current word from *c
319
mov ebx,edx ; high half of product becomes next carry
321
stosd ; [es:edi] = ax; edi += 4;
323
jnz L_20 ; jmp if a_len != 0
231
325
cmp ebx,0 ; is carry zero?
233
327
mov eax,[edi] ; add in current word from *c
235
329
stosd ; [es:edi] = ax; edi += 4;
238
332
mov eax,[edi] ; add in current word from *c
240
334
stosd ; [es:edi] = ax; edi += 4;
343
s_mpv_mul_d_add_prop_sse2:
349
psubq mm2, mm2 ; carry = 0
350
mov ecx, [ebp+12] ; ecx = a_len
351
movd mm1, [ebp+16] ; mm1 = b
354
je L_26 ; jmp if a_len == 0
355
mov esi, [ebp+8] ; esi = a
358
movd mm0, [esi] ; mm0 = *a++
359
movd mm3, [edi] ; fetch the sum
361
pmuludq mm0, mm1 ; mm0 = b * *a++
362
paddq mm2, mm0 ; add the carry
363
paddq mm2, mm3 ; add *c++
364
movd [edi], mm2 ; store the 32bit result
366
psrlq mm2, 32 ; save the carry
368
jnz L_25 ; jmp if a_len != 0
371
cmp ebx, 0 ; is carry zero?
378
mov eax, [edi] ; add in current word from *c
380
stosd ; [es:edi] = ax; edi += 4;
303
453
stosd ; [es:di] = eax; di += 4;
304
454
dec ecx ; --a_len
305
jnz L_10 ; jmp if a_len != 0
455
jnz L_30 ; jmp if a_len != 0
307
457
cmp ebx,0 ; is carry zero?
309
459
mov eax,[edi] ; add in current word from *c
311
461
stosd ; [es:edi] = ax; edi += 4;
314
464
mov eax,[edi] ; add in current word from *c
316
466
stosd ; [es:edi] = ax; edi += 4;
475
s_mpv_sqr_add_prop_sse2:
481
psubq mm2, mm2 ; carry = 0
482
mov ecx, [ebp+12] ; ecx = a_len
485
je L_36 ; jmp if a_len == 0
486
mov esi, [ebp+8] ; esi = a
489
movd mm0, [esi] ; mm0 = *a
490
movd mm3, [edi] ; fetch the sum
492
pmuludq mm0, mm0 ; mm0 = sqr(a)
493
paddq mm2, mm0 ; add the carry
494
paddq mm2, mm3 ; add the low word
496
movd [edi], mm2 ; store the 32bit result
498
paddq mm2, mm3 ; add the high word
499
movd [edi+4], mm2 ; store the 32bit result
500
psrlq mm2, 32 ; save the carry.
503
jnz L_35 ; jmp if a_len != 0
506
cmp ebx, 0 ; is carry zero?
513
mov eax, [edi] ; add in current word from *c
515
stosd ; [es:edi] = ax; edi += 4;