303
303
* smart enough, at least in the version we are currently using.
305
305
if (shift < 32) {
306
asm("mov %%eax, %2 \n\t" // Save lo(multiplicand) in tmp2
307
"mov %%edx, %%eax \n\t" // Get hi(multiplicand)
308
"mull %4 \n\t" // p2 = hi(multiplicand) * multiplier
309
"xchg %%eax, %2 \n\t" // Save lo(p2) in tmp2, get lo(multiplicand)
310
"mov %%edx, %1 \n\t" // Save hi(p2) in tmp1
311
"mull %4 \n\t" // p1 = lo(multiplicand) * multiplier
312
"addl %2, %%edx \n\t" // hi(p1) += lo(p2)
313
"adcl $0, %1 \n\t" // hi(p2) += carry from previous step
314
"shrdl %%edx, %%eax \n\t" // result = hi(p2):hi(p1):lo(p1) >> shift
317
"=&r" (tmp1), // use in shrdl requires it to be a register
318
"=&r" (tmp2) // could be "=&rm" but "m" is slower
319
: "0" (multiplicand),
306
__asm__("mov %%eax, %2 \n\t" // Save lo(multiplicand) in tmp2
307
"mov %%edx, %%eax \n\t" // Get hi(multiplicand)
308
"mull %4 \n\t" // p2 = hi(multiplicand) * multiplier
309
"xchg %%eax, %2 \n\t" // Save lo(p2) in tmp2, get lo(multiplicand)
310
"mov %%edx, %1 \n\t" // Save hi(p2) in tmp1
311
"mull %4 \n\t" // p1 = lo(multiplicand) * multiplier
312
"addl %2, %%edx \n\t" // hi(p1) += lo(p2)
313
"adcl $0, %1 \n\t" // hi(p2) += carry from previous step
314
"shrdl %%edx, %%eax \n\t" // result = hi(p2):hi(p1):lo(p1) >> shift
317
"=&r" (tmp1), // use in shrdl requires it to be a register
318
"=&r" (tmp2) // could be "=&rm" but "m" is slower
319
: "0" (multiplicand),
325
asm("mov %%edx, %2 \n\t" // Save hi(multiplicand) in tmp2
326
"mull %4 \n\t" // p1 = lo(multiplicand) * multiplier
327
"mov %%edx, %1 \n\t" // Save hi(p1) in tmp1
328
"mov %2, %%eax \n\t" // Discard lo(p1), get hi(multiplicand)
329
"mull %4 \n\t" // p2 = hi(multiplicand) * multiplier
330
"addl %1, %%eax \n\t" // lo(p2) += hi(p1)
331
"adcl $0, %%edx \n\t" // hi(p2) += carry from previous step
332
"shrdl %%edx, %%eax \n\t" // result = p2 >> (shift & 31)
335
"=&r" (tmp1), // could be "=&rm" but "m" is slower
336
"=&r" (tmp2) // could be "=&rm" but "m" is slower
337
: "0" (multiplicand),
325
__asm__("mov %%edx, %2 \n\t" // Save hi(multiplicand) in tmp2
326
"mull %4 \n\t" // p1 = lo(multiplicand) * multiplier
327
"mov %%edx, %1 \n\t" // Save hi(p1) in tmp1
328
"mov %2, %%eax \n\t" // Discard lo(p1), get hi(multiplicand)
329
"mull %4 \n\t" // p2 = hi(multiplicand) * multiplier
330
"addl %1, %%eax \n\t" // lo(p2) += hi(p1)
331
"adcl $0, %%edx \n\t" // hi(p2) += carry from previous step
332
"shrdl %%edx, %%eax \n\t" // result = p2 >> (shift & 31)
335
"=&r" (tmp1), // could be "=&rm" but "m" is slower
336
"=&r" (tmp2) // could be "=&rm" but "m" is slower
337
: "0" (multiplicand),
416
416
uint32 tmp1, tmp2;
417
417
// ASSERT(shift >= 0 && shift < 64);
419
419
/* Written and tested by mann, checked by dbudko and hpreg */
420
420
/* XXX hpreg suggested some improvements that we haven't converged on yet */
421
asm("mov %%eax, %2\n\t" // Save lo(multiplicand)
422
"mov %%edx, %%eax\n\t" // Get hi(multiplicand)
423
"test %%eax, %%eax\n\t" // Check sign of multiplicand
424
"jl 0f\n\t" // Go if negative
425
"mull %4\n\t" // p2 = hi(multiplicand) * multiplier
428
"mull %4\n\t" // p2 = hi(multiplicand) * multiplier
429
"sub %4, %%edx\n" // hi(p2) += -1 * multiplier
431
"xchg %%eax, %2\n\t" // Save lo(p2), get lo(multiplicand)
432
"mov %%edx, %1\n\t" // Save hi(p2)
433
"mull %4\n\t" // p1 = lo(multiplicand) * multiplier
434
"addl %2, %%edx\n\t" // hi(p1) += lo(p2)
435
"adcl $0, %1\n\t" // hi(p2) += carry from previous step
436
"cmpl $32, %%ecx\n\t" // shift < 32?
437
"jl 2f\n\t" // Go if so
438
"mov %%edx, %%eax\n\t" // result = hi(p2):hi(p1) >> (shift & 31)
440
"shrdl %%edx, %%eax\n\t"
441
"sarl %%cl, %%edx\n\t"
444
"shrdl %%edx, %%eax\n\t" // result = hi(p2):hi(p1):lo(p1) >> shift
447
: "=A" (result), "=&r" (tmp1), "=&r" (tmp2)
448
: "0" (multiplicand), "rm" (multiplier), "c" (shift)
421
__asm__("mov %%eax, %2\n\t" // Save lo(multiplicand)
422
"mov %%edx, %%eax\n\t" // Get hi(multiplicand)
423
"test %%eax, %%eax\n\t" // Check sign of multiplicand
424
"jl 0f\n\t" // Go if negative
425
"mull %4\n\t" // p2 = hi(multiplicand) * multiplier
428
"mull %4\n\t" // p2 = hi(multiplicand) * multiplier
429
"sub %4, %%edx\n" // hi(p2) += -1 * multiplier
431
"xchg %%eax, %2\n\t" // Save lo(p2), get lo(multiplicand)
432
"mov %%edx, %1\n\t" // Save hi(p2)
433
"mull %4\n\t" // p1 = lo(multiplicand) * multiplier
434
"addl %2, %%edx\n\t" // hi(p1) += lo(p2)
435
"adcl $0, %1\n\t" // hi(p2) += carry from previous step
436
"cmpl $32, %%ecx\n\t" // shift < 32?
437
"jl 2f\n\t" // Go if so
438
"mov %%edx, %%eax\n\t" // result = hi(p2):hi(p1) >> (shift & 31)
440
"shrdl %%edx, %%eax\n\t"
441
"sarl %%cl, %%edx\n\t"
444
"shrdl %%edx, %%eax\n\t" // result = hi(p2):hi(p1):lo(p1) >> shift
447
: "=A" (result), "=&r" (tmp1), "=&r" (tmp2)
448
: "0" (multiplicand), "rm" (multiplier), "c" (shift)