2
* Plain Intel IA32 assembly implementations of PortAudio sample converter functions.
3
* Copyright (c) 1999-2002 Ross Bencina, Phil Burk
5
* Permission is hereby granted, free of charge, to any person obtaining
6
* a copy of this software and associated documentation files
7
* (the "Software"), to deal in the Software without restriction,
8
* including without limitation the rights to use, copy, modify, merge,
9
* publish, distribute, sublicense, and/or sell copies of the Software,
10
* and to permit persons to whom the Software is furnished to do so,
11
* subject to the following conditions:
13
* The above copyright notice and this permission notice shall be
14
* included in all copies or substantial portions of the Software.
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
20
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21
* CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
* The text above constitutes the entire PortAudio license; however,
27
* the PortAudio community also makes the following non-binding requests:
29
* Any person wishing to distribute modifications to the Software is
30
* requested to send the modifications to the original developer so that
31
* they can be incorporated into the canonical version. It is also
32
* requested that these non-binding requests be included along with the
40
#include "pa_x86_plain_converters.h"
42
#include "pa_converters.h"
43
#include "pa_dither.h"
46
the main reason these versions are faster than the equivalent C versions
47
is that float -> int casting is expensive in C on x86 because the rounding
48
mode needs to be changed for every cast. these versions only set
49
the rounding mode once outside the loop.
51
small additional speed gains are made by the way that clamping is
56
o- implement Dither only (no-clip) versions
57
o- implement int8 and uint8 versions
60
o- the packed 24 bit functions could benefit from unrolling and avoiding
61
byte and word sized register access.
64
/* -------------------------------------------------------------------------- */
67
#define PA_CLIP_( val, min, max )\
68
{ val = ((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)); }
72
the following notes were used to determine whether a floating point
73
value should be saturated (ie >1 or <-1) by loading it into an integer
74
register. these should be rewritten so that they make sense.
76
an ieee floating point value
78
1.xxxxxxxxxxxxxxxxxxxx?
81
is less than or equal to 1 and greater than or equal to -1 either:
83
if the mantissa is 0 and the unbiased exponent is 0
87
if the unbiased exponent < 0
91
if the mantissa is 0 and the biased exponent is 7F
95
if the biased exponent is less than 7F
98
therefore the value is greater than 1 or less than -1 if
100
the mantissa is not 0 and the biased exponent is 7F
104
if the biased exponent is greater than 7F
107
in other words, if we mask out the sign bit, the value is
108
greater than 1 or less than -1 if its integer representation is greater than:
110
0 01111111 0000 0000 0000 0000 0000 000
112
0011 1111 1000 0000 0000 0000 0000 0000 => 0x3F800000
115
/* -------------------------------------------------------------------------- */
117
static const short fpuControlWord_ = 0x033F; /*round to nearest, 64 bit precision, all exceptions masked*/
118
static const double int32Scaler_ = 0x7FFFFFFF;
119
static const double ditheredInt32Scaler_ = 0x7FFFFFFE;
120
static const double int24Scaler_ = 0x7FFFFF;
121
static const double ditheredInt24Scaler_ = 0x7FFFFE;
122
static const double int16Scaler_ = 0x7FFF;
123
static const double ditheredInt16Scaler_ = 0x7FFE;
125
#define PA_DITHER_BITS_ (15)
126
/* Multiply by PA_FLOAT_DITHER_SCALE_ to get a float between -2.0 and +1.99999 */
127
#define PA_FLOAT_DITHER_SCALE_ (1.0F / ((1<<PA_DITHER_BITS_)-1))
128
static const float const_float_dither_scale_ = PA_FLOAT_DITHER_SCALE_;
129
#define PA_DITHER_SHIFT_ ((32 - PA_DITHER_BITS_) + 1)
131
/* -------------------------------------------------------------------------- */
136
-EMT64/AMD64 uses different asm
137
-VC2005 doesnt allow _WIN64 with inline assembly either!
139
void PaUtil_InitializeX86PlainConverters( void )
146
static void Float32_To_Int32(
147
void *destinationBuffer, signed int destinationStride,
148
void *sourceBuffer, signed int sourceStride,
149
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
152
float *src = (float*)sourceBuffer;
153
signed long *dest = (signed long*)destinationBuffer;
154
(void)ditherGenerator; // unused parameter
159
double scaled = *src * 0x7FFFFFFF;
160
*dest = (signed long) scaled;
163
dest += destinationStride;
167
short savedFpuControlWord;
169
(void) ditherGenerator; /* unused parameter */
174
// eax -> source byte stride
175
// edi -> destination ptr
176
// ebx -> destination byte stride
177
// ecx -> source end ptr
180
mov esi, sourceBuffer
182
mov edx, 4 // sizeof float32 and int32
183
mov eax, sourceStride
190
mov edi, destinationBuffer
192
mov ebx, destinationStride
196
fstcw savedFpuControlWord
197
fldcw fpuControlWord_
199
fld int32Scaler_ // stack: (int)0x7FFFFFFF
201
Float32_To_Int32_loop:
203
// load unscaled value into st(0)
204
fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
205
add esi, eax // increment source ptr
207
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF
209
note: we could store to a temporary qword here which would cause
210
wraparound distortion instead of int indefinite 0x10. that would
211
be more work, and given that not enabling clipping is only advisable
212
when you know that your signal isn't going to clip it isn't worth it.
214
fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
216
add edi, ebx // increment destination ptr
219
cmp esi, ecx // has src ptr reached end?
220
jne Float32_To_Int32_loop
227
fldcw savedFpuControlWord
231
/* -------------------------------------------------------------------------- */
233
static void Float32_To_Int32_Clip(
234
void *destinationBuffer, signed int destinationStride,
235
void *sourceBuffer, signed int sourceStride,
236
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
239
float *src = (float*)sourceBuffer;
240
signed long *dest = (signed long*)destinationBuffer;
241
(void) ditherGenerator; // unused parameter
246
double scaled = *src * 0x7FFFFFFF;
247
PA_CLIP_( scaled, -2147483648., 2147483647. );
248
*dest = (signed long) scaled;
251
dest += destinationStride;
255
short savedFpuControlWord;
257
(void) ditherGenerator; /* unused parameter */
261
// eax -> source byte stride
262
// edi -> destination ptr
263
// ebx -> destination byte stride
264
// ecx -> source end ptr
267
mov esi, sourceBuffer
269
mov edx, 4 // sizeof float32 and int32
270
mov eax, sourceStride
277
mov edi, destinationBuffer
279
mov ebx, destinationStride
283
fstcw savedFpuControlWord
284
fldcw fpuControlWord_
286
fld int32Scaler_ // stack: (int)0x7FFFFFFF
288
Float32_To_Int32_Clip_loop:
290
mov edx, dword ptr [esi] // load floating point value into integer register
292
and edx, 0x7FFFFFFF // mask off sign
293
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
295
jg Float32_To_Int32_Clip_clamp
297
// load unscaled value into st(0)
298
fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
299
add esi, eax // increment source ptr
301
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF
302
fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
303
jmp Float32_To_Int32_Clip_stored
305
Float32_To_Int32_Clip_clamp:
306
mov edx, dword ptr [esi] // load floating point value into integer register
307
shr edx, 31 // move sign bit into bit 0
308
add esi, eax // increment source ptr
310
add edx, 0x7FFFFFFF // convert to maximum range integers
311
mov dword ptr [edi], edx
313
Float32_To_Int32_Clip_stored:
315
//add edi, ebx // increment destination ptr
318
cmp esi, ecx // has src ptr reached end?
319
jne Float32_To_Int32_Clip_loop
326
fldcw savedFpuControlWord
330
/* -------------------------------------------------------------------------- */
332
static void Float32_To_Int32_DitherClip(
333
void *destinationBuffer, signed int destinationStride,
334
void *sourceBuffer, signed int sourceStride,
335
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
338
float *src = (float*)sourceBuffer;
339
signed long *dest = (signed long*)destinationBuffer;
344
double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
345
// use smaller scaler to prevent overflow when we add the dither
346
double dithered = ((double)*src * (2147483646.0)) + dither;
347
PA_CLIP_( dithered, -2147483648., 2147483647. );
348
*dest = (signed long) dithered;
352
dest += destinationStride;
356
short savedFpuControlWord;
359
signed long sourceByteStride;
360
signed long highpassedDither;
363
unsigned long ditherPrevious = ditherGenerator->previous;
364
unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
365
unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
369
// eax -> source byte stride
370
// edi -> destination ptr
371
// ebx -> destination byte stride
372
// ecx -> source end ptr
375
mov esi, sourceBuffer
377
mov edx, 4 // sizeof float32 and int32
378
mov eax, sourceStride
385
mov edi, destinationBuffer
387
mov ebx, destinationStride
391
fstcw savedFpuControlWord
392
fldcw fpuControlWord_
394
fld ditheredInt32Scaler_ // stack: int scaler
396
Float32_To_Int32_DitherClip_loop:
398
mov edx, dword ptr [esi] // load floating point value into integer register
400
and edx, 0x7FFFFFFF // mask off sign
401
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
403
jg Float32_To_Int32_DitherClip_clamp
405
// load unscaled value into st(0)
406
fld dword ptr [esi] // stack: value, int scaler
407
add esi, eax // increment source ptr
409
fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
412
// call PaUtil_GenerateFloatTriangularDither with C calling convention
413
mov sourceByteStride, eax // save eax
414
mov sourceEnd, ecx // save ecx
415
push ditherGenerator // pass ditherGenerator parameter on stack
416
call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
417
pop edx // clear parameter off stack
418
mov ecx, sourceEnd // restore ecx
419
mov eax, sourceByteStride // restore eax
423
mov sourceByteStride, eax // save eax
425
mov eax, ditherRandSeed1
426
mul edx // eax:edx = eax * 196314165
428
lea eax, [eax+907633515]
429
mov ditherRandSeed1, eax
431
mov eax, ditherRandSeed2
432
mul edx // eax:edx = eax * 196314165
434
lea eax, [eax+907633515]
435
mov edx, ditherRandSeed1
436
shr edx, PA_DITHER_SHIFT_
437
mov ditherRandSeed2, eax
438
shr eax, PA_DITHER_SHIFT_
439
//add eax, edx // eax -> current
441
mov edx, ditherPrevious
443
lea edx, [eax+edx] // highpass = current - previous
444
mov highpassedDither, edx
445
mov ditherPrevious, eax // previous = current
446
mov eax, sourceByteStride // restore eax
447
fild highpassedDither
448
fmul const_float_dither_scale_
449
// end generate dither, dither signal in st(0)
451
faddp st(1), st(0) // stack: dither + value*(int scaler), int scaler
452
fistp dword ptr [edi] // pop st(0) into dest, stack: int scaler
453
jmp Float32_To_Int32_DitherClip_stored
455
Float32_To_Int32_DitherClip_clamp:
456
mov edx, dword ptr [esi] // load floating point value into integer register
457
shr edx, 31 // move sign bit into bit 0
458
add esi, eax // increment source ptr
460
add edx, 0x7FFFFFFF // convert to maximum range integers
461
mov dword ptr [edi], edx
463
Float32_To_Int32_DitherClip_stored:
465
//add edi, ebx // increment destination ptr
468
cmp esi, ecx // has src ptr reached end?
469
jne Float32_To_Int32_DitherClip_loop
476
fldcw savedFpuControlWord
479
ditherGenerator->previous = ditherPrevious;
480
ditherGenerator->randSeed1 = ditherRandSeed1;
481
ditherGenerator->randSeed2 = ditherRandSeed2;
484
/* -------------------------------------------------------------------------- */
486
static void Float32_To_Int24(
487
void *destinationBuffer, signed int destinationStride,
488
void *sourceBuffer, signed int sourceStride,
489
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
492
float *src = (float*)sourceBuffer;
493
unsigned char *dest = (unsigned char*)destinationBuffer;
496
(void) ditherGenerator; // unused parameter
500
// convert to 32 bit and drop the low 8 bits
501
double scaled = *src * 0x7FFFFFFF;
502
temp = (signed long) scaled;
504
dest[0] = (unsigned char)(temp >> 8);
505
dest[1] = (unsigned char)(temp >> 16);
506
dest[2] = (unsigned char)(temp >> 24);
509
dest += destinationStride * 3;
513
short savedFpuControlWord;
515
signed long tempInt32;
517
(void) ditherGenerator; /* unused parameter */
521
// eax -> source byte stride
522
// edi -> destination ptr
523
// ebx -> destination byte stride
524
// ecx -> source end ptr
527
mov esi, sourceBuffer
529
mov edx, 4 // sizeof float32
530
mov eax, sourceStride
537
mov edi, destinationBuffer
539
mov edx, 3 // sizeof int24
540
mov ebx, destinationStride
544
fstcw savedFpuControlWord
545
fldcw fpuControlWord_
547
fld int24Scaler_ // stack: (int)0x7FFFFF
549
Float32_To_Int24_loop:
551
// load unscaled value into st(0)
552
fld dword ptr [esi] // stack: value, (int)0x7FFFFF
553
add esi, eax // increment source ptr
555
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF
556
fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
559
mov byte ptr [edi], DL
561
//mov byte ptr [edi+1], DL
562
//mov byte ptr [edi+2], DH
563
mov word ptr [edi+1], DX
565
//add edi, ebx // increment destination ptr
568
cmp esi, ecx // has src ptr reached end?
569
jne Float32_To_Int24_loop
576
fldcw savedFpuControlWord
580
/* -------------------------------------------------------------------------- */
582
static void Float32_To_Int24_Clip(
583
void *destinationBuffer, signed int destinationStride,
584
void *sourceBuffer, signed int sourceStride,
585
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
588
float *src = (float*)sourceBuffer;
589
unsigned char *dest = (unsigned char*)destinationBuffer;
592
(void) ditherGenerator; // unused parameter
596
// convert to 32 bit and drop the low 8 bits
597
double scaled = *src * 0x7FFFFFFF;
598
PA_CLIP_( scaled, -2147483648., 2147483647. );
599
temp = (signed long) scaled;
601
dest[0] = (unsigned char)(temp >> 8);
602
dest[1] = (unsigned char)(temp >> 16);
603
dest[2] = (unsigned char)(temp >> 24);
606
dest += destinationStride * 3;
610
short savedFpuControlWord;
612
signed long tempInt32;
614
(void) ditherGenerator; /* unused parameter */
618
// eax -> source byte stride
619
// edi -> destination ptr
620
// ebx -> destination byte stride
621
// ecx -> source end ptr
624
mov esi, sourceBuffer
626
mov edx, 4 // sizeof float32
627
mov eax, sourceStride
634
mov edi, destinationBuffer
636
mov edx, 3 // sizeof int24
637
mov ebx, destinationStride
641
fstcw savedFpuControlWord
642
fldcw fpuControlWord_
644
fld int24Scaler_ // stack: (int)0x7FFFFF
646
Float32_To_Int24_Clip_loop:
648
mov edx, dword ptr [esi] // load floating point value into integer register
650
and edx, 0x7FFFFFFF // mask off sign
651
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
653
jg Float32_To_Int24_Clip_clamp
655
// load unscaled value into st(0)
656
fld dword ptr [esi] // stack: value, (int)0x7FFFFF
657
add esi, eax // increment source ptr
659
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF
660
fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
662
jmp Float32_To_Int24_Clip_store
664
Float32_To_Int24_Clip_clamp:
665
mov edx, dword ptr [esi] // load floating point value into integer register
666
shr edx, 31 // move sign bit into bit 0
667
add esi, eax // increment source ptr
669
add edx, 0x7FFFFF // convert to maximum range integers
671
Float32_To_Int24_Clip_store:
673
mov byte ptr [edi], DL
675
//mov byte ptr [edi+1], DL
676
//mov byte ptr [edi+2], DH
677
mov word ptr [edi+1], DX
679
//add edi, ebx // increment destination ptr
682
cmp esi, ecx // has src ptr reached end?
683
jne Float32_To_Int24_Clip_loop
690
fldcw savedFpuControlWord
694
/* -------------------------------------------------------------------------- */
696
static void Float32_To_Int24_DitherClip(
697
void *destinationBuffer, signed int destinationStride,
698
void *sourceBuffer, signed int sourceStride,
699
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
702
float *src = (float*)sourceBuffer;
703
unsigned char *dest = (unsigned char*)destinationBuffer;
708
// convert to 32 bit and drop the low 8 bits
710
// FIXME: the dither amplitude here appears to be too small by 8 bits
711
double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
712
// use smaller scaler to prevent overflow when we add the dither
713
double dithered = ((double)*src * (2147483646.0)) + dither;
714
PA_CLIP_( dithered, -2147483648., 2147483647. );
716
temp = (signed long) dithered;
718
dest[0] = (unsigned char)(temp >> 8);
719
dest[1] = (unsigned char)(temp >> 16);
720
dest[2] = (unsigned char)(temp >> 24);
723
dest += destinationStride * 3;
727
short savedFpuControlWord;
730
signed long sourceByteStride;
731
signed long highpassedDither;
734
unsigned long ditherPrevious = ditherGenerator->previous;
735
unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
736
unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
738
signed long tempInt32;
742
// eax -> source byte stride
743
// edi -> destination ptr
744
// ebx -> destination byte stride
745
// ecx -> source end ptr
748
mov esi, sourceBuffer
750
mov edx, 4 // sizeof float32
751
mov eax, sourceStride
758
mov edi, destinationBuffer
760
mov edx, 3 // sizeof int24
761
mov ebx, destinationStride
765
fstcw savedFpuControlWord
766
fldcw fpuControlWord_
768
fld ditheredInt24Scaler_ // stack: int scaler
770
Float32_To_Int24_DitherClip_loop:
772
mov edx, dword ptr [esi] // load floating point value into integer register
774
and edx, 0x7FFFFFFF // mask off sign
775
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
777
jg Float32_To_Int24_DitherClip_clamp
779
// load unscaled value into st(0)
780
fld dword ptr [esi] // stack: value, int scaler
781
add esi, eax // increment source ptr
783
fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
786
// call PaUtil_GenerateFloatTriangularDither with C calling convention
787
mov sourceByteStride, eax // save eax
788
mov sourceEnd, ecx // save ecx
789
push ditherGenerator // pass ditherGenerator parameter on stack
790
call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
791
pop edx // clear parameter off stack
792
mov ecx, sourceEnd // restore ecx
793
mov eax, sourceByteStride // restore eax
797
mov sourceByteStride, eax // save eax
799
mov eax, ditherRandSeed1
800
mul edx // eax:edx = eax * 196314165
802
lea eax, [eax+907633515]
803
mov ditherRandSeed1, eax
805
mov eax, ditherRandSeed2
806
mul edx // eax:edx = eax * 196314165
808
lea eax, [eax+907633515]
809
mov edx, ditherRandSeed1
810
shr edx, PA_DITHER_SHIFT_
811
mov ditherRandSeed2, eax
812
shr eax, PA_DITHER_SHIFT_
813
//add eax, edx // eax -> current
815
mov edx, ditherPrevious
817
lea edx, [eax+edx] // highpass = current - previous
818
mov highpassedDither, edx
819
mov ditherPrevious, eax // previous = current
820
mov eax, sourceByteStride // restore eax
821
fild highpassedDither
822
fmul const_float_dither_scale_
823
// end generate dither, dither signal in st(0)
825
faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler
826
fistp tempInt32 // pop st(0) into tempInt32, stack: int scaler
828
jmp Float32_To_Int24_DitherClip_store
830
Float32_To_Int24_DitherClip_clamp:
831
mov edx, dword ptr [esi] // load floating point value into integer register
832
shr edx, 31 // move sign bit into bit 0
833
add esi, eax // increment source ptr
835
add edx, 0x7FFFFF // convert to maximum range integers
837
Float32_To_Int24_DitherClip_store:
839
mov byte ptr [edi], DL
841
//mov byte ptr [edi+1], DL
842
//mov byte ptr [edi+2], DH
843
mov word ptr [edi+1], DX
845
//add edi, ebx // increment destination ptr
848
cmp esi, ecx // has src ptr reached end?
849
jne Float32_To_Int24_DitherClip_loop
856
fldcw savedFpuControlWord
859
ditherGenerator->previous = ditherPrevious;
860
ditherGenerator->randSeed1 = ditherRandSeed1;
861
ditherGenerator->randSeed2 = ditherRandSeed2;
864
/* -------------------------------------------------------------------------- */
866
static void Float32_To_Int16(
867
void *destinationBuffer, signed int destinationStride,
868
void *sourceBuffer, signed int sourceStride,
869
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
872
float *src = (float*)sourceBuffer;
873
signed short *dest = (signed short*)destinationBuffer;
874
(void)ditherGenerator; // unused parameter
879
short samp = (short) (*src * (32767.0f));
883
dest += destinationStride;
887
short savedFpuControlWord;
889
(void) ditherGenerator; /* unused parameter */
893
// eax -> source byte stride
894
// edi -> destination ptr
895
// ebx -> destination byte stride
896
// ecx -> source end ptr
899
mov esi, sourceBuffer
901
mov edx, 4 // sizeof float32
902
mov eax, sourceStride
903
imul eax, edx // source byte stride
907
add ecx, esi // source end ptr = count * source byte stride + source ptr
909
mov edi, destinationBuffer
911
mov edx, 2 // sizeof int16
912
mov ebx, destinationStride
913
imul ebx, edx // destination byte stride
916
fstcw savedFpuControlWord
917
fldcw fpuControlWord_
919
fld int16Scaler_ // stack: (int)0x7FFF
921
Float32_To_Int16_loop:
923
// load unscaled value into st(0)
924
fld dword ptr [esi] // stack: value, (int)0x7FFF
925
add esi, eax // increment source ptr
927
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF
928
fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
930
add edi, ebx // increment destination ptr
933
cmp esi, ecx // has src ptr reached end?
934
jne Float32_To_Int16_loop
941
fldcw savedFpuControlWord
945
/* -------------------------------------------------------------------------- */
947
static void Float32_To_Int16_Clip(
948
void *destinationBuffer, signed int destinationStride,
949
void *sourceBuffer, signed int sourceStride,
950
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
953
float *src = (float*)sourceBuffer;
954
signed short *dest = (signed short*)destinationBuffer;
955
(void)ditherGenerator; // unused parameter
959
long samp = (signed long) (*src * (32767.0f));
960
PA_CLIP_( samp, -0x8000, 0x7FFF );
961
*dest = (signed short) samp;
964
dest += destinationStride;
968
short savedFpuControlWord;
970
(void) ditherGenerator; /* unused parameter */
974
// eax -> source byte stride
975
// edi -> destination ptr
976
// ebx -> destination byte stride
977
// ecx -> source end ptr
980
mov esi, sourceBuffer
982
mov edx, 4 // sizeof float32
983
mov eax, sourceStride
984
imul eax, edx // source byte stride
988
add ecx, esi // source end ptr = count * source byte stride + source ptr
990
mov edi, destinationBuffer
992
mov edx, 2 // sizeof int16
993
mov ebx, destinationStride
994
imul ebx, edx // destination byte stride
997
fstcw savedFpuControlWord
998
fldcw fpuControlWord_
1000
fld int16Scaler_ // stack: (int)0x7FFF
1002
Float32_To_Int16_Clip_loop:
1004
mov edx, dword ptr [esi] // load floating point value into integer register
1006
and edx, 0x7FFFFFFF // mask off sign
1007
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
1009
jg Float32_To_Int16_Clip_clamp
1011
// load unscaled value into st(0)
1012
fld dword ptr [esi] // stack: value, (int)0x7FFF
1013
add esi, eax // increment source ptr
1014
//lea esi, [esi+eax]
1015
fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF
1016
fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
1017
jmp Float32_To_Int16_Clip_stored
1019
Float32_To_Int16_Clip_clamp:
1020
mov edx, dword ptr [esi] // load floating point value into integer register
1021
shr edx, 31 // move sign bit into bit 0
1022
add esi, eax // increment source ptr
1023
//lea esi, [esi+eax]
1024
add dx, 0x7FFF // convert to maximum range integers
1025
mov word ptr [edi], dx // store clamped into into dest
1027
Float32_To_Int16_Clip_stored:
1029
add edi, ebx // increment destination ptr
1030
//lea edi, [edi+ebx]
1032
cmp esi, ecx // has src ptr reached end?
1033
jne Float32_To_Int16_Clip_loop
1040
fldcw savedFpuControlWord
1044
/* -------------------------------------------------------------------------- */
1046
static void Float32_To_Int16_DitherClip(
1047
void *destinationBuffer, signed int destinationStride,
1048
void *sourceBuffer, signed int sourceStride,
1049
unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
1052
float *src = (float*)sourceBuffer;
1053
signed short *dest = (signed short*)destinationBuffer;
1054
(void)ditherGenerator; // unused parameter
1059
float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
1060
// use smaller scaler to prevent overflow when we add the dither
1061
float dithered = (*src * (32766.0f)) + dither;
1062
signed long samp = (signed long) dithered;
1063
PA_CLIP_( samp, -0x8000, 0x7FFF );
1064
*dest = (signed short) samp;
1066
src += sourceStride;
1067
dest += destinationStride;
1071
short savedFpuControlWord;
1074
signed long sourceByteStride;
1075
signed long highpassedDither;
1078
unsigned long ditherPrevious = ditherGenerator->previous;
1079
unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
1080
unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
1083
// esi -> source ptr
1084
// eax -> source byte stride
1085
// edi -> destination ptr
1086
// ebx -> destination byte stride
1087
// ecx -> source end ptr
1090
mov esi, sourceBuffer
1092
mov edx, 4 // sizeof float32
1093
mov eax, sourceStride
1094
imul eax, edx // source byte stride
1098
add ecx, esi // source end ptr = count * source byte stride + source ptr
1100
mov edi, destinationBuffer
1102
mov edx, 2 // sizeof int16
1103
mov ebx, destinationStride
1104
imul ebx, edx // destination byte stride
1107
fstcw savedFpuControlWord
1108
fldcw fpuControlWord_
1110
fld ditheredInt16Scaler_ // stack: int scaler
1112
Float32_To_Int16_DitherClip_loop:
1114
mov edx, dword ptr [esi] // load floating point value into integer register
1116
and edx, 0x7FFFFFFF // mask off sign
1117
cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
1119
jg Float32_To_Int16_DitherClip_clamp
1121
// load unscaled value into st(0)
1122
fld dword ptr [esi] // stack: value, int scaler
1123
add esi, eax // increment source ptr
1124
//lea esi, [esi+eax]
1125
fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
1128
// call PaUtil_GenerateFloatTriangularDither with C calling convention
1129
mov sourceByteStride, eax // save eax
1130
mov sourceEnd, ecx // save ecx
1131
push ditherGenerator // pass ditherGenerator parameter on stack
1132
call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
1133
pop edx // clear parameter off stack
1134
mov ecx, sourceEnd // restore ecx
1135
mov eax, sourceByteStride // restore eax
1139
mov sourceByteStride, eax // save eax
1141
mov eax, ditherRandSeed1
1142
mul edx // eax:edx = eax * 196314165
1143
//add eax, 907633515
1144
lea eax, [eax+907633515]
1145
mov ditherRandSeed1, eax
1147
mov eax, ditherRandSeed2
1148
mul edx // eax:edx = eax * 196314165
1149
//add eax, 907633515
1150
lea eax, [eax+907633515]
1151
mov edx, ditherRandSeed1
1152
shr edx, PA_DITHER_SHIFT_
1153
mov ditherRandSeed2, eax
1154
shr eax, PA_DITHER_SHIFT_
1155
//add eax, edx // eax -> current
1156
lea eax, [eax+edx] // current = randSeed1>>x + randSeed2>>x
1157
mov edx, ditherPrevious
1159
lea edx, [eax+edx] // highpass = current - previous
1160
mov highpassedDither, edx
1161
mov ditherPrevious, eax // previous = current
1162
mov eax, sourceByteStride // restore eax
1163
fild highpassedDither
1164
fmul const_float_dither_scale_
1165
// end generate dither, dither signal in st(0)
1167
faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler
1168
fistp word ptr [edi] // store scaled int into dest, stack: int scaler
1169
jmp Float32_To_Int16_DitherClip_stored
1171
Float32_To_Int16_DitherClip_clamp:
1172
mov edx, dword ptr [esi] // load floating point value into integer register
1173
shr edx, 31 // move sign bit into bit 0
1174
add esi, eax // increment source ptr
1175
//lea esi, [esi+eax]
1176
add dx, 0x7FFF // convert to maximum range integers
1177
mov word ptr [edi], dx // store clamped into into dest
1179
Float32_To_Int16_DitherClip_stored:
1181
add edi, ebx // increment destination ptr
1182
//lea edi, [edi+ebx]
1184
cmp esi, ecx // has src ptr reached end?
1185
jne Float32_To_Int16_DitherClip_loop
1192
fldcw savedFpuControlWord
1195
ditherGenerator->previous = ditherPrevious;
1196
ditherGenerator->randSeed1 = ditherRandSeed1;
1197
ditherGenerator->randSeed2 = ditherRandSeed2;
1200
/* -------------------------------------------------------------------------- */
1202
void PaUtil_InitializeX86PlainConverters( void )
1204
paConverters.Float32_To_Int32 = Float32_To_Int32;
1205
paConverters.Float32_To_Int32_Clip = Float32_To_Int32_Clip;
1206
paConverters.Float32_To_Int32_DitherClip = Float32_To_Int32_DitherClip;
1208
paConverters.Float32_To_Int24 = Float32_To_Int24;
1209
paConverters.Float32_To_Int24_Clip = Float32_To_Int24_Clip;
1210
paConverters.Float32_To_Int24_DitherClip = Float32_To_Int24_DitherClip;
1212
paConverters.Float32_To_Int16 = Float32_To_Int16;
1213
paConverters.Float32_To_Int16_Clip = Float32_To_Int16_Clip;
1214
paConverters.Float32_To_Int16_DitherClip = Float32_To_Int16_DitherClip;
1219
/* -------------------------------------------------------------------------- */