/[pcsx2_0.9.7]/branch/r3113_0.9.7_beta/3rdparty/portaudio/src/os/win/pa_x86_plain_converters.c
ViewVC logotype

Contents of /branch/r3113_0.9.7_beta/3rdparty/portaudio/src/os/win/pa_x86_plain_converters.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 32 - (show annotations) (download)
Tue Sep 7 03:29:01 2010 UTC (9 years, 10 months ago) by william
File MIME type: text/plain
File size: 39863 byte(s)
branching from upstream revision (http://pcsx2.googlecode.com/svn/trunk
): r3113 to
https://svn.netsolutions.dnsalias.com/websvn/ps2/pcsx2/pcsx2_0.9.7/branch/r3113_0.9.7_beta
1 /*
2 * Plain Intel IA32 assembly implementations of PortAudio sample converter functions.
3 * Copyright (c) 1999-2002 Ross Bencina, Phil Burk
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files
7 * (the "Software"), to deal in the Software without restriction,
8 * including without limitation the rights to use, copy, modify, merge,
9 * publish, distribute, sublicense, and/or sell copies of the Software,
10 * and to permit persons to whom the Software is furnished to do so,
11 * subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be
14 * included in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR
20 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
21 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /*
26 * The text above constitutes the entire PortAudio license; however,
27 * the PortAudio community also makes the following non-binding requests:
28 *
29 * Any person wishing to distribute modifications to the Software is
30 * requested to send the modifications to the original developer so that
31 * they can be incorporated into the canonical version. It is also
32 * requested that these non-binding requests be included along with the
33 * license above.
34 */
35
36 /** @file
37 @ingroup win_src
38 */
39
40 #include "pa_x86_plain_converters.h"
41
42 #include "pa_converters.h"
43 #include "pa_dither.h"
44
45 /*
46 the main reason these versions are faster than the equivalent C versions
47 is that float -> int casting is expensive in C on x86 because the rounding
48 mode needs to be changed for every cast. these versions only set
49 the rounding mode once outside the loop.
50
51 small additional speed gains are made by the way that clamping is
52 implemented.
53
54 TODO:
55 o- inline dither code
56 o- implement Dither only (no-clip) versions
57 o- implement int8 and uint8 versions
58 o- test thouroughly
59
60 o- the packed 24 bit functions could benefit from unrolling and avoiding
61 byte and word sized register access.
62 */
63
64 /* -------------------------------------------------------------------------- */
65
66 /*
67 #define PA_CLIP_( val, min, max )\
68 { val = ((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)); }
69 */
70
71 /*
72 the following notes were used to determine whether a floating point
73 value should be saturated (ie >1 or <-1) by loading it into an integer
74 register. these should be rewritten so that they make sense.
75
76 an ieee floating point value
77
78 1.xxxxxxxxxxxxxxxxxxxx?
79
80
81 is less than or equal to 1 and greater than or equal to -1 either:
82
83 if the mantissa is 0 and the unbiased exponent is 0
84
85 OR
86
87 if the unbiased exponent < 0
88
89 this translates to:
90
91 if the mantissa is 0 and the biased exponent is 7F
92
93 or
94
95 if the biased exponent is less than 7F
96
97
98 therefore the value is greater than 1 or less than -1 if
99
100 the mantissa is not 0 and the biased exponent is 7F
101
102 or
103
104 if the biased exponent is greater than 7F
105
106
107 in other words, if we mask out the sign bit, the value is
108 greater than 1 or less than -1 if its integer representation is greater than:
109
110 0 01111111 0000 0000 0000 0000 0000 000
111
112 0011 1111 1000 0000 0000 0000 0000 0000 => 0x3F800000
113 */
114
115 /* -------------------------------------------------------------------------- */
116
117 static const short fpuControlWord_ = 0x033F; /*round to nearest, 64 bit precision, all exceptions masked*/
118 static const double int32Scaler_ = 0x7FFFFFFF;
119 static const double ditheredInt32Scaler_ = 0x7FFFFFFE;
120 static const double int24Scaler_ = 0x7FFFFF;
121 static const double ditheredInt24Scaler_ = 0x7FFFFE;
122 static const double int16Scaler_ = 0x7FFF;
123 static const double ditheredInt16Scaler_ = 0x7FFE;
124
125 #define PA_DITHER_BITS_ (15)
126 /* Multiply by PA_FLOAT_DITHER_SCALE_ to get a float between -2.0 and +1.99999 */
127 #define PA_FLOAT_DITHER_SCALE_ (1.0F / ((1<<PA_DITHER_BITS_)-1))
128 static const float const_float_dither_scale_ = PA_FLOAT_DITHER_SCALE_;
129 #define PA_DITHER_SHIFT_ ((32 - PA_DITHER_BITS_) + 1)
130
131 /* -------------------------------------------------------------------------- */
132
133 #if defined(_WIN64) || defined(_WIN32_WCE)
134
135 /*
136 -EMT64/AMD64 uses different asm
137 -VC2005 doesnt allow _WIN64 with inline assembly either!
138 */
139 void PaUtil_InitializeX86PlainConverters( void )
140 {
141 }
142
143 #else
144
145
146 static void Float32_To_Int32(
147 void *destinationBuffer, signed int destinationStride,
148 void *sourceBuffer, signed int sourceStride,
149 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
150 {
151 /*
152 float *src = (float*)sourceBuffer;
153 signed long *dest = (signed long*)destinationBuffer;
154 (void)ditherGenerator; // unused parameter
155
156 while( count-- )
157 {
158 // REVIEW
159 double scaled = *src * 0x7FFFFFFF;
160 *dest = (signed long) scaled;
161
162 src += sourceStride;
163 dest += destinationStride;
164 }
165 */
166
167 short savedFpuControlWord;
168
169 (void) ditherGenerator; /* unused parameter */
170
171
172 __asm{
173 // esi -> source ptr
174 // eax -> source byte stride
175 // edi -> destination ptr
176 // ebx -> destination byte stride
177 // ecx -> source end ptr
178 // edx -> temp
179
180 mov esi, sourceBuffer
181
182 mov edx, 4 // sizeof float32 and int32
183 mov eax, sourceStride
184 imul eax, edx
185
186 mov ecx, count
187 imul ecx, eax
188 add ecx, esi
189
190 mov edi, destinationBuffer
191
192 mov ebx, destinationStride
193 imul ebx, edx
194
195 fwait
196 fstcw savedFpuControlWord
197 fldcw fpuControlWord_
198
199 fld int32Scaler_ // stack: (int)0x7FFFFFFF
200
201 Float32_To_Int32_loop:
202
203 // load unscaled value into st(0)
204 fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
205 add esi, eax // increment source ptr
206 //lea esi, [esi+eax]
207 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF
208 /*
209 note: we could store to a temporary qword here which would cause
210 wraparound distortion instead of int indefinite 0x10. that would
211 be more work, and given that not enabling clipping is only advisable
212 when you know that your signal isn't going to clip it isn't worth it.
213 */
214 fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
215
216 add edi, ebx // increment destination ptr
217 //lea edi, [edi+ebx]
218
219 cmp esi, ecx // has src ptr reached end?
220 jne Float32_To_Int32_loop
221
222 ffree st(0)
223 fincstp
224
225 fwait
226 fnclex
227 fldcw savedFpuControlWord
228 }
229 }
230
231 /* -------------------------------------------------------------------------- */
232
233 static void Float32_To_Int32_Clip(
234 void *destinationBuffer, signed int destinationStride,
235 void *sourceBuffer, signed int sourceStride,
236 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
237 {
238 /*
239 float *src = (float*)sourceBuffer;
240 signed long *dest = (signed long*)destinationBuffer;
241 (void) ditherGenerator; // unused parameter
242
243 while( count-- )
244 {
245 // REVIEW
246 double scaled = *src * 0x7FFFFFFF;
247 PA_CLIP_( scaled, -2147483648., 2147483647. );
248 *dest = (signed long) scaled;
249
250 src += sourceStride;
251 dest += destinationStride;
252 }
253 */
254
255 short savedFpuControlWord;
256
257 (void) ditherGenerator; /* unused parameter */
258
259 __asm{
260 // esi -> source ptr
261 // eax -> source byte stride
262 // edi -> destination ptr
263 // ebx -> destination byte stride
264 // ecx -> source end ptr
265 // edx -> temp
266
267 mov esi, sourceBuffer
268
269 mov edx, 4 // sizeof float32 and int32
270 mov eax, sourceStride
271 imul eax, edx
272
273 mov ecx, count
274 imul ecx, eax
275 add ecx, esi
276
277 mov edi, destinationBuffer
278
279 mov ebx, destinationStride
280 imul ebx, edx
281
282 fwait
283 fstcw savedFpuControlWord
284 fldcw fpuControlWord_
285
286 fld int32Scaler_ // stack: (int)0x7FFFFFFF
287
288 Float32_To_Int32_Clip_loop:
289
290 mov edx, dword ptr [esi] // load floating point value into integer register
291
292 and edx, 0x7FFFFFFF // mask off sign
293 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
294
295 jg Float32_To_Int32_Clip_clamp
296
297 // load unscaled value into st(0)
298 fld dword ptr [esi] // stack: value, (int)0x7FFFFFFF
299 add esi, eax // increment source ptr
300 //lea esi, [esi+eax]
301 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFFFF, (int)0x7FFFFFFF
302 fistp dword ptr [edi] // pop st(0) into dest, stack: (int)0x7FFFFFFF
303 jmp Float32_To_Int32_Clip_stored
304
305 Float32_To_Int32_Clip_clamp:
306 mov edx, dword ptr [esi] // load floating point value into integer register
307 shr edx, 31 // move sign bit into bit 0
308 add esi, eax // increment source ptr
309 //lea esi, [esi+eax]
310 add edx, 0x7FFFFFFF // convert to maximum range integers
311 mov dword ptr [edi], edx
312
313 Float32_To_Int32_Clip_stored:
314
315 //add edi, ebx // increment destination ptr
316 lea edi, [edi+ebx]
317
318 cmp esi, ecx // has src ptr reached end?
319 jne Float32_To_Int32_Clip_loop
320
321 ffree st(0)
322 fincstp
323
324 fwait
325 fnclex
326 fldcw savedFpuControlWord
327 }
328 }
329
330 /* -------------------------------------------------------------------------- */
331
332 static void Float32_To_Int32_DitherClip(
333 void *destinationBuffer, signed int destinationStride,
334 void *sourceBuffer, signed int sourceStride,
335 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
336 {
337 /*
338 float *src = (float*)sourceBuffer;
339 signed long *dest = (signed long*)destinationBuffer;
340
341 while( count-- )
342 {
343 // REVIEW
344 double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
345 // use smaller scaler to prevent overflow when we add the dither
346 double dithered = ((double)*src * (2147483646.0)) + dither;
347 PA_CLIP_( dithered, -2147483648., 2147483647. );
348 *dest = (signed long) dithered;
349
350
351 src += sourceStride;
352 dest += destinationStride;
353 }
354 */
355
356 short savedFpuControlWord;
357
358 // spill storage:
359 signed long sourceByteStride;
360 signed long highpassedDither;
361
362 // dither state:
363 unsigned long ditherPrevious = ditherGenerator->previous;
364 unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
365 unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
366
367 __asm{
368 // esi -> source ptr
369 // eax -> source byte stride
370 // edi -> destination ptr
371 // ebx -> destination byte stride
372 // ecx -> source end ptr
373 // edx -> temp
374
375 mov esi, sourceBuffer
376
377 mov edx, 4 // sizeof float32 and int32
378 mov eax, sourceStride
379 imul eax, edx
380
381 mov ecx, count
382 imul ecx, eax
383 add ecx, esi
384
385 mov edi, destinationBuffer
386
387 mov ebx, destinationStride
388 imul ebx, edx
389
390 fwait
391 fstcw savedFpuControlWord
392 fldcw fpuControlWord_
393
394 fld ditheredInt32Scaler_ // stack: int scaler
395
396 Float32_To_Int32_DitherClip_loop:
397
398 mov edx, dword ptr [esi] // load floating point value into integer register
399
400 and edx, 0x7FFFFFFF // mask off sign
401 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
402
403 jg Float32_To_Int32_DitherClip_clamp
404
405 // load unscaled value into st(0)
406 fld dword ptr [esi] // stack: value, int scaler
407 add esi, eax // increment source ptr
408 //lea esi, [esi+eax]
409 fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
410
411 /*
412 // call PaUtil_GenerateFloatTriangularDither with C calling convention
413 mov sourceByteStride, eax // save eax
414 mov sourceEnd, ecx // save ecx
415 push ditherGenerator // pass ditherGenerator parameter on stack
416 call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
417 pop edx // clear parameter off stack
418 mov ecx, sourceEnd // restore ecx
419 mov eax, sourceByteStride // restore eax
420 */
421
422 // generate dither
423 mov sourceByteStride, eax // save eax
424 mov edx, 196314165
425 mov eax, ditherRandSeed1
426 mul edx // eax:edx = eax * 196314165
427 //add eax, 907633515
428 lea eax, [eax+907633515]
429 mov ditherRandSeed1, eax
430 mov edx, 196314165
431 mov eax, ditherRandSeed2
432 mul edx // eax:edx = eax * 196314165
433 //add eax, 907633515
434 lea eax, [eax+907633515]
435 mov edx, ditherRandSeed1
436 shr edx, PA_DITHER_SHIFT_
437 mov ditherRandSeed2, eax
438 shr eax, PA_DITHER_SHIFT_
439 //add eax, edx // eax -> current
440 lea eax, [eax+edx]
441 mov edx, ditherPrevious
442 neg edx
443 lea edx, [eax+edx] // highpass = current - previous
444 mov highpassedDither, edx
445 mov ditherPrevious, eax // previous = current
446 mov eax, sourceByteStride // restore eax
447 fild highpassedDither
448 fmul const_float_dither_scale_
449 // end generate dither, dither signal in st(0)
450
451 faddp st(1), st(0) // stack: dither + value*(int scaler), int scaler
452 fistp dword ptr [edi] // pop st(0) into dest, stack: int scaler
453 jmp Float32_To_Int32_DitherClip_stored
454
455 Float32_To_Int32_DitherClip_clamp:
456 mov edx, dword ptr [esi] // load floating point value into integer register
457 shr edx, 31 // move sign bit into bit 0
458 add esi, eax // increment source ptr
459 //lea esi, [esi+eax]
460 add edx, 0x7FFFFFFF // convert to maximum range integers
461 mov dword ptr [edi], edx
462
463 Float32_To_Int32_DitherClip_stored:
464
465 //add edi, ebx // increment destination ptr
466 lea edi, [edi+ebx]
467
468 cmp esi, ecx // has src ptr reached end?
469 jne Float32_To_Int32_DitherClip_loop
470
471 ffree st(0)
472 fincstp
473
474 fwait
475 fnclex
476 fldcw savedFpuControlWord
477 }
478
479 ditherGenerator->previous = ditherPrevious;
480 ditherGenerator->randSeed1 = ditherRandSeed1;
481 ditherGenerator->randSeed2 = ditherRandSeed2;
482 }
483
484 /* -------------------------------------------------------------------------- */
485
486 static void Float32_To_Int24(
487 void *destinationBuffer, signed int destinationStride,
488 void *sourceBuffer, signed int sourceStride,
489 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
490 {
491 /*
492 float *src = (float*)sourceBuffer;
493 unsigned char *dest = (unsigned char*)destinationBuffer;
494 signed long temp;
495
496 (void) ditherGenerator; // unused parameter
497
498 while( count-- )
499 {
500 // convert to 32 bit and drop the low 8 bits
501 double scaled = *src * 0x7FFFFFFF;
502 temp = (signed long) scaled;
503
504 dest[0] = (unsigned char)(temp >> 8);
505 dest[1] = (unsigned char)(temp >> 16);
506 dest[2] = (unsigned char)(temp >> 24);
507
508 src += sourceStride;
509 dest += destinationStride * 3;
510 }
511 */
512
513 short savedFpuControlWord;
514
515 signed long tempInt32;
516
517 (void) ditherGenerator; /* unused parameter */
518
519 __asm{
520 // esi -> source ptr
521 // eax -> source byte stride
522 // edi -> destination ptr
523 // ebx -> destination byte stride
524 // ecx -> source end ptr
525 // edx -> temp
526
527 mov esi, sourceBuffer
528
529 mov edx, 4 // sizeof float32
530 mov eax, sourceStride
531 imul eax, edx
532
533 mov ecx, count
534 imul ecx, eax
535 add ecx, esi
536
537 mov edi, destinationBuffer
538
539 mov edx, 3 // sizeof int24
540 mov ebx, destinationStride
541 imul ebx, edx
542
543 fwait
544 fstcw savedFpuControlWord
545 fldcw fpuControlWord_
546
547 fld int24Scaler_ // stack: (int)0x7FFFFF
548
549 Float32_To_Int24_loop:
550
551 // load unscaled value into st(0)
552 fld dword ptr [esi] // stack: value, (int)0x7FFFFF
553 add esi, eax // increment source ptr
554 //lea esi, [esi+eax]
555 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF
556 fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
557 mov edx, tempInt32
558
559 mov byte ptr [edi], DL
560 shr edx, 8
561 //mov byte ptr [edi+1], DL
562 //mov byte ptr [edi+2], DH
563 mov word ptr [edi+1], DX
564
565 //add edi, ebx // increment destination ptr
566 lea edi, [edi+ebx]
567
568 cmp esi, ecx // has src ptr reached end?
569 jne Float32_To_Int24_loop
570
571 ffree st(0)
572 fincstp
573
574 fwait
575 fnclex
576 fldcw savedFpuControlWord
577 }
578 }
579
580 /* -------------------------------------------------------------------------- */
581
582 static void Float32_To_Int24_Clip(
583 void *destinationBuffer, signed int destinationStride,
584 void *sourceBuffer, signed int sourceStride,
585 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
586 {
587 /*
588 float *src = (float*)sourceBuffer;
589 unsigned char *dest = (unsigned char*)destinationBuffer;
590 signed long temp;
591
592 (void) ditherGenerator; // unused parameter
593
594 while( count-- )
595 {
596 // convert to 32 bit and drop the low 8 bits
597 double scaled = *src * 0x7FFFFFFF;
598 PA_CLIP_( scaled, -2147483648., 2147483647. );
599 temp = (signed long) scaled;
600
601 dest[0] = (unsigned char)(temp >> 8);
602 dest[1] = (unsigned char)(temp >> 16);
603 dest[2] = (unsigned char)(temp >> 24);
604
605 src += sourceStride;
606 dest += destinationStride * 3;
607 }
608 */
609
610 short savedFpuControlWord;
611
612 signed long tempInt32;
613
614 (void) ditherGenerator; /* unused parameter */
615
616 __asm{
617 // esi -> source ptr
618 // eax -> source byte stride
619 // edi -> destination ptr
620 // ebx -> destination byte stride
621 // ecx -> source end ptr
622 // edx -> temp
623
624 mov esi, sourceBuffer
625
626 mov edx, 4 // sizeof float32
627 mov eax, sourceStride
628 imul eax, edx
629
630 mov ecx, count
631 imul ecx, eax
632 add ecx, esi
633
634 mov edi, destinationBuffer
635
636 mov edx, 3 // sizeof int24
637 mov ebx, destinationStride
638 imul ebx, edx
639
640 fwait
641 fstcw savedFpuControlWord
642 fldcw fpuControlWord_
643
644 fld int24Scaler_ // stack: (int)0x7FFFFF
645
646 Float32_To_Int24_Clip_loop:
647
648 mov edx, dword ptr [esi] // load floating point value into integer register
649
650 and edx, 0x7FFFFFFF // mask off sign
651 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
652
653 jg Float32_To_Int24_Clip_clamp
654
655 // load unscaled value into st(0)
656 fld dword ptr [esi] // stack: value, (int)0x7FFFFF
657 add esi, eax // increment source ptr
658 //lea esi, [esi+eax]
659 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFFFF, (int)0x7FFFFF
660 fistp tempInt32 // pop st(0) into tempInt32, stack: (int)0x7FFFFF
661 mov edx, tempInt32
662 jmp Float32_To_Int24_Clip_store
663
664 Float32_To_Int24_Clip_clamp:
665 mov edx, dword ptr [esi] // load floating point value into integer register
666 shr edx, 31 // move sign bit into bit 0
667 add esi, eax // increment source ptr
668 //lea esi, [esi+eax]
669 add edx, 0x7FFFFF // convert to maximum range integers
670
671 Float32_To_Int24_Clip_store:
672
673 mov byte ptr [edi], DL
674 shr edx, 8
675 //mov byte ptr [edi+1], DL
676 //mov byte ptr [edi+2], DH
677 mov word ptr [edi+1], DX
678
679 //add edi, ebx // increment destination ptr
680 lea edi, [edi+ebx]
681
682 cmp esi, ecx // has src ptr reached end?
683 jne Float32_To_Int24_Clip_loop
684
685 ffree st(0)
686 fincstp
687
688 fwait
689 fnclex
690 fldcw savedFpuControlWord
691 }
692 }
693
694 /* -------------------------------------------------------------------------- */
695
696 static void Float32_To_Int24_DitherClip(
697 void *destinationBuffer, signed int destinationStride,
698 void *sourceBuffer, signed int sourceStride,
699 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
700 {
701 /*
702 float *src = (float*)sourceBuffer;
703 unsigned char *dest = (unsigned char*)destinationBuffer;
704 signed long temp;
705
706 while( count-- )
707 {
708 // convert to 32 bit and drop the low 8 bits
709
710 // FIXME: the dither amplitude here appears to be too small by 8 bits
711 double dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
712 // use smaller scaler to prevent overflow when we add the dither
713 double dithered = ((double)*src * (2147483646.0)) + dither;
714 PA_CLIP_( dithered, -2147483648., 2147483647. );
715
716 temp = (signed long) dithered;
717
718 dest[0] = (unsigned char)(temp >> 8);
719 dest[1] = (unsigned char)(temp >> 16);
720 dest[2] = (unsigned char)(temp >> 24);
721
722 src += sourceStride;
723 dest += destinationStride * 3;
724 }
725 */
726
727 short savedFpuControlWord;
728
729 // spill storage:
730 signed long sourceByteStride;
731 signed long highpassedDither;
732
733 // dither state:
734 unsigned long ditherPrevious = ditherGenerator->previous;
735 unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
736 unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
737
738 signed long tempInt32;
739
740 __asm{
741 // esi -> source ptr
742 // eax -> source byte stride
743 // edi -> destination ptr
744 // ebx -> destination byte stride
745 // ecx -> source end ptr
746 // edx -> temp
747
748 mov esi, sourceBuffer
749
750 mov edx, 4 // sizeof float32
751 mov eax, sourceStride
752 imul eax, edx
753
754 mov ecx, count
755 imul ecx, eax
756 add ecx, esi
757
758 mov edi, destinationBuffer
759
760 mov edx, 3 // sizeof int24
761 mov ebx, destinationStride
762 imul ebx, edx
763
764 fwait
765 fstcw savedFpuControlWord
766 fldcw fpuControlWord_
767
768 fld ditheredInt24Scaler_ // stack: int scaler
769
770 Float32_To_Int24_DitherClip_loop:
771
772 mov edx, dword ptr [esi] // load floating point value into integer register
773
774 and edx, 0x7FFFFFFF // mask off sign
775 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
776
777 jg Float32_To_Int24_DitherClip_clamp
778
779 // load unscaled value into st(0)
780 fld dword ptr [esi] // stack: value, int scaler
781 add esi, eax // increment source ptr
782 //lea esi, [esi+eax]
783 fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
784
785 /*
786 // call PaUtil_GenerateFloatTriangularDither with C calling convention
787 mov sourceByteStride, eax // save eax
788 mov sourceEnd, ecx // save ecx
789 push ditherGenerator // pass ditherGenerator parameter on stack
790 call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
791 pop edx // clear parameter off stack
792 mov ecx, sourceEnd // restore ecx
793 mov eax, sourceByteStride // restore eax
794 */
795
796 // generate dither
797 mov sourceByteStride, eax // save eax
798 mov edx, 196314165
799 mov eax, ditherRandSeed1
800 mul edx // eax:edx = eax * 196314165
801 //add eax, 907633515
802 lea eax, [eax+907633515]
803 mov ditherRandSeed1, eax
804 mov edx, 196314165
805 mov eax, ditherRandSeed2
806 mul edx // eax:edx = eax * 196314165
807 //add eax, 907633515
808 lea eax, [eax+907633515]
809 mov edx, ditherRandSeed1
810 shr edx, PA_DITHER_SHIFT_
811 mov ditherRandSeed2, eax
812 shr eax, PA_DITHER_SHIFT_
813 //add eax, edx // eax -> current
814 lea eax, [eax+edx]
815 mov edx, ditherPrevious
816 neg edx
817 lea edx, [eax+edx] // highpass = current - previous
818 mov highpassedDither, edx
819 mov ditherPrevious, eax // previous = current
820 mov eax, sourceByteStride // restore eax
821 fild highpassedDither
822 fmul const_float_dither_scale_
823 // end generate dither, dither signal in st(0)
824
825 faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler
826 fistp tempInt32 // pop st(0) into tempInt32, stack: int scaler
827 mov edx, tempInt32
828 jmp Float32_To_Int24_DitherClip_store
829
830 Float32_To_Int24_DitherClip_clamp:
831 mov edx, dword ptr [esi] // load floating point value into integer register
832 shr edx, 31 // move sign bit into bit 0
833 add esi, eax // increment source ptr
834 //lea esi, [esi+eax]
835 add edx, 0x7FFFFF // convert to maximum range integers
836
837 Float32_To_Int24_DitherClip_store:
838
839 mov byte ptr [edi], DL
840 shr edx, 8
841 //mov byte ptr [edi+1], DL
842 //mov byte ptr [edi+2], DH
843 mov word ptr [edi+1], DX
844
845 //add edi, ebx // increment destination ptr
846 lea edi, [edi+ebx]
847
848 cmp esi, ecx // has src ptr reached end?
849 jne Float32_To_Int24_DitherClip_loop
850
851 ffree st(0)
852 fincstp
853
854 fwait
855 fnclex
856 fldcw savedFpuControlWord
857 }
858
859 ditherGenerator->previous = ditherPrevious;
860 ditherGenerator->randSeed1 = ditherRandSeed1;
861 ditherGenerator->randSeed2 = ditherRandSeed2;
862 }
863
864 /* -------------------------------------------------------------------------- */
865
866 static void Float32_To_Int16(
867 void *destinationBuffer, signed int destinationStride,
868 void *sourceBuffer, signed int sourceStride,
869 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
870 {
871 /*
872 float *src = (float*)sourceBuffer;
873 signed short *dest = (signed short*)destinationBuffer;
874 (void)ditherGenerator; // unused parameter
875
876 while( count-- )
877 {
878
879 short samp = (short) (*src * (32767.0f));
880 *dest = samp;
881
882 src += sourceStride;
883 dest += destinationStride;
884 }
885 */
886
887 short savedFpuControlWord;
888
889 (void) ditherGenerator; /* unused parameter */
890
891 __asm{
892 // esi -> source ptr
893 // eax -> source byte stride
894 // edi -> destination ptr
895 // ebx -> destination byte stride
896 // ecx -> source end ptr
897 // edx -> temp
898
899 mov esi, sourceBuffer
900
901 mov edx, 4 // sizeof float32
902 mov eax, sourceStride
903 imul eax, edx // source byte stride
904
905 mov ecx, count
906 imul ecx, eax
907 add ecx, esi // source end ptr = count * source byte stride + source ptr
908
909 mov edi, destinationBuffer
910
911 mov edx, 2 // sizeof int16
912 mov ebx, destinationStride
913 imul ebx, edx // destination byte stride
914
915 fwait
916 fstcw savedFpuControlWord
917 fldcw fpuControlWord_
918
919 fld int16Scaler_ // stack: (int)0x7FFF
920
921 Float32_To_Int16_loop:
922
923 // load unscaled value into st(0)
924 fld dword ptr [esi] // stack: value, (int)0x7FFF
925 add esi, eax // increment source ptr
926 //lea esi, [esi+eax]
927 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF
928 fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
929
930 add edi, ebx // increment destination ptr
931 //lea edi, [edi+ebx]
932
933 cmp esi, ecx // has src ptr reached end?
934 jne Float32_To_Int16_loop
935
936 ffree st(0)
937 fincstp
938
939 fwait
940 fnclex
941 fldcw savedFpuControlWord
942 }
943 }
944
945 /* -------------------------------------------------------------------------- */
946
947 static void Float32_To_Int16_Clip(
948 void *destinationBuffer, signed int destinationStride,
949 void *sourceBuffer, signed int sourceStride,
950 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
951 {
952 /*
953 float *src = (float*)sourceBuffer;
954 signed short *dest = (signed short*)destinationBuffer;
955 (void)ditherGenerator; // unused parameter
956
957 while( count-- )
958 {
959 long samp = (signed long) (*src * (32767.0f));
960 PA_CLIP_( samp, -0x8000, 0x7FFF );
961 *dest = (signed short) samp;
962
963 src += sourceStride;
964 dest += destinationStride;
965 }
966 */
967
968 short savedFpuControlWord;
969
970 (void) ditherGenerator; /* unused parameter */
971
972 __asm{
973 // esi -> source ptr
974 // eax -> source byte stride
975 // edi -> destination ptr
976 // ebx -> destination byte stride
977 // ecx -> source end ptr
978 // edx -> temp
979
980 mov esi, sourceBuffer
981
982 mov edx, 4 // sizeof float32
983 mov eax, sourceStride
984 imul eax, edx // source byte stride
985
986 mov ecx, count
987 imul ecx, eax
988 add ecx, esi // source end ptr = count * source byte stride + source ptr
989
990 mov edi, destinationBuffer
991
992 mov edx, 2 // sizeof int16
993 mov ebx, destinationStride
994 imul ebx, edx // destination byte stride
995
996 fwait
997 fstcw savedFpuControlWord
998 fldcw fpuControlWord_
999
1000 fld int16Scaler_ // stack: (int)0x7FFF
1001
1002 Float32_To_Int16_Clip_loop:
1003
1004 mov edx, dword ptr [esi] // load floating point value into integer register
1005
1006 and edx, 0x7FFFFFFF // mask off sign
1007 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
1008
1009 jg Float32_To_Int16_Clip_clamp
1010
1011 // load unscaled value into st(0)
1012 fld dword ptr [esi] // stack: value, (int)0x7FFF
1013 add esi, eax // increment source ptr
1014 //lea esi, [esi+eax]
1015 fmul st(0), st(1) // st(0) *= st(1), stack: value*0x7FFF, (int)0x7FFF
1016 fistp word ptr [edi] // store scaled int into dest, stack: (int)0x7FFF
1017 jmp Float32_To_Int16_Clip_stored
1018
1019 Float32_To_Int16_Clip_clamp:
1020 mov edx, dword ptr [esi] // load floating point value into integer register
1021 shr edx, 31 // move sign bit into bit 0
1022 add esi, eax // increment source ptr
1023 //lea esi, [esi+eax]
1024 add dx, 0x7FFF // convert to maximum range integers
1025 mov word ptr [edi], dx // store clamped into into dest
1026
1027 Float32_To_Int16_Clip_stored:
1028
1029 add edi, ebx // increment destination ptr
1030 //lea edi, [edi+ebx]
1031
1032 cmp esi, ecx // has src ptr reached end?
1033 jne Float32_To_Int16_Clip_loop
1034
1035 ffree st(0)
1036 fincstp
1037
1038 fwait
1039 fnclex
1040 fldcw savedFpuControlWord
1041 }
1042 }
1043
1044 /* -------------------------------------------------------------------------- */
1045
1046 static void Float32_To_Int16_DitherClip(
1047 void *destinationBuffer, signed int destinationStride,
1048 void *sourceBuffer, signed int sourceStride,
1049 unsigned int count, PaUtilTriangularDitherGenerator *ditherGenerator )
1050 {
1051 /*
1052 float *src = (float*)sourceBuffer;
1053 signed short *dest = (signed short*)destinationBuffer;
1054 (void)ditherGenerator; // unused parameter
1055
1056 while( count-- )
1057 {
1058
1059 float dither = PaUtil_GenerateFloatTriangularDither( ditherGenerator );
1060 // use smaller scaler to prevent overflow when we add the dither
1061 float dithered = (*src * (32766.0f)) + dither;
1062 signed long samp = (signed long) dithered;
1063 PA_CLIP_( samp, -0x8000, 0x7FFF );
1064 *dest = (signed short) samp;
1065
1066 src += sourceStride;
1067 dest += destinationStride;
1068 }
1069 */
1070
1071 short savedFpuControlWord;
1072
1073 // spill storage:
1074 signed long sourceByteStride;
1075 signed long highpassedDither;
1076
1077 // dither state:
1078 unsigned long ditherPrevious = ditherGenerator->previous;
1079 unsigned long ditherRandSeed1 = ditherGenerator->randSeed1;
1080 unsigned long ditherRandSeed2 = ditherGenerator->randSeed2;
1081
1082 __asm{
1083 // esi -> source ptr
1084 // eax -> source byte stride
1085 // edi -> destination ptr
1086 // ebx -> destination byte stride
1087 // ecx -> source end ptr
1088 // edx -> temp
1089
1090 mov esi, sourceBuffer
1091
1092 mov edx, 4 // sizeof float32
1093 mov eax, sourceStride
1094 imul eax, edx // source byte stride
1095
1096 mov ecx, count
1097 imul ecx, eax
1098 add ecx, esi // source end ptr = count * source byte stride + source ptr
1099
1100 mov edi, destinationBuffer
1101
1102 mov edx, 2 // sizeof int16
1103 mov ebx, destinationStride
1104 imul ebx, edx // destination byte stride
1105
1106 fwait
1107 fstcw savedFpuControlWord
1108 fldcw fpuControlWord_
1109
1110 fld ditheredInt16Scaler_ // stack: int scaler
1111
1112 Float32_To_Int16_DitherClip_loop:
1113
1114 mov edx, dword ptr [esi] // load floating point value into integer register
1115
1116 and edx, 0x7FFFFFFF // mask off sign
1117 cmp edx, 0x3F800000 // greater than 1.0 or less than -1.0
1118
1119 jg Float32_To_Int16_DitherClip_clamp
1120
1121 // load unscaled value into st(0)
1122 fld dword ptr [esi] // stack: value, int scaler
1123 add esi, eax // increment source ptr
1124 //lea esi, [esi+eax]
1125 fmul st(0), st(1) // st(0) *= st(1), stack: value*(int scaler), int scaler
1126
1127 /*
1128 // call PaUtil_GenerateFloatTriangularDither with C calling convention
1129 mov sourceByteStride, eax // save eax
1130 mov sourceEnd, ecx // save ecx
1131 push ditherGenerator // pass ditherGenerator parameter on stack
1132 call PaUtil_GenerateFloatTriangularDither // stack: dither, value*(int scaler), int scaler
1133 pop edx // clear parameter off stack
1134 mov ecx, sourceEnd // restore ecx
1135 mov eax, sourceByteStride // restore eax
1136 */
1137
1138 // generate dither
1139 mov sourceByteStride, eax // save eax
1140 mov edx, 196314165
1141 mov eax, ditherRandSeed1
1142 mul edx // eax:edx = eax * 196314165
1143 //add eax, 907633515
1144 lea eax, [eax+907633515]
1145 mov ditherRandSeed1, eax
1146 mov edx, 196314165
1147 mov eax, ditherRandSeed2
1148 mul edx // eax:edx = eax * 196314165
1149 //add eax, 907633515
1150 lea eax, [eax+907633515]
1151 mov edx, ditherRandSeed1
1152 shr edx, PA_DITHER_SHIFT_
1153 mov ditherRandSeed2, eax
1154 shr eax, PA_DITHER_SHIFT_
1155 //add eax, edx // eax -> current
1156 lea eax, [eax+edx] // current = randSeed1>>x + randSeed2>>x
1157 mov edx, ditherPrevious
1158 neg edx
1159 lea edx, [eax+edx] // highpass = current - previous
1160 mov highpassedDither, edx
1161 mov ditherPrevious, eax // previous = current
1162 mov eax, sourceByteStride // restore eax
1163 fild highpassedDither
1164 fmul const_float_dither_scale_
1165 // end generate dither, dither signal in st(0)
1166
1167 faddp st(1), st(0) // stack: dither * value*(int scaler), int scaler
1168 fistp word ptr [edi] // store scaled int into dest, stack: int scaler
1169 jmp Float32_To_Int16_DitherClip_stored
1170
1171 Float32_To_Int16_DitherClip_clamp:
1172 mov edx, dword ptr [esi] // load floating point value into integer register
1173 shr edx, 31 // move sign bit into bit 0
1174 add esi, eax // increment source ptr
1175 //lea esi, [esi+eax]
1176 add dx, 0x7FFF // convert to maximum range integers
1177 mov word ptr [edi], dx // store clamped into into dest
1178
1179 Float32_To_Int16_DitherClip_stored:
1180
1181 add edi, ebx // increment destination ptr
1182 //lea edi, [edi+ebx]
1183
1184 cmp esi, ecx // has src ptr reached end?
1185 jne Float32_To_Int16_DitherClip_loop
1186
1187 ffree st(0)
1188 fincstp
1189
1190 fwait
1191 fnclex
1192 fldcw savedFpuControlWord
1193 }
1194
1195 ditherGenerator->previous = ditherPrevious;
1196 ditherGenerator->randSeed1 = ditherRandSeed1;
1197 ditherGenerator->randSeed2 = ditherRandSeed2;
1198 }
1199
1200 /* -------------------------------------------------------------------------- */
1201
1202 void PaUtil_InitializeX86PlainConverters( void )
1203 {
1204 paConverters.Float32_To_Int32 = Float32_To_Int32;
1205 paConverters.Float32_To_Int32_Clip = Float32_To_Int32_Clip;
1206 paConverters.Float32_To_Int32_DitherClip = Float32_To_Int32_DitherClip;
1207
1208 paConverters.Float32_To_Int24 = Float32_To_Int24;
1209 paConverters.Float32_To_Int24_Clip = Float32_To_Int24_Clip;
1210 paConverters.Float32_To_Int24_DitherClip = Float32_To_Int24_DitherClip;
1211
1212 paConverters.Float32_To_Int16 = Float32_To_Int16;
1213 paConverters.Float32_To_Int16_Clip = Float32_To_Int16_Clip;
1214 paConverters.Float32_To_Int16_DitherClip = Float32_To_Int16_DitherClip;
1215 }
1216
1217 #endif
1218
1219 /* -------------------------------------------------------------------------- */

  ViewVC Help
Powered by ViewVC 1.1.22