/[pcsx2_0.9.7]/trunk/plugins/spu2-x/src/Mixer.cpp
ViewVC logotype

Contents of /trunk/plugins/spu2-x/src/Mixer.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 280 - (show annotations) (download)
Thu Dec 23 12:02:12 2010 UTC (9 years, 2 months ago) by william
File size: 33232 byte(s)
re-commit (had local access denied errors when committing)
1 /* SPU2-X, A plugin for Emulating the Sound Processing Unit of the Playstation 2
2 * Developed and maintained by the Pcsx2 Development Team.
3 *
4 * Original portions from SPU2ghz are (c) 2008 by David Quintana [gigaherz]
5 *
6 * SPU2-X is free software: you can redistribute it and/or modify it under the terms
7 * of the GNU Lesser General Public License as published by the Free Software Found-
8 * ation, either version 3 of the License, or (at your option) any later version.
9 *
10 * SPU2-X is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
11 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
12 * PURPOSE. See the GNU Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public License
15 * along with SPU2-X. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18 #include "Global.h"
19
20 void ADMAOutLogWrite(void *lpData, u32 ulSize);
21
22 static const s32 tbl_XA_Factor[5][2] =
23 {
24 { 0, 0 },
25 { 60, 0 },
26 { 115, -52 },
27 { 98, -55 },
28 { 122, -60 }
29 };
30
31
32 // Performs a 64-bit multiplication between two values and returns the
33 // high 32 bits as a result (discarding the fractional 32 bits).
34 // The combined fractional bits of both inputs must be 32 bits for this
35 // to work properly.
36 //
37 // This is meant to be a drop-in replacement for times when the 'div' part
38 // of a MulDiv is a constant. (example: 1<<8, or 4096, etc)
39 //
40 // [Air] Performance breakdown: This is over 10 times faster than MulDiv in
41 // a *worst case* scenario. It's also more accurate since it forces the
42 // caller to extend the inputs so that they make use of all 32 bits of
43 // precision.
44 //
45 static __forceinline s32 MulShr32( s32 srcval, s32 mulval )
46 {
47 return (s64)srcval * mulval >> 32;
48 }
49
50 __forceinline s32 clamp_mix( s32 x, u8 bitshift )
51 {
52 return GetClamped( x, -0x8000<<bitshift, 0x7fff<<bitshift );
53 }
54
55 #if _MSC_VER
56 __forceinline
57 // Without the keyword static, gcc compilation fails on the inlining...
58 // Unfortunately the function is also used in Reverb.cpp. In order to keep the code
59 // clean we just disable it.
60 // We will need link-time code generation / Whole Program optimization to do a clean
61 // inline. Gcc 4.5 has the experimental options -flto, -fwhopr and -fwhole-program to
62 // do it but it still experimental...
63 #endif
64 StereoOut32 clamp_mix( const StereoOut32& sample, u8 bitshift )
65 {
66 // We should clampify between -0x8000 and 0x7fff, however some audio output
67 // modules or sound drivers could (will :p) overshoot with that. So giving it a small safety.
68
69 return StereoOut32(
70 GetClamped( sample.Left, -0x7f00<<bitshift, 0x7f00<<bitshift ),
71 GetClamped( sample.Right, -0x7f00<<bitshift, 0x7f00<<bitshift )
72 );
73 }
74
75 static void __forceinline XA_decode_block(s16* buffer, const s16* block, s32& prev1, s32& prev2)
76 {
77 const s32 header = *block;
78 const s32 shift = (header&0xF)+16;
79 const s32 pred1 = tbl_XA_Factor[(header>> 4)&0xF][0];
80 const s32 pred2 = tbl_XA_Factor[(header>> 4)&0xF][1];
81
82 const s8* blockbytes = (s8*)&block[1];
83 const s8* blockend = &blockbytes[13];
84
85 for(; blockbytes<=blockend; ++blockbytes)
86 {
87 s32 data = ((*blockbytes)<<28) & 0xF0000000;
88 s32 pcm = (data >> shift) + (((pred1*prev1)+(pred2*prev2)) >> 6);
89
90 Clampify( pcm, -0x8000, 0x7fff );
91 *(buffer++) = pcm;
92
93 data = ((*blockbytes)<<24) & 0xF0000000;
94 s32 pcm2 = (data >> shift) + (((pred1*pcm)+(pred2*prev1)) >> 6);
95
96 Clampify( pcm2, -0x8000, 0x7fff );
97 *(buffer++) = pcm2;
98
99 prev2 = pcm;
100 prev1 = pcm2;
101 }
102 }
103
104 static void __forceinline IncrementNextA(V_Core& thiscore, uint voiceidx)
105 {
106 V_Voice &vc(thiscore.Voices[voiceidx]);
107
108 // Important! Both cores signal IRQ when an address is read, regardless of
109 // which core actually reads the address.
110
111 for( uint i=0; i<2; i++ )
112 {
113 if( Cores[i].IRQEnable && (vc.NextA==Cores[i].IRQA ) )
114 {
115 if( IsDevBuild )
116 ConLog(" * SPU2 Core %d: IRQ Called (IRQA (%05X) passed; voice %d).\n", i, Cores[i].IRQA, thiscore.Index * 24 + voiceidx);
117
118 SetIrqCall(i);
119 }
120 }
121
122 vc.NextA++;
123 vc.NextA&=0xFFFFF;
124 }
125
126 // decoded pcm data, used to cache the decoded data so that it needn't be decoded
127 // multiple times. Cache chunks are decoded when the mixer requests the blocks, and
128 // invalided when DMA transfers and memory writes are performed.
129 PcmCacheEntry *pcm_cache_data = NULL;
130
131 int g_counter_cache_hits = 0;
132 int g_counter_cache_misses = 0;
133 int g_counter_cache_ignores = 0;
134
135 #define XAFLAG_LOOP_END (1ul<<0)
136 #define XAFLAG_LOOP (1ul<<1)
137 #define XAFLAG_LOOP_START (1ul<<2)
138
139 static __forceinline s32 GetNextDataBuffered( V_Core& thiscore, uint voiceidx )
140 {
141 V_Voice& vc( thiscore.Voices[voiceidx] );
142
143 if( vc.SCurrent == 28 )
144 {
145 if(vc.LoopFlags & XAFLAG_LOOP_END)
146 {
147 thiscore.Regs.ENDX |= (1 << voiceidx);
148
149 if( vc.LoopFlags & XAFLAG_LOOP )
150 {
151 vc.NextA = vc.LoopStartA;
152 }
153 else
154 {
155 vc.Stop();
156 if( IsDevBuild )
157 {
158 if(MsgVoiceOff()) ConLog("* SPU2-X: Voice Off by EndPoint: %d \n", voiceidx);
159 }
160 }
161 }
162
163 // We'll need the loop flags and buffer pointers regardless of cache status:
164 // Note to Self : NextA addresses WORDS (not bytes).
165
166 s16* memptr = GetMemPtr(vc.NextA&0xFFFFF);
167 vc.LoopFlags = *memptr >> 8; // grab loop flags from the upper byte.
168
169 const int cacheIdx = vc.NextA / pcm_WordsPerBlock;
170 PcmCacheEntry& cacheLine = pcm_cache_data[cacheIdx];
171 vc.SBuffer = cacheLine.Sampledata;
172
173 if( cacheLine.Validated )
174 {
175 // Cached block! Read from the cache directly.
176 // Make sure to propagate the prev1/prev2 ADPCM:
177
178 vc.Prev1 = vc.SBuffer[27];
179 vc.Prev2 = vc.SBuffer[26];
180
181 //ConLog( "* SPU2-X: Cache Hit! NextA=0x%x, cacheIdx=0x%x\n", vc.NextA, cacheIdx );
182
183 if( IsDevBuild )
184 g_counter_cache_hits++;
185 }
186 else
187 {
188 // Only flag the cache if it's a non-dynamic memory range.
189 if( vc.NextA >= SPU2_DYN_MEMLINE )
190 cacheLine.Validated = true;
191
192 if( IsDevBuild )
193 {
194 if( vc.NextA < SPU2_DYN_MEMLINE )
195 g_counter_cache_ignores++;
196 else
197 g_counter_cache_misses++;
198 }
199
200 XA_decode_block( vc.SBuffer, memptr, vc.Prev1, vc.Prev2 );
201 }
202
203 vc.SCurrent = 0;
204 if( (vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode )
205 vc.LoopStartA = vc.NextA;
206
207 goto _Increment;
208 }
209
210 if( (vc.SCurrent&3) == 3 )
211 {
212 _Increment:
213 IncrementNextA( thiscore, voiceidx );
214 }
215
216 return vc.SBuffer[vc.SCurrent++];
217 }
218
219 static __forceinline void GetNextDataDummy(V_Core& thiscore, uint voiceidx)
220 {
221 V_Voice& vc( thiscore.Voices[voiceidx] );
222
223 if (vc.SCurrent == 28)
224 {
225 if(vc.LoopFlags & XAFLAG_LOOP_END)
226 {
227 thiscore.Regs.ENDX |= (1 << voiceidx);
228
229 if( vc.LoopFlags & XAFLAG_LOOP )
230 vc.NextA = vc.LoopStartA;
231 // no else, already stopped
232 }
233
234 vc.LoopFlags = *GetMemPtr(vc.NextA&0xFFFFF) >> 8; // grab loop flags from the upper byte.
235
236 if ((vc.LoopFlags & XAFLAG_LOOP_START) && !vc.LoopMode)
237 vc.LoopStartA = vc.NextA;
238
239 IncrementNextA(thiscore, voiceidx);
240
241 vc.SCurrent = 0;
242 }
243
244 vc.SP -= 4096 * (4 - (vc.SCurrent & 3));
245 vc.SCurrent += 4 - (vc.SCurrent & 3);
246 IncrementNextA(thiscore, voiceidx);
247 }
248
249 /////////////////////////////////////////////////////////////////////////////////////////
250 /////////////////////////////////////////////////////////////////////////////////////////
251 // //
252
253 static s32 __forceinline GetNoiseValues()
254 {
255 static s32 Seed = 0x41595321;
256 s32 retval = 0x8000;
257
258 if( Seed&0x100 )
259 retval = (Seed&0xff) << 8;
260 else if( Seed&0xffff )
261 retval = 0x7fff;
262 #ifdef _WIN32
263 __asm {
264 MOV eax,Seed
265 ROR eax,5
266 XOR eax,0x9a
267 MOV ebx,eax
268 ROL eax,2
269 ADD eax,ebx
270 XOR eax,ebx
271 ROR eax,3
272 MOV Seed,eax
273 }
274 #else
275 __asm__ (
276 ".intel_syntax\n"
277 "MOV %%eax,%1\n"
278 "ROR %%eax,5\n"
279 "XOR %%eax,0x9a\n"
280 "MOV %%esi,%%eax\n"
281 "ROL %%eax,2\n"
282 "ADD %%eax,%%esi\n"
283 "XOR %%eax,%%esi\n"
284 "ROR %%eax,3\n"
285 "MOV %0,%%eax\n"
286 ".att_syntax\n" : "=r"(Seed) :"r"(Seed)
287 : "%eax", "%esi"
288 );
289 #endif
290 return retval;
291 }
292 /////////////////////////////////////////////////////////////////////////////////////////
293 /////////////////////////////////////////////////////////////////////////////////////////
294 // //
295
296 // Data is expected to be 16 bit signed (typical stuff!).
297 // volume is expected to be 32 bit signed (31 bits with reverse phase)
298 // Data is shifted up by 1 bit to give the output an effective 16 bit range.
299 static __forceinline s32 ApplyVolume(s32 data, s32 volume)
300 {
301 //return (volume * data) >> 15;
302 return MulShr32( data<<1, volume );
303 }
304
305 static __forceinline StereoOut32 ApplyVolume( const StereoOut32& data, const V_VolumeLR& volume )
306 {
307 return StereoOut32(
308 ApplyVolume( data.Left, volume.Left ),
309 ApplyVolume( data.Right, volume.Right )
310 );
311 }
312
313 static __forceinline StereoOut32 ApplyVolume( const StereoOut32& data, const V_VolumeSlideLR& volume )
314 {
315 return StereoOut32(
316 ApplyVolume( data.Left, volume.Left.Value ),
317 ApplyVolume( data.Right, volume.Right.Value )
318 );
319 }
320
321 static void __forceinline UpdatePitch( uint coreidx, uint voiceidx )
322 {
323 V_Voice& vc( Cores[coreidx].Voices[voiceidx] );
324 s32 pitch;
325
326 // [Air] : re-ordered comparisons: Modulated is much more likely to be zero than voice,
327 // and so the way it was before it's have to check both voice and modulated values
328 // most of the time. Now it'll just check Modulated and short-circuit past the voice
329 // check (not that it amounts to much, but eh every little bit helps).
330 if( (vc.Modulated==0) || (voiceidx==0) )
331 pitch = vc.Pitch;
332 else
333 pitch = (vc.Pitch*(32768 + Cores[coreidx].Voices[voiceidx-1].OutX))>>15;
334
335 vc.SP+=pitch;
336 }
337
338
339 static __forceinline void CalculateADSR( V_Core& thiscore, uint voiceidx )
340 {
341 V_Voice& vc( thiscore.Voices[voiceidx] );
342
343 if( vc.ADSR.Phase==0 )
344 {
345 vc.ADSR.Value = 0;
346 return;
347 }
348
349 if( !vc.ADSR.Calculate() )
350 {
351 if( IsDevBuild )
352 {
353 if(MsgVoiceOff()) ConLog("* SPU2-X: Voice Off by ADSR: %d \n", voiceidx);
354 }
355 vc.Stop();
356 thiscore.Regs.ENDX |= (1 << voiceidx);
357 }
358
359 jASSUME( vc.ADSR.Value >= 0 ); // ADSR should never be negative...
360 }
361
362 /*
363 Tension: 65535 is high, 32768 is normal, 0 is low
364 */
365 template<s32 i_tension>
366 __forceinline
367 static s32 HermiteInterpolate(
368 s32 y0, // 16.0
369 s32 y1, // 16.0
370 s32 y2, // 16.0
371 s32 y3, // 16.0
372 s32 mu // 0.12
373 )
374 {
375 s32 m00 = ((y1-y0)*i_tension) >> 16; // 16.0
376 s32 m01 = ((y2-y1)*i_tension) >> 16; // 16.0
377 s32 m0 = m00 + m01;
378
379 s32 m10 = ((y2-y1)*i_tension) >> 16; // 16.0
380 s32 m11 = ((y3-y2)*i_tension) >> 16; // 16.0
381 s32 m1 = m10 + m11;
382
383 s32 val = (( 2*y1 + m0 + m1 - 2*y2) * mu) >> 12; // 16.0
384 val = ((val - 3*y1 - 2*m0 - m1 + 3*y2) * mu) >> 12; // 16.0
385 val = ((val + m0 ) * mu) >> 11; // 16.0
386
387 return(val + (y1<<1));
388 }
389
390 __forceinline
391 static s32 CatmullRomInterpolate(
392 s32 y0, // 16.0
393 s32 y1, // 16.0
394 s32 y2, // 16.0
395 s32 y3, // 16.0
396 s32 mu // 0.12
397 )
398 {
399 //q(t) = 0.5 *( (2 * P1) +
400 // (-P0 + P2) * t +
401 // (2*P0 - 5*P1 + 4*P2 - P3) * t2 +
402 // (-P0 + 3*P1- 3*P2 + P3) * t3)
403
404 s32 a3 = (- y0 + 3*y1 - 3*y2 + y3);
405 s32 a2 = ( 2*y0 - 5*y1 + 4*y2 - y3);
406 s32 a1 = (- y0 + y2 );
407 s32 a0 = ( 2*y1 );
408
409 s32 val = ((a3 ) * mu) >> 12;
410 val = ((a2 + val) * mu) >> 12;
411 val = ((a1 + val) * mu) >> 12;
412
413 return (a0 + val);
414 }
415
416 __forceinline
417 static s32 CubicInterpolate(
418 s32 y0, // 16.0
419 s32 y1, // 16.0
420 s32 y2, // 16.0
421 s32 y3, // 16.0
422 s32 mu // 0.12
423 )
424 {
425 const s32 a0 = y3 - y2 - y0 + y1;
426 const s32 a1 = y0 - y1 - a0;
427 const s32 a2 = y2 - y0;
428
429 s32 val = (( a0) * mu) >> 12;
430 val = ((val + a1) * mu) >> 12;
431 val = ((val + a2) * mu) >> 11;
432
433 return(val + (y1<<1));
434 }
435
436 // Returns a 16 bit result in Value.
437 // Uses standard template-style optimization techniques to statically generate five different
438 // versions of this function (one for each type of interpolation).
439 template< int InterpType >
440 static __forceinline s32 GetVoiceValues( V_Core& thiscore, uint voiceidx )
441 {
442 V_Voice& vc( thiscore.Voices[voiceidx] );
443
444 while( vc.SP > 0 )
445 {
446 if( InterpType >= 2 )
447 {
448 vc.PV4 = vc.PV3;
449 vc.PV3 = vc.PV2;
450 }
451 vc.PV2 = vc.PV1;
452 vc.PV1 = GetNextDataBuffered( thiscore, voiceidx );
453 vc.SP -= 4096;
454 }
455
456 const s32 mu = vc.SP + 4096;
457
458 switch( InterpType )
459 {
460 case 0: return vc.PV1<<1;
461 case 1: return (vc.PV1<<1) - (( (vc.PV2 - vc.PV1) * vc.SP)>>11);
462
463 case 2: return CubicInterpolate (vc.PV4, vc.PV3, vc.PV2, vc.PV1, mu);
464 case 3: return HermiteInterpolate<16384> (vc.PV4, vc.PV3, vc.PV2, vc.PV1, mu);
465 case 4: return CatmullRomInterpolate (vc.PV4, vc.PV3, vc.PV2, vc.PV1, mu);
466
467 jNO_DEFAULT;
468 }
469
470 return 0; // technically unreachable!
471 }
472
473 // Noise values need to be mixed without going through interpolation, since it
474 // can wreak havoc on the noise (causing muffling or popping). Not that this noise
475 // generator is accurate in its own right.. but eh, ah well :)
476 static __forceinline s32 GetNoiseValues( V_Core& thiscore, uint voiceidx )
477 {
478 V_Voice& vc( thiscore.Voices[voiceidx] );
479
480 s32 retval = GetNoiseValues();
481
482 /*while(vc.SP>=4096)
483 {
484 retval = GetNoiseValues();
485 vc.SP-=4096;
486 }*/
487
488 // GetNoiseValues can't set the phase zero on us unexpectedly
489 // like GetVoiceValues can. Better assert just in case though..
490 jASSUME( vc.ADSR.Phase != 0 );
491
492 return retval;
493 }
494
495 /////////////////////////////////////////////////////////////////////////////////////////
496 /////////////////////////////////////////////////////////////////////////////////////////
497 // //
498
499 // writes a signed value to the SPU2 ram
500 // Performs no cache invalidation -- use only for dynamic memory ranges
501 // of the SPU2 (between 0x0000 and SPU2_DYN_MEMLINE)
502 static __forceinline void spu2M_WriteFast( u32 addr, s16 value )
503 {
504 // Fixes some of the oldest hangs in pcsx2's history! :p
505 for( uint i=0; i<2; i++ )
506 {
507 if( Cores[i].IRQEnable && Cores[i].IRQA == addr )
508 {
509 //printf("Core %d special write IRQ Called (IRQ passed). IRQA = %x\n",i,addr);
510 SetIrqCall(i);
511 }
512 }
513 // throw an assertion if the memory range is invalid:
514 #ifndef DEBUG_FAST
515 jASSUME( addr < SPU2_DYN_MEMLINE );
516 #endif
517 *GetMemPtr( addr ) = value;
518 }
519
520
521 static __forceinline StereoOut32 MixVoice( uint coreidx, uint voiceidx )
522 {
523 V_Core& thiscore( Cores[coreidx] );
524 V_Voice& vc( thiscore.Voices[voiceidx] );
525
526 // If this assertion fails, it mans SCurrent is being corrupted somewhere, or is not initialized
527 // properly. Invalid values in SCurrent will cause errant IRQs and corrupted audio.
528 pxAssumeMsg( (vc.SCurrent <= 28) && (vc.SCurrent != 0), "Current sample should always range from 1->28" );
529
530 // Most games don't use much volume slide effects. So only call the UpdateVolume
531 // methods when needed by checking the flag outside the method here...
532 // (Note: Ys 6 : Ark of Nephistm uses these effects)
533
534 vc.Volume.Update();
535
536 // SPU2 Note: The spu2 continues to process voices for eternity, always, so we
537 // have to run through all the motions of updating the voice regardless of it's
538 // audible status. Otherwise IRQs might not trigger and emulation might fail.
539
540 if( vc.ADSR.Phase > 0 )
541 {
542 UpdatePitch( coreidx, voiceidx );
543
544 s32 Value;
545
546 if( vc.Noise )
547 Value = GetNoiseValues( thiscore, voiceidx );
548 else
549 {
550 // Optimization : Forceinline'd Templated Dispatch Table. Any halfwit compiler will
551 // turn this into a clever jump dispatch table (no call/rets, no compares, uber-efficient!)
552
553 switch( Interpolation )
554 {
555 case 0: Value = GetVoiceValues<0>( thiscore, voiceidx ); break;
556 case 1: Value = GetVoiceValues<1>( thiscore, voiceidx ); break;
557 case 2: Value = GetVoiceValues<2>( thiscore, voiceidx ); break;
558 case 3: Value = GetVoiceValues<3>( thiscore, voiceidx ); break;
559 case 4: Value = GetVoiceValues<4>( thiscore, voiceidx ); break;
560
561 jNO_DEFAULT;
562 }
563 }
564
565 // Update and Apply ADSR (applies to normal and noise sources)
566 //
567 // Note! It's very important that ADSR stay as accurate as possible. By the way
568 // it is used, various sound effects can end prematurely if we truncate more than
569 // one or two bits. Best result comes from no truncation at all, which is why we
570 // use a full 64-bit multiply/result here.
571
572 CalculateADSR( thiscore, voiceidx );
573 Value = MulShr32( Value, vc.ADSR.Value );
574
575 // Store Value for eventual modulation later
576 // Pseudonym's Crest calculation idea. Actually calculates a crest, unlike the old code which was just peak.
577 u32 Amplitude = std::abs(Value);
578 if(Amplitude < vc.NextCrest)
579 {
580 vc.OutX = vc.NextCrest;
581 vc.NextCrest = 0;
582 }
583 if(Amplitude > vc.PrevAmp)
584 {
585 vc.NextCrest = Amplitude;
586 }
587 vc.PrevAmp = Amplitude;
588
589 if( IsDevBuild )
590 DebugCores[coreidx].Voices[voiceidx].displayPeak = std::max(DebugCores[coreidx].Voices[voiceidx].displayPeak,(s32)vc.OutX);
591
592 // Write-back of raw voice data (post ADSR applied)
593
594 if (voiceidx==1) spu2M_WriteFast( ( (0==coreidx) ? 0x400 : 0xc00 ) + OutPos, vc.OutX );
595 else if (voiceidx==3) spu2M_WriteFast( ( (0==coreidx) ? 0x600 : 0xe00 ) + OutPos, vc.OutX );
596
597 return ApplyVolume( StereoOut32( Value, Value ), vc.Volume );
598 }
599 else
600 {
601 // Continue processing voice, even if it's "off". Or else we miss interrupts! (Fatal Frame engine died because of this.)
602 if ((vc.LoopFlags & 3) != 3 || vc.LoopStartA != (vc.NextA & ~7)) {
603 UpdatePitch(coreidx, voiceidx);
604
605 while (vc.SP > 0)
606 GetNextDataDummy(thiscore, voiceidx); // Dummy is enough
607 }
608
609 // Write-back of raw voice data (some zeros since the voice is "dead")
610 if (voiceidx==1) spu2M_WriteFast( ( (0==coreidx) ? 0x400 : 0xc00 ) + OutPos, 0 );
611 else if (voiceidx==3) spu2M_WriteFast( ( (0==coreidx) ? 0x600 : 0xe00 ) + OutPos, 0 );
612
613 return StereoOut32( 0, 0 );
614 }
615 }
616
617 const VoiceMixSet VoiceMixSet::Empty( (StereoOut32()), (StereoOut32()) ); // Don't use SteroOut32::Empty because C++ doesn't make any dep/order checks on global initializers.
618
619 static __forceinline void MixCoreVoices( VoiceMixSet& dest, const uint coreidx )
620 {
621 V_Core& thiscore( Cores[coreidx] );
622
623 for( uint voiceidx=0; voiceidx<V_Core::NumVoices; ++voiceidx )
624 {
625 StereoOut32 VVal( MixVoice( coreidx, voiceidx ) );
626
627 // Note: Results from MixVoice are ranged at 16 bits.
628
629 dest.Dry.Left += VVal.Left & thiscore.VoiceGates[voiceidx].DryL;
630 dest.Dry.Right += VVal.Right & thiscore.VoiceGates[voiceidx].DryR;
631 dest.Wet.Left += VVal.Left & thiscore.VoiceGates[voiceidx].WetL;
632 dest.Wet.Right += VVal.Right & thiscore.VoiceGates[voiceidx].WetR;
633 }
634 }
635
636 StereoOut32 V_Core::Mix( const VoiceMixSet& inVoices, const StereoOut32& Input, const StereoOut32& Ext )
637 {
638 MasterVol.Update();
639
640 // Saturate final result to standard 16 bit range.
641 const VoiceMixSet Voices( clamp_mix( inVoices.Dry ), clamp_mix( inVoices.Wet ) );
642
643 // Write Mixed results To Output Area
644 spu2M_WriteFast( ( (0==Index) ? 0x1000 : 0x1800 ) + OutPos, Voices.Dry.Left );
645 spu2M_WriteFast( ( (0==Index) ? 0x1200 : 0x1A00 ) + OutPos, Voices.Dry.Right );
646 spu2M_WriteFast( ( (0==Index) ? 0x1400 : 0x1C00 ) + OutPos, Voices.Wet.Left );
647 spu2M_WriteFast( ( (0==Index) ? 0x1600 : 0x1E00 ) + OutPos, Voices.Wet.Right );
648
649 // Write mixed results to logfile (if enabled)
650
651 WaveDump::WriteCore( Index, CoreSrc_DryVoiceMix, Voices.Dry );
652 WaveDump::WriteCore( Index, CoreSrc_WetVoiceMix, Voices.Wet );
653
654 // Mix in the Input data
655
656 StereoOut32 TD(
657 Input.Left & DryGate.InpL,
658 Input.Right & DryGate.InpR
659 );
660
661 // Mix in the Voice data
662 TD.Left += Voices.Dry.Left & DryGate.SndL;
663 TD.Right += Voices.Dry.Right & DryGate.SndR;
664
665 // Mix in the External (nothing/core0) data
666 TD.Left += Ext.Left & DryGate.ExtL;
667 TD.Right += Ext.Right & DryGate.ExtR;
668
669 // User-level Effects disabling. Nice speedup but breaks games that depend on
670 // reverb IRQs (very few -- if you find one name it here!).
671 if( EffectsDisabled ) return TD;
672
673 // ----------------------------------------------------------------------------
674 // Reverberation Effects Processing
675 // ----------------------------------------------------------------------------
676 // SPU2 has an FxEnable bit which seems to disable all reverb processing *and*
677 // output, but does *not* disable the advancing buffers. IRQs are not triggered
678 // and reverb is rendered silent.
679 //
680 // Technically we should advance the buffers even when fx are disabled. However
681 // there are two things that make this very unlikely to matter:
682 //
683 // 1. Any SPU2 app wanting to avoid noise or pops needs to clear the reverb buffers
684 // when adjusting settings anyway; so the read/write positions in the reverb
685 // buffer after FxEnabled is set back to 1 doesn't really matter.
686 //
687 // 2. Writes to ESA (and possibly EEA) reset the buffer pointers to 0.
688 //
689 // On the other hand, updating the buffer is cheap and easy, so might as well. ;)
690
691 Reverb_AdvanceBuffer(); // Updates the reverb work area as well, if needed.
692 if (!FxEnable) return TD;
693
694 StereoOut32 TW;
695
696 // Mix Input, Voice, and External data:
697
698 TW.Left = Input.Left & WetGate.InpL;
699 TW.Right = Input.Right & WetGate.InpR;
700
701 TW.Left += Voices.Wet.Left & WetGate.SndL;
702 TW.Right += Voices.Wet.Right & WetGate.SndR;
703 TW.Left += Ext.Left & WetGate.ExtL;
704 TW.Right += Ext.Right & WetGate.ExtR;
705
706 WaveDump::WriteCore( Index, CoreSrc_PreReverb, TW );
707
708 StereoOut32 RV( DoReverb( TW ) );
709
710 WaveDump::WriteCore( Index, CoreSrc_PostReverb, RV );
711
712 // Boost reverb volume
713 int temp = 1;
714 switch (ReverbBoost)
715 {
716 case 0: break;
717 case 1: temp = 2; break;
718 case 2: temp = 4; break;
719 case 3: temp = 8; break;
720 }
721 // Mix Dry + Wet
722 // (master volume is applied later to the result of both outputs added together).
723 return TD + ApplyVolume( RV*temp, FxVol );
724 }
725
726 // Filters that work on the final output to de-alias and equlize it.
727 // Taken from http://nenolod.net/projects/upse/
728 #define OVERALL_SCALE (0.87f)
729
730 StereoOut32 Apply_Frequency_Response_Filter(StereoOut32 &SoundStream)
731 {
732 static FrequencyResponseFilter FRF = FrequencyResponseFilter();
733
734 s32 in, out;
735 s32 l, r;
736 s32 mid, side;
737
738 l = SoundStream.Left;
739 r = SoundStream.Right;
740
741 mid = l + r;
742 side = l - r;
743
744 in = mid;
745 out = FRF.la0 * in + FRF.la1 * FRF.lx1 + FRF.la2 * FRF.lx2 - FRF.lb1 * FRF.ly1 - FRF.lb2 * FRF.ly2;
746
747 FRF.lx2 = FRF.lx1;
748 FRF.lx1 = in;
749
750 FRF.ly2 = FRF.ly1;
751 FRF.ly1 = out;
752
753 mid = out;
754
755 l = ((0.5) * (OVERALL_SCALE)) * (mid + side);
756 r = ((0.5) * (OVERALL_SCALE)) * (mid - side);
757
758 in = l;
759 out = FRF.ha0 * in + FRF.ha1 * FRF.History_One_In.Left + FRF.ha2 * FRF.History_Two_In.Left - FRF.hb1 * FRF.History_One_Out.Left - FRF.hb2 * FRF.History_Two_Out.Left;
760 FRF.History_Two_In.Left = FRF.History_One_In.Left; FRF.History_One_In.Left = in;
761 FRF.History_Two_Out.Left = FRF.History_One_Out.Left; FRF.History_One_Out.Left = out;
762 l = out;
763
764 in = r;
765 out = FRF.ha0 * in + FRF.ha1 * FRF.History_One_In.Right + FRF.ha2 * FRF.History_Two_In.Right - FRF.hb1 * FRF.History_One_Out.Right - FRF.hb2 * FRF.History_Two_Out.Right;
766 FRF.History_Two_In.Right = FRF.History_One_In.Right; FRF.History_One_In.Right = in;
767 FRF.History_Two_Out.Right = FRF.History_One_Out.Right; FRF.History_One_Out.Right = out;
768 r = out;
769
770 //clamp_mix(l);
771 //clamp_mix(r);
772
773 SoundStream.Left = l;
774 SoundStream.Right = r;
775
776 return SoundStream;
777 }
778
779 StereoOut32 Apply_Dealias_Filter(StereoOut32 &SoundStream)
780 {
781 static StereoOut32 Old = StereoOut32::Empty;
782
783 s32 l, r;
784
785 l = SoundStream.Left;
786 r = SoundStream.Right;
787
788 l += (l - Old.Left);
789 r += (r - Old.Right);
790
791 Old.Left = SoundStream.Left;
792 Old.Right = SoundStream.Right;
793
794 SoundStream.Left = l;
795 SoundStream.Right = r;
796
797 return SoundStream;
798 }
799
800 // used to throttle the output rate of cache stat reports
801 static int p_cachestat_counter=0;
802
803 __forceinline void Mix()
804 {
805 // Note: Playmode 4 is SPDIF, which overrides other inputs.
806 StereoOut32 InputData[2] =
807 {
808 // SPDIF is on Core 0:
809 // Fixme:
810 // 1. We do not have an AC3 decoder for the bitstream.
811 // 2. Games usually provide a normal ADMA stream as well and want to see it getting read!
812 /*(PlayMode&4) ? StereoOut32::Empty : */ApplyVolume( Cores[0].ReadInput(), Cores[0].InpVol ),
813
814 // CDDA is on Core 1:
815 (PlayMode&8) ? StereoOut32::Empty : ApplyVolume( Cores[1].ReadInput(), Cores[1].InpVol )
816 };
817
818 WaveDump::WriteCore( 0, CoreSrc_Input, InputData[0] );
819 WaveDump::WriteCore( 1, CoreSrc_Input, InputData[1] );
820
821 // Todo: Replace me with memzero initializer!
822 VoiceMixSet VoiceData[2] = { VoiceMixSet::Empty, VoiceMixSet::Empty }; // mixed voice data for each core.
823 MixCoreVoices( VoiceData[0], 0 );
824 MixCoreVoices( VoiceData[1], 1 );
825
826 StereoOut32 Ext( Cores[0].Mix( VoiceData[0], InputData[0], StereoOut32::Empty ) );
827
828 if( (PlayMode & 4) || (Cores[0].Mute!=0) )
829 Ext = StereoOut32::Empty;
830 else
831 {
832 Ext = clamp_mix( ApplyVolume( Ext, Cores[0].MasterVol ) );
833 }
834
835 // Commit Core 0 output to ram before mixing Core 1:
836
837 spu2M_WriteFast( 0x800 + OutPos, Ext.Left );
838 spu2M_WriteFast( 0xA00 + OutPos, Ext.Right );
839 WaveDump::WriteCore( 0, CoreSrc_External, Ext );
840
841 ApplyVolume( Ext, Cores[1].ExtVol );
842 StereoOut32 Out( Cores[1].Mix( VoiceData[1], InputData[1], Ext ) );
843
844 if( PlayMode & 8 )
845 {
846 // Experimental CDDA support
847 // The CDDA overrides all other mixer output. It's a direct feed!
848
849 Out = Cores[1].ReadInput_HiFi();
850 //WaveLog::WriteCore( 1, "CDDA-32", OutL, OutR );
851 }
852 else
853 {
854 Out.Left = MulShr32( Out.Left<<(SndOutVolumeShift+1), Cores[1].MasterVol.Left.Value );
855 Out.Right = MulShr32( Out.Right<<(SndOutVolumeShift+1), Cores[1].MasterVol.Right.Value );
856
857 #ifdef DEBUG_KEYS
858 if(postprocess_filter_enabled)
859 #endif
860 {
861 Out = Apply_Dealias_Filter ( Out );
862 Out = Apply_Frequency_Response_Filter ( Out );
863 }
864
865 // Final Clamp!
866 // Like any good audio system, the PS2 pumps the volume and incurs some distortion in its
867 // output, giving us a nice thumpy sound at times. So we add 1 above (2x volume pump) and
868 // then clamp it all here.
869 Out = clamp_mix( Out, SndOutVolumeShift );
870 }
871
872 SndBuffer::Write( Out );
873
874 // Update AutoDMA output positioning
875 OutPos++;
876 if (OutPos>=0x200) OutPos=0;
877
878 if( IsDevBuild )
879 {
880 p_cachestat_counter++;
881 if(p_cachestat_counter > (48000*10) )
882 {
883 p_cachestat_counter = 0;
884 if( MsgCache() ) ConLog( " * SPU2 > CacheStats > Hits: %d Misses: %d Ignores: %d\n",
885 g_counter_cache_hits,
886 g_counter_cache_misses,
887 g_counter_cache_ignores );
888
889 g_counter_cache_hits =
890 g_counter_cache_misses =
891 g_counter_cache_ignores = 0;
892 }
893 }
894 }
895
896 /////////////////////////////////////////////////////////////////////////////////////////
897 /////////////////////////////////////////////////////////////////////////////////////////
898 // //
899
900 /*
901 -----------------------------------------------------------------------------
902 PSX reverb hardware notes
903 by Neill Corlett
904 -----------------------------------------------------------------------------
905
906 Yadda yadda disclaimer yadda probably not perfect yadda well it's okay anyway
907 yadda yadda.
908
909 -----------------------------------------------------------------------------
910
911 Basics
912 ------
913
914 - The reverb buffer is 22khz 16-bit mono PCM.
915 - It starts at the reverb address given by 1DA2, extends to
916 the end of sound RAM, and wraps back to the 1DA2 address.
917
918 Setting the address at 1DA2 resets the current reverb work address.
919
920 This work address ALWAYS increments every 1/22050 sec., regardless of
921 whether reverb is enabled (bit 7 of 1DAA set).
922
923 And the contents of the reverb buffer ALWAYS play, scaled by the
924 "reverberation depth left/right" volumes (1D84/1D86).
925 (which, by the way, appear to be scaled so 3FFF=approx. 1.0, 4000=-1.0)
926
927 -----------------------------------------------------------------------------
928
929 Register names
930 --------------
931
932 These are probably not their real names.
933 These are probably not even correct names.
934 We will use them anyway, because we can.
935
936 1DC0: FB_SRC_A (offset)
937 1DC2: FB_SRC_B (offset)
938 1DC4: IIR_ALPHA (coef.)
939 1DC6: ACC_COEF_A (coef.)
940 1DC8: ACC_COEF_B (coef.)
941 1DCA: ACC_COEF_C (coef.)
942 1DCC: ACC_COEF_D (coef.)
943 1DCE: IIR_COEF (coef.)
944 1DD0: FB_ALPHA (coef.)
945 1DD2: FB_X (coef.)
946 1DD4: IIR_DEST_A0 (offset)
947 1DD6: IIR_DEST_A1 (offset)
948 1DD8: ACC_SRC_A0 (offset)
949 1DDA: ACC_SRC_A1 (offset)
950 1DDC: ACC_SRC_B0 (offset)
951 1DDE: ACC_SRC_B1 (offset)
952 1DE0: IIR_SRC_A0 (offset)
953 1DE2: IIR_SRC_A1 (offset)
954 1DE4: IIR_DEST_B0 (offset)
955 1DE6: IIR_DEST_B1 (offset)
956 1DE8: ACC_SRC_C0 (offset)
957 1DEA: ACC_SRC_C1 (offset)
958 1DEC: ACC_SRC_D0 (offset)
959 1DEE: ACC_SRC_D1 (offset)
960 1DF0: IIR_SRC_B1 (offset)
961 1DF2: IIR_SRC_B0 (offset)
962 1DF4: MIX_DEST_A0 (offset)
963 1DF6: MIX_DEST_A1 (offset)
964 1DF8: MIX_DEST_B0 (offset)
965 1DFA: MIX_DEST_B1 (offset)
966 1DFC: IN_COEF_L (coef.)
967 1DFE: IN_COEF_R (coef.)
968
969 The coefficients are signed fractional values.
970 -32768 would be -1.0
971 32768 would be 1.0 (if it were possible... the highest is of course 32767)
972
973 The offsets are (byte/8) offsets into the reverb buffer.
974 i.e. you multiply them by 8, you get byte offsets.
975 You can also think of them as (samples/4) offsets.
976 They appear to be signed. They can be negative.
977 None of the documented presets make them negative, though.
978
979 Yes, 1DF0 and 1DF2 appear to be backwards. Not a typo.
980
981 -----------------------------------------------------------------------------
982
983 What it does
984 ------------
985
986 We take all reverb sources:
987 - regular channels that have the reverb bit on
988 - cd and external sources, if their reverb bits are on
989 and mix them into one stereo 44100hz signal.
990
991 Lowpass/downsample that to 22050hz. The PSX uses a proper bandlimiting
992 algorithm here, but I haven't figured out the hysterically exact specifics.
993 I use an 8-tap filter with these coefficients, which are nice but probably
994 not the real ones:
995
996 0.037828187894
997 0.157538631280
998 0.321159685278
999 0.449322115345
1000 0.449322115345
1001 0.321159685278
1002 0.157538631280
1003 0.037828187894
1004
1005 So we have two input samples (INPUT_SAMPLE_L, INPUT_SAMPLE_R) every 22050hz.
1006
1007 * IN MY EMULATION, I divide these by 2 to make it clip less.
1008 (and of course the L/R output coefficients are adjusted to compensate)
1009 The real thing appears to not do this.
1010
1011 At every 22050hz tick:
1012 - If the reverb bit is enabled (bit 7 of 1DAA), execute the reverb
1013 steady-state algorithm described below
1014 - AFTERWARDS, retrieve the "wet out" L and R samples from the reverb buffer
1015 (This part may not be exactly right and I guessed at the coefs. TODO: check later.)
1016 L is: 0.333 * (buffer[MIX_DEST_A0] + buffer[MIX_DEST_B0])
1017 R is: 0.333 * (buffer[MIX_DEST_A1] + buffer[MIX_DEST_B1])
1018 - Advance the current buffer position by 1 sample
1019
1020 The wet out L and R are then upsampled to 44100hz and played at the
1021 "reverberation depth left/right" (1D84/1D86) volume, independent of the main
1022 volume.
1023
1024 -----------------------------------------------------------------------------
1025
1026 Reverb steady-state
1027 -------------------
1028
1029 The reverb steady-state algorithm is fairly clever, and of course by
1030 "clever" I mean "batshit insane".
1031
1032 buffer[x] is relative to the current buffer position, not the beginning of
1033 the buffer. Note that all buffer offsets must wrap around so they're
1034 contained within the reverb work area.
1035
1036 Clipping is performed at the end... maybe also sooner, but definitely at
1037 the end.
1038
1039 IIR_INPUT_A0 = buffer[IIR_SRC_A0] * IIR_COEF + INPUT_SAMPLE_L * IN_COEF_L;
1040 IIR_INPUT_A1 = buffer[IIR_SRC_A1] * IIR_COEF + INPUT_SAMPLE_R * IN_COEF_R;
1041 IIR_INPUT_B0 = buffer[IIR_SRC_B0] * IIR_COEF + INPUT_SAMPLE_L * IN_COEF_L;
1042 IIR_INPUT_B1 = buffer[IIR_SRC_B1] * IIR_COEF + INPUT_SAMPLE_R * IN_COEF_R;
1043
1044 IIR_A0 = IIR_INPUT_A0 * IIR_ALPHA + buffer[IIR_DEST_A0] * (1.0 - IIR_ALPHA);
1045 IIR_A1 = IIR_INPUT_A1 * IIR_ALPHA + buffer[IIR_DEST_A1] * (1.0 - IIR_ALPHA);
1046 IIR_B0 = IIR_INPUT_B0 * IIR_ALPHA + buffer[IIR_DEST_B0] * (1.0 - IIR_ALPHA);
1047 IIR_B1 = IIR_INPUT_B1 * IIR_ALPHA + buffer[IIR_DEST_B1] * (1.0 - IIR_ALPHA);
1048
1049 buffer[IIR_DEST_A0 + 1sample] = IIR_A0;
1050 buffer[IIR_DEST_A1 + 1sample] = IIR_A1;
1051 buffer[IIR_DEST_B0 + 1sample] = IIR_B0;
1052 buffer[IIR_DEST_B1 + 1sample] = IIR_B1;
1053
1054 ACC0 = buffer[ACC_SRC_A0] * ACC_COEF_A +
1055 buffer[ACC_SRC_B0] * ACC_COEF_B +
1056 buffer[ACC_SRC_C0] * ACC_COEF_C +
1057 buffer[ACC_SRC_D0] * ACC_COEF_D;
1058 ACC1 = buffer[ACC_SRC_A1] * ACC_COEF_A +
1059 buffer[ACC_SRC_B1] * ACC_COEF_B +
1060 buffer[ACC_SRC_C1] * ACC_COEF_C +
1061 buffer[ACC_SRC_D1] * ACC_COEF_D;
1062
1063 FB_A0 = buffer[MIX_DEST_A0 - FB_SRC_A];
1064 FB_A1 = buffer[MIX_DEST_A1 - FB_SRC_A];
1065 FB_B0 = buffer[MIX_DEST_B0 - FB_SRC_B];
1066 FB_B1 = buffer[MIX_DEST_B1 - FB_SRC_B];
1067
1068 buffer[MIX_DEST_A0] = ACC0 - FB_A0 * FB_ALPHA;
1069 buffer[MIX_DEST_A1] = ACC1 - FB_A1 * FB_ALPHA;
1070 buffer[MIX_DEST_B0] = (FB_ALPHA * ACC0) - FB_A0 * (FB_ALPHA^0x8000) - FB_B0 * FB_X;
1071 buffer[MIX_DEST_B1] = (FB_ALPHA * ACC1) - FB_A1 * (FB_ALPHA^0x8000) - FB_B1 * FB_X;
1072
1073 Air notes:
1074 The above is effectivly the same as:
1075 buffer[MIX_DEST_B0] = (ACC0 * FB_ALPHA) + (FB_A0 * (1.0-FB_ALPHA)) - FB_B0 * FB_X;
1076 buffer[MIX_DEST_B1] = (ACC1 * FB_ALPHA) + (FB_A1 * (1.0-FB_ALPHA)) - FB_B1 * FB_X;
1077
1078 Which reduces to:
1079 buffer[MIX_DEST_B0] = ACC0 + ((FB_A0-ACC0) * FB_ALPHA) - FB_B0 * FB_X;
1080 buffer[MIX_DEST_B1] = ACC1 + ((FB_A1-ACC1) * FB_ALPHA) - FB_B1 * FB_X;
1081
1082
1083 -----------------------------------------------------------------------------
1084 */

  ViewVC Help
Powered by ViewVC 1.1.22