/[pcsx2_0.9.7]/trunk/pcsx2/IPU/IPU.cpp
ViewVC logotype

Contents of /trunk/pcsx2/IPU/IPU.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 62 - (show annotations) (download)
Tue Sep 7 11:08:22 2010 UTC (9 years, 10 months ago) by william
File size: 27513 byte(s)
Auto Commited Import of: pcsx2-0.9.7-r3738-debug in ./trunk
1 /* PCSX2 - PS2 Emulator for PCs
2 * Copyright (C) 2002-2010 PCSX2 Dev Team
3 *
4 * PCSX2 is free software: you can redistribute it and/or modify it under the terms
5 * of the GNU Lesser General Public License as published by the Free Software Found-
6 * ation, either version 3 of the License, or (at your option) any later version.
7 *
8 * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10 * PURPOSE. See the GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License along with PCSX2.
13 * If not, see <http://www.gnu.org/licenses/>.
14 */
15
16 #include "PrecompiledHeader.h"
17 #include "Common.h"
18
19 #include "IPU.h"
20 #include "IPUdma.h"
21 #include "yuv2rgb.h"
22 #include "mpeg2lib/Mpeg.h"
23
24 #include "Vif.h"
25 #include "Gif.h"
26 #include "Vif_Dma.h"
27 #include <limits.h>
28
29 static __fi void IPU_INT0_FROM()
30 {
31 if (ipu0dma.qwc > 0 && ipu0dma.chcr.STR) ipu0Interrupt();
32 }
33
34 tIPU_cmd ipu_cmd;
35
36 void ReorderBitstream();
37
38 // the BP doesn't advance and returns -1 if there is no data to be read
39 __aligned16 tIPU_BP g_BP;
40
41 void IPUWorker();
42
43 // Color conversion stuff, the memory layout is a total hack
44 // convert_data_buffer is a pointer to the internal rgb struct (the first param in convert_init_t)
45 //char convert_data_buffer[sizeof(convert_rgb_t)];
46 //char convert_data_buffer[0x1C]; // unused?
47 //u8 PCT[] = {'r', 'I', 'P', 'B', 'D', '-', '-', '-'}; // unused?
48
49 // Quantization matrix
50 static u16 vqclut[16]; //clut conversion table
51 static u8 s_thresh[2]; //thresholds for color conversions
52 int coded_block_pattern = 0;
53
54
55 u8 indx4[16*16/2];
56 __aligned16 decoder_t decoder;
57
58 __aligned16 u8 _readbits[80]; //local buffer (ring buffer)
59 u8* readbits = _readbits; // always can decrement by one 1qw
60
61 __fi void IPUProcessInterrupt()
62 {
63 if (ipuRegs.ctrl.BUSY && g_BP.IFC) IPUWorker();
64 }
65
66 /////////////////////////////////////////////////////////
67 // Register accesses (run on EE thread)
68 int ipuInit()
69 {
70 memzero(ipuRegs);
71 memzero(g_BP);
72 memzero(decoder);
73
74 decoder.picture_structure = FRAME_PICTURE; //default: progressive...my guess:P
75
76 ipu_fifo.init();
77 ipu_cmd.clear();
78
79 return 0;
80 }
81
82 void ipuReset()
83 {
84 ipuInit();
85 }
86
87 void ReportIPU()
88 {
89 //Console.WriteLn(g_nDMATransfer.desc());
90 Console.WriteLn(ipu_fifo.in.desc());
91 Console.WriteLn(ipu_fifo.out.desc());
92 Console.WriteLn(g_BP.desc());
93 Console.WriteLn("vqclut = 0x%x.", vqclut);
94 Console.WriteLn("s_thresh = 0x%x.", s_thresh);
95 Console.WriteLn("coded_block_pattern = 0x%x.", coded_block_pattern);
96 Console.WriteLn("g_decoder = 0x%x.", &decoder);
97 Console.WriteLn("mpeg2_scan = 0x%x.", &mpeg2_scan);
98 Console.WriteLn(ipu_cmd.desc());
99 Console.WriteLn("_readbits = 0x%x. readbits - _readbits, which is also frozen, is 0x%x.",
100 _readbits, readbits - _readbits);
101 Console.Newline();
102 }
103
104 void SaveStateBase::ipuFreeze()
105 {
106 // Get a report of the status of the ipu variables when saving and loading savestates.
107 //ReportIPU();
108 FreezeTag("IPU");
109 Freeze(ipu_fifo);
110
111 Freeze(g_BP);
112 Freeze(vqclut);
113 Freeze(s_thresh);
114 Freeze(coded_block_pattern);
115 Freeze(decoder);
116 Freeze(ipu_cmd);
117 Freeze(_readbits);
118
119 int temp = readbits - _readbits;
120 Freeze(temp);
121
122 if (IsLoading())
123 {
124 readbits = _readbits;
125 }
126 }
127
128 void tIPU_CMD_IDEC::log() const
129 {
130 IPU_LOG("IDEC command.");
131
132 if (FB) IPU_LOG(" Skip %d bits.", FB);
133 IPU_LOG(" Quantizer step code=0x%X.", QSC);
134
135 if (DTD == 0)
136 IPU_LOG(" Does not decode DT.");
137 else
138 IPU_LOG(" Decodes DT.");
139
140 if (SGN == 0)
141 IPU_LOG(" No bias.");
142 else
143 IPU_LOG(" Bias=128.");
144
145 if (DTE == 1) IPU_LOG(" Dither Enabled.");
146 if (OFM == 0)
147 IPU_LOG(" Output format is RGB32.");
148 else
149 IPU_LOG(" Output format is RGB16.");
150
151 IPU_LOG("");
152 }
153
154 void tIPU_CMD_BDEC::log(int s_bdec) const
155 {
156 IPU_LOG("BDEC(macroblock decode) command %x, num: 0x%x", cpuRegs.pc, s_bdec);
157 if (FB) IPU_LOG(" Skip 0x%X bits.", FB);
158
159 if (MBI)
160 IPU_LOG(" Intra MB.");
161 else
162 IPU_LOG(" Non-intra MB.");
163
164 if (DCR)
165 IPU_LOG(" Resets DC prediction value.");
166 else
167 IPU_LOG(" Doesn't reset DC prediction value.");
168
169 if (DT)
170 IPU_LOG(" Use field DCT.");
171 else
172 IPU_LOG(" Use frame DCT.");
173
174 IPU_LOG(" Quantizer step=0x%X", QSC);
175 }
176
177 void tIPU_CMD_CSC::log_from_YCbCr() const
178 {
179 IPU_LOG("CSC(Colorspace conversion from YCbCr) command (%d).", MBC);
180 if (OFM)
181 IPU_LOG("Output format is RGB16. ");
182 else
183 IPU_LOG("Output format is RGB32. ");
184
185 if (DTE) IPU_LOG("Dithering enabled.");
186 }
187
188 void tIPU_CMD_CSC::log_from_RGB32() const
189 {
190 IPU_LOG("PACK (Colorspace conversion from RGB32) command.");
191
192 if (OFM)
193 IPU_LOG("Output format is RGB16. ");
194 else
195 IPU_LOG("Output format is INDX4. ");
196
197 if (DTE) IPU_LOG("Dithering enabled.");
198
199 IPU_LOG("Number of macroblocks to be converted: %d", MBC);
200 }
201
202
203 __fi u32 ipuRead32(u32 mem)
204 {
205 // Note: It's assumed that mem's input value is always in the 0x10002000 page
206 // of memory (if not, it's probably bad code).
207
208 pxAssert((mem & ~0xff) == 0x10002000);
209 mem &= 0xff; // ipu repeats every 0x100
210
211 //IPUProcessInterrupt();
212
213 switch (mem)
214 {
215 ipucase(IPU_CTRL): // IPU_CTRL
216 ipuRegs.ctrl.IFC = g_BP.IFC;
217 ipuRegs.ctrl.CBP = coded_block_pattern;
218
219 if (!ipuRegs.ctrl.BUSY)
220 IPU_LOG("read32: IPU_CTRL=0x%08X", ipuRegs.ctrl._u32);
221
222 return ipuRegs.ctrl._u32;
223
224 ipucase(IPU_BP): // IPU_BP
225 ipuRegs.ipubp = g_BP.BP & 0x7f;
226 ipuRegs.ipubp |= g_BP.IFC << 8;
227 ipuRegs.ipubp |= (g_BP.FP /*+ g_BP.bufferhasnew*/) << 16;
228
229 IPU_LOG("read32: IPU_BP=0x%08X", ipuRegs.ipubp);
230 return ipuRegs.ipubp;
231
232 default:
233 IPU_LOG("read32: Addr=0x%08X Value = 0x%08X", mem, psHu32(IPU_CMD + mem));
234 }
235
236 return psHu32(IPU_CMD + mem);
237 }
238
239 __fi u64 ipuRead64(u32 mem)
240 {
241 // Note: It's assumed that mem's input value is always in the 0x10002000 page
242 // of memory (if not, it's probably bad code).
243
244 pxAssert((mem & ~0xff) == 0x10002000);
245 mem &= 0xff; // ipu repeats every 0x100
246
247 //IPUProcessInterrupt();
248
249 switch (mem)
250 {
251 ipucase(IPU_CMD): // IPU_CMD
252 if (ipuRegs.cmd.DATA & 0xffffff)
253 IPU_LOG("read64: IPU_CMD=BUSY=%x, DATA=%08X", ipuRegs.cmd.BUSY ? 1 : 0, ipuRegs.cmd.DATA);
254 break;
255
256 ipucase(IPU_CTRL):
257 DevCon.Warning("reading 64bit IPU ctrl");
258 break;
259
260 ipucase(IPU_BP):
261 DevCon.Warning("reading 64bit IPU top");
262 break;
263
264 ipucase(IPU_TOP): // IPU_TOP
265 IPU_LOG("read64: IPU_TOP=%x, bp = %d", ipuRegs.top, g_BP.BP);
266 break;
267
268 default:
269 IPU_LOG("read64: Unknown=%x", mem);
270 break;
271 }
272 return psHu64(IPU_CMD + mem);
273 }
274
275 void ipuSoftReset()
276 {
277 ipu_fifo.clear();
278
279 coded_block_pattern = 0;
280
281 ipuRegs.ctrl.reset();
282 ipuRegs.top = 0;
283 ipu_cmd.clear();
284 ipuRegs.cmd.BUSY = 0;
285
286 g_BP.BP = 0;
287 g_BP.FP = 0;
288 //g_BP.bufferhasnew = 0;
289 }
290
291 __fi bool ipuWrite32(u32 mem, u32 value)
292 {
293 // Note: It's assumed that mem's input value is always in the 0x10002000 page
294 // of memory (if not, it's probably bad code).
295
296 pxAssert((mem & ~0xfff) == 0x10002000);
297 mem &= 0xfff;
298
299 IPUProcessInterrupt();
300
301 switch (mem)
302 {
303 ipucase(IPU_CMD): // IPU_CMD
304 IPU_LOG("write32: IPU_CMD=0x%08X", value);
305 IPUCMD_WRITE(value);
306 return false;
307
308 ipucase(IPU_CTRL): // IPU_CTRL
309 // CTRL = the first 16 bits of ctrl [0x8000ffff], + value for the next 16 bits,
310 // minus the reserved bits. (18-19; 27-29) [0x47f30000]
311 ipuRegs.ctrl.write(value);
312 if (ipuRegs.ctrl.IDP == 3)
313 {
314 Console.WriteLn("IPU Invalid Intra DC Precision, switching to 9 bits");
315 ipuRegs.ctrl.IDP = 1;
316 }
317
318 if (ipuRegs.ctrl.RST) ipuSoftReset(); // RESET
319
320 IPU_LOG("write32: IPU_CTRL=0x%08X", value);
321 return false;
322 }
323 return true;
324 }
325
326 // returns FALSE when the writeback is handled, TRUE if the caller should do the
327 // writeback itself.
328 __fi bool ipuWrite64(u32 mem, u64 value)
329 {
330 // Note: It's assumed that mem's input value is always in the 0x10002000 page
331 // of memory (if not, it's probably bad code).
332
333 pxAssert((mem & ~0xfff) == 0x10002000);
334 mem &= 0xfff;
335
336 IPUProcessInterrupt();
337
338 switch (mem)
339 {
340 ipucase(IPU_CMD):
341 IPU_LOG("write64: IPU_CMD=0x%08X", value);
342 IPUCMD_WRITE((u32)value);
343 return false;
344 }
345
346 return true;
347 }
348
349
350 //////////////////////////////////////////////////////
351 // IPU Commands (exec on worker thread only)
352
353 static void ipuBCLR(u32 val)
354 {
355 ipu_fifo.in.clear();
356
357 g_BP.BP = val & 0x7F;
358 g_BP.FP = 0;
359 //g_BP.bufferhasnew = 0;
360 ipuRegs.ctrl.BUSY = 0;
361 ipuRegs.cmd.BUSY = 0;
362 memzero(_readbits);
363 IPU_LOG("Clear IPU input FIFO. Set Bit offset=0x%X", g_BP.BP);
364 }
365
366 static bool ipuIDEC(u32 val, bool resume)
367 {
368 tIPU_CMD_IDEC idec(val);
369
370 if (!resume)
371 {
372 idec.log();
373 g_BP.BP += idec.FB;//skip FB bits
374
375 //from IPU_CTRL
376 ipuRegs.ctrl.PCT = I_TYPE; //Intra DECoding;)
377
378 decoder.coding_type = ipuRegs.ctrl.PCT;
379 decoder.mpeg1 = ipuRegs.ctrl.MP1;
380 decoder.q_scale_type = ipuRegs.ctrl.QST;
381 decoder.intra_vlc_format = ipuRegs.ctrl.IVF;
382 decoder.scantype = ipuRegs.ctrl.AS;
383 decoder.intra_dc_precision = ipuRegs.ctrl.IDP;
384
385 //from IDEC value
386 decoder.quantizer_scale = idec.QSC;
387 decoder.frame_pred_frame_dct= !idec.DTD;
388 decoder.sgn = idec.SGN;
389 decoder.dte = idec.DTE;
390 decoder.ofm = idec.OFM;
391
392 //other stuff
393 decoder.dcr = 1; // resets DC prediction value
394 }
395
396 return mpeg2sliceIDEC();
397 }
398
399 static int s_bdec = 0;
400
401 static __fi bool ipuBDEC(u32 val, bool resume)
402 {
403 tIPU_CMD_BDEC bdec(val);
404
405 if (!resume)
406 {
407 bdec.log(s_bdec);
408 if (IsDebugBuild) s_bdec++;
409
410 g_BP.BP += bdec.FB;//skip FB bits
411 decoder.coding_type = I_TYPE;
412 decoder.mpeg1 = ipuRegs.ctrl.MP1;
413 decoder.q_scale_type = ipuRegs.ctrl.QST;
414 decoder.intra_vlc_format = ipuRegs.ctrl.IVF;
415 decoder.scantype = ipuRegs.ctrl.AS;
416 decoder.intra_dc_precision = ipuRegs.ctrl.IDP;
417
418 //from BDEC value
419 decoder.quantizer_scale = decoder.q_scale_type ? non_linear_quantizer_scale [bdec.QSC] : bdec.QSC << 1;
420 decoder.macroblock_modes = bdec.DT ? DCT_TYPE_INTERLACED : 0;
421 decoder.dcr = bdec.DCR;
422 decoder.macroblock_modes |= bdec.MBI ? MACROBLOCK_INTRA : MACROBLOCK_PATTERN;
423
424 memzero_sse_a(decoder.mb8);
425 memzero_sse_a(decoder.mb16);
426 }
427
428 return mpeg2_slice();
429 }
430
431 static bool __fastcall ipuVDEC(u32 val)
432 {
433 switch (ipu_cmd.pos[0])
434 {
435 case 0:
436 ipuRegs.cmd.DATA = 0;
437 if (!getBits32((u8*)&decoder.bitstream_buf, 0)) return false;
438
439 decoder.bitstream_bits = -16;
440 BigEndian(decoder.bitstream_buf, decoder.bitstream_buf);
441
442 switch ((val >> 26) & 3)
443 {
444 case 0://Macroblock Address Increment
445 decoder.mpeg1 = ipuRegs.ctrl.MP1;
446 ipuRegs.cmd.DATA = get_macroblock_address_increment();
447 break;
448
449 case 1://Macroblock Type
450 decoder.frame_pred_frame_dct = 1;
451 decoder.coding_type = ipuRegs.ctrl.PCT;
452 ipuRegs.cmd.DATA = get_macroblock_modes();
453 break;
454
455 case 2://Motion Code
456 ipuRegs.cmd.DATA = get_motion_delta(0);
457 break;
458
459 case 3://DMVector
460 ipuRegs.cmd.DATA = get_dmv();
461 break;
462 }
463
464 g_BP.BP += (int)decoder.bitstream_bits + 16;
465
466 if ((int)g_BP.BP < 0)
467 {
468 g_BP.BP += 128;
469 ReorderBitstream();
470 }
471
472 ipuRegs.cmd.DATA = (ipuRegs.cmd.DATA & 0xFFFF) | ((decoder.bitstream_bits + 16) << 16);
473 ipuRegs.ctrl.ECD = (ipuRegs.cmd.DATA == 0);
474
475 case 1:
476 if (!getBits32((u8*)&ipuRegs.top, 0))
477 {
478 ipu_cmd.pos[0] = 1;
479 return false;
480 }
481
482 BigEndian(ipuRegs.top, ipuRegs.top);
483
484 IPU_LOG("VDEC command data 0x%x(0x%x). Skip 0x%X bits/Table=%d (%s), pct %d",
485 ipuRegs.cmd.DATA, ipuRegs.cmd.DATA >> 16, val & 0x3f, (val >> 26) & 3, (val >> 26) & 1 ?
486 ((val >> 26) & 2 ? "DMV" : "MBT") : (((val >> 26) & 2 ? "MC" : "MBAI")), ipuRegs.ctrl.PCT);
487 return true;
488
489 jNO_DEFAULT
490 }
491
492 return false;
493 }
494
495 static __fi bool ipuFDEC(u32 val)
496 {
497 if (!getBits32((u8*)&ipuRegs.cmd.DATA, 0)) return false;
498
499 BigEndian(ipuRegs.cmd.DATA, ipuRegs.cmd.DATA);
500 ipuRegs.top = ipuRegs.cmd.DATA;
501
502 IPU_LOG("FDEC read: 0x%08x", ipuRegs.top);
503
504 return true;
505 }
506
507 static bool ipuSETIQ(u32 val)
508 {
509 int i;
510
511 if ((val >> 27) & 1)
512 {
513 u8 (&niq)[64] = decoder.niq;
514
515 for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
516 {
517 if (!getBits64((u8*)niq + 8 * ipu_cmd.pos[0], 1)) return false;
518 }
519
520 IPU_LOG("Read non-intra quantization matrix from FIFO.");
521 for (i = 0; i < 8; i++)
522 {
523 IPU_LOG("%02X %02X %02X %02X %02X %02X %02X %02X",
524 niq[i * 8 + 0], niq[i * 8 + 1], niq[i * 8 + 2], niq[i * 8 + 3],
525 niq[i * 8 + 4], niq[i * 8 + 5], niq[i * 8 + 6], niq[i * 8 + 7]);
526 }
527 }
528 else
529 {
530 u8 (&iq)[64] = decoder.iq;
531
532 for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
533 {
534 if (!getBits64((u8*)iq + 8 * ipu_cmd.pos[0], 1)) return false;
535 }
536
537 IPU_LOG("Read intra quantization matrix from FIFO.");
538 for (i = 0; i < 8; i++)
539 {
540 IPU_LOG("%02X %02X %02X %02X %02X %02X %02X %02X",
541 iq[i * 8 + 0], iq[i * 8 + 1], iq[i * 8 + 2], iq[i *8 + 3],
542 iq[i * 8 + 4], iq[i * 8 + 5], iq[i * 8 + 6], iq[i *8 + 7]);
543 }
544 }
545
546 return true;
547 }
548
549 static bool ipuSETVQ(u32 val)
550 {
551 for(;ipu_cmd.pos[0] < 4; ipu_cmd.pos[0]++)
552 {
553 if (!getBits64(((u8*)vqclut) + 8 * ipu_cmd.pos[0], 1)) return false;
554 }
555
556 IPU_LOG("SETVQ command.\nRead VQCLUT table from FIFO.");
557 IPU_LOG(
558 "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
559 "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d"
560 "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d "
561 "%02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d %02d:%02d:%02d",
562 vqclut[0] >> 10, (vqclut[0] >> 5) & 0x1F, vqclut[0] & 0x1F,
563 vqclut[1] >> 10, (vqclut[1] >> 5) & 0x1F, vqclut[1] & 0x1F,
564 vqclut[2] >> 10, (vqclut[2] >> 5) & 0x1F, vqclut[2] & 0x1F,
565 vqclut[3] >> 10, (vqclut[3] >> 5) & 0x1F, vqclut[3] & 0x1F,
566 vqclut[4] >> 10, (vqclut[4] >> 5) & 0x1F, vqclut[4] & 0x1F,
567 vqclut[5] >> 10, (vqclut[5] >> 5) & 0x1F, vqclut[5] & 0x1F,
568 vqclut[6] >> 10, (vqclut[6] >> 5) & 0x1F, vqclut[6] & 0x1F,
569 vqclut[7] >> 10, (vqclut[7] >> 5) & 0x1F, vqclut[7] & 0x1F,
570 vqclut[8] >> 10, (vqclut[8] >> 5) & 0x1F, vqclut[8] & 0x1F,
571 vqclut[9] >> 10, (vqclut[9] >> 5) & 0x1F, vqclut[9] & 0x1F,
572 vqclut[10] >> 10, (vqclut[10] >> 5) & 0x1F, vqclut[10] & 0x1F,
573 vqclut[11] >> 10, (vqclut[11] >> 5) & 0x1F, vqclut[11] & 0x1F,
574 vqclut[12] >> 10, (vqclut[12] >> 5) & 0x1F, vqclut[12] & 0x1F,
575 vqclut[13] >> 10, (vqclut[13] >> 5) & 0x1F, vqclut[13] & 0x1F,
576 vqclut[14] >> 10, (vqclut[14] >> 5) & 0x1F, vqclut[14] & 0x1F,
577 vqclut[15] >> 10, (vqclut[15] >> 5) & 0x1F, vqclut[15] & 0x1F);
578
579 return true;
580 }
581
582 // IPU Transfers are split into 8Qwords so we need to send ALL the data
583 static bool __fastcall ipuCSC(u32 val)
584 {
585 tIPU_CMD_CSC csc(val);
586 csc.log_from_YCbCr();
587
588 for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++)
589 {
590 for(;ipu_cmd.pos[0] < 48; ipu_cmd.pos[0]++)
591 {
592 if (!getBits64((u8*)&decoder.mb8 + 8 * ipu_cmd.pos[0], 1)) return false;
593 }
594
595 ipu_csc(decoder.mb8, decoder.rgb32, 0);
596 if (csc.OFM) ipu_dither(decoder.rgb32, decoder.rgb16, csc.DTE);
597
598 if (csc.OFM)
599 {
600 while (ipu_cmd.pos[1] < 32)
601 {
602 ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & decoder.rgb16) + 4 * ipu_cmd.pos[1], 32 - ipu_cmd.pos[1]);
603
604 if (ipu_cmd.pos[1] <= 0) return false;
605 }
606 }
607 else
608 {
609 while (ipu_cmd.pos[1] < 64)
610 {
611 ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & decoder.rgb32) + 4 * ipu_cmd.pos[1], 64 - ipu_cmd.pos[1]);
612
613 if (ipu_cmd.pos[1] <= 0) return false;
614 }
615 }
616
617 ipu_cmd.pos[0] = 0;
618 ipu_cmd.pos[1] = 0;
619 }
620
621 return true;
622 }
623
624 // Todo - Need to add the same stop and start code as CSC
625 static bool ipuPACK(u32 val)
626 {
627 tIPU_CMD_CSC csc(val);
628 csc.log_from_RGB32();
629
630 for (;ipu_cmd.index < (int)csc.MBC; ipu_cmd.index++)
631 {
632 for(;ipu_cmd.pos[0] < 8; ipu_cmd.pos[0]++)
633 {
634 if (!getBits64((u8*)&decoder.mb8 + 8 * ipu_cmd.pos[0], 1)) return false;
635 }
636
637 ipu_csc(decoder.mb8, decoder.rgb32, 0);
638 ipu_dither(decoder.rgb32, decoder.rgb16, csc.DTE);
639
640 if (csc.OFM) ipu_vq(decoder.rgb16, indx4);
641
642 if (csc.OFM)
643 {
644 ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*) & decoder.rgb16) + 4 * ipu_cmd.pos[1], 32 - ipu_cmd.pos[1]);
645
646 if (ipu_cmd.pos[1] < 32) return false;
647 }
648 else
649 {
650 ipu_cmd.pos[1] += ipu_fifo.out.write(((u32*)indx4) + 4 * ipu_cmd.pos[1], 8 - ipu_cmd.pos[1]);
651
652 if (ipu_cmd.pos[1] < 8) return false;
653 }
654
655 ipu_cmd.pos[0] = 0;
656 ipu_cmd.pos[1] = 0;
657 }
658
659 return TRUE;
660 }
661
662 static void ipuSETTH(u32 val)
663 {
664 s_thresh[0] = (val & 0xff);
665 s_thresh[1] = ((val >> 16) & 0xff);
666 IPU_LOG("SETTH (Set threshold value)command %x.", val&0xff00ff);
667 }
668
669 // --------------------------------------------------------------------------------------
670 // CORE Functions (referenced from MPEG library)
671 // --------------------------------------------------------------------------------------
672 __fi void ipu_csc(macroblock_8& mb8, macroblock_rgb32& rgb32, int sgn)
673 {
674 int i;
675 u8* p = (u8*)&rgb32;
676
677 yuv2rgb();
678
679 if (s_thresh[0] > 0)
680 {
681 for (i = 0; i < 16*16; i++, p += 4)
682 {
683 if ((p[0] < s_thresh[0]) && (p[1] < s_thresh[0]) && (p[2] < s_thresh[0]))
684 *(u32*)p = 0;
685 else if ((p[0] < s_thresh[1]) && (p[1] < s_thresh[1]) && (p[2] < s_thresh[1]))
686 p[3] = 0x40;
687 }
688 }
689 else if (s_thresh[1] > 0)
690 {
691 for (i = 0; i < 16*16; i++, p += 4)
692 {
693 if ((p[0] < s_thresh[1]) && (p[1] < s_thresh[1]) && (p[2] < s_thresh[1]))
694 p[3] = 0x40;
695 }
696 }
697 if (sgn)
698 {
699 for (i = 0; i < 16*16; i++, p += 4)
700 {
701 *(u32*)p ^= 0x808080;
702 }
703 }
704 }
705
706 __fi void ipu_dither(const macroblock_rgb32& rgb32, macroblock_rgb16& rgb16, int dte)
707 {
708 int i, j;
709 for (i = 0; i < 16; ++i)
710 {
711 for (j = 0; j < 16; ++j)
712 {
713 rgb16.c[i][j].r = rgb32.c[i][j].r >> 3;
714 rgb16.c[i][j].g = rgb32.c[i][j].g >> 3;
715 rgb16.c[i][j].b = rgb32.c[i][j].b >> 3;
716 rgb16.c[i][j].a = rgb32.c[i][j].a == 0x40;
717 }
718 }
719 }
720
721 __fi void ipu_vq(macroblock_rgb16& rgb16, u8* indx4)
722 {
723 Console.Error("IPU: VQ not implemented");
724 }
725
726 __fi void ipu_copy(const macroblock_8& mb8, macroblock_16& mb16)
727 {
728 const u8 *s = (const u8*)&mb8;
729 s16 *d = (s16*)&mb16;
730 int i;
731 for (i = 0; i < 256; i++) *d++ = *s++; //Y bias - 16
732 for (i = 0; i < 64; i++) *d++ = *s++; //Cr bias - 128
733 for (i = 0; i < 64; i++) *d++ = *s++; //Cb bias - 128
734 }
735
736
737 // --------------------------------------------------------------------------------------
738 // Buffer reader
739 // --------------------------------------------------------------------------------------
740
741 // move the readbits queue
742 __fi void inc_readbits()
743 {
744 readbits += 16;
745 if (readbits >= _readbits + 64)
746 {
747 // move back
748 *(u64*)(_readbits) = *(u64*)(_readbits + 64);
749 *(u64*)(_readbits + 8) = *(u64*)(_readbits + 72);
750 readbits = _readbits;
751 }
752 }
753
754 // returns the pointer of readbits moved by 1 qword
755 __fi u8* next_readbits()
756 {
757 return readbits + 16;
758 }
759
760 // returns the pointer of readbits moved by 1 qword
761 u8* prev_readbits()
762 {
763 if (readbits < _readbits + 16) return _readbits + 48 - (readbits - _readbits);
764
765 return readbits - 16;
766 }
767
768 void ReorderBitstream()
769 {
770 readbits = prev_readbits();
771 g_BP.FP = 2;
772 }
773
774 // IPU has a 2qword internal buffer whose status is pointed by FP.
775 // If FP is 1, there's 1 qword in buffer. Second qword is only loaded
776 // incase there are less than 32bits available in the first qword.
777 // \return Number of bits available (clamps at 16 bits)
778 u16 __fastcall FillInternalBuffer(u32 * pointer, u32 advance, u32 size)
779 {
780 if (g_BP.FP == 0)
781 {
782 if (ipu_fifo.in.read(next_readbits()) == 0) return 0;
783
784 inc_readbits();
785 g_BP.FP = 1;
786 }
787
788 if ((g_BP.FP < 2) && ((*(int*)pointer + size) >= 128))
789 {
790 if (ipu_fifo.in.read(next_readbits())) g_BP.FP += 1;
791 }
792
793 if (*(int*)pointer >= 128)
794 {
795 pxAssert(g_BP.FP >= 1);
796
797 if (g_BP.FP > 1) inc_readbits();
798
799 if (advance)
800 {
801 g_BP.FP--;
802 *pointer &= 127;
803 }
804 }
805
806 return (g_BP.FP >= 1) ? g_BP.FP * 128 - (*(int*)pointer) : 0;
807 }
808
809 // whenever reading fractions of bytes. The low bits always come from the next byte
810 // while the high bits come from the current byte
811 u8 __fastcall getBits128(u8 *address, u32 advance)
812 {
813 u64 mask2;
814 u128 mask;
815 u8* readpos;
816
817 // Check if the current BP has exceeded or reached the limit of 128
818 if (FillInternalBuffer(&g_BP.BP, 1, 128) < 128) return 0;
819
820 readpos = readbits + (int)g_BP.BP / 8;
821
822 if (uint shift = (g_BP.BP & 7))
823 {
824 mask2 = 0xff >> shift;
825 mask.lo = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56);
826 mask.hi = mask2 | (mask2 << 8) | (mask2 << 16) | (mask2 << 24) | (mask2 << 32) | (mask2 << 40) | (mask2 << 48) | (mask2 << 56);
827
828 u128 notMask;
829 u128 data = *(u128*)(readpos + 1);
830 notMask.lo = ~mask.lo & data.lo;
831 notMask.hi = ~mask.hi & data.hi;
832 notMask.lo >>= 8 - shift;
833 notMask.lo |= (notMask.hi & (ULLONG_MAX >> (64 - shift))) << (64 - shift);
834 notMask.hi >>= 8 - shift;
835
836 mask.hi = (((*(u128*)readpos).hi & mask.hi) << shift) | (((*(u128*)readpos).lo & mask.lo) >> (64 - shift));
837 mask.lo = ((*(u128*)readpos).lo & mask.lo) << shift;
838
839 notMask.lo |= mask.lo;
840 notMask.hi |= mask.hi;
841 *(u128*)address = notMask;
842 }
843 else
844 {
845 *(u128*)address = *(u128*)readpos;
846 }
847
848 if (advance) g_BP.BP += 128;
849
850 return 1;
851 }
852
853 // whenever reading fractions of bytes. The low bits always come from the next byte
854 // while the high bits come from the current byte
855 u8 __fastcall getBits64(u8 *address, u32 advance)
856 {
857 register u64 mask = 0;
858 u8* readpos;
859
860 // Check if the current BP has exceeded or reached the limit of 128
861 if (FillInternalBuffer(&g_BP.BP, 1, 64) < 64) return 0;
862
863 readpos = readbits + (int)g_BP.BP / 8;
864
865 if (uint shift = (g_BP.BP & 7))
866 {
867 mask = (0xff >> shift);
868 mask = mask | (mask << 8) | (mask << 16) | (mask << 24) | (mask << 32) | (mask << 40) | (mask << 48) | (mask << 56);
869
870 *(u64*)address = ((~mask & *(u64*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u64*)readpos) << shift);
871 }
872 else
873 {
874 *(u64*)address = *(u64*)readpos;
875 }
876
877 if (advance) g_BP.BP += 64;
878
879 return 1;
880 }
881
882 // whenever reading fractions of bytes. The low bits always come from the next byte
883 // while the high bits come from the current byte
884 u8 __fastcall getBits32(u8 *address, u32 advance)
885 {
886 u32 mask;
887 u8* readpos;
888
889 // Check if the current BP has exceeded or reached the limit of 128
890 if (FillInternalBuffer(&g_BP.BP, 1, 32) < 32) return 0;
891
892 readpos = readbits + (int)g_BP.BP / 8;
893
894 if (uint shift = (g_BP.BP & 7))
895 {
896 mask = (0xff >> shift);
897 mask = mask | (mask << 8) | (mask << 16) | (mask << 24);
898
899 *(u32*)address = ((~mask & *(u32*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u32*)readpos) << shift);
900 }
901 else
902 {
903 *(u32*)address = *(u32*)readpos;
904 }
905
906 if (advance) g_BP.BP += 32;
907
908 return 1;
909 }
910
911 __fi u8 __fastcall getBits16(u8 *address, u32 advance)
912 {
913 u32 mask;
914 u8* readpos;
915
916 // Check if the current BP has exceeded or reached the limit of 128
917 if (FillInternalBuffer(&g_BP.BP, 1, 16) < 16) return 0;
918
919 readpos = readbits + (int)g_BP.BP / 8;
920
921 if (uint shift = (g_BP.BP & 7))
922 {
923 mask = (0xff >> shift);
924 mask = mask | (mask << 8);
925
926 *(u16*)address = ((~mask & *(u16*)(readpos + 1)) >> (8 - shift)) | (((mask) & *(u16*)readpos) << shift);
927 }
928 else
929 {
930 *(u16*)address = *(u16*)readpos;
931 }
932
933 if (advance) g_BP.BP += 16;
934
935 return 1;
936 }
937
938 u8 __fastcall getBits8(u8 *address, u32 advance)
939 {
940 u32 mask;
941 u8* readpos;
942
943 // Check if the current BP has exceeded or reached the limit of 128
944 if (FillInternalBuffer(&g_BP.BP, 1, 8) < 8)
945 return 0;
946
947 readpos = readbits + (int)g_BP.BP / 8;
948
949 if (uint shift = (g_BP.BP & 7))
950 {
951 mask = (0xff >> shift);
952 *(u8*)address = (((~mask) & readpos[1]) >> (8 - shift)) | (((mask) & *readpos) << shift);
953 }
954 else
955 {
956 *(u8*)address = *(u8*)readpos;
957 }
958
959 if (advance) g_BP.BP += 8;
960
961 return 1;
962 }
963
964 // --------------------------------------------------------------------------------------
965 // IPU Worker / Dispatcher
966 // --------------------------------------------------------------------------------------
967 void IPUCMD_WRITE(u32 val)
968 {
969 // don't process anything if currently busy
970 if (ipuRegs.ctrl.BUSY) Console.WriteLn("IPU BUSY!"); // wait for thread
971
972 ipuRegs.ctrl.ECD = 0;
973 ipuRegs.ctrl.SCD = 0; //clear ECD/SCD
974 ipu_cmd.clear();
975 ipu_cmd.current = val;
976
977 switch (val >> 28)
978 {
979 case SCE_IPU_BCLR:
980 ipuBCLR(val);
981 hwIntcIrq(INTC_IPU); //DMAC_TO_IPU
982 return;
983
984 case SCE_IPU_VDEC:
985
986 g_BP.BP += val & 0x3F;
987
988 // check if enough data in queue
989 if (ipuVDEC(val)) return;
990
991 ipuRegs.cmd.BUSY = 0x80000000;
992 ipuRegs.topbusy = 0x80000000;
993 break;
994
995 case SCE_IPU_FDEC:
996 IPU_LOG("FDEC command. Skip 0x%X bits, FIFO 0x%X qwords, BP 0x%X, FP %d, CHCR 0x%x",
997 val & 0x3f, g_BP.IFC, (int)g_BP.BP, g_BP.FP, ipu1dma.chcr._u32);
998 g_BP.BP += val & 0x3F;
999 if (ipuFDEC(val)) return;
1000 ipuRegs.cmd.BUSY = 0x80000000;
1001 ipuRegs.topbusy = 0x80000000;
1002 break;
1003
1004 case SCE_IPU_SETTH:
1005 ipuSETTH(val);
1006 hwIntcIrq(INTC_IPU);
1007 return;
1008
1009 case SCE_IPU_SETIQ:
1010 IPU_LOG("SETIQ command.");
1011 if (val & 0x3f) IPU_LOG("Skip %d bits.", val & 0x3f);
1012 g_BP.BP += val & 0x3F;
1013 if (ipuSETIQ(val)) return;
1014 break;
1015
1016 case SCE_IPU_SETVQ:
1017 if (ipuSETVQ(val)) return;
1018 break;
1019
1020 case SCE_IPU_CSC:
1021 ipu_cmd.pos[1] = 0;
1022 ipu_cmd.index = 0;
1023
1024 if (ipuCSC(val))
1025 {
1026 IPU_INT0_FROM();
1027 return;
1028 }
1029 break;
1030
1031 case SCE_IPU_PACK:
1032 ipu_cmd.pos[1] = 0;
1033 ipu_cmd.index = 0;
1034 if (ipuPACK(val)) return;
1035 break;
1036
1037 case SCE_IPU_IDEC:
1038 if (ipuIDEC(val, false))
1039 {
1040 // idec done, ipu0 done too
1041 IPU_INT0_FROM();
1042 return;
1043 }
1044
1045 ipuRegs.topbusy = 0x80000000;
1046 break;
1047
1048 case SCE_IPU_BDEC:
1049 if (ipuBDEC(val, false))
1050 {
1051 IPU_INT0_FROM();
1052 if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU);
1053 return;
1054 }
1055 else
1056 {
1057 ipuRegs.topbusy = 0x80000000;
1058 }
1059 break;
1060 }
1061
1062 // have to resort to the thread
1063 ipuRegs.ctrl.BUSY = 1;
1064 if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
1065 }
1066
1067 void IPUWorker()
1068 {
1069 pxAssert(ipuRegs.ctrl.BUSY);
1070
1071 switch (ipu_cmd.CMD)
1072 {
1073 case SCE_IPU_VDEC:
1074 if (!ipuVDEC(ipu_cmd.current))
1075 {
1076 if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
1077 return;
1078 }
1079 ipuRegs.cmd.BUSY = 0;
1080 ipuRegs.topbusy = 0;
1081 break;
1082
1083 case SCE_IPU_FDEC:
1084 if (!ipuFDEC(ipu_cmd.current))
1085 {
1086 if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
1087 return;
1088 }
1089 ipuRegs.cmd.BUSY = 0;
1090 ipuRegs.topbusy = 0;
1091 break;
1092
1093 case SCE_IPU_SETIQ:
1094 if (!ipuSETIQ(ipu_cmd.current))
1095 {
1096 if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
1097 return;
1098 }
1099 break;
1100
1101 case SCE_IPU_SETVQ:
1102 if (!ipuSETVQ(ipu_cmd.current))
1103 {
1104 if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
1105 return;
1106 }
1107 break;
1108
1109 case SCE_IPU_CSC:
1110 if (!ipuCSC(ipu_cmd.current))
1111 {
1112 if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
1113 return;
1114 }
1115 IPU_INT0_FROM();
1116 break;
1117
1118 case SCE_IPU_PACK:
1119 if (!ipuPACK(ipu_cmd.current))
1120 {
1121 if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
1122 return;
1123 }
1124 break;
1125
1126 case SCE_IPU_IDEC:
1127 if (!ipuIDEC(ipu_cmd.current, true))
1128 {
1129 if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
1130 return;
1131 }
1132
1133 ipuRegs.ctrl.OFC = 0;
1134 ipuRegs.ctrl.BUSY = 0;
1135 ipuRegs.topbusy = 0;
1136 ipuRegs.cmd.BUSY = 0;
1137 ipu_cmd.current = 0xffffffff;
1138
1139 // CHECK!: IPU0dma remains when IDEC is done, so we need to clear it
1140 IPU_INT0_FROM();
1141 break;
1142
1143 case SCE_IPU_BDEC:
1144 if (!ipuBDEC(ipu_cmd.current, true))
1145 {
1146 if(ipu1dma.chcr.STR == false) hwIntcIrq(INTC_IPU);
1147 return;
1148 }
1149
1150 ipuRegs.ctrl.BUSY = 0;
1151 ipuRegs.topbusy = 0;
1152 ipuRegs.cmd.BUSY = 0;
1153 ipu_cmd.current = 0xffffffff;
1154
1155 IPU_INT0_FROM();
1156 if (ipuRegs.ctrl.SCD || ipuRegs.ctrl.ECD) hwIntcIrq(INTC_IPU);
1157 return;
1158
1159 default:
1160 Console.WriteLn("Unknown IPU command: %08x", ipu_cmd.current);
1161 break;
1162 }
1163
1164 // success
1165 ipuRegs.ctrl.BUSY = 0;
1166 ipu_cmd.current = 0xffffffff;
1167 }

  ViewVC Help
Powered by ViewVC 1.1.22