/[pcsx2_0.9.7]/trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp
ViewVC logotype

Contents of /trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 280 - (show annotations) (download)
Thu Dec 23 12:02:12 2010 UTC (9 years, 10 months ago) by william
File size: 23135 byte(s)
re-commit (had local access denied errors when committing)
1 /*
2 * Mpeg.c
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
5 * Modified by Florin for PCSX2 emu
6 *
7 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
8 * See http://libmpeg2.sourceforge.net/ for updates.
9 *
10 * mpeg2dec is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * mpeg2dec is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 */
24
25 // [Air] Note: many functions in this module are large and only used once, so they
26 // have been forced to inline since it won't bloat the program and gets rid of
27 // some call overhead.
28
29 #include "PrecompiledHeader.h"
30
31 #include "Common.h"
32 #include "IPU/IPU.h"
33 #include "Mpeg.h"
34 #include "Vlc.h"
35
36 #include "Utilities/MemsetFast.inl"
37
38 const int non_linear_quantizer_scale [] =
39 {
40 0, 1, 2, 3, 4, 5, 6, 7,
41 8, 10, 12, 14, 16, 18, 20, 22,
42 24, 28, 32, 36, 40, 44, 48, 52,
43 56, 64, 72, 80, 88, 96, 104, 112
44 };
45
46 /* Bitstream and buffer needs to be reallocated in order for successful
47 reading of the old data. Here the old data stored in the 2nd slot
48 of the internal buffer is copied to 1st slot, and the new data read
49 into 1st slot is copied to the 2nd slot. Which will later be copied
50 back to the 1st slot when 128bits have been read.
51 */
52 const DCTtab * tab;
53 int mbaCount = 0;
54
55 int bitstream_init ()
56 {
57 return g_BP.FillBuffer(32);
58 }
59
60 int get_macroblock_modes()
61 {
62 int macroblock_modes;
63 const MBtab * tab;
64
65 switch (decoder.coding_type)
66 {
67 case I_TYPE:
68 macroblock_modes = UBITS(2);
69
70 if (macroblock_modes == 0) return 0; // error
71
72 tab = MB_I + (macroblock_modes >> 1);
73 DUMPBITS(tab->len);
74 macroblock_modes = tab->modes;
75
76 if ((!(decoder.frame_pred_frame_dct)) &&
77 (decoder.picture_structure == FRAME_PICTURE))
78 {
79 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
80 }
81 return macroblock_modes;
82
83 case P_TYPE:
84 macroblock_modes = UBITS(6);
85
86 if (macroblock_modes == 0) return 0; // error
87
88 tab = MB_P + (macroblock_modes >> 1);
89 DUMPBITS(tab->len);
90 macroblock_modes = tab->modes;
91
92 if (decoder.picture_structure != FRAME_PICTURE)
93 {
94 if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
95 {
96 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
97 }
98
99 return macroblock_modes;
100 }
101 else if (decoder.frame_pred_frame_dct)
102 {
103 if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
104 macroblock_modes |= MC_FRAME;
105
106 return macroblock_modes;
107 }
108 else
109 {
110 if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
111 {
112 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
113 }
114
115 if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
116 {
117 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
118 }
119
120 return macroblock_modes;
121 }
122
123 case B_TYPE:
124 macroblock_modes = UBITS(6);
125
126 if (macroblock_modes == 0) return 0; // error
127
128 tab = MB_B + macroblock_modes;
129 DUMPBITS(tab->len);
130 macroblock_modes = tab->modes;
131
132 if (decoder.picture_structure != FRAME_PICTURE)
133 {
134 if (!(macroblock_modes & MACROBLOCK_INTRA))
135 {
136 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
137 }
138 return (macroblock_modes | (tab->len << 16));
139 }
140 else if (decoder.frame_pred_frame_dct)
141 {
142 /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
143 macroblock_modes |= MC_FRAME;
144 return (macroblock_modes | (tab->len << 16));
145 }
146 else
147 {
148 if (macroblock_modes & MACROBLOCK_INTRA) goto intra;
149
150 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
151
152 if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
153 {
154 intra:
155 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
156 }
157 return (macroblock_modes | (tab->len << 16));
158 }
159
160 case D_TYPE:
161 macroblock_modes = GETBITS(1);
162 //I suspect (as this is actually a 2 bit command) that this should be getbits(2)
163 //additionally, we arent dumping any bits here when i think we should be, need a game to test. (Refraction)
164 DevCon.Warning(" Rare MPEG command! ");
165 if (macroblock_modes == 0) return 0; // error
166 return (MACROBLOCK_INTRA | (1 << 16));
167
168 default:
169 return 0;
170 }
171 }
172
173 static __fi int get_quantizer_scale()
174 {
175 int quantizer_scale_code;
176
177 quantizer_scale_code = GETBITS(5);
178
179 if (decoder.q_scale_type)
180 return non_linear_quantizer_scale [quantizer_scale_code];
181 else
182 return quantizer_scale_code << 1;
183 }
184
185 static __fi int get_coded_block_pattern()
186 {
187 const CBPtab * tab;
188 u16 code = UBITS(16);
189
190 if (code >= 0x2000)
191 tab = CBP_7 + (UBITS(7) - 16);
192 else
193 tab = CBP_9 + UBITS(9);
194
195 DUMPBITS(tab->len);
196 return tab->cbp;
197 }
198
199 int __fi get_motion_delta(const int f_code)
200 {
201 int delta;
202 int sign;
203 const MVtab * tab;
204 u16 code = UBITS(16);
205
206 if ((code & 0x8000))
207 {
208 DUMPBITS(1);
209 return 0x00010000;
210 }
211 else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00))
212 {
213 tab = MV_4 + UBITS(4);
214 }
215 else
216 {
217 tab = MV_10 + UBITS(10);
218 }
219
220 delta = tab->delta + 1;
221 DUMPBITS(tab->len);
222
223 sign = SBITS(1);
224 DUMPBITS(1);
225
226 return (((delta ^ sign) - sign) | (tab->len << 16));
227 }
228
229 int __fi get_dmv()
230 {
231 const DMVtab* tab = DMV_2 + UBITS(2);
232 DUMPBITS(tab->len);
233 return (tab->dmv | (tab->len << 16));
234 }
235
236 int get_macroblock_address_increment()
237 {
238 const MBAtab *mba;
239
240 u16 code = UBITS(16);
241
242 if (code >= 4096)
243 mba = MBA.mba5 + (UBITS(5) - 2);
244 else if (code >= 768)
245 mba = MBA.mba11 + (UBITS(11) - 24);
246 else switch (UBITS(11))
247 {
248 case 8: /* macroblock_escape */
249 DUMPBITS(11);
250 return 0xb0023;
251
252 case 15: /* macroblock_stuffing (MPEG1 only) */
253 if (decoder.mpeg1)
254 {
255 DUMPBITS(11);
256 return 0xb0022;
257 }
258
259 default:
260 return 0;//error
261 }
262
263 DUMPBITS(mba->len);
264
265 return ((mba->mba + 1) | (mba->len << 16));
266 }
267
268 static __fi int get_luma_dc_dct_diff()
269 {
270 int size;
271 int dc_diff;
272 u16 code = UBITS(5);
273
274 if (code < 31)
275 {
276 size = DCtable.lum0[code].size;
277 DUMPBITS(DCtable.lum0[code].len);
278
279 // 5 bits max
280 }
281 else
282 {
283 code = UBITS(9) - 0x1f0;
284 size = DCtable.lum1[code].size;
285 DUMPBITS(DCtable.lum1[code].len);
286
287 // 9 bits max
288 }
289
290 if (size==0)
291 dc_diff = 0;
292 else
293 {
294 dc_diff = GETBITS(size);
295
296 // 6 for tab0 and 11 for tab1
297 if ((dc_diff & (1<<(size-1)))==0)
298 dc_diff-= (1<<size) - 1;
299 }
300
301 return dc_diff;
302 }
303
304 static __fi int get_chroma_dc_dct_diff()
305 {
306 int size;
307 int dc_diff;
308 u16 code = UBITS(5);
309
310 if (code<31)
311 {
312 size = DCtable.chrom0[code].size;
313 DUMPBITS(DCtable.chrom0[code].len);
314 }
315 else
316 {
317 code = UBITS(10) - 0x3e0;
318 size = DCtable.chrom1[code].size;
319 DUMPBITS(DCtable.chrom1[code].len);
320 }
321
322 if (size==0)
323 dc_diff = 0;
324 else
325 {
326 dc_diff = GETBITS(size);
327
328 if ((dc_diff & (1<<(size-1)))==0)
329 {
330 dc_diff-= (1<<size) - 1;
331 }
332 }
333
334 return dc_diff;
335 }
336
337 #define SATURATE(val) \
338 do { \
339 if (((u32)(val + 2048) > 4095)) \
340 val = (((s32)val) >> 31) ^ 2047; \
341 } while (0)
342
343 static bool get_intra_block()
344 {
345 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
346 const u8 (&quant_matrix)[64] = decoder.iq;
347 int quantizer_scale = decoder.quantizer_scale;
348 s16 * dest = decoder.DCTblock;
349 u16 code;
350
351 /* decode AC coefficients */
352 for (int i=1 + ipu_cmd.pos[4]; ; i++)
353 {
354 switch (ipu_cmd.pos[5])
355 {
356 case 0:
357 if (!GETWORD())
358 {
359 ipu_cmd.pos[4] = i - 1;
360 return false;
361 }
362
363 code = UBITS(16);
364
365 if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
366 {
367 tab = &DCT.next[(code >> 12) - 4];
368 }
369 else if (code >= 1024)
370 {
371 if (decoder.intra_vlc_format && !decoder.mpeg1)
372 {
373 tab = &DCT.tab0a[(code >> 8) - 4];
374 }
375 else
376 {
377 tab = &DCT.tab0[(code >> 8) - 4];
378 }
379 }
380 else if (code >= 512)
381 {
382 if (decoder.intra_vlc_format && !decoder.mpeg1)
383 {
384 tab = &DCT.tab1a[(code >> 6) - 8];
385 }
386 else
387 {
388 tab = &DCT.tab1[(code >> 6) - 8];
389 }
390 }
391
392 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
393 // that should use a single unrolled DCT table instead of five separate tables used
394 // here. Multiple conditional statements are very slow, while modern CPU data caches
395 // have lots of room to spare.
396
397 else if (code >= 256)
398 {
399 tab = &DCT.tab2[(code >> 4) - 16];
400 }
401 else if (code >= 128)
402 {
403 tab = &DCT.tab3[(code >> 3) - 16];
404 }
405 else if (code >= 64)
406 {
407 tab = &DCT.tab4[(code >> 2) - 16];
408 }
409 else if (code >= 32)
410 {
411 tab = &DCT.tab5[(code >> 1) - 16];
412 }
413 else if (code >= 16)
414 {
415 tab = &DCT.tab6[code - 16];
416 }
417 else
418 {
419 ipu_cmd.pos[4] = 0;
420 return true;
421 }
422
423 DUMPBITS(tab->len);
424
425 if (tab->run==64) /* end_of_block */
426 {
427 ipu_cmd.pos[4] = 0;
428 return true;
429 }
430
431 i += (tab->run == 65) ? GETBITS(6) : tab->run;
432 if (i >= 64)
433 {
434 ipu_cmd.pos[4] = 0;
435 return true;
436 }
437
438 case 1:
439 {
440 if (!GETWORD())
441 {
442 ipu_cmd.pos[4] = i - 1;
443 ipu_cmd.pos[5] = 1;
444 return false;
445 }
446
447 uint j = scan[i];
448 int val;
449
450 if (tab->run==65) /* escape */
451 {
452 if(!decoder.mpeg1)
453 {
454 val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
455 DUMPBITS(12);
456 }
457 else
458 {
459 val = SBITS(8);
460 DUMPBITS(8);
461
462 if (!(val & 0x7f))
463 {
464 val = GETBITS(8) + 2 * val;
465 }
466
467 val = (val * quantizer_scale * quant_matrix[i]) >> 4;
468 val = (val + ~ (((s32)val) >> 31)) | 1;
469 }
470 }
471 else
472 {
473 val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
474 if(decoder.mpeg1)
475 {
476 /* oddification */
477 val = (val - 1) | 1;
478 }
479
480 /* if (bitstream_get (1)) val = -val; */
481 int bit1 = SBITS(1);
482 val = (val ^ bit1) - bit1;
483 DUMPBITS(1);
484 }
485
486 SATURATE(val);
487 dest[j] = val;
488 ipu_cmd.pos[5] = 0;
489 }
490 }
491 }
492
493 ipu_cmd.pos[4] = 0;
494 return true;
495 }
496
497 static bool get_non_intra_block(int * last)
498 {
499 int i;
500 int j;
501 int val;
502 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
503 const u8 (&quant_matrix)[64] = decoder.niq;
504 int quantizer_scale = decoder.quantizer_scale;
505 s16 * dest = decoder.DCTblock;
506 u16 code;
507
508 /* decode AC coefficients */
509 for (i= ipu_cmd.pos[4] ; ; i++)
510 {
511 switch (ipu_cmd.pos[5])
512 {
513 case 0:
514 if (!GETWORD())
515 {
516 ipu_cmd.pos[4] = i;
517 return false;
518 }
519
520 code = UBITS(16);
521
522 if (code >= 16384)
523 {
524 if (i==0)
525 {
526 tab = &DCT.first[(code >> 12) - 4];
527 }
528 else
529 {
530 tab = &DCT.next[(code >> 12)- 4];
531 }
532 }
533 else if (code >= 1024)
534 {
535 tab = &DCT.tab0[(code >> 8) - 4];
536 }
537 else if (code >= 512)
538 {
539 tab = &DCT.tab1[(code >> 6) - 8];
540 }
541
542 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
543 // that should use a single unrolled DCT table instead of five separate tables used
544 // here. Multiple conditional statements are very slow, while modern CPU data caches
545 // have lots of room to spare.
546
547 else if (code >= 256)
548 {
549 tab = &DCT.tab2[(code >> 4) - 16];
550 }
551 else if (code >= 128)
552 {
553 tab = &DCT.tab3[(code >> 3) - 16];
554 }
555 else if (code >= 64)
556 {
557 tab = &DCT.tab4[(code >> 2) - 16];
558 }
559 else if (code >= 32)
560 {
561 tab = &DCT.tab5[(code >> 1) - 16];
562 }
563 else if (code >= 16)
564 {
565 tab = &DCT.tab6[code - 16];
566 }
567 else
568 {
569 ipu_cmd.pos[4] = 0;
570 return true;
571 }
572
573 DUMPBITS(tab->len);
574
575 if (tab->run==64) /* end_of_block */
576 {
577 *last = i;
578 ipu_cmd.pos[4] = 0;
579 return true;
580 }
581
582 i += (tab->run == 65) ? GETBITS(6) : tab->run;
583 if (i >= 64)
584 {
585 *last = i;
586 ipu_cmd.pos[4] = 0;
587 return true;
588 }
589
590 case 1:
591 if (!GETWORD())
592 {
593 ipu_cmd.pos[4] = i;
594 ipu_cmd.pos[5] = 1;
595 return false;
596 }
597
598 j = scan[i];
599
600 if (tab->run==65) /* escape */
601 {
602 if (!decoder.mpeg1)
603 {
604 val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5;
605 DUMPBITS(12);
606 }
607 else
608 {
609 val = SBITS(8);
610 DUMPBITS(8);
611
612 if (!(val & 0x7f))
613 {
614 val = GETBITS(8) + 2 * val;
615 }
616
617 val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32;
618 val = (val + ~ (((s32)val) >> 31)) | 1;
619 }
620 }
621 else
622 {
623 int bit1 = SBITS(1);
624 val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
625 val = (val ^ bit1) - bit1;
626 DUMPBITS(1);
627 }
628
629 SATURATE(val);
630 dest[j] = val;
631 ipu_cmd.pos[5] = 0;
632 }
633 }
634
635 ipu_cmd.pos[4] = 0;
636 return true;
637 }
638
639 static __fi bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
640 {
641 if (!skip || ipu_cmd.pos[3])
642 {
643 ipu_cmd.pos[3] = 0;
644 if (!GETWORD())
645 {
646 ipu_cmd.pos[3] = 1;
647 return false;
648 }
649
650 /* Get the intra DC coefficient and inverse quantize it */
651 if (cc == 0)
652 decoder.dc_dct_pred[0] += get_luma_dc_dct_diff();
653 else
654 decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff();
655
656 decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision);
657 }
658
659 if (!get_intra_block())
660 {
661 return false;
662 }
663
664 mpeg2_idct_copy(decoder.DCTblock, dest, stride);
665
666 return true;
667 }
668
669 static __fi bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
670 {
671 int last;
672
673 if (!skip)
674 {
675 memzero_sse_a(decoder.DCTblock);
676 }
677
678 if (!get_non_intra_block(&last))
679 {
680 return false;
681 }
682
683 mpeg2_idct_add(last, decoder.DCTblock, dest, stride);
684
685 return true;
686 }
687
688 void __fi finishmpeg2sliceIDEC()
689 {
690 ipuRegs.ctrl.SCD = 0;
691 coded_block_pattern = decoder.coded_block_pattern;
692 }
693
694 __fi bool mpeg2sliceIDEC()
695 {
696 u16 code;
697
698 switch (ipu_cmd.pos[0])
699 {
700 case 0:
701 decoder.dc_dct_pred[0] =
702 decoder.dc_dct_pred[1] =
703 decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
704
705 ipuRegs.top = 0;
706 ipuRegs.ctrl.ECD = 0;
707
708 case 1:
709 ipu_cmd.pos[0] = 1;
710 if (!bitstream_init())
711 {
712 return false;
713 }
714
715 case 2:
716 ipu_cmd.pos[0] = 2;
717 while (1)
718 {
719 macroblock_8& mb8 = decoder.mb8;
720 macroblock_rgb16& rgb16 = decoder.rgb16;
721 macroblock_rgb32& rgb32 = decoder.rgb32;
722
723 int DCT_offset, DCT_stride;
724 const MBAtab * mba;
725
726 switch (ipu_cmd.pos[1])
727 {
728 case 0:
729 decoder.macroblock_modes = get_macroblock_modes();
730
731 if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC
732 {
733 decoder.quantizer_scale = get_quantizer_scale();
734 }
735
736 decoder.coded_block_pattern = 0x3F;//all 6 blocks
737 memzero_sse_a(mb8);
738 memzero_sse_a(rgb32);
739
740 case 1:
741 ipu_cmd.pos[1] = 1;
742
743 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
744 {
745 DCT_offset = decoder_stride;
746 DCT_stride = decoder_stride * 2;
747 }
748 else
749 {
750 DCT_offset = decoder_stride * 8;
751 DCT_stride = decoder_stride;
752 }
753
754 switch (ipu_cmd.pos[2])
755 {
756 case 0:
757 case 1:
758 if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1))
759 {
760 ipu_cmd.pos[2] = 1;
761 return false;
762 }
763 case 2:
764 if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
765 {
766 ipu_cmd.pos[2] = 2;
767 return false;
768 }
769 case 3:
770 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
771 {
772 ipu_cmd.pos[2] = 3;
773 return false;
774 }
775 case 4:
776 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
777 {
778 ipu_cmd.pos[2] = 4;
779 return false;
780 }
781 case 5:
782 if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5))
783 {
784 ipu_cmd.pos[2] = 5;
785 return false;
786 }
787 case 6:
788 if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6))
789 {
790 ipu_cmd.pos[2] = 6;
791 return false;
792 }
793 break;
794
795 jNO_DEFAULT;
796 }
797
798 // Send The MacroBlock via DmaIpuFrom
799 ipu_csc(mb8, rgb32, decoder.sgn);
800
801 if (decoder.ofm == 0)
802 decoder.SetOutputTo(rgb32);
803 else
804 {
805 ipu_dither(rgb32, rgb16, decoder.dte);
806 decoder.SetOutputTo(rgb16);
807 }
808
809 case 2:
810 {
811 pxAssume(decoder.ipu0_data > 0);
812
813 uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
814 decoder.AdvanceIpuDataBy(read);
815
816 if (decoder.ipu0_data != 0)
817 {
818 // IPU FIFO filled up -- Will have to finish transferring later.
819 ipu_cmd.pos[1] = 2;
820 return false;
821 }
822
823 mbaCount = 0;
824 }
825
826 case 3:
827 while (1)
828 {
829 if (!GETWORD())
830 {
831 ipu_cmd.pos[1] = 3;
832 return false;
833 }
834
835 code = UBITS(16);
836 if (code >= 0x1000)
837 {
838 mba = MBA.mba5 + (UBITS(5) - 2);
839 break;
840 }
841 else if (code >= 0x0300)
842 {
843 mba = MBA.mba11 + (UBITS(11) - 24);
844 break;
845 }
846 else switch (UBITS(11))
847 {
848 case 8: /* macroblock_escape */
849 mbaCount += 33;
850 /* pass through */
851
852 case 15: /* macroblock_stuffing (MPEG1 only) */
853 DUMPBITS(11);
854 continue;
855
856 default: /* end of slice/frame, or error? */
857 {
858 goto finish_idec;
859 }
860 }
861 }
862
863 DUMPBITS(mba->len);
864 mbaCount += mba->mba;
865
866 if (mbaCount)
867 {
868 decoder.dc_dct_pred[0] =
869 decoder.dc_dct_pred[1] =
870 decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
871 }
872
873 case 4:
874 if (!GETWORD())
875 {
876 ipu_cmd.pos[1] = 4;
877 return false;
878 }
879
880 break;
881
882 jNO_DEFAULT;
883 }
884
885 ipu_cmd.pos[1] = 0;
886 ipu_cmd.pos[2] = 0;
887 }
888
889 finish_idec:
890 finishmpeg2sliceIDEC();
891
892 case 3:
893 {
894 u8 bit8;
895 if (!getBits8((u8*)&bit8, 0))
896 {
897 ipu_cmd.pos[0] = 3;
898 return false;
899 }
900
901 if (bit8 == 0)
902 {
903 g_BP.Align();
904 ipuRegs.ctrl.SCD = 1;
905 }
906 }
907
908 case 4:
909 if (!getBits32((u8*)&ipuRegs.top, 0))
910 {
911 ipu_cmd.pos[0] = 4;
912 return false;
913 }
914
915 ipuRegs.top = BigEndian(ipuRegs.top);
916 break;
917
918 jNO_DEFAULT;
919 }
920
921 return true;
922 }
923
924 __fi bool mpeg2_slice()
925 {
926 int DCT_offset, DCT_stride;
927
928 macroblock_8& mb8 = decoder.mb8;
929 macroblock_16& mb16 = decoder.mb16;
930
931 switch (ipu_cmd.pos[0])
932 {
933 case 0:
934 if (decoder.dcr)
935 {
936 decoder.dc_dct_pred[0] =
937 decoder.dc_dct_pred[1] =
938 decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
939 }
940
941 ipuRegs.ctrl.ECD = 0;
942 ipuRegs.top = 0;
943 memzero_sse_a(mb8);
944 memzero_sse_a(mb16);
945 case 1:
946 if (!bitstream_init())
947 {
948 ipu_cmd.pos[0] = 1;
949 return false;
950 }
951
952 case 2:
953 ipu_cmd.pos[0] = 2;
954
955 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
956 {
957 DCT_offset = decoder_stride;
958 DCT_stride = decoder_stride * 2;
959 }
960 else
961 {
962 DCT_offset = decoder_stride * 8;
963 DCT_stride = decoder_stride;
964 }
965
966 if (decoder.macroblock_modes & MACROBLOCK_INTRA)
967 {
968 switch(ipu_cmd.pos[1])
969 {
970 case 0:
971 decoder.coded_block_pattern = 0x3F;
972 case 1:
973 if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1))
974 {
975 ipu_cmd.pos[1] = 1;
976 return false;
977 }
978 case 2:
979 if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
980 {
981 ipu_cmd.pos[1] = 2;
982 return false;
983 }
984 case 3:
985 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
986 {
987 ipu_cmd.pos[1] = 3;
988 return false;
989 }
990 case 4:
991 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
992 {
993 ipu_cmd.pos[1] = 4;
994 return false;
995 }
996 case 5:
997 if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
998 {
999 ipu_cmd.pos[1] = 5;
1000 return false;
1001 }
1002 case 6:
1003 if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1004 {
1005 ipu_cmd.pos[1] = 6;
1006 return false;
1007 }
1008 break;
1009
1010 jNO_DEFAULT;
1011 }
1012
1013 // Copy macroblock8 to macroblock16 - without sign extension.
1014 // Manually inlined due to MSVC refusing to inline the SSE-optimized version.
1015 {
1016 const u8 *s = (const u8*)&mb8;
1017 u16 *d = (u16*)&mb16;
1018
1019 //Y bias - 16 * 16
1020 //Cr bias - 8 * 8
1021 //Cb bias - 8 * 8
1022
1023 __m128i zeroreg = _mm_setzero_si128();
1024
1025 for (uint i = 0; i < (256+64+64) / 32; ++i)
1026 {
1027 //*d++ = *s++;
1028 __m128i woot1 = _mm_load_si128((__m128i*)s);
1029 __m128i woot2 = _mm_load_si128((__m128i*)s+1);
1030 _mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg));
1031 _mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg));
1032 _mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg));
1033 _mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg));
1034 s += 32;
1035 d += 32;
1036 }
1037 }
1038 }
1039 else
1040 {
1041 if (decoder.macroblock_modes & MACROBLOCK_PATTERN)
1042 {
1043 switch(ipu_cmd.pos[1])
1044 {
1045 case 0:
1046 decoder.coded_block_pattern = get_coded_block_pattern(); // max 9bits
1047 case 1:
1048 if (decoder.coded_block_pattern & 0x20)
1049 {
1050 if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1))
1051 {
1052 ipu_cmd.pos[1] = 1;
1053 return false;
1054 }
1055 }
1056 case 2:
1057 if (decoder.coded_block_pattern & 0x10)
1058 {
1059 if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
1060 {
1061 ipu_cmd.pos[1] = 2;
1062 return false;
1063 }
1064 }
1065 case 3:
1066 if (decoder.coded_block_pattern & 0x08)
1067 {
1068 if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
1069 {
1070 ipu_cmd.pos[1] = 3;
1071 return false;
1072 }
1073 }
1074 case 4:
1075 if (decoder.coded_block_pattern & 0x04)
1076 {
1077 if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
1078 {
1079 ipu_cmd.pos[1] = 4;
1080 return false;
1081 }
1082 }
1083 case 5:
1084 if (decoder.coded_block_pattern & 0x2)
1085 {
1086 if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
1087 {
1088 ipu_cmd.pos[1] = 5;
1089 return false;
1090 }
1091 }
1092 case 6:
1093 if (decoder.coded_block_pattern & 0x1)
1094 {
1095 if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1096 {
1097 ipu_cmd.pos[1] = 6;
1098 return false;
1099 }
1100 }
1101 break;
1102
1103 jNO_DEFAULT;
1104 }
1105 }
1106 }
1107
1108 // Send The MacroBlock via DmaIpuFrom
1109 ipuRegs.ctrl.SCD = 0;
1110 coded_block_pattern = decoder.coded_block_pattern;
1111
1112 decoder.SetOutputTo(mb16);
1113
1114 case 3:
1115 {
1116 pxAssume(decoder.ipu0_data > 0);
1117
1118 uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
1119 decoder.AdvanceIpuDataBy(read);
1120
1121 if (decoder.ipu0_data != 0)
1122 {
1123 // IPU FIFO filled up -- Will have to finish transferring later.
1124 ipu_cmd.pos[0] = 3;
1125 return false;
1126 }
1127
1128 mbaCount = 0;
1129 }
1130
1131 case 4:
1132 {
1133 u8 bit8;
1134 if (!getBits8((u8*)&bit8, 0))
1135 {
1136 ipu_cmd.pos[0] = 4;
1137 return false;
1138 }
1139
1140 if (bit8 == 0)
1141 {
1142 g_BP.Align();
1143 ipuRegs.ctrl.SCD = 1;
1144 }
1145 }
1146
1147 case 5:
1148 if (!getBits32((u8*)&ipuRegs.top, 0))
1149 {
1150 ipu_cmd.pos[0] = 5;
1151 return false;
1152 }
1153
1154 ipuRegs.top = BigEndian(ipuRegs.top);
1155 break;
1156 }
1157
1158 return true;
1159 }

  ViewVC Help
Powered by ViewVC 1.1.22