/[pcsx2_0.9.7]/trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp
ViewVC logotype

Contents of /trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 273 - (show annotations) (download)
Fri Nov 12 01:10:22 2010 UTC (9 years, 10 months ago) by william
File size: 22712 byte(s)
Auto Commited Import of: pcsx2-0.9.7-DEBUG (upstream: v0.9.7.4013 local: v0.9.7.197-latest) in ./trunk
1 /*
2 * Mpeg.c
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
5 * Modified by Florin for PCSX2 emu
6 *
7 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
8 * See http://libmpeg2.sourceforge.net/ for updates.
9 *
10 * mpeg2dec is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * mpeg2dec is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 */
24
25 // [Air] Note: many functions in this module are large and only used once, so they
26 // have been forced to inline since it won't bloat the program and gets rid of
27 // some call overhead.
28
29 #include "PrecompiledHeader.h"
30
31 #include "Common.h"
32 #include "IPU/IPU.h"
33 #include "Mpeg.h"
34 #include "Vlc.h"
35
36 const int non_linear_quantizer_scale [] =
37 {
38 0, 1, 2, 3, 4, 5, 6, 7,
39 8, 10, 12, 14, 16, 18, 20, 22,
40 24, 28, 32, 36, 40, 44, 48, 52,
41 56, 64, 72, 80, 88, 96, 104, 112
42 };
43
44 /* Bitstream and buffer needs to be reallocated in order for successful
45 reading of the old data. Here the old data stored in the 2nd slot
46 of the internal buffer is copied to 1st slot, and the new data read
47 into 1st slot is copied to the 2nd slot. Which will later be copied
48 back to the 1st slot when 128bits have been read.
49 */
50 const DCTtab * tab;
51 int mbaCount = 0;
52
53 int bitstream_init ()
54 {
55 return g_BP.FillBuffer(32);
56 }
57
58 int get_macroblock_modes()
59 {
60 int macroblock_modes;
61 const MBtab * tab;
62
63 switch (decoder.coding_type)
64 {
65 case I_TYPE:
66 macroblock_modes = UBITS(2);
67
68 if (macroblock_modes == 0) return 0; // error
69
70 tab = MB_I + (macroblock_modes >> 1);
71 DUMPBITS(tab->len);
72 macroblock_modes = tab->modes;
73
74 if ((!(decoder.frame_pred_frame_dct)) &&
75 (decoder.picture_structure == FRAME_PICTURE))
76 {
77 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
78 }
79 return macroblock_modes;
80
81 case P_TYPE:
82 macroblock_modes = UBITS(6);
83
84 if (macroblock_modes == 0) return 0; // error
85
86 tab = MB_P + (macroblock_modes >> 1);
87 DUMPBITS(tab->len);
88 macroblock_modes = tab->modes;
89
90 if (decoder.picture_structure != FRAME_PICTURE)
91 {
92 if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
93 {
94 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
95 }
96
97 return macroblock_modes;
98 }
99 else if (decoder.frame_pred_frame_dct)
100 {
101 if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
102 macroblock_modes |= MC_FRAME;
103
104 return macroblock_modes;
105 }
106 else
107 {
108 if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
109 {
110 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
111 }
112
113 if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
114 {
115 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
116 }
117
118 return macroblock_modes;
119 }
120
121 case B_TYPE:
122 macroblock_modes = UBITS(6);
123
124 if (macroblock_modes == 0) return 0; // error
125
126 tab = MB_B + macroblock_modes;
127 DUMPBITS(tab->len);
128 macroblock_modes = tab->modes;
129
130 if (decoder.picture_structure != FRAME_PICTURE)
131 {
132 if (!(macroblock_modes & MACROBLOCK_INTRA))
133 {
134 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
135 }
136
137 return macroblock_modes;
138 }
139 else if (decoder.frame_pred_frame_dct)
140 {
141 /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
142 macroblock_modes |= MC_FRAME;
143 return macroblock_modes;
144 }
145 else
146 {
147 if (macroblock_modes & MACROBLOCK_INTRA) goto intra;
148
149 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
150
151 if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
152 {
153 intra:
154 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
155 }
156
157 return macroblock_modes;
158 }
159
160 case D_TYPE:
161 macroblock_modes = GETBITS(1);
162
163 if (macroblock_modes == 0) return 0; // error
164 return MACROBLOCK_INTRA;
165
166 default:
167 return 0;
168 }
169 }
170
171 static __fi int get_quantizer_scale()
172 {
173 int quantizer_scale_code;
174
175 quantizer_scale_code = GETBITS(5);
176
177 if (decoder.q_scale_type)
178 return non_linear_quantizer_scale [quantizer_scale_code];
179 else
180 return quantizer_scale_code << 1;
181 }
182
183 static __fi int get_coded_block_pattern()
184 {
185 const CBPtab * tab;
186 u16 code = UBITS(16);
187
188 if (code >= 0x2000)
189 tab = CBP_7 + (UBITS(7) - 16);
190 else
191 tab = CBP_9 + UBITS(9);
192
193 DUMPBITS(tab->len);
194 return tab->cbp;
195 }
196
197 int __fi get_motion_delta(const int f_code)
198 {
199 int delta;
200 int sign;
201 const MVtab * tab;
202 u16 code = UBITS(16);
203
204 if ((code & 0x8000))
205 {
206 DUMPBITS(1);
207 return 0x00010000;
208 }
209 else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00))
210 {
211 tab = MV_4 + UBITS(4);
212 }
213 else
214 {
215 tab = MV_10 + UBITS(10);
216 }
217
218 delta = tab->delta + 1;
219 DUMPBITS(tab->len);
220
221 sign = SBITS(1);
222 DUMPBITS(1);
223 return (delta ^ sign) - sign;
224 }
225
226 int __fi get_dmv()
227 {
228 const DMVtab* tab = DMV_2 + UBITS(2);
229 DUMPBITS(tab->len);
230 return tab->dmv;
231 }
232
233 int get_macroblock_address_increment()
234 {
235 const MBAtab *mba;
236
237 u16 code = UBITS(16);
238
239 if (code >= 4096)
240 mba = MBA.mba5 + (UBITS(5) - 2);
241 else if (code >= 768)
242 mba = MBA.mba11 + (UBITS(11) - 24);
243 else switch (UBITS(11))
244 {
245 case 8: /* macroblock_escape */
246 DUMPBITS(11);
247 return 0x23;
248
249 case 15: /* macroblock_stuffing (MPEG1 only) */
250 if (decoder.mpeg1)
251 {
252 DUMPBITS(11);
253 return 0x22;
254 }
255
256 default:
257 return 0;//error
258 }
259
260 DUMPBITS(mba->len);
261
262 return mba->mba + 1;
263 }
264
265 static __fi int get_luma_dc_dct_diff()
266 {
267 int size;
268 int dc_diff;
269 u16 code = UBITS(5);
270
271 if (code < 31)
272 {
273 size = DCtable.lum0[code].size;
274 DUMPBITS(DCtable.lum0[code].len);
275
276 // 5 bits max
277 }
278 else
279 {
280 code = UBITS(9) - 0x1f0;
281 size = DCtable.lum1[code].size;
282 DUMPBITS(DCtable.lum1[code].len);
283
284 // 9 bits max
285 }
286
287 if (size==0)
288 dc_diff = 0;
289 else
290 {
291 dc_diff = GETBITS(size);
292
293 // 6 for tab0 and 11 for tab1
294 if ((dc_diff & (1<<(size-1)))==0)
295 dc_diff-= (1<<size) - 1;
296 }
297
298 return dc_diff;
299 }
300
301 static __fi int get_chroma_dc_dct_diff()
302 {
303 int size;
304 int dc_diff;
305 u16 code = UBITS(5);
306
307 if (code<31)
308 {
309 size = DCtable.chrom0[code].size;
310 DUMPBITS(DCtable.chrom0[code].len);
311 }
312 else
313 {
314 code = UBITS(10) - 0x3e0;
315 size = DCtable.chrom1[code].size;
316 DUMPBITS(DCtable.chrom1[code].len);
317 }
318
319 if (size==0)
320 dc_diff = 0;
321 else
322 {
323 dc_diff = GETBITS(size);
324
325 if ((dc_diff & (1<<(size-1)))==0)
326 {
327 dc_diff-= (1<<size) - 1;
328 }
329 }
330
331 return dc_diff;
332 }
333
334 #define SATURATE(val) \
335 do { \
336 if (((u32)(val + 2048) > 4095)) \
337 val = (((s32)val) >> 31) ^ 2047; \
338 } while (0)
339
340 static bool get_intra_block()
341 {
342 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
343 const u8 (&quant_matrix)[64] = decoder.iq;
344 int quantizer_scale = decoder.quantizer_scale;
345 s16 * dest = decoder.DCTblock;
346 u16 code;
347
348 /* decode AC coefficients */
349 for (int i=1 + ipu_cmd.pos[4]; ; i++)
350 {
351 switch (ipu_cmd.pos[5])
352 {
353 case 0:
354 if (!GETWORD())
355 {
356 ipu_cmd.pos[4] = i - 1;
357 return false;
358 }
359
360 code = UBITS(16);
361
362 if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
363 {
364 tab = &DCT.next[(code >> 12) - 4];
365 }
366 else if (code >= 1024)
367 {
368 if (decoder.intra_vlc_format && !decoder.mpeg1)
369 {
370 tab = &DCT.tab0a[(code >> 8) - 4];
371 }
372 else
373 {
374 tab = &DCT.tab0[(code >> 8) - 4];
375 }
376 }
377 else if (code >= 512)
378 {
379 if (decoder.intra_vlc_format && !decoder.mpeg1)
380 {
381 tab = &DCT.tab1a[(code >> 6) - 8];
382 }
383 else
384 {
385 tab = &DCT.tab1[(code >> 6) - 8];
386 }
387 }
388
389 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
390 // that should use a single unrolled DCT table instead of five separate tables used
391 // here. Multiple conditional statements are very slow, while modern CPU data caches
392 // have lots of room to spare.
393
394 else if (code >= 256)
395 {
396 tab = &DCT.tab2[(code >> 4) - 16];
397 }
398 else if (code >= 128)
399 {
400 tab = &DCT.tab3[(code >> 3) - 16];
401 }
402 else if (code >= 64)
403 {
404 tab = &DCT.tab4[(code >> 2) - 16];
405 }
406 else if (code >= 32)
407 {
408 tab = &DCT.tab5[(code >> 1) - 16];
409 }
410 else if (code >= 16)
411 {
412 tab = &DCT.tab6[code - 16];
413 }
414 else
415 {
416 ipu_cmd.pos[4] = 0;
417 return true;
418 }
419
420 DUMPBITS(tab->len);
421
422 if (tab->run==64) /* end_of_block */
423 {
424 ipu_cmd.pos[4] = 0;
425 return true;
426 }
427
428 i += (tab->run == 65) ? GETBITS(6) : tab->run;
429 if (i >= 64)
430 {
431 ipu_cmd.pos[4] = 0;
432 return true;
433 }
434
435 case 1:
436 {
437 if (!GETWORD())
438 {
439 ipu_cmd.pos[4] = i - 1;
440 ipu_cmd.pos[5] = 1;
441 return false;
442 }
443
444 uint j = scan[i];
445 int val;
446
447 if (tab->run==65) /* escape */
448 {
449 if(!decoder.mpeg1)
450 {
451 val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
452 DUMPBITS(12);
453 }
454 else
455 {
456 val = SBITS(8);
457 DUMPBITS(8);
458
459 if (!(val & 0x7f))
460 {
461 val = GETBITS(8) + 2 * val;
462 }
463
464 val = (val * quantizer_scale * quant_matrix[i]) >> 4;
465 val = (val + ~ (((s32)val) >> 31)) | 1;
466 }
467 }
468 else
469 {
470 val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
471 if(decoder.mpeg1)
472 {
473 /* oddification */
474 val = (val - 1) | 1;
475 }
476
477 /* if (bitstream_get (1)) val = -val; */
478 int bit1 = SBITS(1);
479 val = (val ^ bit1) - bit1;
480 DUMPBITS(1);
481 }
482
483 SATURATE(val);
484 dest[j] = val;
485 ipu_cmd.pos[5] = 0;
486 }
487 }
488 }
489
490 ipu_cmd.pos[4] = 0;
491 return true;
492 }
493
494 static bool get_non_intra_block(int * last)
495 {
496 int i;
497 int j;
498 int val;
499 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
500 const u8 (&quant_matrix)[64] = decoder.niq;
501 int quantizer_scale = decoder.quantizer_scale;
502 s16 * dest = decoder.DCTblock;
503 u16 code;
504
505 /* decode AC coefficients */
506 for (i= ipu_cmd.pos[4] ; ; i++)
507 {
508 switch (ipu_cmd.pos[5])
509 {
510 case 0:
511 if (!GETWORD())
512 {
513 ipu_cmd.pos[4] = i;
514 return false;
515 }
516
517 code = UBITS(16);
518
519 if (code >= 16384)
520 {
521 if (i==0)
522 {
523 tab = &DCT.first[(code >> 12) - 4];
524 }
525 else
526 {
527 tab = &DCT.next[(code >> 12)- 4];
528 }
529 }
530 else if (code >= 1024)
531 {
532 tab = &DCT.tab0[(code >> 8) - 4];
533 }
534 else if (code >= 512)
535 {
536 tab = &DCT.tab1[(code >> 6) - 8];
537 }
538
539 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
540 // that should use a single unrolled DCT table instead of five separate tables used
541 // here. Multiple conditional statements are very slow, while modern CPU data caches
542 // have lots of room to spare.
543
544 else if (code >= 256)
545 {
546 tab = &DCT.tab2[(code >> 4) - 16];
547 }
548 else if (code >= 128)
549 {
550 tab = &DCT.tab3[(code >> 3) - 16];
551 }
552 else if (code >= 64)
553 {
554 tab = &DCT.tab4[(code >> 2) - 16];
555 }
556 else if (code >= 32)
557 {
558 tab = &DCT.tab5[(code >> 1) - 16];
559 }
560 else if (code >= 16)
561 {
562 tab = &DCT.tab6[code - 16];
563 }
564 else
565 {
566 ipu_cmd.pos[4] = 0;
567 return true;
568 }
569
570 DUMPBITS(tab->len);
571
572 if (tab->run==64) /* end_of_block */
573 {
574 *last = i;
575 ipu_cmd.pos[4] = 0;
576 return true;
577 }
578
579 i += (tab->run == 65) ? GETBITS(6) : tab->run;
580 if (i >= 64)
581 {
582 *last = i;
583 ipu_cmd.pos[4] = 0;
584 return true;
585 }
586
587 case 1:
588 if (!GETWORD())
589 {
590 ipu_cmd.pos[4] = i;
591 ipu_cmd.pos[5] = 1;
592 return false;
593 }
594
595 j = scan[i];
596
597 if (tab->run==65) /* escape */
598 {
599 if (!decoder.mpeg1)
600 {
601 val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5;
602 DUMPBITS(12);
603 }
604 else
605 {
606 val = SBITS(8);
607 DUMPBITS(8);
608
609 if (!(val & 0x7f))
610 {
611 val = GETBITS(8) + 2 * val;
612 }
613
614 val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32;
615 val = (val + ~ (((s32)val) >> 31)) | 1;
616 }
617 }
618 else
619 {
620 int bit1 = SBITS(1);
621 val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
622 val = (val ^ bit1) - bit1;
623 DUMPBITS(1);
624 }
625
626 SATURATE(val);
627 dest[j] = val;
628 ipu_cmd.pos[5] = 0;
629 }
630 }
631
632 ipu_cmd.pos[4] = 0;
633 return true;
634 }
635
636 static __fi bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
637 {
638 if (!skip || ipu_cmd.pos[3])
639 {
640 ipu_cmd.pos[3] = 0;
641 if (!GETWORD())
642 {
643 ipu_cmd.pos[3] = 1;
644 return false;
645 }
646
647 /* Get the intra DC coefficient and inverse quantize it */
648 if (cc == 0)
649 decoder.dc_dct_pred[0] += get_luma_dc_dct_diff();
650 else
651 decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff();
652
653 decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision);
654 }
655
656 if (!get_intra_block())
657 {
658 return false;
659 }
660
661 mpeg2_idct_copy(decoder.DCTblock, dest, stride);
662
663 return true;
664 }
665
666 static __fi bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
667 {
668 int last;
669
670 if (!skip)
671 {
672 memzero_sse_a(decoder.DCTblock);
673 }
674
675 if (!get_non_intra_block(&last))
676 {
677 return false;
678 }
679
680 mpeg2_idct_add(last, decoder.DCTblock, dest, stride);
681
682 return true;
683 }
684
685 void __fi finishmpeg2sliceIDEC()
686 {
687 ipuRegs.ctrl.SCD = 0;
688 coded_block_pattern = decoder.coded_block_pattern;
689 }
690
691 __fi bool mpeg2sliceIDEC()
692 {
693 u16 code;
694
695 switch (ipu_cmd.pos[0])
696 {
697 case 0:
698 decoder.dc_dct_pred[0] =
699 decoder.dc_dct_pred[1] =
700 decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
701
702 ipuRegs.top = 0;
703 ipuRegs.ctrl.ECD = 0;
704
705 case 1:
706 ipu_cmd.pos[0] = 1;
707 if (!bitstream_init())
708 {
709 return false;
710 }
711
712 case 2:
713 ipu_cmd.pos[0] = 2;
714 while (1)
715 {
716 macroblock_8& mb8 = decoder.mb8;
717 macroblock_rgb16& rgb16 = decoder.rgb16;
718 macroblock_rgb32& rgb32 = decoder.rgb32;
719
720 int DCT_offset, DCT_stride;
721 const MBAtab * mba;
722
723 switch (ipu_cmd.pos[1])
724 {
725 case 0:
726 decoder.macroblock_modes = get_macroblock_modes();
727
728 if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC
729 {
730 decoder.quantizer_scale = get_quantizer_scale();
731 }
732
733 decoder.coded_block_pattern = 0x3F;//all 6 blocks
734 memzero_sse_a(mb8);
735 memzero_sse_a(rgb32);
736
737 case 1:
738 ipu_cmd.pos[1] = 1;
739
740 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
741 {
742 DCT_offset = decoder_stride;
743 DCT_stride = decoder_stride * 2;
744 }
745 else
746 {
747 DCT_offset = decoder_stride * 8;
748 DCT_stride = decoder_stride;
749 }
750
751 switch (ipu_cmd.pos[2])
752 {
753 case 0:
754 case 1:
755 if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1))
756 {
757 ipu_cmd.pos[2] = 1;
758 return false;
759 }
760 case 2:
761 if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
762 {
763 ipu_cmd.pos[2] = 2;
764 return false;
765 }
766 case 3:
767 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
768 {
769 ipu_cmd.pos[2] = 3;
770 return false;
771 }
772 case 4:
773 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
774 {
775 ipu_cmd.pos[2] = 4;
776 return false;
777 }
778 case 5:
779 if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5))
780 {
781 ipu_cmd.pos[2] = 5;
782 return false;
783 }
784 case 6:
785 if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6))
786 {
787 ipu_cmd.pos[2] = 6;
788 return false;
789 }
790 break;
791
792 jNO_DEFAULT;
793 }
794
795 // Send The MacroBlock via DmaIpuFrom
796 ipu_csc(mb8, rgb32, decoder.sgn);
797
798 if (decoder.ofm == 0)
799 decoder.SetOutputTo(rgb32);
800 else
801 {
802 ipu_dither(rgb32, rgb16, decoder.dte);
803 decoder.SetOutputTo(rgb16);
804 }
805
806 case 2:
807 {
808 pxAssume(decoder.ipu0_data > 0);
809
810 uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
811 decoder.AdvanceIpuDataBy(read);
812
813 if (decoder.ipu0_data != 0)
814 {
815 // IPU FIFO filled up -- Will have to finish transferring later.
816 ipu_cmd.pos[1] = 2;
817 return false;
818 }
819
820 mbaCount = 0;
821 }
822
823 case 3:
824 while (1)
825 {
826 if (!GETWORD())
827 {
828 ipu_cmd.pos[1] = 3;
829 return false;
830 }
831
832 code = UBITS(16);
833 if (code >= 0x1000)
834 {
835 mba = MBA.mba5 + (UBITS(5) - 2);
836 break;
837 }
838 else if (code >= 0x0300)
839 {
840 mba = MBA.mba11 + (UBITS(11) - 24);
841 break;
842 }
843 else switch (UBITS(11))
844 {
845 case 8: /* macroblock_escape */
846 mbaCount += 33;
847 /* pass through */
848
849 case 15: /* macroblock_stuffing (MPEG1 only) */
850 DUMPBITS(11);
851 continue;
852
853 default: /* end of slice/frame, or error? */
854 {
855 goto finish_idec;
856 }
857 }
858 }
859
860 DUMPBITS(mba->len);
861 mbaCount += mba->mba;
862
863 if (mbaCount)
864 {
865 decoder.dc_dct_pred[0] =
866 decoder.dc_dct_pred[1] =
867 decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
868 }
869
870 case 4:
871 if (!GETWORD())
872 {
873 ipu_cmd.pos[1] = 4;
874 return false;
875 }
876
877 break;
878
879 jNO_DEFAULT;
880 }
881
882 ipu_cmd.pos[1] = 0;
883 ipu_cmd.pos[2] = 0;
884 }
885
886 finish_idec:
887 finishmpeg2sliceIDEC();
888
889 case 3:
890 {
891 u8 bit8;
892 if (!getBits8((u8*)&bit8, 0))
893 {
894 ipu_cmd.pos[0] = 3;
895 return false;
896 }
897
898 if (bit8 == 0)
899 {
900 g_BP.Align();
901 ipuRegs.ctrl.SCD = 1;
902 }
903 }
904
905 case 4:
906 if (!getBits32((u8*)&ipuRegs.top, 0))
907 {
908 ipu_cmd.pos[0] = 4;
909 return false;
910 }
911
912 ipuRegs.top = BigEndian(ipuRegs.top);
913 break;
914
915 jNO_DEFAULT;
916 }
917
918 return true;
919 }
920
921 __fi bool mpeg2_slice()
922 {
923 int DCT_offset, DCT_stride;
924
925 macroblock_8& mb8 = decoder.mb8;
926 macroblock_16& mb16 = decoder.mb16;
927
928 switch (ipu_cmd.pos[0])
929 {
930 case 0:
931 if (decoder.dcr)
932 {
933 decoder.dc_dct_pred[0] =
934 decoder.dc_dct_pred[1] =
935 decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
936 }
937
938 ipuRegs.ctrl.ECD = 0;
939 ipuRegs.top = 0;
940 memzero_sse_a(mb8);
941 memzero_sse_a(mb16);
942 case 1:
943 if (!bitstream_init())
944 {
945 ipu_cmd.pos[0] = 1;
946 return false;
947 }
948
949 case 2:
950 ipu_cmd.pos[0] = 2;
951
952 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
953 {
954 DCT_offset = decoder_stride;
955 DCT_stride = decoder_stride * 2;
956 }
957 else
958 {
959 DCT_offset = decoder_stride * 8;
960 DCT_stride = decoder_stride;
961 }
962
963 if (decoder.macroblock_modes & MACROBLOCK_INTRA)
964 {
965 switch(ipu_cmd.pos[1])
966 {
967 case 0:
968 decoder.coded_block_pattern = 0x3F;
969 case 1:
970 if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1))
971 {
972 ipu_cmd.pos[1] = 1;
973 return false;
974 }
975 case 2:
976 if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
977 {
978 ipu_cmd.pos[1] = 2;
979 return false;
980 }
981 case 3:
982 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
983 {
984 ipu_cmd.pos[1] = 3;
985 return false;
986 }
987 case 4:
988 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
989 {
990 ipu_cmd.pos[1] = 4;
991 return false;
992 }
993 case 5:
994 if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
995 {
996 ipu_cmd.pos[1] = 5;
997 return false;
998 }
999 case 6:
1000 if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1001 {
1002 ipu_cmd.pos[1] = 6;
1003 return false;
1004 }
1005 break;
1006
1007 jNO_DEFAULT;
1008 }
1009
1010 // Copy macroblock8 to macroblock16 - without sign extension.
1011 // Manually inlined due to MSVC refusing to inline the SSE-optimized version.
1012 {
1013 const u8 *s = (const u8*)&mb8;
1014 u16 *d = (u16*)&mb16;
1015
1016 //Y bias - 16 * 16
1017 //Cr bias - 8 * 8
1018 //Cb bias - 8 * 8
1019
1020 __m128i zeroreg = _mm_setzero_si128();
1021
1022 for (uint i = 0; i < (256+64+64) / 32; ++i)
1023 {
1024 //*d++ = *s++;
1025 __m128i woot1 = _mm_load_si128((__m128i*)s);
1026 __m128i woot2 = _mm_load_si128((__m128i*)s+1);
1027 _mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg));
1028 _mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg));
1029 _mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg));
1030 _mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg));
1031 s += 32;
1032 d += 32;
1033 }
1034 }
1035 }
1036 else
1037 {
1038 if (decoder.macroblock_modes & MACROBLOCK_PATTERN)
1039 {
1040 switch(ipu_cmd.pos[1])
1041 {
1042 case 0:
1043 decoder.coded_block_pattern = get_coded_block_pattern(); // max 9bits
1044 case 1:
1045 if (decoder.coded_block_pattern & 0x20)
1046 {
1047 if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1))
1048 {
1049 ipu_cmd.pos[1] = 1;
1050 return false;
1051 }
1052 }
1053 case 2:
1054 if (decoder.coded_block_pattern & 0x10)
1055 {
1056 if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
1057 {
1058 ipu_cmd.pos[1] = 2;
1059 return false;
1060 }
1061 }
1062 case 3:
1063 if (decoder.coded_block_pattern & 0x08)
1064 {
1065 if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
1066 {
1067 ipu_cmd.pos[1] = 3;
1068 return false;
1069 }
1070 }
1071 case 4:
1072 if (decoder.coded_block_pattern & 0x04)
1073 {
1074 if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
1075 {
1076 ipu_cmd.pos[1] = 4;
1077 return false;
1078 }
1079 }
1080 case 5:
1081 if (decoder.coded_block_pattern & 0x2)
1082 {
1083 if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
1084 {
1085 ipu_cmd.pos[1] = 5;
1086 return false;
1087 }
1088 }
1089 case 6:
1090 if (decoder.coded_block_pattern & 0x1)
1091 {
1092 if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1093 {
1094 ipu_cmd.pos[1] = 6;
1095 return false;
1096 }
1097 }
1098 break;
1099
1100 jNO_DEFAULT;
1101 }
1102 }
1103 }
1104
1105 // Send The MacroBlock via DmaIpuFrom
1106 ipuRegs.ctrl.SCD = 0;
1107 coded_block_pattern = decoder.coded_block_pattern;
1108
1109 decoder.SetOutputTo(mb16);
1110
1111 case 3:
1112 {
1113 pxAssume(decoder.ipu0_data > 0);
1114
1115 uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
1116 decoder.AdvanceIpuDataBy(read);
1117
1118 if (decoder.ipu0_data != 0)
1119 {
1120 // IPU FIFO filled up -- Will have to finish transferring later.
1121 ipu_cmd.pos[0] = 3;
1122 return false;
1123 }
1124
1125 mbaCount = 0;
1126 }
1127
1128 case 4:
1129 {
1130 u8 bit8;
1131 if (!getBits8((u8*)&bit8, 0))
1132 {
1133 ipu_cmd.pos[0] = 4;
1134 return false;
1135 }
1136
1137 if (bit8 == 0)
1138 {
1139 g_BP.Align();
1140 ipuRegs.ctrl.SCD = 1;
1141 }
1142 }
1143
1144 case 5:
1145 if (!getBits32((u8*)&ipuRegs.top, 0))
1146 {
1147 ipu_cmd.pos[0] = 5;
1148 return false;
1149 }
1150
1151 ipuRegs.top = BigEndian(ipuRegs.top);
1152 break;
1153 }
1154
1155 return true;
1156 }

  ViewVC Help
Powered by ViewVC 1.1.22