/[pcsx2_0.9.7]/trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp
ViewVC logotype

Contents of /trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 191 - (show annotations) (download)
Mon Sep 20 05:35:51 2010 UTC (10 years ago) by william
File size: 22807 byte(s)
Auto Commited Import of: pcsx2-0.9.7-DEBUG (upstream: v0.9.7.3795 local: v0.9.7.186-latest) in ./trunk
1 /*
2 * Mpeg.c
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
5 * Modified by Florin for PCSX2 emu
6 *
7 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
8 * See http://libmpeg2.sourceforge.net/ for updates.
9 *
10 * mpeg2dec is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * mpeg2dec is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 */
24
25 // [Air] Note: many functions in this module are large and only used once, so they
26 // have been forced to inline since it won't bloat the program and gets rid of
27 // some call overhead.
28
29 #include "PrecompiledHeader.h"
30
31 #include "Common.h"
32 #include "IPU/IPU.h"
33 #include "Mpeg.h"
34 #include "Vlc.h"
35
36 const int non_linear_quantizer_scale [] =
37 {
38 0, 1, 2, 3, 4, 5, 6, 7,
39 8, 10, 12, 14, 16, 18, 20, 22,
40 24, 28, 32, 36, 40, 44, 48, 52,
41 56, 64, 72, 80, 88, 96, 104, 112
42 };
43
44 /* Bitstream and buffer needs to be reallocated in order for successful
45 reading of the old data. Here the old data stored in the 2nd slot
46 of the internal buffer is copied to 1st slot, and the new data read
47 into 1st slot is copied to the 2nd slot. Which will later be copied
48 back to the 1st slot when 128bits have been read.
49 */
50 const DCTtab * tab;
51 int mbaCount = 0;
52
53 int bitstream_init ()
54 {
55 return g_BP.FillBuffer(32);
56 }
57
58 int get_macroblock_modes()
59 {
60 int macroblock_modes;
61 const MBtab * tab;
62
63 switch (decoder.coding_type)
64 {
65 case I_TYPE:
66 macroblock_modes = UBITS(2);
67
68 if (macroblock_modes == 0) return 0; // error
69
70 tab = MB_I + (macroblock_modes >> 1);
71 DUMPBITS(tab->len);
72 macroblock_modes = tab->modes;
73
74 if ((!(decoder.frame_pred_frame_dct)) &&
75 (decoder.picture_structure == FRAME_PICTURE))
76 {
77 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
78 }
79 return macroblock_modes;
80
81 case P_TYPE:
82 macroblock_modes = UBITS(6);
83
84 if (macroblock_modes == 0) return 0; // error
85
86 tab = MB_P + (macroblock_modes >> 1);
87 DUMPBITS(tab->len);
88 macroblock_modes = tab->modes;
89
90 if (decoder.picture_structure != FRAME_PICTURE)
91 {
92 if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
93 {
94 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
95 }
96
97 return macroblock_modes;
98 }
99 else if (decoder.frame_pred_frame_dct)
100 {
101 if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
102 macroblock_modes |= MC_FRAME;
103
104 return macroblock_modes;
105 }
106 else
107 {
108 if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
109 {
110 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
111 }
112
113 if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
114 {
115 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
116 }
117
118 return macroblock_modes;
119 }
120
121 case B_TYPE:
122 macroblock_modes = UBITS(6);
123
124 if (macroblock_modes == 0) return 0; // error
125
126 tab = MB_B + macroblock_modes;
127 DUMPBITS(tab->len);
128 macroblock_modes = tab->modes;
129
130 if (decoder.picture_structure != FRAME_PICTURE)
131 {
132 if (!(macroblock_modes & MACROBLOCK_INTRA))
133 {
134 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
135 }
136
137 return macroblock_modes;
138 }
139 else if (decoder.frame_pred_frame_dct)
140 {
141 /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
142 macroblock_modes |= MC_FRAME;
143 return macroblock_modes;
144 }
145 else
146 {
147 if (macroblock_modes & MACROBLOCK_INTRA) goto intra;
148
149 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
150
151 if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
152 {
153 intra:
154 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
155 }
156
157 return macroblock_modes;
158 }
159
160 case D_TYPE:
161 macroblock_modes = GETBITS(1);
162
163 if (macroblock_modes == 0) return 0; // error
164 return MACROBLOCK_INTRA;
165
166 default:
167 return 0;
168 }
169 }
170
171 static __fi int get_quantizer_scale()
172 {
173 int quantizer_scale_code;
174
175 quantizer_scale_code = GETBITS(5);
176
177 if (decoder.q_scale_type)
178 return non_linear_quantizer_scale [quantizer_scale_code];
179 else
180 return quantizer_scale_code << 1;
181 }
182
183 static __fi int get_coded_block_pattern()
184 {
185 const CBPtab * tab;
186 u16 code = UBITS(16);
187
188 if (code >= 0x2000)
189 tab = CBP_7 + (UBITS(7) - 16);
190 else
191 tab = CBP_9 + UBITS(9);
192
193 DUMPBITS(tab->len);
194 return tab->cbp;
195 }
196
197 int __fi get_motion_delta(const int f_code)
198 {
199 int delta;
200 int sign;
201 const MVtab * tab;
202 u16 code = UBITS(16);
203
204 if ((code & 0x8000))
205 {
206 DUMPBITS(1);
207 return 0x00010000;
208 }
209 else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00))
210 {
211 tab = MV_4 + UBITS(4);
212 }
213 else
214 {
215 tab = MV_10 + UBITS(10);
216 }
217
218 delta = tab->delta + 1;
219 DUMPBITS(tab->len);
220
221 sign = SBITS(1);
222 DUMPBITS(1);
223 return (delta ^ sign) - sign;
224 }
225
226 int __fi get_dmv()
227 {
228 const DMVtab* tab = DMV_2 + UBITS(2);
229 DUMPBITS(tab->len);
230 return tab->dmv;
231 }
232
233 int get_macroblock_address_increment()
234 {
235 const MBAtab *mba;
236
237 u16 code = UBITS(16);
238
239 if (code >= 4096)
240 mba = MBA.mba5 + (UBITS(5) - 2);
241 else if (code >= 768)
242 mba = MBA.mba11 + (UBITS(11) - 24);
243 else switch (UBITS(11))
244 {
245 case 8: /* macroblock_escape */
246 DUMPBITS(11);
247 return 0x23;
248
249 case 15: /* macroblock_stuffing (MPEG1 only) */
250 if (decoder.mpeg1)
251 {
252 DUMPBITS(11);
253 return 0x22;
254 }
255
256 default:
257 return 0;//error
258 }
259
260 DUMPBITS(mba->len);
261
262 return mba->mba + 1;
263 }
264
265 static __fi int get_luma_dc_dct_diff()
266 {
267 int size;
268 int dc_diff;
269 u16 code = UBITS(5);
270
271 if (code < 31)
272 {
273 size = DCtable.lum0[code].size;
274 DUMPBITS(DCtable.lum0[code].len);
275
276 // 5 bits max
277 }
278 else
279 {
280 code = UBITS(9) - 0x1f0;
281 size = DCtable.lum1[code].size;
282 DUMPBITS(DCtable.lum1[code].len);
283
284 // 9 bits max
285 }
286
287 if (size==0)
288 dc_diff = 0;
289 else
290 {
291 dc_diff = GETBITS(size);
292
293 // 6 for tab0 and 11 for tab1
294 if ((dc_diff & (1<<(size-1)))==0)
295 dc_diff-= (1<<size) - 1;
296 }
297
298 return dc_diff;
299 }
300
301 static __fi int get_chroma_dc_dct_diff()
302 {
303 int size;
304 int dc_diff;
305 u16 code = UBITS(5);
306
307 if (code<31)
308 {
309 size = DCtable.chrom0[code].size;
310 DUMPBITS(DCtable.chrom0[code].len);
311 }
312 else
313 {
314 code = UBITS(10) - 0x3e0;
315 size = DCtable.chrom1[code].size;
316 DUMPBITS(DCtable.chrom1[code].len);
317 }
318
319 if (size==0)
320 dc_diff = 0;
321 else
322 {
323 dc_diff = GETBITS(size);
324
325 if ((dc_diff & (1<<(size-1)))==0)
326 {
327 dc_diff-= (1<<size) - 1;
328 }
329 }
330
331 return dc_diff;
332 }
333
334 #define SATURATE(val) \
335 do { \
336 if (((u32)(val + 2048) > 4095)) \
337 val = (((s32)val) >> 31) ^ 2047; \
338 } while (0)
339
340 static bool get_intra_block()
341 {
342 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
343 const u8 (&quant_matrix)[64] = decoder.iq;
344 int quantizer_scale = decoder.quantizer_scale;
345 s16 * dest = decoder.DCTblock;
346 u16 code;
347
348 /* decode AC coefficients */
349 for (int i=1 + ipu_cmd.pos[4]; ; i++)
350 {
351 switch (ipu_cmd.pos[5])
352 {
353 case 0:
354 if (!GETWORD())
355 {
356 ipu_cmd.pos[4] = i - 1;
357 return false;
358 }
359
360 code = UBITS(16);
361
362 if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
363 {
364 tab = &DCT.next[(code >> 12) - 4];
365 }
366 else if (code >= 1024)
367 {
368 if (decoder.intra_vlc_format && !decoder.mpeg1)
369 {
370 tab = &DCT.tab0a[(code >> 8) - 4];
371 }
372 else
373 {
374 tab = &DCT.tab0[(code >> 8) - 4];
375 }
376 }
377 else if (code >= 512)
378 {
379 if (decoder.intra_vlc_format && !decoder.mpeg1)
380 {
381 tab = &DCT.tab1a[(code >> 6) - 8];
382 }
383 else
384 {
385 tab = &DCT.tab1[(code >> 6) - 8];
386 }
387 }
388
389 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
390 // that should use a single unrolled DCT table instead of five separate tables used
391 // here. Multiple conditional statements are very slow, while modern CPU data caches
392 // have lots of room to spare.
393
394 else if (code >= 256)
395 {
396 tab = &DCT.tab2[(code >> 4) - 16];
397 }
398 else if (code >= 128)
399 {
400 tab = &DCT.tab3[(code >> 3) - 16];
401 }
402 else if (code >= 64)
403 {
404 tab = &DCT.tab4[(code >> 2) - 16];
405 }
406 else if (code >= 32)
407 {
408 tab = &DCT.tab5[(code >> 1) - 16];
409 }
410 else if (code >= 16)
411 {
412 tab = &DCT.tab6[code - 16];
413 }
414 else
415 {
416 ipu_cmd.pos[4] = 0;
417 return true;
418 }
419
420 DUMPBITS(tab->len);
421
422 if (tab->run==64) /* end_of_block */
423 {
424 ipu_cmd.pos[4] = 0;
425 return true;
426 }
427
428 i += (tab->run == 65) ? GETBITS(6) : tab->run;
429 if (i >= 64)
430 {
431 ipu_cmd.pos[4] = 0;
432 return true;
433 }
434
435 case 1:
436 {
437 if (!GETWORD())
438 {
439 ipu_cmd.pos[4] = i - 1;
440 ipu_cmd.pos[5] = 1;
441 return false;
442 }
443
444 uint j = scan[i];
445 int val;
446
447 if (tab->run==65) /* escape */
448 {
449 if(!decoder.mpeg1)
450 {
451 val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
452 DUMPBITS(12);
453 }
454 else
455 {
456 val = SBITS(8);
457 DUMPBITS(8);
458
459 if (!(val & 0x7f))
460 {
461 val = GETBITS(8) + 2 * val;
462 }
463
464 val = (val * quantizer_scale * quant_matrix[i]) >> 4;
465 val = (val + ~ (((s32)val) >> 31)) | 1;
466 }
467 }
468 else
469 {
470 val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
471 if(decoder.mpeg1)
472 {
473 /* oddification */
474 val = (val - 1) | 1;
475 }
476
477 /* if (bitstream_get (1)) val = -val; */
478 int bit1 = SBITS(1);
479 val = (val ^ bit1) - bit1;
480 DUMPBITS(1);
481 }
482
483 SATURATE(val);
484 dest[j] = val;
485 ipu_cmd.pos[5] = 0;
486 }
487 }
488 }
489
490 ipu_cmd.pos[4] = 0;
491 return true;
492 }
493
494 static bool get_non_intra_block(int * last)
495 {
496 int i;
497 int j;
498 int val;
499 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
500 const u8 (&quant_matrix)[64] = decoder.niq;
501 int quantizer_scale = decoder.quantizer_scale;
502 s16 * dest = decoder.DCTblock;
503 u16 code;
504
505 /* decode AC coefficients */
506 for (i= ipu_cmd.pos[4] ; ; i++)
507 {
508 switch (ipu_cmd.pos[5])
509 {
510 case 0:
511 if (!GETWORD())
512 {
513 ipu_cmd.pos[4] = i;
514 return false;
515 }
516
517 code = UBITS(16);
518
519 if (code >= 16384)
520 {
521 if (i==0)
522 {
523 tab = &DCT.first[(code >> 12) - 4];
524 }
525 else
526 {
527 tab = &DCT.next[(code >> 12)- 4];
528 }
529 }
530 else if (code >= 1024)
531 {
532 tab = &DCT.tab0[(code >> 8) - 4];
533 }
534 else if (code >= 512)
535 {
536 tab = &DCT.tab1[(code >> 6) - 8];
537 }
538
539 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
540 // that should use a single unrolled DCT table instead of five separate tables used
541 // here. Multiple conditional statements are very slow, while modern CPU data caches
542 // have lots of room to spare.
543
544 else if (code >= 256)
545 {
546 tab = &DCT.tab2[(code >> 4) - 16];
547 }
548 else if (code >= 128)
549 {
550 tab = &DCT.tab3[(code >> 3) - 16];
551 }
552 else if (code >= 64)
553 {
554 tab = &DCT.tab4[(code >> 2) - 16];
555 }
556 else if (code >= 32)
557 {
558 tab = &DCT.tab5[(code >> 1) - 16];
559 }
560 else if (code >= 16)
561 {
562 tab = &DCT.tab6[code - 16];
563 }
564 else
565 {
566 ipu_cmd.pos[4] = 0;
567 return true;
568 }
569
570 DUMPBITS(tab->len);
571
572 if (tab->run==64) /* end_of_block */
573 {
574 *last = i;
575 ipu_cmd.pos[4] = 0;
576 return true;
577 }
578
579 i += (tab->run == 65) ? GETBITS(6) : tab->run;
580 if (i >= 64)
581 {
582 *last = i;
583 ipu_cmd.pos[4] = 0;
584 return true;
585 }
586
587 case 1:
588 if (!GETWORD())
589 {
590 ipu_cmd.pos[4] = i;
591 ipu_cmd.pos[5] = 1;
592 return false;
593 }
594
595 j = scan[i];
596
597 if (tab->run==65) /* escape */
598 {
599 if (!decoder.mpeg1)
600 {
601 val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5;
602 DUMPBITS(12);
603 }
604 else
605 {
606 val = SBITS(8);
607 DUMPBITS(8);
608
609 if (!(val & 0x7f))
610 {
611 val = GETBITS(8) + 2 * val;
612 }
613
614 val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32;
615 val = (val + ~ (((s32)val) >> 31)) | 1;
616 }
617 }
618 else
619 {
620 int bit1 = SBITS(1);
621 val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
622 val = (val ^ bit1) - bit1;
623 DUMPBITS(1);
624 }
625
626 SATURATE(val);
627 dest[j] = val;
628 ipu_cmd.pos[5] = 0;
629 }
630 }
631
632 ipu_cmd.pos[4] = 0;
633 return true;
634 }
635
636 static __fi bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
637 {
638 if (!skip || ipu_cmd.pos[3])
639 {
640 ipu_cmd.pos[3] = 0;
641 if (!GETWORD())
642 {
643 ipu_cmd.pos[3] = 1;
644 return false;
645 }
646
647 /* Get the intra DC coefficient and inverse quantize it */
648 if (cc == 0)
649 decoder.dc_dct_pred[0] += get_luma_dc_dct_diff();
650 else
651 decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff();
652
653 decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision);
654 }
655
656 if (!get_intra_block())
657 {
658 return false;
659 }
660
661 mpeg2_idct_copy(decoder.DCTblock, dest, stride);
662
663 return true;
664 }
665
666 static __fi bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
667 {
668 int last;
669
670 if (!skip)
671 {
672 memzero_sse_a(decoder.DCTblock);
673 }
674
675 if (!get_non_intra_block(&last))
676 {
677 return false;
678 }
679
680 mpeg2_idct_add(last, decoder.DCTblock, dest, stride);
681
682 return true;
683 }
684
685 void __fi finishmpeg2sliceIDEC()
686 {
687 ipuRegs.ctrl.SCD = 0;
688 coded_block_pattern = decoder.coded_block_pattern;
689 }
690
691 bool mpeg2sliceIDEC()
692 {
693 u16 code;
694
695 switch (ipu_cmd.pos[0])
696 {
697 case 0:
698 decoder.dc_dct_pred[0] =
699 decoder.dc_dct_pred[1] =
700 decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
701
702 decoder.mbc = 0;
703 ipuRegs.top = 0;
704 ipuRegs.ctrl.ECD = 0;
705
706 case 1:
707 ipu_cmd.pos[0] = 1;
708 if (!bitstream_init())
709 {
710 return false;
711 }
712
713 case 2:
714 ipu_cmd.pos[0] = 2;
715 while (1)
716 {
717 macroblock_8& mb8 = decoder.mb8;
718 macroblock_rgb16& rgb16 = decoder.rgb16;
719 macroblock_rgb32& rgb32 = decoder.rgb32;
720
721 int DCT_offset, DCT_stride;
722 const MBAtab * mba;
723
724 switch (ipu_cmd.pos[1])
725 {
726 case 0:
727 decoder.macroblock_modes = get_macroblock_modes();
728
729 if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC
730 {
731 decoder.quantizer_scale = get_quantizer_scale();
732 }
733
734 decoder.coded_block_pattern = 0x3F;//all 6 blocks
735 memzero_sse_a(mb8);
736 memzero_sse_a(rgb32);
737
738 case 1:
739 ipu_cmd.pos[1] = 1;
740
741 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
742 {
743 DCT_offset = decoder_stride;
744 DCT_stride = decoder_stride * 2;
745 }
746 else
747 {
748 DCT_offset = decoder_stride * 8;
749 DCT_stride = decoder_stride;
750 }
751
752 switch (ipu_cmd.pos[2])
753 {
754 case 0:
755 case 1:
756 if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1))
757 {
758 ipu_cmd.pos[2] = 1;
759 return false;
760 }
761 case 2:
762 if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
763 {
764 ipu_cmd.pos[2] = 2;
765 return false;
766 }
767 case 3:
768 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
769 {
770 ipu_cmd.pos[2] = 3;
771 return false;
772 }
773 case 4:
774 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
775 {
776 ipu_cmd.pos[2] = 4;
777 return false;
778 }
779 case 5:
780 if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5))
781 {
782 ipu_cmd.pos[2] = 5;
783 return false;
784 }
785 case 6:
786 if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6))
787 {
788 ipu_cmd.pos[2] = 6;
789 return false;
790 }
791 break;
792
793 jNO_DEFAULT;
794 }
795
796 // Send The MacroBlock via DmaIpuFrom
797 ipu_csc(mb8, rgb32, decoder.sgn);
798
799 if (decoder.ofm == 0)
800 decoder.SetOutputTo(rgb32);
801 else
802 {
803 ipu_dither(rgb32, rgb16, decoder.dte);
804 decoder.SetOutputTo(rgb16);
805 }
806
807 case 2:
808 {
809 pxAssume(decoder.ipu0_data > 0);
810
811 uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
812 decoder.AdvanceIpuDataBy(read);
813
814 if (decoder.ipu0_data != 0)
815 {
816 // IPU FIFO filled up -- Will have to finish transferring later.
817 ipu_cmd.pos[1] = 2;
818 return false;
819 }
820
821 decoder.mbc++;
822 mbaCount = 0;
823 }
824
825 case 3:
826 while (1)
827 {
828 if (!GETWORD())
829 {
830 ipu_cmd.pos[1] = 3;
831 return false;
832 }
833
834 code = UBITS(16);
835 if (code >= 0x1000)
836 {
837 mba = MBA.mba5 + (UBITS(5) - 2);
838 break;
839 }
840 else if (code >= 0x0300)
841 {
842 mba = MBA.mba11 + (UBITS(11) - 24);
843 break;
844 }
845 else switch (UBITS(11))
846 {
847 case 8: /* macroblock_escape */
848 mbaCount += 33;
849 /* pass through */
850
851 case 15: /* macroblock_stuffing (MPEG1 only) */
852 DUMPBITS(11);
853 continue;
854
855 default: /* end of slice/frame, or error? */
856 {
857 goto finish_idec;
858 }
859 }
860 }
861
862 DUMPBITS(mba->len);
863 mbaCount += mba->mba;
864
865 if (mbaCount)
866 {
867 decoder.dc_dct_pred[0] =
868 decoder.dc_dct_pred[1] =
869 decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
870
871 decoder.mbc += mbaCount;
872 }
873
874 case 4:
875 if (!GETWORD())
876 {
877 ipu_cmd.pos[1] = 4;
878 return false;
879 }
880
881 break;
882
883 jNO_DEFAULT;
884 }
885
886 ipu_cmd.pos[1] = 0;
887 ipu_cmd.pos[2] = 0;
888 }
889
890 finish_idec:
891 finishmpeg2sliceIDEC();
892
893 case 3:
894 {
895 u8 bit8;
896 if (!getBits8((u8*)&bit8, 0))
897 {
898 ipu_cmd.pos[0] = 3;
899 return false;
900 }
901
902 if (bit8 == 0)
903 {
904 g_BP.Align();
905 ipuRegs.ctrl.SCD = 1;
906 }
907 }
908
909 case 4:
910 if (!getBits32((u8*)&ipuRegs.top, 0))
911 {
912 ipu_cmd.pos[0] = 4;
913 return false;
914 }
915
916 ipuRegs.top = BigEndian(ipuRegs.top);
917 break;
918
919 jNO_DEFAULT;
920 }
921
922 return true;
923 }
924
925 bool mpeg2_slice()
926 {
927 int DCT_offset, DCT_stride;
928
929 macroblock_8& mb8 = decoder.mb8;
930 macroblock_16& mb16 = decoder.mb16;
931
932 switch (ipu_cmd.pos[0])
933 {
934 case 0:
935 if (decoder.dcr)
936 {
937 decoder.dc_dct_pred[0] =
938 decoder.dc_dct_pred[1] =
939 decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
940 }
941
942 ipuRegs.ctrl.ECD = 0;
943 ipuRegs.top = 0;
944 memzero_sse_a(mb8);
945 memzero_sse_a(mb16);
946 case 1:
947 if (!bitstream_init())
948 {
949 ipu_cmd.pos[0] = 1;
950 return false;
951 }
952
953 case 2:
954 ipu_cmd.pos[0] = 2;
955
956 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
957 {
958 DCT_offset = decoder_stride;
959 DCT_stride = decoder_stride * 2;
960 }
961 else
962 {
963 DCT_offset = decoder_stride * 8;
964 DCT_stride = decoder_stride;
965 }
966
967 if (decoder.macroblock_modes & MACROBLOCK_INTRA)
968 {
969 switch(ipu_cmd.pos[1])
970 {
971 case 0:
972 decoder.coded_block_pattern = 0x3F;
973 case 1:
974 if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1))
975 {
976 ipu_cmd.pos[1] = 1;
977 return false;
978 }
979 case 2:
980 if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
981 {
982 ipu_cmd.pos[1] = 2;
983 return false;
984 }
985 case 3:
986 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
987 {
988 ipu_cmd.pos[1] = 3;
989 return false;
990 }
991 case 4:
992 if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
993 {
994 ipu_cmd.pos[1] = 4;
995 return false;
996 }
997 case 5:
998 if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
999 {
1000 ipu_cmd.pos[1] = 5;
1001 return false;
1002 }
1003 case 6:
1004 if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1005 {
1006 ipu_cmd.pos[1] = 6;
1007 return false;
1008 }
1009 break;
1010
1011 jNO_DEFAULT;
1012 }
1013
1014 // Copy macroblock8 to macroblock16 - without sign extension.
1015 // Manually inlined due to MSVC refusing to inline the SSE-optimized version.
1016 {
1017 const u8 *s = (const u8*)&mb8;
1018 u16 *d = (u16*)&mb16;
1019
1020 //Y bias - 16 * 16
1021 //Cr bias - 8 * 8
1022 //Cb bias - 8 * 8
1023
1024 __m128i zeroreg = _mm_setzero_si128();
1025
1026 for (uint i = 0; i < (256+64+64) / 32; ++i)
1027 {
1028 //*d++ = *s++;
1029 __m128i woot1 = _mm_load_si128((__m128i*)s);
1030 __m128i woot2 = _mm_load_si128((__m128i*)s+1);
1031 _mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg));
1032 _mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg));
1033 _mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg));
1034 _mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg));
1035 s += 32;
1036 d += 32;
1037 }
1038 }
1039 }
1040 else
1041 {
1042 if (decoder.macroblock_modes & MACROBLOCK_PATTERN)
1043 {
1044 switch(ipu_cmd.pos[1])
1045 {
1046 case 0:
1047 decoder.coded_block_pattern = get_coded_block_pattern(); // max 9bits
1048 case 1:
1049 if (decoder.coded_block_pattern & 0x20)
1050 {
1051 if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1))
1052 {
1053 ipu_cmd.pos[1] = 1;
1054 return false;
1055 }
1056 }
1057 case 2:
1058 if (decoder.coded_block_pattern & 0x10)
1059 {
1060 if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
1061 {
1062 ipu_cmd.pos[1] = 2;
1063 return false;
1064 }
1065 }
1066 case 3:
1067 if (decoder.coded_block_pattern & 0x08)
1068 {
1069 if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
1070 {
1071 ipu_cmd.pos[1] = 3;
1072 return false;
1073 }
1074 }
1075 case 4:
1076 if (decoder.coded_block_pattern & 0x04)
1077 {
1078 if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
1079 {
1080 ipu_cmd.pos[1] = 4;
1081 return false;
1082 }
1083 }
1084 case 5:
1085 if (decoder.coded_block_pattern & 0x2)
1086 {
1087 if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
1088 {
1089 ipu_cmd.pos[1] = 5;
1090 return false;
1091 }
1092 }
1093 case 6:
1094 if (decoder.coded_block_pattern & 0x1)
1095 {
1096 if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1097 {
1098 ipu_cmd.pos[1] = 6;
1099 return false;
1100 }
1101 }
1102 break;
1103
1104 jNO_DEFAULT;
1105 }
1106 }
1107 }
1108
1109 // Send The MacroBlock via DmaIpuFrom
1110 ipuRegs.ctrl.SCD = 0;
1111 coded_block_pattern = decoder.coded_block_pattern;
1112
1113 decoder.mbc = 1;
1114 decoder.SetOutputTo(mb16);
1115
1116 case 3:
1117 {
1118 pxAssume(decoder.ipu0_data > 0);
1119
1120 uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
1121 decoder.AdvanceIpuDataBy(read);
1122
1123 if (decoder.ipu0_data != 0)
1124 {
1125 // IPU FIFO filled up -- Will have to finish transferring later.
1126 ipu_cmd.pos[0] = 3;
1127 return false;
1128 }
1129
1130 decoder.mbc++;
1131 mbaCount = 0;
1132 }
1133
1134 case 4:
1135 {
1136 u8 bit8;
1137 if (!getBits8((u8*)&bit8, 0))
1138 {
1139 ipu_cmd.pos[0] = 4;
1140 return false;
1141 }
1142
1143 if (bit8 == 0)
1144 {
1145 g_BP.Align();
1146 ipuRegs.ctrl.SCD = 1;
1147 }
1148 }
1149
1150 case 5:
1151 if (!getBits32((u8*)&ipuRegs.top, 0))
1152 {
1153 ipu_cmd.pos[0] = 5;
1154 return false;
1155 }
1156
1157 ipuRegs.top = BigEndian(ipuRegs.top);
1158 break;
1159 }
1160
1161 return true;
1162 }

  ViewVC Help
Powered by ViewVC 1.1.22