/[pcsx2_0.9.7]/trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp
ViewVC logotype

Annotation of /trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 191 - (hide annotations) (download)
Mon Sep 20 05:35:51 2010 UTC (10 years ago) by william
File size: 22807 byte(s)
Auto Commited Import of: pcsx2-0.9.7-DEBUG (upstream: v0.9.7.3795 local: v0.9.7.186-latest) in ./trunk
1 william 31 /*
2     * Mpeg.c
3     * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
4     * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
5     * Modified by Florin for PCSX2 emu
6     *
7     * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
8     * See http://libmpeg2.sourceforge.net/ for updates.
9     *
10     * mpeg2dec is free software; you can redistribute it and/or modify
11     * it under the terms of the GNU General Public License as published by
12     * the Free Software Foundation; either version 2 of the License, or
13     * (at your option) any later version.
14     *
15     * mpeg2dec is distributed in the hope that it will be useful,
16     * but WITHOUT ANY WARRANTY; without even the implied warranty of
17     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18     * GNU General Public License for more details.
19     *
20     * You should have received a copy of the GNU General Public License
21     * along with this program; if not, write to the Free Software
22     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23     */
24    
25     // [Air] Note: many functions in this module are large and only used once, so they
26     // have been forced to inline since it won't bloat the program and gets rid of
27     // some call overhead.
28    
29     #include "PrecompiledHeader.h"
30    
31     #include "Common.h"
32     #include "IPU/IPU.h"
33     #include "Mpeg.h"
34     #include "Vlc.h"
35    
36 william 62 const int non_linear_quantizer_scale [] =
37 william 31 {
38     0, 1, 2, 3, 4, 5, 6, 7,
39     8, 10, 12, 14, 16, 18, 20, 22,
40     24, 28, 32, 36, 40, 44, 48, 52,
41     56, 64, 72, 80, 88, 96, 104, 112
42     };
43    
44     /* Bitstream and buffer needs to be reallocated in order for successful
45     reading of the old data. Here the old data stored in the 2nd slot
46     of the internal buffer is copied to 1st slot, and the new data read
47     into 1st slot is copied to the 2nd slot. Which will later be copied
48     back to the 1st slot when 128bits have been read.
49     */
50 william 62 const DCTtab * tab;
51     int mbaCount = 0;
52 william 31
53 william 191 int bitstream_init ()
54     {
55     return g_BP.FillBuffer(32);
56     }
57    
58 william 62 int get_macroblock_modes()
59 william 31 {
60     int macroblock_modes;
61     const MBtab * tab;
62    
63 william 62 switch (decoder.coding_type)
64 william 31 {
65     case I_TYPE:
66 william 62 macroblock_modes = UBITS(2);
67 william 31
68     if (macroblock_modes == 0) return 0; // error
69    
70     tab = MB_I + (macroblock_modes >> 1);
71 william 62 DUMPBITS(tab->len);
72 william 31 macroblock_modes = tab->modes;
73    
74 william 62 if ((!(decoder.frame_pred_frame_dct)) &&
75     (decoder.picture_structure == FRAME_PICTURE))
76 william 31 {
77 william 62 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
78 william 31 }
79     return macroblock_modes;
80    
81     case P_TYPE:
82 william 62 macroblock_modes = UBITS(6);
83 william 31
84     if (macroblock_modes == 0) return 0; // error
85    
86     tab = MB_P + (macroblock_modes >> 1);
87 william 62 DUMPBITS(tab->len);
88 william 31 macroblock_modes = tab->modes;
89    
90 william 62 if (decoder.picture_structure != FRAME_PICTURE)
91 william 31 {
92     if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
93     {
94 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
95 william 31 }
96    
97     return macroblock_modes;
98     }
99 william 62 else if (decoder.frame_pred_frame_dct)
100 william 31 {
101     if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
102     macroblock_modes |= MC_FRAME;
103    
104     return macroblock_modes;
105     }
106     else
107     {
108     if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
109     {
110 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
111 william 31 }
112    
113     if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
114     {
115 william 62 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
116 william 31 }
117    
118     return macroblock_modes;
119     }
120    
121     case B_TYPE:
122 william 62 macroblock_modes = UBITS(6);
123 william 31
124     if (macroblock_modes == 0) return 0; // error
125    
126     tab = MB_B + macroblock_modes;
127 william 62 DUMPBITS(tab->len);
128 william 31 macroblock_modes = tab->modes;
129    
130 william 62 if (decoder.picture_structure != FRAME_PICTURE)
131 william 31 {
132     if (!(macroblock_modes & MACROBLOCK_INTRA))
133     {
134 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
135 william 31 }
136    
137     return macroblock_modes;
138     }
139 william 62 else if (decoder.frame_pred_frame_dct)
140 william 31 {
141     /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
142     macroblock_modes |= MC_FRAME;
143     return macroblock_modes;
144     }
145     else
146     {
147     if (macroblock_modes & MACROBLOCK_INTRA) goto intra;
148    
149 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
150 william 31
151     if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
152     {
153     intra:
154 william 62 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
155 william 31 }
156    
157     return macroblock_modes;
158     }
159    
160     case D_TYPE:
161 william 62 macroblock_modes = GETBITS(1);
162 william 31
163     if (macroblock_modes == 0) return 0; // error
164     return MACROBLOCK_INTRA;
165    
166     default:
167     return 0;
168     }
169     }
170    
171 william 62 static __fi int get_quantizer_scale()
172 william 31 {
173     int quantizer_scale_code;
174    
175 william 62 quantizer_scale_code = GETBITS(5);
176 william 31
177 william 62 if (decoder.q_scale_type)
178 william 31 return non_linear_quantizer_scale [quantizer_scale_code];
179     else
180     return quantizer_scale_code << 1;
181     }
182    
183 william 62 static __fi int get_coded_block_pattern()
184 william 31 {
185     const CBPtab * tab;
186 william 62 u16 code = UBITS(16);
187 william 31
188 william 62 if (code >= 0x2000)
189     tab = CBP_7 + (UBITS(7) - 16);
190 william 31 else
191 william 62 tab = CBP_9 + UBITS(9);
192 william 31
193 william 62 DUMPBITS(tab->len);
194 william 31 return tab->cbp;
195     }
196    
197 william 62 int __fi get_motion_delta(const int f_code)
198 william 31 {
199 william 62 int delta;
200     int sign;
201     const MVtab * tab;
202     u16 code = UBITS(16);
203 william 31
204 william 62 if ((code & 0x8000))
205 william 31 {
206 william 62 DUMPBITS(1);
207     return 0x00010000;
208 william 31 }
209 william 62 else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00))
210     {
211     tab = MV_4 + UBITS(4);
212     }
213     else
214     {
215     tab = MV_10 + UBITS(10);
216     }
217 william 31
218 william 62 delta = tab->delta + 1;
219     DUMPBITS(tab->len);
220 william 31
221 william 62 sign = SBITS(1);
222     DUMPBITS(1);
223     return (delta ^ sign) - sign;
224 william 31 }
225    
226 william 62 int __fi get_dmv()
227 william 31 {
228 william 191 const DMVtab* tab = DMV_2 + UBITS(2);
229 william 62 DUMPBITS(tab->len);
230     return tab->dmv;
231 william 31 }
232    
233 william 62 int get_macroblock_address_increment()
234 william 31 {
235 william 62 const MBAtab *mba;
236    
237     u16 code = UBITS(16);
238 william 31
239 william 62 if (code >= 4096)
240     mba = MBA.mba5 + (UBITS(5) - 2);
241     else if (code >= 768)
242     mba = MBA.mba11 + (UBITS(11) - 24);
243     else switch (UBITS(11))
244 william 191 {
245     case 8: /* macroblock_escape */
246     DUMPBITS(11);
247     return 0x23;
248 william 31
249 william 191 case 15: /* macroblock_stuffing (MPEG1 only) */
250     if (decoder.mpeg1)
251     {
252 william 62 DUMPBITS(11);
253 william 191 return 0x22;
254     }
255 william 31
256 william 191 default:
257     return 0;//error
258     }
259 william 31
260 william 62 DUMPBITS(mba->len);
261 william 31
262 william 62 return mba->mba + 1;
263     }
264 william 31
265 william 62 static __fi int get_luma_dc_dct_diff()
266     {
267     int size;
268     int dc_diff;
269     u16 code = UBITS(5);
270 william 31
271 william 62 if (code < 31)
272     {
273     size = DCtable.lum0[code].size;
274     DUMPBITS(DCtable.lum0[code].len);
275 william 31
276 william 62 // 5 bits max
277     }
278     else
279     {
280     code = UBITS(9) - 0x1f0;
281     size = DCtable.lum1[code].size;
282     DUMPBITS(DCtable.lum1[code].len);
283 william 31
284 william 62 // 9 bits max
285     }
286    
287     if (size==0)
288     dc_diff = 0;
289     else
290     {
291     dc_diff = GETBITS(size);
292 william 31
293 william 62 // 6 for tab0 and 11 for tab1
294     if ((dc_diff & (1<<(size-1)))==0)
295     dc_diff-= (1<<size) - 1;
296     }
297 william 31
298 william 62 return dc_diff;
299     }
300 william 31
301 william 62 static __fi int get_chroma_dc_dct_diff()
302     {
303     int size;
304     int dc_diff;
305     u16 code = UBITS(5);
306 william 31
307 william 62 if (code<31)
308     {
309     size = DCtable.chrom0[code].size;
310     DUMPBITS(DCtable.chrom0[code].len);
311     }
312     else
313     {
314     code = UBITS(10) - 0x3e0;
315     size = DCtable.chrom1[code].size;
316     DUMPBITS(DCtable.chrom1[code].len);
317     }
318    
319     if (size==0)
320     dc_diff = 0;
321     else
322     {
323     dc_diff = GETBITS(size);
324 william 31
325 william 62 if ((dc_diff & (1<<(size-1)))==0)
326 william 31 {
327 william 62 dc_diff-= (1<<size) - 1;
328 william 31 }
329     }
330 william 62
331     return dc_diff;
332     }
333 william 31
334 william 62 #define SATURATE(val) \
335     do { \
336     if (((u32)(val + 2048) > 4095)) \
337     val = (((s32)val) >> 31) ^ 2047; \
338     } while (0)
339 william 31
340 william 191 static bool get_intra_block()
341 william 31 {
342 william 62 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
343     const u8 (&quant_matrix)[64] = decoder.iq;
344     int quantizer_scale = decoder.quantizer_scale;
345     s16 * dest = decoder.DCTblock;
346     u16 code;
347 william 31
348 william 62 /* decode AC coefficients */
349 william 191 for (int i=1 + ipu_cmd.pos[4]; ; i++)
350 william 62 {
351     switch (ipu_cmd.pos[5])
352     {
353     case 0:
354     if (!GETWORD())
355     {
356     ipu_cmd.pos[4] = i - 1;
357     return false;
358     }
359 william 31
360 william 62 code = UBITS(16);
361 william 31
362 william 62 if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
363 william 31 {
364 william 62 tab = &DCT.next[(code >> 12) - 4];
365     }
366     else if (code >= 1024)
367     {
368     if (decoder.intra_vlc_format && !decoder.mpeg1)
369 william 31 {
370 william 62 tab = &DCT.tab0a[(code >> 8) - 4];
371 william 31 }
372     else
373     {
374 william 62 tab = &DCT.tab0[(code >> 8) - 4];
375 william 31 }
376     }
377 william 62 else if (code >= 512)
378 william 31 {
379 william 62 if (decoder.intra_vlc_format && !decoder.mpeg1)
380     {
381     tab = &DCT.tab1a[(code >> 6) - 8];
382     }
383     else
384     {
385     tab = &DCT.tab1[(code >> 6) - 8];
386     }
387 william 31 }
388    
389 william 62 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
390     // that should use a single unrolled DCT table instead of five separate tables used
391     // here. Multiple conditional statements are very slow, while modern CPU data caches
392     // have lots of room to spare.
393    
394     else if (code >= 256)
395 william 31 {
396 william 62 tab = &DCT.tab2[(code >> 4) - 16];
397 william 31 }
398 william 62 else if (code >= 128)
399     {
400     tab = &DCT.tab3[(code >> 3) - 16];
401     }
402     else if (code >= 64)
403     {
404     tab = &DCT.tab4[(code >> 2) - 16];
405     }
406     else if (code >= 32)
407     {
408     tab = &DCT.tab5[(code >> 1) - 16];
409     }
410     else if (code >= 16)
411     {
412     tab = &DCT.tab6[code - 16];
413     }
414 william 31 else
415     {
416 william 62 ipu_cmd.pos[4] = 0;
417     return true;
418 william 31 }
419    
420 william 62 DUMPBITS(tab->len);
421 william 31
422 william 62 if (tab->run==64) /* end_of_block */
423 william 31 {
424 william 62 ipu_cmd.pos[4] = 0;
425     return true;
426 william 31 }
427 william 62
428 william 191 i += (tab->run == 65) ? GETBITS(6) : tab->run;
429 william 62 if (i >= 64)
430 william 31 {
431 william 62 ipu_cmd.pos[4] = 0;
432     return true;
433 william 31 }
434 william 191
435 william 62 case 1:
436 william 191 {
437     if (!GETWORD())
438     {
439     ipu_cmd.pos[4] = i - 1;
440     ipu_cmd.pos[5] = 1;
441     return false;
442     }
443 william 31
444 william 191 uint j = scan[i];
445     int val;
446 william 62
447 william 191 if (tab->run==65) /* escape */
448     {
449     if(!decoder.mpeg1)
450     {
451     val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
452     DUMPBITS(12);
453     }
454     else
455     {
456     val = SBITS(8);
457     DUMPBITS(8);
458 william 31
459 william 191 if (!(val & 0x7f))
460     {
461     val = GETBITS(8) + 2 * val;
462     }
463 william 31
464 william 191 val = (val * quantizer_scale * quant_matrix[i]) >> 4;
465     val = (val + ~ (((s32)val) >> 31)) | 1;
466     }
467     }
468     else
469     {
470     val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
471     if(decoder.mpeg1)
472     {
473     /* oddification */
474     val = (val - 1) | 1;
475     }
476    
477     /* if (bitstream_get (1)) val = -val; */
478     int bit1 = SBITS(1);
479     val = (val ^ bit1) - bit1;
480     DUMPBITS(1);
481     }
482    
483     SATURATE(val);
484     dest[j] = val;
485     ipu_cmd.pos[5] = 0;
486 william 31 }
487 william 62 }
488     }
489 william 31
490 william 62 ipu_cmd.pos[4] = 0;
491     return true;
492 william 31 }
493    
494 william 191 static bool get_non_intra_block(int * last)
495 william 31 {
496     int i;
497     int j;
498     int val;
499 william 62 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
500     const u8 (&quant_matrix)[64] = decoder.niq;
501     int quantizer_scale = decoder.quantizer_scale;
502     s16 * dest = decoder.DCTblock;
503     u16 code;
504 william 31
505 william 62 /* decode AC coefficients */
506     for (i= ipu_cmd.pos[4] ; ; i++)
507     {
508     switch (ipu_cmd.pos[5])
509 william 31 {
510 william 62 case 0:
511     if (!GETWORD())
512     {
513     ipu_cmd.pos[4] = i;
514     return false;
515     }
516 william 31
517 william 62 code = UBITS(16);
518 william 31
519 william 62 if (code >= 16384)
520     {
521     if (i==0)
522     {
523     tab = &DCT.first[(code >> 12) - 4];
524     }
525     else
526     {
527     tab = &DCT.next[(code >> 12)- 4];
528     }
529     }
530     else if (code >= 1024)
531     {
532     tab = &DCT.tab0[(code >> 8) - 4];
533     }
534     else if (code >= 512)
535     {
536     tab = &DCT.tab1[(code >> 6) - 8];
537     }
538 william 31
539 william 62 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
540     // that should use a single unrolled DCT table instead of five separate tables used
541     // here. Multiple conditional statements are very slow, while modern CPU data caches
542     // have lots of room to spare.
543 william 31
544 william 62 else if (code >= 256)
545     {
546     tab = &DCT.tab2[(code >> 4) - 16];
547     }
548     else if (code >= 128)
549     {
550     tab = &DCT.tab3[(code >> 3) - 16];
551     }
552     else if (code >= 64)
553     {
554     tab = &DCT.tab4[(code >> 2) - 16];
555     }
556     else if (code >= 32)
557     {
558     tab = &DCT.tab5[(code >> 1) - 16];
559     }
560     else if (code >= 16)
561     {
562     tab = &DCT.tab6[code - 16];
563     }
564     else
565     {
566     ipu_cmd.pos[4] = 0;
567     return true;
568     }
569 william 31
570 william 62 DUMPBITS(tab->len);
571 william 31
572 william 62 if (tab->run==64) /* end_of_block */
573     {
574     *last = i;
575     ipu_cmd.pos[4] = 0;
576     return true;
577     }
578 william 31
579 william 62 i += (tab->run == 65) ? GETBITS(6) : tab->run;
580     if (i >= 64)
581     {
582     *last = i;
583     ipu_cmd.pos[4] = 0;
584     return true;
585     }
586 william 31
587 william 62 case 1:
588     if (!GETWORD())
589     {
590     ipu_cmd.pos[4] = i;
591     ipu_cmd.pos[5] = 1;
592     return false;
593     }
594 william 31
595 william 62 j = scan[i];
596 william 31
597 william 62 if (tab->run==65) /* escape */
598     {
599     if (!decoder.mpeg1)
600     {
601     val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5;
602     DUMPBITS(12);
603     }
604     else
605     {
606     val = SBITS(8);
607     DUMPBITS(8);
608 william 31
609 william 62 if (!(val & 0x7f))
610     {
611     val = GETBITS(8) + 2 * val;
612     }
613 william 31
614 william 62 val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32;
615     val = (val + ~ (((s32)val) >> 31)) | 1;
616     }
617     }
618     else
619 william 31 {
620 william 191 int bit1 = SBITS(1);
621 william 62 val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
622 william 191 val = (val ^ bit1) - bit1;
623 william 62 DUMPBITS(1);
624 william 31 }
625    
626     SATURATE(val);
627     dest[j] = val;
628 william 62 ipu_cmd.pos[5] = 0;
629 william 31 }
630     }
631    
632 william 62 ipu_cmd.pos[4] = 0;
633     return true;
634 william 31 }
635    
636 william 62 static __fi bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
637 william 31 {
638 william 62 if (!skip || ipu_cmd.pos[3])
639 william 31 {
640 william 62 ipu_cmd.pos[3] = 0;
641     if (!GETWORD())
642 william 31 {
643 william 62 ipu_cmd.pos[3] = 1;
644     return false;
645 william 31 }
646    
647 william 62 /* Get the intra DC coefficient and inverse quantize it */
648     if (cc == 0)
649     decoder.dc_dct_pred[0] += get_luma_dc_dct_diff();
650 william 31 else
651 william 62 decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff();
652 william 31
653 william 62 decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision);
654 william 31 }
655    
656 william 62 if (!get_intra_block())
657 william 31 {
658 william 62 return false;
659 william 31 }
660    
661 william 62 mpeg2_idct_copy(decoder.DCTblock, dest, stride);
662    
663     return true;
664 william 31 }
665    
666 william 62 static __fi bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
667 william 31 {
668     int last;
669    
670 william 62 if (!skip)
671 william 31 {
672 william 62 memzero_sse_a(decoder.DCTblock);
673 william 31 }
674    
675 william 62 if (!get_non_intra_block(&last))
676 william 31 {
677 william 62 return false;
678 william 31 }
679    
680 william 62 mpeg2_idct_add(last, decoder.DCTblock, dest, stride);
681 william 31
682 william 62 return true;
683 william 31 }
684    
685 william 62 void __fi finishmpeg2sliceIDEC()
686 william 31 {
687 william 62 ipuRegs.ctrl.SCD = 0;
688     coded_block_pattern = decoder.coded_block_pattern;
689 william 31 }
690    
691 william 62 bool mpeg2sliceIDEC()
692 william 31 {
693 william 62 u16 code;
694 william 31
695 william 62 switch (ipu_cmd.pos[0])
696     {
697     case 0:
698     decoder.dc_dct_pred[0] =
699     decoder.dc_dct_pred[1] =
700     decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
701 william 31
702 william 62 decoder.mbc = 0;
703     ipuRegs.top = 0;
704     ipuRegs.ctrl.ECD = 0;
705 william 31
706 william 62 case 1:
707     ipu_cmd.pos[0] = 1;
708     if (!bitstream_init())
709     {
710     return false;
711     }
712 william 31
713 william 62 case 2:
714     ipu_cmd.pos[0] = 2;
715 william 31 while (1)
716     {
717 william 62 macroblock_8& mb8 = decoder.mb8;
718     macroblock_rgb16& rgb16 = decoder.rgb16;
719     macroblock_rgb32& rgb32 = decoder.rgb32;
720    
721 william 31 int DCT_offset, DCT_stride;
722     const MBAtab * mba;
723    
724 william 62 switch (ipu_cmd.pos[1])
725 william 31 {
726 william 62 case 0:
727     decoder.macroblock_modes = get_macroblock_modes();
728 william 31
729 william 62 if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC
730     {
731     decoder.quantizer_scale = get_quantizer_scale();
732     }
733 william 31
734 william 62 decoder.coded_block_pattern = 0x3F;//all 6 blocks
735     memzero_sse_a(mb8);
736     memzero_sse_a(rgb32);
737 william 31
738 william 62 case 1:
739     ipu_cmd.pos[1] = 1;
740 william 31
741 william 62 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
742 william 31 {
743 william 62 DCT_offset = decoder_stride;
744     DCT_stride = decoder_stride * 2;
745 william 31 }
746     else
747     {
748 william 62 DCT_offset = decoder_stride * 8;
749     DCT_stride = decoder_stride;
750     }
751 william 31
752 william 62 switch (ipu_cmd.pos[2])
753     {
754     case 0:
755     case 1:
756     if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1))
757     {
758     ipu_cmd.pos[2] = 1;
759     return false;
760     }
761     case 2:
762     if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
763     {
764     ipu_cmd.pos[2] = 2;
765     return false;
766     }
767     case 3:
768     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
769     {
770     ipu_cmd.pos[2] = 3;
771     return false;
772     }
773     case 4:
774     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
775     {
776     ipu_cmd.pos[2] = 4;
777     return false;
778     }
779     case 5:
780     if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5))
781     {
782     ipu_cmd.pos[2] = 5;
783     return false;
784     }
785     case 6:
786     if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6))
787     {
788     ipu_cmd.pos[2] = 6;
789     return false;
790     }
791 william 191 break;
792    
793     jNO_DEFAULT;
794 william 31 }
795    
796 william 62 // Send The MacroBlock via DmaIpuFrom
797     ipu_csc(mb8, rgb32, decoder.sgn);
798    
799     if (decoder.ofm == 0)
800     decoder.SetOutputTo(rgb32);
801     else
802 william 31 {
803 william 62 ipu_dither(rgb32, rgb16, decoder.dte);
804     decoder.SetOutputTo(rgb16);
805     }
806 william 31
807 william 62 case 2:
808 william 191 {
809     pxAssume(decoder.ipu0_data > 0);
810    
811     uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
812     decoder.AdvanceIpuDataBy(read);
813    
814     if (decoder.ipu0_data != 0)
815 william 62 {
816 william 191 // IPU FIFO filled up -- Will have to finish transferring later.
817     ipu_cmd.pos[1] = 2;
818     return false;
819 william 31 }
820    
821 william 62 decoder.mbc++;
822     mbaCount = 0;
823 william 191 }
824    
825 william 62 case 3:
826     while (1)
827 william 31 {
828 william 62 if (!GETWORD())
829 william 31 {
830 william 62 ipu_cmd.pos[1] = 3;
831     return false;
832     }
833 william 31
834 william 62 code = UBITS(16);
835     if (code >= 0x1000)
836     {
837     mba = MBA.mba5 + (UBITS(5) - 2);
838     break;
839     }
840     else if (code >= 0x0300)
841     {
842     mba = MBA.mba11 + (UBITS(11) - 24);
843     break;
844     }
845     else switch (UBITS(11))
846     {
847 william 191 case 8: /* macroblock_escape */
848     mbaCount += 33;
849     /* pass through */
850 william 31
851 william 191 case 15: /* macroblock_stuffing (MPEG1 only) */
852     DUMPBITS(11);
853     continue;
854 william 31
855 william 191 default: /* end of slice/frame, or error? */
856     {
857     goto finish_idec;
858     }
859 william 31 }
860 william 62 }
861 william 31
862 william 62 DUMPBITS(mba->len);
863     mbaCount += mba->mba;
864 william 31
865 william 62 if (mbaCount)
866     {
867     decoder.dc_dct_pred[0] =
868     decoder.dc_dct_pred[1] =
869     decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
870 william 31
871 william 62 decoder.mbc += mbaCount;
872     }
873    
874     case 4:
875     if (!GETWORD())
876 william 31 {
877 william 62 ipu_cmd.pos[1] = 4;
878     return false;
879 william 31 }
880 william 62
881     break;
882 william 191
883     jNO_DEFAULT;
884 william 31 }
885 william 62
886     ipu_cmd.pos[1] = 0;
887     ipu_cmd.pos[2] = 0;
888 william 31 }
889 william 191
890 william 62 finish_idec:
891     finishmpeg2sliceIDEC();
892 william 31
893 william 62 case 3:
894 william 191 {
895     u8 bit8;
896 william 62 if (!getBits8((u8*)&bit8, 0))
897     {
898     ipu_cmd.pos[0] = 3;
899     return false;
900     }
901 william 31
902 william 62 if (bit8 == 0)
903     {
904 william 191 g_BP.Align();
905 william 62 ipuRegs.ctrl.SCD = 1;
906     }
907 william 191 }
908 william 62
909     case 4:
910     if (!getBits32((u8*)&ipuRegs.top, 0))
911     {
912     ipu_cmd.pos[0] = 4;
913     return false;
914     }
915    
916 william 191 ipuRegs.top = BigEndian(ipuRegs.top);
917 william 62 break;
918 william 191
919     jNO_DEFAULT;
920 william 62 }
921    
922     return true;
923 william 31 }
924    
925 william 62 bool mpeg2_slice()
926 william 31 {
927     int DCT_offset, DCT_stride;
928    
929 william 62 macroblock_8& mb8 = decoder.mb8;
930     macroblock_16& mb16 = decoder.mb16;
931 william 31
932 william 62 switch (ipu_cmd.pos[0])
933 william 31 {
934 william 62 case 0:
935     if (decoder.dcr)
936 william 31 {
937 william 62 decoder.dc_dct_pred[0] =
938     decoder.dc_dct_pred[1] =
939     decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
940 william 31 }
941 william 62
942     ipuRegs.ctrl.ECD = 0;
943     ipuRegs.top = 0;
944     memzero_sse_a(mb8);
945     memzero_sse_a(mb16);
946     case 1:
947     if (!bitstream_init())
948     {
949     ipu_cmd.pos[0] = 1;
950     return false;
951     }
952 william 31
953 william 62 case 2:
954     ipu_cmd.pos[0] = 2;
955 william 31
956 william 62 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
957 william 31 {
958 william 62 DCT_offset = decoder_stride;
959     DCT_stride = decoder_stride * 2;
960 william 31 }
961     else
962     {
963 william 62 DCT_offset = decoder_stride * 8;
964     DCT_stride = decoder_stride;
965 william 31 }
966    
967 william 62 if (decoder.macroblock_modes & MACROBLOCK_INTRA)
968     {
969     switch(ipu_cmd.pos[1])
970     {
971     case 0:
972     decoder.coded_block_pattern = 0x3F;
973     case 1:
974     if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1))
975     {
976     ipu_cmd.pos[1] = 1;
977     return false;
978     }
979     case 2:
980     if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
981     {
982     ipu_cmd.pos[1] = 2;
983     return false;
984     }
985     case 3:
986     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
987     {
988     ipu_cmd.pos[1] = 3;
989     return false;
990     }
991     case 4:
992     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
993     {
994     ipu_cmd.pos[1] = 4;
995     return false;
996     }
997     case 5:
998     if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
999     {
1000     ipu_cmd.pos[1] = 5;
1001     return false;
1002     }
1003     case 6:
1004     if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1005     {
1006     ipu_cmd.pos[1] = 6;
1007     return false;
1008     }
1009     break;
1010 william 191
1011     jNO_DEFAULT;
1012 william 62 }
1013 william 31
1014 william 191 // Copy macroblock8 to macroblock16 - without sign extension.
1015     // Manually inlined due to MSVC refusing to inline the SSE-optimized version.
1016     {
1017     const u8 *s = (const u8*)&mb8;
1018     u16 *d = (u16*)&mb16;
1019    
1020     //Y bias - 16 * 16
1021     //Cr bias - 8 * 8
1022     //Cb bias - 8 * 8
1023    
1024     __m128i zeroreg = _mm_setzero_si128();
1025    
1026     for (uint i = 0; i < (256+64+64) / 32; ++i)
1027     {
1028     //*d++ = *s++;
1029     __m128i woot1 = _mm_load_si128((__m128i*)s);
1030     __m128i woot2 = _mm_load_si128((__m128i*)s+1);
1031     _mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg));
1032     _mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg));
1033     _mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg));
1034     _mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg));
1035     s += 32;
1036     d += 32;
1037     }
1038     }
1039 william 62 }
1040     else
1041     {
1042     if (decoder.macroblock_modes & MACROBLOCK_PATTERN)
1043     {
1044     switch(ipu_cmd.pos[1])
1045     {
1046     case 0:
1047     decoder.coded_block_pattern = get_coded_block_pattern(); // max 9bits
1048     case 1:
1049     if (decoder.coded_block_pattern & 0x20)
1050     {
1051     if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1))
1052     {
1053     ipu_cmd.pos[1] = 1;
1054     return false;
1055     }
1056     }
1057     case 2:
1058     if (decoder.coded_block_pattern & 0x10)
1059     {
1060     if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
1061     {
1062     ipu_cmd.pos[1] = 2;
1063     return false;
1064     }
1065     }
1066     case 3:
1067     if (decoder.coded_block_pattern & 0x08)
1068     {
1069     if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
1070     {
1071     ipu_cmd.pos[1] = 3;
1072     return false;
1073     }
1074     }
1075     case 4:
1076     if (decoder.coded_block_pattern & 0x04)
1077     {
1078     if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
1079     {
1080     ipu_cmd.pos[1] = 4;
1081     return false;
1082     }
1083     }
1084     case 5:
1085     if (decoder.coded_block_pattern & 0x2)
1086     {
1087     if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
1088     {
1089     ipu_cmd.pos[1] = 5;
1090     return false;
1091     }
1092     }
1093     case 6:
1094     if (decoder.coded_block_pattern & 0x1)
1095     {
1096     if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1097     {
1098     ipu_cmd.pos[1] = 6;
1099     return false;
1100     }
1101     }
1102     break;
1103 william 191
1104     jNO_DEFAULT;
1105 william 62 }
1106     }
1107     }
1108 william 31
1109 william 62 // Send The MacroBlock via DmaIpuFrom
1110     ipuRegs.ctrl.SCD = 0;
1111     coded_block_pattern = decoder.coded_block_pattern;
1112 william 31
1113 william 62 decoder.mbc = 1;
1114     decoder.SetOutputTo(mb16);
1115 william 31
1116 william 62 case 3:
1117 william 191 {
1118     pxAssume(decoder.ipu0_data > 0);
1119    
1120     uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
1121     decoder.AdvanceIpuDataBy(read);
1122    
1123     if (decoder.ipu0_data != 0)
1124 william 62 {
1125 william 191 // IPU FIFO filled up -- Will have to finish transferring later.
1126     ipu_cmd.pos[0] = 3;
1127     return false;
1128 william 62 }
1129 william 31
1130 william 191 decoder.mbc++;
1131     mbaCount = 0;
1132     }
1133    
1134 william 62 case 4:
1135 william 191 {
1136     u8 bit8;
1137 william 62 if (!getBits8((u8*)&bit8, 0))
1138 william 31 {
1139 william 62 ipu_cmd.pos[0] = 4;
1140     return false;
1141 william 31 }
1142    
1143 william 62 if (bit8 == 0)
1144     {
1145 william 191 g_BP.Align();
1146 william 62 ipuRegs.ctrl.SCD = 1;
1147     }
1148 william 191 }
1149    
1150 william 62 case 5:
1151     if (!getBits32((u8*)&ipuRegs.top, 0))
1152 william 31 {
1153 william 62 ipu_cmd.pos[0] = 5;
1154     return false;
1155 william 31 }
1156    
1157 william 191 ipuRegs.top = BigEndian(ipuRegs.top);
1158 william 62 break;
1159     }
1160 william 31
1161 william 62 return true;
1162     }

  ViewVC Help
Powered by ViewVC 1.1.22