/[pcsx2_0.9.7]/trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp
ViewVC logotype

Annotation of /trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 273 - (hide annotations) (download)
Fri Nov 12 01:10:22 2010 UTC (9 years, 10 months ago) by william
File size: 22712 byte(s)
Auto Commited Import of: pcsx2-0.9.7-DEBUG (upstream: v0.9.7.4013 local: v0.9.7.197-latest) in ./trunk
1 william 31 /*
2     * Mpeg.c
3     * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
4     * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
5     * Modified by Florin for PCSX2 emu
6     *
7     * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
8     * See http://libmpeg2.sourceforge.net/ for updates.
9     *
10     * mpeg2dec is free software; you can redistribute it and/or modify
11     * it under the terms of the GNU General Public License as published by
12     * the Free Software Foundation; either version 2 of the License, or
13     * (at your option) any later version.
14     *
15     * mpeg2dec is distributed in the hope that it will be useful,
16     * but WITHOUT ANY WARRANTY; without even the implied warranty of
17     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18     * GNU General Public License for more details.
19     *
20     * You should have received a copy of the GNU General Public License
21     * along with this program; if not, write to the Free Software
22     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23     */
24    
25     // [Air] Note: many functions in this module are large and only used once, so they
26     // have been forced to inline since it won't bloat the program and gets rid of
27     // some call overhead.
28    
29     #include "PrecompiledHeader.h"
30    
31     #include "Common.h"
32     #include "IPU/IPU.h"
33     #include "Mpeg.h"
34     #include "Vlc.h"
35    
36 william 62 const int non_linear_quantizer_scale [] =
37 william 31 {
38     0, 1, 2, 3, 4, 5, 6, 7,
39     8, 10, 12, 14, 16, 18, 20, 22,
40     24, 28, 32, 36, 40, 44, 48, 52,
41     56, 64, 72, 80, 88, 96, 104, 112
42     };
43    
44     /* Bitstream and buffer needs to be reallocated in order for successful
45     reading of the old data. Here the old data stored in the 2nd slot
46     of the internal buffer is copied to 1st slot, and the new data read
47     into 1st slot is copied to the 2nd slot. Which will later be copied
48     back to the 1st slot when 128bits have been read.
49     */
50 william 62 const DCTtab * tab;
51     int mbaCount = 0;
52 william 31
53 william 191 int bitstream_init ()
54     {
55     return g_BP.FillBuffer(32);
56     }
57    
58 william 62 int get_macroblock_modes()
59 william 31 {
60     int macroblock_modes;
61     const MBtab * tab;
62    
63 william 62 switch (decoder.coding_type)
64 william 31 {
65     case I_TYPE:
66 william 62 macroblock_modes = UBITS(2);
67 william 31
68     if (macroblock_modes == 0) return 0; // error
69    
70     tab = MB_I + (macroblock_modes >> 1);
71 william 62 DUMPBITS(tab->len);
72 william 31 macroblock_modes = tab->modes;
73    
74 william 62 if ((!(decoder.frame_pred_frame_dct)) &&
75     (decoder.picture_structure == FRAME_PICTURE))
76 william 31 {
77 william 62 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
78 william 31 }
79     return macroblock_modes;
80    
81     case P_TYPE:
82 william 62 macroblock_modes = UBITS(6);
83 william 31
84     if (macroblock_modes == 0) return 0; // error
85    
86     tab = MB_P + (macroblock_modes >> 1);
87 william 62 DUMPBITS(tab->len);
88 william 31 macroblock_modes = tab->modes;
89    
90 william 62 if (decoder.picture_structure != FRAME_PICTURE)
91 william 31 {
92     if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
93     {
94 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
95 william 31 }
96    
97     return macroblock_modes;
98     }
99 william 62 else if (decoder.frame_pred_frame_dct)
100 william 31 {
101     if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
102     macroblock_modes |= MC_FRAME;
103    
104     return macroblock_modes;
105     }
106     else
107     {
108     if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
109     {
110 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
111 william 31 }
112    
113     if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
114     {
115 william 62 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
116 william 31 }
117    
118     return macroblock_modes;
119     }
120    
121     case B_TYPE:
122 william 62 macroblock_modes = UBITS(6);
123 william 31
124     if (macroblock_modes == 0) return 0; // error
125    
126     tab = MB_B + macroblock_modes;
127 william 62 DUMPBITS(tab->len);
128 william 31 macroblock_modes = tab->modes;
129    
130 william 62 if (decoder.picture_structure != FRAME_PICTURE)
131 william 31 {
132     if (!(macroblock_modes & MACROBLOCK_INTRA))
133     {
134 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
135 william 31 }
136    
137     return macroblock_modes;
138     }
139 william 62 else if (decoder.frame_pred_frame_dct)
140 william 31 {
141     /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
142     macroblock_modes |= MC_FRAME;
143     return macroblock_modes;
144     }
145     else
146     {
147     if (macroblock_modes & MACROBLOCK_INTRA) goto intra;
148    
149 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
150 william 31
151     if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
152     {
153     intra:
154 william 62 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
155 william 31 }
156    
157     return macroblock_modes;
158     }
159    
160     case D_TYPE:
161 william 62 macroblock_modes = GETBITS(1);
162 william 31
163     if (macroblock_modes == 0) return 0; // error
164     return MACROBLOCK_INTRA;
165    
166     default:
167     return 0;
168     }
169     }
170    
171 william 62 static __fi int get_quantizer_scale()
172 william 31 {
173     int quantizer_scale_code;
174    
175 william 62 quantizer_scale_code = GETBITS(5);
176 william 31
177 william 62 if (decoder.q_scale_type)
178 william 31 return non_linear_quantizer_scale [quantizer_scale_code];
179     else
180     return quantizer_scale_code << 1;
181     }
182    
183 william 62 static __fi int get_coded_block_pattern()
184 william 31 {
185     const CBPtab * tab;
186 william 62 u16 code = UBITS(16);
187 william 31
188 william 62 if (code >= 0x2000)
189     tab = CBP_7 + (UBITS(7) - 16);
190 william 31 else
191 william 62 tab = CBP_9 + UBITS(9);
192 william 31
193 william 62 DUMPBITS(tab->len);
194 william 31 return tab->cbp;
195     }
196    
197 william 62 int __fi get_motion_delta(const int f_code)
198 william 31 {
199 william 62 int delta;
200     int sign;
201     const MVtab * tab;
202     u16 code = UBITS(16);
203 william 31
204 william 62 if ((code & 0x8000))
205 william 31 {
206 william 62 DUMPBITS(1);
207     return 0x00010000;
208 william 31 }
209 william 62 else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00))
210     {
211     tab = MV_4 + UBITS(4);
212     }
213     else
214     {
215     tab = MV_10 + UBITS(10);
216     }
217 william 31
218 william 62 delta = tab->delta + 1;
219     DUMPBITS(tab->len);
220 william 31
221 william 62 sign = SBITS(1);
222     DUMPBITS(1);
223     return (delta ^ sign) - sign;
224 william 31 }
225    
226 william 62 int __fi get_dmv()
227 william 31 {
228 william 191 const DMVtab* tab = DMV_2 + UBITS(2);
229 william 62 DUMPBITS(tab->len);
230     return tab->dmv;
231 william 31 }
232    
233 william 62 int get_macroblock_address_increment()
234 william 31 {
235 william 62 const MBAtab *mba;
236    
237     u16 code = UBITS(16);
238 william 31
239 william 62 if (code >= 4096)
240     mba = MBA.mba5 + (UBITS(5) - 2);
241     else if (code >= 768)
242     mba = MBA.mba11 + (UBITS(11) - 24);
243     else switch (UBITS(11))
244 william 191 {
245     case 8: /* macroblock_escape */
246     DUMPBITS(11);
247     return 0x23;
248 william 31
249 william 191 case 15: /* macroblock_stuffing (MPEG1 only) */
250     if (decoder.mpeg1)
251     {
252 william 62 DUMPBITS(11);
253 william 191 return 0x22;
254     }
255 william 31
256 william 191 default:
257     return 0;//error
258     }
259 william 31
260 william 62 DUMPBITS(mba->len);
261 william 31
262 william 62 return mba->mba + 1;
263     }
264 william 31
265 william 62 static __fi int get_luma_dc_dct_diff()
266     {
267     int size;
268     int dc_diff;
269     u16 code = UBITS(5);
270 william 31
271 william 62 if (code < 31)
272     {
273     size = DCtable.lum0[code].size;
274     DUMPBITS(DCtable.lum0[code].len);
275 william 31
276 william 62 // 5 bits max
277     }
278     else
279     {
280     code = UBITS(9) - 0x1f0;
281     size = DCtable.lum1[code].size;
282     DUMPBITS(DCtable.lum1[code].len);
283 william 31
284 william 62 // 9 bits max
285     }
286    
287     if (size==0)
288     dc_diff = 0;
289     else
290     {
291     dc_diff = GETBITS(size);
292 william 31
293 william 62 // 6 for tab0 and 11 for tab1
294     if ((dc_diff & (1<<(size-1)))==0)
295     dc_diff-= (1<<size) - 1;
296     }
297 william 31
298 william 62 return dc_diff;
299     }
300 william 31
301 william 62 static __fi int get_chroma_dc_dct_diff()
302     {
303     int size;
304     int dc_diff;
305     u16 code = UBITS(5);
306 william 31
307 william 62 if (code<31)
308     {
309     size = DCtable.chrom0[code].size;
310     DUMPBITS(DCtable.chrom0[code].len);
311     }
312     else
313     {
314     code = UBITS(10) - 0x3e0;
315     size = DCtable.chrom1[code].size;
316     DUMPBITS(DCtable.chrom1[code].len);
317     }
318    
319     if (size==0)
320     dc_diff = 0;
321     else
322     {
323     dc_diff = GETBITS(size);
324 william 31
325 william 62 if ((dc_diff & (1<<(size-1)))==0)
326 william 31 {
327 william 62 dc_diff-= (1<<size) - 1;
328 william 31 }
329     }
330 william 62
331     return dc_diff;
332     }
333 william 31
334 william 62 #define SATURATE(val) \
335     do { \
336     if (((u32)(val + 2048) > 4095)) \
337     val = (((s32)val) >> 31) ^ 2047; \
338     } while (0)
339 william 31
340 william 191 static bool get_intra_block()
341 william 31 {
342 william 62 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
343     const u8 (&quant_matrix)[64] = decoder.iq;
344     int quantizer_scale = decoder.quantizer_scale;
345     s16 * dest = decoder.DCTblock;
346     u16 code;
347 william 31
348 william 62 /* decode AC coefficients */
349 william 191 for (int i=1 + ipu_cmd.pos[4]; ; i++)
350 william 62 {
351     switch (ipu_cmd.pos[5])
352     {
353     case 0:
354     if (!GETWORD())
355     {
356     ipu_cmd.pos[4] = i - 1;
357     return false;
358     }
359 william 31
360 william 62 code = UBITS(16);
361 william 31
362 william 62 if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
363 william 31 {
364 william 62 tab = &DCT.next[(code >> 12) - 4];
365     }
366     else if (code >= 1024)
367     {
368     if (decoder.intra_vlc_format && !decoder.mpeg1)
369 william 31 {
370 william 62 tab = &DCT.tab0a[(code >> 8) - 4];
371 william 31 }
372     else
373     {
374 william 62 tab = &DCT.tab0[(code >> 8) - 4];
375 william 31 }
376     }
377 william 62 else if (code >= 512)
378 william 31 {
379 william 62 if (decoder.intra_vlc_format && !decoder.mpeg1)
380     {
381     tab = &DCT.tab1a[(code >> 6) - 8];
382     }
383     else
384     {
385     tab = &DCT.tab1[(code >> 6) - 8];
386     }
387 william 31 }
388    
389 william 62 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
390     // that should use a single unrolled DCT table instead of five separate tables used
391     // here. Multiple conditional statements are very slow, while modern CPU data caches
392     // have lots of room to spare.
393    
394     else if (code >= 256)
395 william 31 {
396 william 62 tab = &DCT.tab2[(code >> 4) - 16];
397 william 31 }
398 william 62 else if (code >= 128)
399     {
400     tab = &DCT.tab3[(code >> 3) - 16];
401     }
402     else if (code >= 64)
403     {
404     tab = &DCT.tab4[(code >> 2) - 16];
405     }
406     else if (code >= 32)
407     {
408     tab = &DCT.tab5[(code >> 1) - 16];
409     }
410     else if (code >= 16)
411     {
412     tab = &DCT.tab6[code - 16];
413     }
414 william 31 else
415     {
416 william 62 ipu_cmd.pos[4] = 0;
417     return true;
418 william 31 }
419    
420 william 62 DUMPBITS(tab->len);
421 william 31
422 william 62 if (tab->run==64) /* end_of_block */
423 william 31 {
424 william 62 ipu_cmd.pos[4] = 0;
425     return true;
426 william 31 }
427 william 62
428 william 191 i += (tab->run == 65) ? GETBITS(6) : tab->run;
429 william 62 if (i >= 64)
430 william 31 {
431 william 62 ipu_cmd.pos[4] = 0;
432     return true;
433 william 31 }
434 william 191
435 william 62 case 1:
436 william 191 {
437     if (!GETWORD())
438     {
439     ipu_cmd.pos[4] = i - 1;
440     ipu_cmd.pos[5] = 1;
441     return false;
442     }
443 william 31
444 william 191 uint j = scan[i];
445     int val;
446 william 62
447 william 191 if (tab->run==65) /* escape */
448     {
449     if(!decoder.mpeg1)
450     {
451     val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
452     DUMPBITS(12);
453     }
454     else
455     {
456     val = SBITS(8);
457     DUMPBITS(8);
458 william 31
459 william 191 if (!(val & 0x7f))
460     {
461     val = GETBITS(8) + 2 * val;
462     }
463 william 31
464 william 191 val = (val * quantizer_scale * quant_matrix[i]) >> 4;
465     val = (val + ~ (((s32)val) >> 31)) | 1;
466     }
467     }
468     else
469     {
470     val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
471     if(decoder.mpeg1)
472     {
473     /* oddification */
474     val = (val - 1) | 1;
475     }
476    
477     /* if (bitstream_get (1)) val = -val; */
478     int bit1 = SBITS(1);
479     val = (val ^ bit1) - bit1;
480     DUMPBITS(1);
481     }
482    
483     SATURATE(val);
484     dest[j] = val;
485     ipu_cmd.pos[5] = 0;
486 william 31 }
487 william 62 }
488     }
489 william 31
490 william 62 ipu_cmd.pos[4] = 0;
491     return true;
492 william 31 }
493    
494 william 191 static bool get_non_intra_block(int * last)
495 william 31 {
496     int i;
497     int j;
498     int val;
499 william 62 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
500     const u8 (&quant_matrix)[64] = decoder.niq;
501     int quantizer_scale = decoder.quantizer_scale;
502     s16 * dest = decoder.DCTblock;
503     u16 code;
504 william 31
505 william 62 /* decode AC coefficients */
506     for (i= ipu_cmd.pos[4] ; ; i++)
507     {
508     switch (ipu_cmd.pos[5])
509 william 31 {
510 william 62 case 0:
511     if (!GETWORD())
512     {
513     ipu_cmd.pos[4] = i;
514     return false;
515     }
516 william 31
517 william 62 code = UBITS(16);
518 william 31
519 william 62 if (code >= 16384)
520     {
521     if (i==0)
522     {
523     tab = &DCT.first[(code >> 12) - 4];
524     }
525     else
526     {
527     tab = &DCT.next[(code >> 12)- 4];
528     }
529     }
530     else if (code >= 1024)
531     {
532     tab = &DCT.tab0[(code >> 8) - 4];
533     }
534     else if (code >= 512)
535     {
536     tab = &DCT.tab1[(code >> 6) - 8];
537     }
538 william 31
539 william 62 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
540     // that should use a single unrolled DCT table instead of five separate tables used
541     // here. Multiple conditional statements are very slow, while modern CPU data caches
542     // have lots of room to spare.
543 william 31
544 william 62 else if (code >= 256)
545     {
546     tab = &DCT.tab2[(code >> 4) - 16];
547     }
548     else if (code >= 128)
549     {
550     tab = &DCT.tab3[(code >> 3) - 16];
551     }
552     else if (code >= 64)
553     {
554     tab = &DCT.tab4[(code >> 2) - 16];
555     }
556     else if (code >= 32)
557     {
558     tab = &DCT.tab5[(code >> 1) - 16];
559     }
560     else if (code >= 16)
561     {
562     tab = &DCT.tab6[code - 16];
563     }
564     else
565     {
566     ipu_cmd.pos[4] = 0;
567     return true;
568     }
569 william 31
570 william 62 DUMPBITS(tab->len);
571 william 31
572 william 62 if (tab->run==64) /* end_of_block */
573     {
574     *last = i;
575     ipu_cmd.pos[4] = 0;
576     return true;
577     }
578 william 31
579 william 62 i += (tab->run == 65) ? GETBITS(6) : tab->run;
580     if (i >= 64)
581     {
582     *last = i;
583     ipu_cmd.pos[4] = 0;
584     return true;
585     }
586 william 31
587 william 62 case 1:
588     if (!GETWORD())
589     {
590     ipu_cmd.pos[4] = i;
591     ipu_cmd.pos[5] = 1;
592     return false;
593     }
594 william 31
595 william 62 j = scan[i];
596 william 31
597 william 62 if (tab->run==65) /* escape */
598     {
599     if (!decoder.mpeg1)
600     {
601     val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5;
602     DUMPBITS(12);
603     }
604     else
605     {
606     val = SBITS(8);
607     DUMPBITS(8);
608 william 31
609 william 62 if (!(val & 0x7f))
610     {
611     val = GETBITS(8) + 2 * val;
612     }
613 william 31
614 william 62 val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32;
615     val = (val + ~ (((s32)val) >> 31)) | 1;
616     }
617     }
618     else
619 william 31 {
620 william 191 int bit1 = SBITS(1);
621 william 62 val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
622 william 191 val = (val ^ bit1) - bit1;
623 william 62 DUMPBITS(1);
624 william 31 }
625    
626     SATURATE(val);
627     dest[j] = val;
628 william 62 ipu_cmd.pos[5] = 0;
629 william 31 }
630     }
631    
632 william 62 ipu_cmd.pos[4] = 0;
633     return true;
634 william 31 }
635    
636 william 62 static __fi bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
637 william 31 {
638 william 62 if (!skip || ipu_cmd.pos[3])
639 william 31 {
640 william 62 ipu_cmd.pos[3] = 0;
641     if (!GETWORD())
642 william 31 {
643 william 62 ipu_cmd.pos[3] = 1;
644     return false;
645 william 31 }
646    
647 william 62 /* Get the intra DC coefficient and inverse quantize it */
648     if (cc == 0)
649     decoder.dc_dct_pred[0] += get_luma_dc_dct_diff();
650 william 31 else
651 william 62 decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff();
652 william 31
653 william 62 decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision);
654 william 31 }
655    
656 william 62 if (!get_intra_block())
657 william 31 {
658 william 62 return false;
659 william 31 }
660    
661 william 62 mpeg2_idct_copy(decoder.DCTblock, dest, stride);
662    
663     return true;
664 william 31 }
665    
666 william 62 static __fi bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
667 william 31 {
668     int last;
669    
670 william 62 if (!skip)
671 william 31 {
672 william 62 memzero_sse_a(decoder.DCTblock);
673 william 31 }
674    
675 william 62 if (!get_non_intra_block(&last))
676 william 31 {
677 william 62 return false;
678 william 31 }
679    
680 william 62 mpeg2_idct_add(last, decoder.DCTblock, dest, stride);
681 william 31
682 william 62 return true;
683 william 31 }
684    
685 william 62 void __fi finishmpeg2sliceIDEC()
686 william 31 {
687 william 62 ipuRegs.ctrl.SCD = 0;
688     coded_block_pattern = decoder.coded_block_pattern;
689 william 31 }
690    
691 william 273 __fi bool mpeg2sliceIDEC()
692 william 31 {
693 william 62 u16 code;
694 william 31
695 william 62 switch (ipu_cmd.pos[0])
696     {
697     case 0:
698     decoder.dc_dct_pred[0] =
699     decoder.dc_dct_pred[1] =
700     decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
701 william 31
702 william 62 ipuRegs.top = 0;
703     ipuRegs.ctrl.ECD = 0;
704 william 31
705 william 62 case 1:
706     ipu_cmd.pos[0] = 1;
707     if (!bitstream_init())
708     {
709     return false;
710     }
711 william 31
712 william 62 case 2:
713     ipu_cmd.pos[0] = 2;
714 william 31 while (1)
715     {
716 william 62 macroblock_8& mb8 = decoder.mb8;
717     macroblock_rgb16& rgb16 = decoder.rgb16;
718     macroblock_rgb32& rgb32 = decoder.rgb32;
719    
720 william 31 int DCT_offset, DCT_stride;
721     const MBAtab * mba;
722    
723 william 62 switch (ipu_cmd.pos[1])
724 william 31 {
725 william 62 case 0:
726     decoder.macroblock_modes = get_macroblock_modes();
727 william 31
728 william 62 if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC
729     {
730     decoder.quantizer_scale = get_quantizer_scale();
731     }
732 william 31
733 william 62 decoder.coded_block_pattern = 0x3F;//all 6 blocks
734     memzero_sse_a(mb8);
735     memzero_sse_a(rgb32);
736 william 31
737 william 62 case 1:
738     ipu_cmd.pos[1] = 1;
739 william 31
740 william 62 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
741 william 31 {
742 william 62 DCT_offset = decoder_stride;
743     DCT_stride = decoder_stride * 2;
744 william 31 }
745     else
746     {
747 william 62 DCT_offset = decoder_stride * 8;
748     DCT_stride = decoder_stride;
749     }
750 william 31
751 william 62 switch (ipu_cmd.pos[2])
752     {
753     case 0:
754     case 1:
755     if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1))
756     {
757     ipu_cmd.pos[2] = 1;
758     return false;
759     }
760     case 2:
761     if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
762     {
763     ipu_cmd.pos[2] = 2;
764     return false;
765     }
766     case 3:
767     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
768     {
769     ipu_cmd.pos[2] = 3;
770     return false;
771     }
772     case 4:
773     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
774     {
775     ipu_cmd.pos[2] = 4;
776     return false;
777     }
778     case 5:
779     if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5))
780     {
781     ipu_cmd.pos[2] = 5;
782     return false;
783     }
784     case 6:
785     if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6))
786     {
787     ipu_cmd.pos[2] = 6;
788     return false;
789     }
790 william 191 break;
791    
792     jNO_DEFAULT;
793 william 31 }
794    
795 william 62 // Send The MacroBlock via DmaIpuFrom
796     ipu_csc(mb8, rgb32, decoder.sgn);
797    
798     if (decoder.ofm == 0)
799     decoder.SetOutputTo(rgb32);
800     else
801 william 31 {
802 william 62 ipu_dither(rgb32, rgb16, decoder.dte);
803     decoder.SetOutputTo(rgb16);
804     }
805 william 31
806 william 62 case 2:
807 william 191 {
808     pxAssume(decoder.ipu0_data > 0);
809    
810     uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
811     decoder.AdvanceIpuDataBy(read);
812    
813     if (decoder.ipu0_data != 0)
814 william 62 {
815 william 191 // IPU FIFO filled up -- Will have to finish transferring later.
816     ipu_cmd.pos[1] = 2;
817     return false;
818 william 31 }
819    
820 william 62 mbaCount = 0;
821 william 191 }
822    
823 william 62 case 3:
824     while (1)
825 william 31 {
826 william 62 if (!GETWORD())
827 william 31 {
828 william 62 ipu_cmd.pos[1] = 3;
829     return false;
830     }
831 william 31
832 william 62 code = UBITS(16);
833     if (code >= 0x1000)
834     {
835     mba = MBA.mba5 + (UBITS(5) - 2);
836     break;
837     }
838     else if (code >= 0x0300)
839     {
840     mba = MBA.mba11 + (UBITS(11) - 24);
841     break;
842     }
843     else switch (UBITS(11))
844     {
845 william 191 case 8: /* macroblock_escape */
846     mbaCount += 33;
847     /* pass through */
848 william 31
849 william 191 case 15: /* macroblock_stuffing (MPEG1 only) */
850     DUMPBITS(11);
851     continue;
852 william 31
853 william 191 default: /* end of slice/frame, or error? */
854     {
855     goto finish_idec;
856     }
857 william 31 }
858 william 62 }
859 william 31
860 william 62 DUMPBITS(mba->len);
861     mbaCount += mba->mba;
862 william 31
863 william 62 if (mbaCount)
864     {
865     decoder.dc_dct_pred[0] =
866     decoder.dc_dct_pred[1] =
867     decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
868     }
869    
870     case 4:
871     if (!GETWORD())
872 william 31 {
873 william 62 ipu_cmd.pos[1] = 4;
874     return false;
875 william 31 }
876 william 62
877     break;
878 william 191
879     jNO_DEFAULT;
880 william 31 }
881 william 62
882     ipu_cmd.pos[1] = 0;
883     ipu_cmd.pos[2] = 0;
884 william 31 }
885 william 191
886 william 62 finish_idec:
887     finishmpeg2sliceIDEC();
888 william 31
889 william 62 case 3:
890 william 191 {
891     u8 bit8;
892 william 62 if (!getBits8((u8*)&bit8, 0))
893     {
894     ipu_cmd.pos[0] = 3;
895     return false;
896     }
897 william 31
898 william 62 if (bit8 == 0)
899     {
900 william 191 g_BP.Align();
901 william 62 ipuRegs.ctrl.SCD = 1;
902     }
903 william 191 }
904 william 62
905     case 4:
906     if (!getBits32((u8*)&ipuRegs.top, 0))
907     {
908     ipu_cmd.pos[0] = 4;
909     return false;
910     }
911    
912 william 191 ipuRegs.top = BigEndian(ipuRegs.top);
913 william 62 break;
914 william 191
915     jNO_DEFAULT;
916 william 62 }
917    
918     return true;
919 william 31 }
920    
921 william 273 __fi bool mpeg2_slice()
922 william 31 {
923     int DCT_offset, DCT_stride;
924    
925 william 62 macroblock_8& mb8 = decoder.mb8;
926     macroblock_16& mb16 = decoder.mb16;
927 william 31
928 william 62 switch (ipu_cmd.pos[0])
929 william 31 {
930 william 62 case 0:
931     if (decoder.dcr)
932 william 31 {
933 william 62 decoder.dc_dct_pred[0] =
934     decoder.dc_dct_pred[1] =
935     decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
936 william 31 }
937 william 62
938     ipuRegs.ctrl.ECD = 0;
939     ipuRegs.top = 0;
940     memzero_sse_a(mb8);
941     memzero_sse_a(mb16);
942     case 1:
943     if (!bitstream_init())
944     {
945     ipu_cmd.pos[0] = 1;
946     return false;
947     }
948 william 31
949 william 62 case 2:
950     ipu_cmd.pos[0] = 2;
951 william 31
952 william 62 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
953 william 31 {
954 william 62 DCT_offset = decoder_stride;
955     DCT_stride = decoder_stride * 2;
956 william 31 }
957     else
958     {
959 william 62 DCT_offset = decoder_stride * 8;
960     DCT_stride = decoder_stride;
961 william 31 }
962    
963 william 62 if (decoder.macroblock_modes & MACROBLOCK_INTRA)
964     {
965     switch(ipu_cmd.pos[1])
966     {
967     case 0:
968     decoder.coded_block_pattern = 0x3F;
969     case 1:
970     if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1))
971     {
972     ipu_cmd.pos[1] = 1;
973     return false;
974     }
975     case 2:
976     if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
977     {
978     ipu_cmd.pos[1] = 2;
979     return false;
980     }
981     case 3:
982     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
983     {
984     ipu_cmd.pos[1] = 3;
985     return false;
986     }
987     case 4:
988     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
989     {
990     ipu_cmd.pos[1] = 4;
991     return false;
992     }
993     case 5:
994     if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
995     {
996     ipu_cmd.pos[1] = 5;
997     return false;
998     }
999     case 6:
1000     if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1001     {
1002     ipu_cmd.pos[1] = 6;
1003     return false;
1004     }
1005     break;
1006 william 191
1007     jNO_DEFAULT;
1008 william 62 }
1009 william 31
1010 william 191 // Copy macroblock8 to macroblock16 - without sign extension.
1011     // Manually inlined due to MSVC refusing to inline the SSE-optimized version.
1012     {
1013     const u8 *s = (const u8*)&mb8;
1014     u16 *d = (u16*)&mb16;
1015    
1016     //Y bias - 16 * 16
1017     //Cr bias - 8 * 8
1018     //Cb bias - 8 * 8
1019    
1020     __m128i zeroreg = _mm_setzero_si128();
1021    
1022     for (uint i = 0; i < (256+64+64) / 32; ++i)
1023     {
1024     //*d++ = *s++;
1025     __m128i woot1 = _mm_load_si128((__m128i*)s);
1026     __m128i woot2 = _mm_load_si128((__m128i*)s+1);
1027     _mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg));
1028     _mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg));
1029     _mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg));
1030     _mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg));
1031     s += 32;
1032     d += 32;
1033     }
1034     }
1035 william 62 }
1036     else
1037     {
1038     if (decoder.macroblock_modes & MACROBLOCK_PATTERN)
1039     {
1040     switch(ipu_cmd.pos[1])
1041     {
1042     case 0:
1043     decoder.coded_block_pattern = get_coded_block_pattern(); // max 9bits
1044     case 1:
1045     if (decoder.coded_block_pattern & 0x20)
1046     {
1047     if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1))
1048     {
1049     ipu_cmd.pos[1] = 1;
1050     return false;
1051     }
1052     }
1053     case 2:
1054     if (decoder.coded_block_pattern & 0x10)
1055     {
1056     if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
1057     {
1058     ipu_cmd.pos[1] = 2;
1059     return false;
1060     }
1061     }
1062     case 3:
1063     if (decoder.coded_block_pattern & 0x08)
1064     {
1065     if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
1066     {
1067     ipu_cmd.pos[1] = 3;
1068     return false;
1069     }
1070     }
1071     case 4:
1072     if (decoder.coded_block_pattern & 0x04)
1073     {
1074     if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
1075     {
1076     ipu_cmd.pos[1] = 4;
1077     return false;
1078     }
1079     }
1080     case 5:
1081     if (decoder.coded_block_pattern & 0x2)
1082     {
1083     if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
1084     {
1085     ipu_cmd.pos[1] = 5;
1086     return false;
1087     }
1088     }
1089     case 6:
1090     if (decoder.coded_block_pattern & 0x1)
1091     {
1092     if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1093     {
1094     ipu_cmd.pos[1] = 6;
1095     return false;
1096     }
1097     }
1098     break;
1099 william 191
1100     jNO_DEFAULT;
1101 william 62 }
1102     }
1103     }
1104 william 31
1105 william 62 // Send The MacroBlock via DmaIpuFrom
1106     ipuRegs.ctrl.SCD = 0;
1107     coded_block_pattern = decoder.coded_block_pattern;
1108 william 31
1109 william 62 decoder.SetOutputTo(mb16);
1110 william 31
1111 william 62 case 3:
1112 william 191 {
1113     pxAssume(decoder.ipu0_data > 0);
1114    
1115     uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
1116     decoder.AdvanceIpuDataBy(read);
1117    
1118     if (decoder.ipu0_data != 0)
1119 william 62 {
1120 william 191 // IPU FIFO filled up -- Will have to finish transferring later.
1121     ipu_cmd.pos[0] = 3;
1122     return false;
1123 william 62 }
1124 william 31
1125 william 191 mbaCount = 0;
1126     }
1127    
1128 william 62 case 4:
1129 william 191 {
1130     u8 bit8;
1131 william 62 if (!getBits8((u8*)&bit8, 0))
1132 william 31 {
1133 william 62 ipu_cmd.pos[0] = 4;
1134     return false;
1135 william 31 }
1136    
1137 william 62 if (bit8 == 0)
1138     {
1139 william 191 g_BP.Align();
1140 william 62 ipuRegs.ctrl.SCD = 1;
1141     }
1142 william 191 }
1143    
1144 william 62 case 5:
1145     if (!getBits32((u8*)&ipuRegs.top, 0))
1146 william 31 {
1147 william 62 ipu_cmd.pos[0] = 5;
1148     return false;
1149 william 31 }
1150    
1151 william 191 ipuRegs.top = BigEndian(ipuRegs.top);
1152 william 62 break;
1153     }
1154 william 31
1155 william 62 return true;
1156     }

  ViewVC Help
Powered by ViewVC 1.1.22