/[pcsx2_0.9.7]/trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp
ViewVC logotype

Annotation of /trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 280 - (hide annotations) (download)
Thu Dec 23 12:02:12 2010 UTC (9 years, 9 months ago) by william
File size: 23135 byte(s)
re-commit (had local access denied errors when committing)
1 william 31 /*
2     * Mpeg.c
3     * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
4     * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
5     * Modified by Florin for PCSX2 emu
6     *
7     * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
8     * See http://libmpeg2.sourceforge.net/ for updates.
9     *
10     * mpeg2dec is free software; you can redistribute it and/or modify
11     * it under the terms of the GNU General Public License as published by
12     * the Free Software Foundation; either version 2 of the License, or
13     * (at your option) any later version.
14     *
15     * mpeg2dec is distributed in the hope that it will be useful,
16     * but WITHOUT ANY WARRANTY; without even the implied warranty of
17     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18     * GNU General Public License for more details.
19     *
20     * You should have received a copy of the GNU General Public License
21     * along with this program; if not, write to the Free Software
22     * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23     */
24    
25     // [Air] Note: many functions in this module are large and only used once, so they
26     // have been forced to inline since it won't bloat the program and gets rid of
27     // some call overhead.
28    
29     #include "PrecompiledHeader.h"
30    
31     #include "Common.h"
32     #include "IPU/IPU.h"
33     #include "Mpeg.h"
34     #include "Vlc.h"
35    
36 william 280 #include "Utilities/MemsetFast.inl"
37    
38 william 62 const int non_linear_quantizer_scale [] =
39 william 31 {
40     0, 1, 2, 3, 4, 5, 6, 7,
41     8, 10, 12, 14, 16, 18, 20, 22,
42     24, 28, 32, 36, 40, 44, 48, 52,
43     56, 64, 72, 80, 88, 96, 104, 112
44     };
45    
46     /* Bitstream and buffer needs to be reallocated in order for successful
47     reading of the old data. Here the old data stored in the 2nd slot
48     of the internal buffer is copied to 1st slot, and the new data read
49     into 1st slot is copied to the 2nd slot. Which will later be copied
50     back to the 1st slot when 128bits have been read.
51     */
52 william 62 const DCTtab * tab;
53     int mbaCount = 0;
54 william 31
55 william 191 int bitstream_init ()
56     {
57     return g_BP.FillBuffer(32);
58     }
59    
60 william 62 int get_macroblock_modes()
61 william 31 {
62     int macroblock_modes;
63     const MBtab * tab;
64    
65 william 62 switch (decoder.coding_type)
66 william 31 {
67     case I_TYPE:
68 william 62 macroblock_modes = UBITS(2);
69 william 31
70     if (macroblock_modes == 0) return 0; // error
71    
72     tab = MB_I + (macroblock_modes >> 1);
73 william 62 DUMPBITS(tab->len);
74 william 31 macroblock_modes = tab->modes;
75    
76 william 62 if ((!(decoder.frame_pred_frame_dct)) &&
77     (decoder.picture_structure == FRAME_PICTURE))
78 william 31 {
79 william 62 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
80 william 31 }
81     return macroblock_modes;
82    
83     case P_TYPE:
84 william 62 macroblock_modes = UBITS(6);
85 william 31
86     if (macroblock_modes == 0) return 0; // error
87    
88     tab = MB_P + (macroblock_modes >> 1);
89 william 62 DUMPBITS(tab->len);
90 william 31 macroblock_modes = tab->modes;
91    
92 william 62 if (decoder.picture_structure != FRAME_PICTURE)
93 william 31 {
94     if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
95     {
96 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
97 william 31 }
98    
99     return macroblock_modes;
100     }
101 william 62 else if (decoder.frame_pred_frame_dct)
102 william 31 {
103     if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
104     macroblock_modes |= MC_FRAME;
105    
106     return macroblock_modes;
107     }
108     else
109     {
110     if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
111     {
112 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
113 william 31 }
114    
115     if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
116     {
117 william 62 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
118 william 31 }
119    
120     return macroblock_modes;
121     }
122    
123     case B_TYPE:
124 william 62 macroblock_modes = UBITS(6);
125 william 31
126     if (macroblock_modes == 0) return 0; // error
127    
128     tab = MB_B + macroblock_modes;
129 william 62 DUMPBITS(tab->len);
130 william 31 macroblock_modes = tab->modes;
131    
132 william 62 if (decoder.picture_structure != FRAME_PICTURE)
133 william 31 {
134     if (!(macroblock_modes & MACROBLOCK_INTRA))
135     {
136 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
137 william 31 }
138 william 280 return (macroblock_modes | (tab->len << 16));
139 william 31 }
140 william 62 else if (decoder.frame_pred_frame_dct)
141 william 31 {
142     /* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
143     macroblock_modes |= MC_FRAME;
144 william 280 return (macroblock_modes | (tab->len << 16));
145 william 31 }
146     else
147     {
148     if (macroblock_modes & MACROBLOCK_INTRA) goto intra;
149    
150 william 62 macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
151 william 31
152     if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
153     {
154     intra:
155 william 62 macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
156 william 31 }
157 william 280 return (macroblock_modes | (tab->len << 16));
158 william 31 }
159    
160     case D_TYPE:
161 william 62 macroblock_modes = GETBITS(1);
162 william 280 //I suspect (as this is actually a 2 bit command) that this should be getbits(2)
163     //additionally, we arent dumping any bits here when i think we should be, need a game to test. (Refraction)
164     DevCon.Warning(" Rare MPEG command! ");
165 william 31 if (macroblock_modes == 0) return 0; // error
166 william 280 return (MACROBLOCK_INTRA | (1 << 16));
167 william 31
168     default:
169     return 0;
170     }
171     }
172    
173 william 62 static __fi int get_quantizer_scale()
174 william 31 {
175     int quantizer_scale_code;
176    
177 william 62 quantizer_scale_code = GETBITS(5);
178 william 31
179 william 62 if (decoder.q_scale_type)
180 william 31 return non_linear_quantizer_scale [quantizer_scale_code];
181     else
182     return quantizer_scale_code << 1;
183     }
184    
185 william 62 static __fi int get_coded_block_pattern()
186 william 31 {
187     const CBPtab * tab;
188 william 62 u16 code = UBITS(16);
189 william 31
190 william 62 if (code >= 0x2000)
191     tab = CBP_7 + (UBITS(7) - 16);
192 william 31 else
193 william 62 tab = CBP_9 + UBITS(9);
194 william 31
195 william 62 DUMPBITS(tab->len);
196 william 31 return tab->cbp;
197     }
198    
199 william 62 int __fi get_motion_delta(const int f_code)
200 william 31 {
201 william 62 int delta;
202     int sign;
203     const MVtab * tab;
204     u16 code = UBITS(16);
205 william 31
206 william 62 if ((code & 0x8000))
207 william 31 {
208 william 62 DUMPBITS(1);
209     return 0x00010000;
210 william 31 }
211 william 62 else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00))
212     {
213     tab = MV_4 + UBITS(4);
214     }
215     else
216     {
217     tab = MV_10 + UBITS(10);
218     }
219 william 31
220 william 62 delta = tab->delta + 1;
221     DUMPBITS(tab->len);
222 william 31
223 william 62 sign = SBITS(1);
224     DUMPBITS(1);
225 william 280
226     return (((delta ^ sign) - sign) | (tab->len << 16));
227 william 31 }
228    
229 william 62 int __fi get_dmv()
230 william 31 {
231 william 191 const DMVtab* tab = DMV_2 + UBITS(2);
232 william 62 DUMPBITS(tab->len);
233 william 280 return (tab->dmv | (tab->len << 16));
234 william 31 }
235    
236 william 62 int get_macroblock_address_increment()
237 william 31 {
238 william 62 const MBAtab *mba;
239    
240     u16 code = UBITS(16);
241 william 31
242 william 62 if (code >= 4096)
243     mba = MBA.mba5 + (UBITS(5) - 2);
244     else if (code >= 768)
245     mba = MBA.mba11 + (UBITS(11) - 24);
246     else switch (UBITS(11))
247 william 191 {
248     case 8: /* macroblock_escape */
249     DUMPBITS(11);
250 william 280 return 0xb0023;
251 william 31
252 william 191 case 15: /* macroblock_stuffing (MPEG1 only) */
253     if (decoder.mpeg1)
254     {
255 william 62 DUMPBITS(11);
256 william 280 return 0xb0022;
257 william 191 }
258 william 31
259 william 191 default:
260     return 0;//error
261     }
262 william 31
263 william 62 DUMPBITS(mba->len);
264 william 31
265 william 280 return ((mba->mba + 1) | (mba->len << 16));
266 william 62 }
267 william 31
268 william 62 static __fi int get_luma_dc_dct_diff()
269     {
270     int size;
271     int dc_diff;
272     u16 code = UBITS(5);
273 william 31
274 william 62 if (code < 31)
275     {
276     size = DCtable.lum0[code].size;
277     DUMPBITS(DCtable.lum0[code].len);
278 william 31
279 william 62 // 5 bits max
280     }
281     else
282     {
283     code = UBITS(9) - 0x1f0;
284     size = DCtable.lum1[code].size;
285     DUMPBITS(DCtable.lum1[code].len);
286 william 31
287 william 62 // 9 bits max
288     }
289    
290     if (size==0)
291     dc_diff = 0;
292     else
293     {
294     dc_diff = GETBITS(size);
295 william 31
296 william 62 // 6 for tab0 and 11 for tab1
297     if ((dc_diff & (1<<(size-1)))==0)
298     dc_diff-= (1<<size) - 1;
299     }
300 william 31
301 william 62 return dc_diff;
302     }
303 william 31
304 william 62 static __fi int get_chroma_dc_dct_diff()
305     {
306     int size;
307     int dc_diff;
308     u16 code = UBITS(5);
309 william 31
310 william 62 if (code<31)
311     {
312     size = DCtable.chrom0[code].size;
313     DUMPBITS(DCtable.chrom0[code].len);
314     }
315     else
316     {
317     code = UBITS(10) - 0x3e0;
318     size = DCtable.chrom1[code].size;
319     DUMPBITS(DCtable.chrom1[code].len);
320     }
321    
322     if (size==0)
323     dc_diff = 0;
324     else
325     {
326     dc_diff = GETBITS(size);
327 william 31
328 william 62 if ((dc_diff & (1<<(size-1)))==0)
329 william 31 {
330 william 62 dc_diff-= (1<<size) - 1;
331 william 31 }
332     }
333 william 62
334     return dc_diff;
335     }
336 william 31
337 william 62 #define SATURATE(val) \
338     do { \
339     if (((u32)(val + 2048) > 4095)) \
340     val = (((s32)val) >> 31) ^ 2047; \
341     } while (0)
342 william 31
343 william 191 static bool get_intra_block()
344 william 31 {
345 william 62 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
346     const u8 (&quant_matrix)[64] = decoder.iq;
347     int quantizer_scale = decoder.quantizer_scale;
348     s16 * dest = decoder.DCTblock;
349     u16 code;
350 william 31
351 william 62 /* decode AC coefficients */
352 william 191 for (int i=1 + ipu_cmd.pos[4]; ; i++)
353 william 62 {
354     switch (ipu_cmd.pos[5])
355     {
356     case 0:
357     if (!GETWORD())
358     {
359     ipu_cmd.pos[4] = i - 1;
360     return false;
361     }
362 william 31
363 william 62 code = UBITS(16);
364 william 31
365 william 62 if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
366 william 31 {
367 william 62 tab = &DCT.next[(code >> 12) - 4];
368     }
369     else if (code >= 1024)
370     {
371     if (decoder.intra_vlc_format && !decoder.mpeg1)
372 william 31 {
373 william 62 tab = &DCT.tab0a[(code >> 8) - 4];
374 william 31 }
375     else
376     {
377 william 62 tab = &DCT.tab0[(code >> 8) - 4];
378 william 31 }
379     }
380 william 62 else if (code >= 512)
381 william 31 {
382 william 62 if (decoder.intra_vlc_format && !decoder.mpeg1)
383     {
384     tab = &DCT.tab1a[(code >> 6) - 8];
385     }
386     else
387     {
388     tab = &DCT.tab1[(code >> 6) - 8];
389     }
390 william 31 }
391    
392 william 62 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
393     // that should use a single unrolled DCT table instead of five separate tables used
394     // here. Multiple conditional statements are very slow, while modern CPU data caches
395     // have lots of room to spare.
396    
397     else if (code >= 256)
398 william 31 {
399 william 62 tab = &DCT.tab2[(code >> 4) - 16];
400 william 31 }
401 william 62 else if (code >= 128)
402     {
403     tab = &DCT.tab3[(code >> 3) - 16];
404     }
405     else if (code >= 64)
406     {
407     tab = &DCT.tab4[(code >> 2) - 16];
408     }
409     else if (code >= 32)
410     {
411     tab = &DCT.tab5[(code >> 1) - 16];
412     }
413     else if (code >= 16)
414     {
415     tab = &DCT.tab6[code - 16];
416     }
417 william 31 else
418     {
419 william 62 ipu_cmd.pos[4] = 0;
420     return true;
421 william 31 }
422    
423 william 62 DUMPBITS(tab->len);
424 william 31
425 william 62 if (tab->run==64) /* end_of_block */
426 william 31 {
427 william 62 ipu_cmd.pos[4] = 0;
428     return true;
429 william 31 }
430 william 62
431 william 191 i += (tab->run == 65) ? GETBITS(6) : tab->run;
432 william 62 if (i >= 64)
433 william 31 {
434 william 62 ipu_cmd.pos[4] = 0;
435     return true;
436 william 31 }
437 william 191
438 william 62 case 1:
439 william 191 {
440     if (!GETWORD())
441     {
442     ipu_cmd.pos[4] = i - 1;
443     ipu_cmd.pos[5] = 1;
444     return false;
445     }
446 william 31
447 william 191 uint j = scan[i];
448     int val;
449 william 62
450 william 191 if (tab->run==65) /* escape */
451     {
452     if(!decoder.mpeg1)
453     {
454     val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
455     DUMPBITS(12);
456     }
457     else
458     {
459     val = SBITS(8);
460     DUMPBITS(8);
461 william 31
462 william 191 if (!(val & 0x7f))
463     {
464     val = GETBITS(8) + 2 * val;
465     }
466 william 31
467 william 191 val = (val * quantizer_scale * quant_matrix[i]) >> 4;
468     val = (val + ~ (((s32)val) >> 31)) | 1;
469     }
470     }
471     else
472     {
473     val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
474     if(decoder.mpeg1)
475     {
476     /* oddification */
477     val = (val - 1) | 1;
478     }
479    
480     /* if (bitstream_get (1)) val = -val; */
481     int bit1 = SBITS(1);
482     val = (val ^ bit1) - bit1;
483     DUMPBITS(1);
484     }
485    
486     SATURATE(val);
487     dest[j] = val;
488     ipu_cmd.pos[5] = 0;
489 william 31 }
490 william 62 }
491     }
492 william 31
493 william 62 ipu_cmd.pos[4] = 0;
494     return true;
495 william 31 }
496    
497 william 191 static bool get_non_intra_block(int * last)
498 william 31 {
499     int i;
500     int j;
501     int val;
502 william 62 const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
503     const u8 (&quant_matrix)[64] = decoder.niq;
504     int quantizer_scale = decoder.quantizer_scale;
505     s16 * dest = decoder.DCTblock;
506     u16 code;
507 william 31
508 william 62 /* decode AC coefficients */
509     for (i= ipu_cmd.pos[4] ; ; i++)
510     {
511     switch (ipu_cmd.pos[5])
512 william 31 {
513 william 62 case 0:
514     if (!GETWORD())
515     {
516     ipu_cmd.pos[4] = i;
517     return false;
518     }
519 william 31
520 william 62 code = UBITS(16);
521 william 31
522 william 62 if (code >= 16384)
523     {
524     if (i==0)
525     {
526     tab = &DCT.first[(code >> 12) - 4];
527     }
528     else
529     {
530     tab = &DCT.next[(code >> 12)- 4];
531     }
532     }
533     else if (code >= 1024)
534     {
535     tab = &DCT.tab0[(code >> 8) - 4];
536     }
537     else if (code >= 512)
538     {
539     tab = &DCT.tab1[(code >> 6) - 8];
540     }
541 william 31
542 william 62 // [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
543     // that should use a single unrolled DCT table instead of five separate tables used
544     // here. Multiple conditional statements are very slow, while modern CPU data caches
545     // have lots of room to spare.
546 william 31
547 william 62 else if (code >= 256)
548     {
549     tab = &DCT.tab2[(code >> 4) - 16];
550     }
551     else if (code >= 128)
552     {
553     tab = &DCT.tab3[(code >> 3) - 16];
554     }
555     else if (code >= 64)
556     {
557     tab = &DCT.tab4[(code >> 2) - 16];
558     }
559     else if (code >= 32)
560     {
561     tab = &DCT.tab5[(code >> 1) - 16];
562     }
563     else if (code >= 16)
564     {
565     tab = &DCT.tab6[code - 16];
566     }
567     else
568     {
569     ipu_cmd.pos[4] = 0;
570     return true;
571     }
572 william 31
573 william 62 DUMPBITS(tab->len);
574 william 31
575 william 62 if (tab->run==64) /* end_of_block */
576     {
577     *last = i;
578     ipu_cmd.pos[4] = 0;
579     return true;
580     }
581 william 31
582 william 62 i += (tab->run == 65) ? GETBITS(6) : tab->run;
583     if (i >= 64)
584     {
585     *last = i;
586     ipu_cmd.pos[4] = 0;
587     return true;
588     }
589 william 31
590 william 62 case 1:
591     if (!GETWORD())
592     {
593     ipu_cmd.pos[4] = i;
594     ipu_cmd.pos[5] = 1;
595     return false;
596     }
597 william 31
598 william 62 j = scan[i];
599 william 31
600 william 62 if (tab->run==65) /* escape */
601     {
602     if (!decoder.mpeg1)
603     {
604     val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5;
605     DUMPBITS(12);
606     }
607     else
608     {
609     val = SBITS(8);
610     DUMPBITS(8);
611 william 31
612 william 62 if (!(val & 0x7f))
613     {
614     val = GETBITS(8) + 2 * val;
615     }
616 william 31
617 william 62 val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32;
618     val = (val + ~ (((s32)val) >> 31)) | 1;
619     }
620     }
621     else
622 william 31 {
623 william 191 int bit1 = SBITS(1);
624 william 62 val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
625 william 191 val = (val ^ bit1) - bit1;
626 william 62 DUMPBITS(1);
627 william 31 }
628    
629     SATURATE(val);
630     dest[j] = val;
631 william 62 ipu_cmd.pos[5] = 0;
632 william 31 }
633     }
634    
635 william 62 ipu_cmd.pos[4] = 0;
636     return true;
637 william 31 }
638    
639 william 62 static __fi bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
640 william 31 {
641 william 62 if (!skip || ipu_cmd.pos[3])
642 william 31 {
643 william 62 ipu_cmd.pos[3] = 0;
644     if (!GETWORD())
645 william 31 {
646 william 62 ipu_cmd.pos[3] = 1;
647     return false;
648 william 31 }
649    
650 william 62 /* Get the intra DC coefficient and inverse quantize it */
651     if (cc == 0)
652     decoder.dc_dct_pred[0] += get_luma_dc_dct_diff();
653 william 31 else
654 william 62 decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff();
655 william 31
656 william 62 decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision);
657 william 31 }
658    
659 william 62 if (!get_intra_block())
660 william 31 {
661 william 62 return false;
662 william 31 }
663    
664 william 62 mpeg2_idct_copy(decoder.DCTblock, dest, stride);
665    
666     return true;
667 william 31 }
668    
669 william 62 static __fi bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
670 william 31 {
671     int last;
672    
673 william 62 if (!skip)
674 william 31 {
675 william 62 memzero_sse_a(decoder.DCTblock);
676 william 31 }
677    
678 william 62 if (!get_non_intra_block(&last))
679 william 31 {
680 william 62 return false;
681 william 31 }
682    
683 william 62 mpeg2_idct_add(last, decoder.DCTblock, dest, stride);
684 william 31
685 william 62 return true;
686 william 31 }
687    
688 william 62 void __fi finishmpeg2sliceIDEC()
689 william 31 {
690 william 62 ipuRegs.ctrl.SCD = 0;
691     coded_block_pattern = decoder.coded_block_pattern;
692 william 31 }
693    
694 william 273 __fi bool mpeg2sliceIDEC()
695 william 31 {
696 william 62 u16 code;
697 william 31
698 william 62 switch (ipu_cmd.pos[0])
699     {
700     case 0:
701     decoder.dc_dct_pred[0] =
702     decoder.dc_dct_pred[1] =
703     decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
704 william 31
705 william 62 ipuRegs.top = 0;
706     ipuRegs.ctrl.ECD = 0;
707 william 31
708 william 62 case 1:
709     ipu_cmd.pos[0] = 1;
710     if (!bitstream_init())
711     {
712     return false;
713     }
714 william 31
715 william 62 case 2:
716     ipu_cmd.pos[0] = 2;
717 william 31 while (1)
718     {
719 william 62 macroblock_8& mb8 = decoder.mb8;
720     macroblock_rgb16& rgb16 = decoder.rgb16;
721     macroblock_rgb32& rgb32 = decoder.rgb32;
722    
723 william 31 int DCT_offset, DCT_stride;
724     const MBAtab * mba;
725    
726 william 62 switch (ipu_cmd.pos[1])
727 william 31 {
728 william 62 case 0:
729     decoder.macroblock_modes = get_macroblock_modes();
730 william 31
731 william 62 if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC
732     {
733     decoder.quantizer_scale = get_quantizer_scale();
734     }
735 william 31
736 william 62 decoder.coded_block_pattern = 0x3F;//all 6 blocks
737     memzero_sse_a(mb8);
738     memzero_sse_a(rgb32);
739 william 31
740 william 62 case 1:
741     ipu_cmd.pos[1] = 1;
742 william 31
743 william 62 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
744 william 31 {
745 william 62 DCT_offset = decoder_stride;
746     DCT_stride = decoder_stride * 2;
747 william 31 }
748     else
749     {
750 william 62 DCT_offset = decoder_stride * 8;
751     DCT_stride = decoder_stride;
752     }
753 william 31
754 william 62 switch (ipu_cmd.pos[2])
755     {
756     case 0:
757     case 1:
758     if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1))
759     {
760     ipu_cmd.pos[2] = 1;
761     return false;
762     }
763     case 2:
764     if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
765     {
766     ipu_cmd.pos[2] = 2;
767     return false;
768     }
769     case 3:
770     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
771     {
772     ipu_cmd.pos[2] = 3;
773     return false;
774     }
775     case 4:
776     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
777     {
778     ipu_cmd.pos[2] = 4;
779     return false;
780     }
781     case 5:
782     if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5))
783     {
784     ipu_cmd.pos[2] = 5;
785     return false;
786     }
787     case 6:
788     if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6))
789     {
790     ipu_cmd.pos[2] = 6;
791     return false;
792     }
793 william 191 break;
794    
795     jNO_DEFAULT;
796 william 31 }
797    
798 william 62 // Send The MacroBlock via DmaIpuFrom
799     ipu_csc(mb8, rgb32, decoder.sgn);
800    
801     if (decoder.ofm == 0)
802     decoder.SetOutputTo(rgb32);
803     else
804 william 31 {
805 william 62 ipu_dither(rgb32, rgb16, decoder.dte);
806     decoder.SetOutputTo(rgb16);
807     }
808 william 31
809 william 62 case 2:
810 william 191 {
811     pxAssume(decoder.ipu0_data > 0);
812    
813     uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
814     decoder.AdvanceIpuDataBy(read);
815    
816     if (decoder.ipu0_data != 0)
817 william 62 {
818 william 191 // IPU FIFO filled up -- Will have to finish transferring later.
819     ipu_cmd.pos[1] = 2;
820     return false;
821 william 31 }
822    
823 william 62 mbaCount = 0;
824 william 191 }
825    
826 william 62 case 3:
827     while (1)
828 william 31 {
829 william 62 if (!GETWORD())
830 william 31 {
831 william 62 ipu_cmd.pos[1] = 3;
832     return false;
833     }
834 william 31
835 william 62 code = UBITS(16);
836     if (code >= 0x1000)
837     {
838     mba = MBA.mba5 + (UBITS(5) - 2);
839     break;
840     }
841     else if (code >= 0x0300)
842     {
843     mba = MBA.mba11 + (UBITS(11) - 24);
844     break;
845     }
846     else switch (UBITS(11))
847     {
848 william 191 case 8: /* macroblock_escape */
849     mbaCount += 33;
850     /* pass through */
851 william 31
852 william 191 case 15: /* macroblock_stuffing (MPEG1 only) */
853     DUMPBITS(11);
854     continue;
855 william 31
856 william 191 default: /* end of slice/frame, or error? */
857     {
858     goto finish_idec;
859     }
860 william 31 }
861 william 62 }
862 william 31
863 william 62 DUMPBITS(mba->len);
864     mbaCount += mba->mba;
865 william 31
866 william 62 if (mbaCount)
867     {
868     decoder.dc_dct_pred[0] =
869     decoder.dc_dct_pred[1] =
870     decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
871     }
872    
873     case 4:
874     if (!GETWORD())
875 william 31 {
876 william 62 ipu_cmd.pos[1] = 4;
877     return false;
878 william 31 }
879 william 62
880     break;
881 william 191
882     jNO_DEFAULT;
883 william 31 }
884 william 62
885     ipu_cmd.pos[1] = 0;
886     ipu_cmd.pos[2] = 0;
887 william 31 }
888 william 191
889 william 62 finish_idec:
890     finishmpeg2sliceIDEC();
891 william 31
892 william 62 case 3:
893 william 191 {
894     u8 bit8;
895 william 62 if (!getBits8((u8*)&bit8, 0))
896     {
897     ipu_cmd.pos[0] = 3;
898     return false;
899     }
900 william 31
901 william 62 if (bit8 == 0)
902     {
903 william 191 g_BP.Align();
904 william 62 ipuRegs.ctrl.SCD = 1;
905     }
906 william 191 }
907 william 62
908     case 4:
909     if (!getBits32((u8*)&ipuRegs.top, 0))
910     {
911     ipu_cmd.pos[0] = 4;
912     return false;
913     }
914    
915 william 191 ipuRegs.top = BigEndian(ipuRegs.top);
916 william 62 break;
917 william 191
918     jNO_DEFAULT;
919 william 62 }
920    
921     return true;
922 william 31 }
923    
924 william 273 __fi bool mpeg2_slice()
925 william 31 {
926     int DCT_offset, DCT_stride;
927    
928 william 62 macroblock_8& mb8 = decoder.mb8;
929     macroblock_16& mb16 = decoder.mb16;
930 william 31
931 william 62 switch (ipu_cmd.pos[0])
932 william 31 {
933 william 62 case 0:
934     if (decoder.dcr)
935 william 31 {
936 william 62 decoder.dc_dct_pred[0] =
937     decoder.dc_dct_pred[1] =
938     decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
939 william 31 }
940 william 62
941     ipuRegs.ctrl.ECD = 0;
942     ipuRegs.top = 0;
943     memzero_sse_a(mb8);
944     memzero_sse_a(mb16);
945     case 1:
946     if (!bitstream_init())
947     {
948     ipu_cmd.pos[0] = 1;
949     return false;
950     }
951 william 31
952 william 62 case 2:
953     ipu_cmd.pos[0] = 2;
954 william 31
955 william 62 if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
956 william 31 {
957 william 62 DCT_offset = decoder_stride;
958     DCT_stride = decoder_stride * 2;
959 william 31 }
960     else
961     {
962 william 62 DCT_offset = decoder_stride * 8;
963     DCT_stride = decoder_stride;
964 william 31 }
965    
966 william 62 if (decoder.macroblock_modes & MACROBLOCK_INTRA)
967     {
968     switch(ipu_cmd.pos[1])
969     {
970     case 0:
971     decoder.coded_block_pattern = 0x3F;
972     case 1:
973     if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1))
974     {
975     ipu_cmd.pos[1] = 1;
976     return false;
977     }
978     case 2:
979     if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
980     {
981     ipu_cmd.pos[1] = 2;
982     return false;
983     }
984     case 3:
985     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
986     {
987     ipu_cmd.pos[1] = 3;
988     return false;
989     }
990     case 4:
991     if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
992     {
993     ipu_cmd.pos[1] = 4;
994     return false;
995     }
996     case 5:
997     if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
998     {
999     ipu_cmd.pos[1] = 5;
1000     return false;
1001     }
1002     case 6:
1003     if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1004     {
1005     ipu_cmd.pos[1] = 6;
1006     return false;
1007     }
1008     break;
1009 william 191
1010     jNO_DEFAULT;
1011 william 62 }
1012 william 31
1013 william 191 // Copy macroblock8 to macroblock16 - without sign extension.
1014     // Manually inlined due to MSVC refusing to inline the SSE-optimized version.
1015     {
1016     const u8 *s = (const u8*)&mb8;
1017     u16 *d = (u16*)&mb16;
1018    
1019     //Y bias - 16 * 16
1020     //Cr bias - 8 * 8
1021     //Cb bias - 8 * 8
1022    
1023     __m128i zeroreg = _mm_setzero_si128();
1024    
1025     for (uint i = 0; i < (256+64+64) / 32; ++i)
1026     {
1027     //*d++ = *s++;
1028     __m128i woot1 = _mm_load_si128((__m128i*)s);
1029     __m128i woot2 = _mm_load_si128((__m128i*)s+1);
1030     _mm_store_si128((__m128i*)d, _mm_unpacklo_epi8(woot1, zeroreg));
1031     _mm_store_si128((__m128i*)d+1, _mm_unpackhi_epi8(woot1, zeroreg));
1032     _mm_store_si128((__m128i*)d+2, _mm_unpacklo_epi8(woot2, zeroreg));
1033     _mm_store_si128((__m128i*)d+3, _mm_unpackhi_epi8(woot2, zeroreg));
1034     s += 32;
1035     d += 32;
1036     }
1037     }
1038 william 62 }
1039     else
1040     {
1041     if (decoder.macroblock_modes & MACROBLOCK_PATTERN)
1042     {
1043     switch(ipu_cmd.pos[1])
1044     {
1045     case 0:
1046     decoder.coded_block_pattern = get_coded_block_pattern(); // max 9bits
1047     case 1:
1048     if (decoder.coded_block_pattern & 0x20)
1049     {
1050     if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1))
1051     {
1052     ipu_cmd.pos[1] = 1;
1053     return false;
1054     }
1055     }
1056     case 2:
1057     if (decoder.coded_block_pattern & 0x10)
1058     {
1059     if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
1060     {
1061     ipu_cmd.pos[1] = 2;
1062     return false;
1063     }
1064     }
1065     case 3:
1066     if (decoder.coded_block_pattern & 0x08)
1067     {
1068     if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
1069     {
1070     ipu_cmd.pos[1] = 3;
1071     return false;
1072     }
1073     }
1074     case 4:
1075     if (decoder.coded_block_pattern & 0x04)
1076     {
1077     if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
1078     {
1079     ipu_cmd.pos[1] = 4;
1080     return false;
1081     }
1082     }
1083     case 5:
1084     if (decoder.coded_block_pattern & 0x2)
1085     {
1086     if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
1087     {
1088     ipu_cmd.pos[1] = 5;
1089     return false;
1090     }
1091     }
1092     case 6:
1093     if (decoder.coded_block_pattern & 0x1)
1094     {
1095     if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
1096     {
1097     ipu_cmd.pos[1] = 6;
1098     return false;
1099     }
1100     }
1101     break;
1102 william 191
1103     jNO_DEFAULT;
1104 william 62 }
1105     }
1106     }
1107 william 31
1108 william 62 // Send The MacroBlock via DmaIpuFrom
1109     ipuRegs.ctrl.SCD = 0;
1110     coded_block_pattern = decoder.coded_block_pattern;
1111 william 31
1112 william 62 decoder.SetOutputTo(mb16);
1113 william 31
1114 william 62 case 3:
1115 william 191 {
1116     pxAssume(decoder.ipu0_data > 0);
1117    
1118     uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
1119     decoder.AdvanceIpuDataBy(read);
1120    
1121     if (decoder.ipu0_data != 0)
1122 william 62 {
1123 william 191 // IPU FIFO filled up -- Will have to finish transferring later.
1124     ipu_cmd.pos[0] = 3;
1125     return false;
1126 william 62 }
1127 william 31
1128 william 191 mbaCount = 0;
1129     }
1130    
1131 william 62 case 4:
1132 william 191 {
1133     u8 bit8;
1134 william 62 if (!getBits8((u8*)&bit8, 0))
1135 william 31 {
1136 william 62 ipu_cmd.pos[0] = 4;
1137     return false;
1138 william 31 }
1139    
1140 william 62 if (bit8 == 0)
1141     {
1142 william 191 g_BP.Align();
1143 william 62 ipuRegs.ctrl.SCD = 1;
1144     }
1145 william 191 }
1146    
1147 william 62 case 5:
1148     if (!getBits32((u8*)&ipuRegs.top, 0))
1149 william 31 {
1150 william 62 ipu_cmd.pos[0] = 5;
1151     return false;
1152 william 31 }
1153    
1154 william 191 ipuRegs.top = BigEndian(ipuRegs.top);
1155 william 62 break;
1156     }
1157 william 31
1158 william 62 return true;
1159     }

  ViewVC Help
Powered by ViewVC 1.1.22