/[pcsx2_0.9.7]/trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp
ViewVC logotype

Diff of /trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

--- trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp	2010/09/07 03:24:11	31
+++ trunk/pcsx2/IPU/mpeg2lib/Mpeg.cpp	2010/09/07 11:08:22	62
@@ -33,7 +33,7 @@
 #include "Mpeg.h"
 #include "Vlc.h"
 
-int non_linear_quantizer_scale [] =
+const int non_linear_quantizer_scale [] =
 {
 	0,  1,  2,  3,  4,  5,	6,	7,
 	8, 10, 12, 14, 16, 18,  20,  22,
@@ -48,55 +48,51 @@
 	back to the 1st slot when 128bits have been read.
 */
 extern void ReorderBitstream();
+const DCTtab * tab;
+int mbaCount = 0;
 
-int get_macroblock_modes(decoder_t * const decoder)
+int get_macroblock_modes()
 {
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
 	int macroblock_modes;
 	const MBtab * tab;
 
-	switch (decoder->coding_type)
+	switch (decoder.coding_type)
 	{
-
 		case I_TYPE:
-			macroblock_modes = UBITS(bit_buf, 2);
+			macroblock_modes = UBITS(2);
 
 			if (macroblock_modes == 0) return 0;   // error
 
 			tab = MB_I + (macroblock_modes >> 1);
-			DUMPBITS(bit_buf, bits, tab->len);
+			DUMPBITS(tab->len);
 			macroblock_modes = tab->modes;
 
-			if ((!(decoder->frame_pred_frame_dct)) &&
-			        (decoder->picture_structure == FRAME_PICTURE))
+			if ((!(decoder.frame_pred_frame_dct)) &&
+			        (decoder.picture_structure == FRAME_PICTURE))
 			{
-				macroblock_modes |= UBITS(bit_buf, 1) * DCT_TYPE_INTERLACED;
-				DUMPBITS(bit_buf, bits, 1);
+				macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
 			}
 			return macroblock_modes;
 
 		case P_TYPE:
-			macroblock_modes = UBITS(bit_buf, 6);
+			macroblock_modes = UBITS(6);
 
 			if (macroblock_modes == 0) return 0;   // error
 
 			tab = MB_P + (macroblock_modes >> 1);
-			DUMPBITS(bit_buf, bits, tab->len);
+			DUMPBITS(tab->len);
 			macroblock_modes = tab->modes;
 
-			if (decoder->picture_structure != FRAME_PICTURE)
+			if (decoder.picture_structure != FRAME_PICTURE)
 			{
 				if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
 				{
-					macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE;
-					DUMPBITS(bit_buf, bits, 2);
+					macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
 				}
 
 				return macroblock_modes;
 			}
-			else if (decoder->frame_pred_frame_dct)
+			else if (decoder.frame_pred_frame_dct)
 			{
 				if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
 					macroblock_modes |= MC_FRAME;
@@ -107,39 +103,36 @@
 			{
 				if (macroblock_modes & MACROBLOCK_MOTION_FORWARD)
 				{
-					macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE;
-					DUMPBITS(bit_buf, bits, 2);
+					macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
 				}
 
 				if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
 				{
-					macroblock_modes |= UBITS(bit_buf, 1) * DCT_TYPE_INTERLACED;
-					DUMPBITS(bit_buf, bits, 1);
+					macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
 				}
 
 				return macroblock_modes;
 			}
 
 		case B_TYPE:
-			macroblock_modes = UBITS(bit_buf, 6);
+			macroblock_modes = UBITS(6);
 
 			if (macroblock_modes == 0) return 0;   // error
 
 			tab = MB_B + macroblock_modes;
-			DUMPBITS(bit_buf, bits, tab->len);
+			DUMPBITS(tab->len);
 			macroblock_modes = tab->modes;
 
-			if (decoder->picture_structure != FRAME_PICTURE)
+			if (decoder.picture_structure != FRAME_PICTURE)
 			{
 				if (!(macroblock_modes & MACROBLOCK_INTRA))
 				{
-					macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE;
-					DUMPBITS(bit_buf, bits, 2);
+					macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
 				}
 
 				return macroblock_modes;
 			}
-			else if (decoder->frame_pred_frame_dct)
+			else if (decoder.frame_pred_frame_dct)
 			{
 				/* if (! (macroblock_modes & MACROBLOCK_INTRA)) */
 				macroblock_modes |= MC_FRAME;
@@ -149,968 +142,548 @@
 			{
 				if (macroblock_modes & MACROBLOCK_INTRA) goto intra;
 
-				macroblock_modes |= UBITS(bit_buf, 2) * MOTION_TYPE_BASE;
-				DUMPBITS(bit_buf, bits, 2);
+				macroblock_modes |= GETBITS(2) * MOTION_TYPE_BASE;
 
 				if (macroblock_modes & (MACROBLOCK_INTRA | MACROBLOCK_PATTERN))
 				{
 intra:
-					macroblock_modes |= UBITS(bit_buf, 1) * DCT_TYPE_INTERLACED;
-					DUMPBITS(bit_buf, bits, 1);
+					macroblock_modes |= GETBITS(1) * DCT_TYPE_INTERLACED;
 				}
 
 				return macroblock_modes;
 			}
 
 		case D_TYPE:
-			macroblock_modes = UBITS(bit_buf, 1);
+			macroblock_modes = GETBITS(1);
 
 			if (macroblock_modes == 0) return 0;   // error
-
-			DUMPBITS(bit_buf, bits, 1);
 			return MACROBLOCK_INTRA;
 
 		default:
 			return 0;
 	}
-
-#undef bit_buf
-#undef bits
-#undef bit_ptr
 }
 
-static __forceinline int get_quantizer_scale(decoder_t * const decoder)
+static __fi int get_quantizer_scale()
 {
 	int quantizer_scale_code;
 
-	quantizer_scale_code = UBITS(decoder->bitstream_buf, 5);
-	DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 5);
+	quantizer_scale_code = GETBITS(5);
 
-	if (decoder->q_scale_type)
+	if (decoder.q_scale_type)
 		return non_linear_quantizer_scale [quantizer_scale_code];
 	else
 		return quantizer_scale_code << 1;
 }
 
-static __forceinline int get_coded_block_pattern(decoder_t * const decoder)
+static __fi int get_coded_block_pattern()
 {
 	const CBPtab * tab;
+	u16 code = UBITS(16);
 
-	NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr);
-
-	if (decoder->bitstream_buf >= 0x20000000)
-		tab = CBP_7 + (UBITS(decoder->bitstream_buf, 7) - 16);
+	if (code >= 0x2000)
+		tab = CBP_7 + (UBITS(7) - 16);
 	else
-		tab = CBP_9 + UBITS(decoder->bitstream_buf, 9);
+		tab = CBP_9 + UBITS(9);
 
-	DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, tab->len);
+	DUMPBITS(tab->len);
 	return tab->cbp;
 }
 
-static __forceinline int get_luma_dc_dct_diff(decoder_t * const decoder)
+int __fi get_motion_delta(const int f_code)
 {
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
+	int delta;
+	int sign;
+	const MVtab * tab;
+	u16 code = UBITS(16);
+
+	if ((code & 0x8000))
+	{
+		DUMPBITS(1);
+		return 0x00010000;
+	}
+	else if ((code & 0xf000) || ((code & 0xfc00) == 0x0c00))
+	{
+		tab = MV_4 + UBITS(4);
+	}
+	else
+	{
+		tab = MV_10 + UBITS(10);
+	}
+
+	delta = tab->delta + 1;
+	DUMPBITS(tab->len);
+
+	sign = SBITS(1);
+	DUMPBITS(1);
+	return (delta ^ sign) - sign;
+}
+
+int __fi get_dmv()
+{
+	const DMVtab * tab;
+
+	tab = DMV_2 + UBITS(2);
+	DUMPBITS(tab->len);
+	return tab->dmv;
+}
+
+int get_macroblock_address_increment()
+{
+	const MBAtab *mba;
+	
+	u16 code = UBITS(16);
+
+	if (code >= 4096)
+		mba = MBA.mba5 + (UBITS(5) - 2);
+	else if (code >= 768)
+		mba = MBA.mba11 + (UBITS(11) - 24);
+	else switch (UBITS(11))
+		{
+
+			case 8:		/* macroblock_escape */
+				DUMPBITS(11);
+				return 0x23;
+
+			case 15:	/* macroblock_stuffing (MPEG1 only) */
+				if (decoder.mpeg1)
+				{
+					DUMPBITS(11);
+					return 0x22;
+				}
+
+			default:
+				return 0;//error
+		}
+
+	DUMPBITS(mba->len);
+
+	return mba->mba + 1;
+}
 
-	const DCtab * tab;
+static __fi int get_luma_dc_dct_diff()
+{
 	int size;
 	int dc_diff;
+	u16 code = UBITS(5);
 
-	if (bit_buf < 0xf8000000)
+	if (code < 31)
 	{
-		tab = DC_lum_5 + UBITS(bit_buf, 5);
-		size = tab->size;
+		size = DCtable.lum0[code].size;
+		DUMPBITS(DCtable.lum0[code].len);
 
-		if (size)
-		{
-			DUMPBITS(bit_buf, bits, tab->len);
-			bits += size;
-			dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size);
-			bit_buf <<= size;
-			return dc_diff;
-		}
-		else
-		{
-			DUMPBITS(bit_buf, bits, 3);
-			return 0;
-		}
+		// 5 bits max
 	}
+	else
+	{
+		code = UBITS(9) - 0x1f0;
+		size = DCtable.lum1[code].size;
+		DUMPBITS(DCtable.lum1[code].len);
+
+		// 9 bits max
+	}
+	
+	if (size==0)
+		dc_diff = 0;
+	else
+	{
+		dc_diff = GETBITS(size);
 
-	tab = DC_long + (UBITS(bit_buf, 9) - 0x1e0); //0x1e0);
+		// 6 for tab0 and 11 for tab1
+		if ((dc_diff & (1<<(size-1)))==0)
+		  dc_diff-= (1<<size) - 1;
+	}
 
-	size = tab->size;
-	DUMPBITS(bit_buf, bits, tab->len);
-	NEEDBITS(bit_buf, bits, bit_ptr);
-	dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size);
-	DUMPBITS(bit_buf, bits, size);
 	return dc_diff;
-#undef bit_buf
-#undef bits
-#undef bit_ptr
 }
 
-static __forceinline int get_chroma_dc_dct_diff(decoder_t * const decoder)
+static __fi int get_chroma_dc_dct_diff()
 {
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-
-	const DCtab * tab;
 	int size;
 	int dc_diff;
+	u16 code = UBITS(5);
 
-	if (bit_buf < 0xf8000000)
+    if (code<31)
+	{
+		size = DCtable.chrom0[code].size;
+		DUMPBITS(DCtable.chrom0[code].len);
+	}
+	else
 	{
-		tab = DC_chrom_5 + UBITS(bit_buf, 5);
-		size = tab->size;
+	    code = UBITS(10) - 0x3e0;
+	    size = DCtable.chrom1[code].size;
+		DUMPBITS(DCtable.chrom1[code].len);
+	}
+	
+	if (size==0)
+	    dc_diff = 0;
+	else
+	{
+		dc_diff = GETBITS(size);
 
-		if (size)
-		{
-			DUMPBITS(bit_buf, bits, tab->len);
-			bits += size;
-			dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size);
-			bit_buf <<= size;
-			return dc_diff;
-		}
-		else
+		if ((dc_diff & (1<<(size-1)))==0)
 		{
-			DUMPBITS(bit_buf, bits, 2);
-			return 0;
+			dc_diff-= (1<<size) - 1;
 		}
 	}
-
-	tab = DC_long + (UBITS(bit_buf, 10) - 0x3e0);
-
-	size = tab->size;
-	DUMPBITS(bit_buf, bits, tab->len + 1);
-	NEEDBITS(bit_buf, bits, bit_ptr);
-	dc_diff = UBITS(bit_buf, size) - UBITS(SBITS(~bit_buf, 1), size);
-	DUMPBITS(bit_buf, bits, size);
+  
 	return dc_diff;
-#undef bit_buf
-#undef bits
-#undef bit_ptr
 }
 
 #define SATURATE(val)					\
 do {							\
 	 if (((u32)(val + 2048) > 4095))	\
-	val = SBITS (val, 1) ^ 2047;			\
+	val = (((s32)val) >> 31) ^ 2047;			\
 } while (0)
 
-static __forceinline void get_intra_block_B14(decoder_t * const decoder)
+static __fi bool get_intra_block()
 {
 	int i;
 	int j;
 	int val;
-	const u8 * scan = decoder->scan;
-	const u8 * quant_matrix = decoder->intra_quantizer_matrix;
-	int quantizer_scale = decoder->quantizer_scale;
-	int mismatch;
-	const DCTtab * tab;
-	u32 bit_buf;
-	u8 * bit_ptr;
-	int bits;
-	s16 * dest;
-
-	dest = decoder->DCTblock;
-	i = 0;
-	mismatch = ~dest[0];
-
-	bit_buf = decoder->bitstream_buf;
-	bits = decoder->bitstream_bits;
-	bit_ptr = decoder->bitstream_ptr;
+	const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
+	const u8 (&quant_matrix)[64] = decoder.iq;
+	int quantizer_scale = decoder.quantizer_scale;
+	s16 * dest = decoder.DCTblock;
+	u16 code; 
 
-	NEEDBITS(bit_buf, bits, bit_ptr);
-
-	while (1)
-	{
-		if (bit_buf >= 0x28000000)
+	/* decode AC coefficients */
+  for (i=1 + ipu_cmd.pos[4]; ; i++)
+  {
+	  switch (ipu_cmd.pos[5])
+	  {
+	  case 0:
+		if (!GETWORD())
 		{
-			tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5);
-			i += tab->run;
-			if (i >= 64) break;	/* end of block */
-
-normal_code:
-			j = scan[i];
-			bit_buf <<= tab->len;
-			bits += tab->len + 1;
-
-			/* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
-			val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
-
-			/* if (bitstream_get (1)) val = -val; */
-			val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1);
-
-			SATURATE(val);
-			dest[j] = val;
-			mismatch ^= val;
-			bit_buf <<= 1;
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			continue;
+		  ipu_cmd.pos[4] = i - 1;
+		  return false;
 		}
-		else if (bit_buf >= 0x04000000)
-		{
-			tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-
-			/* escape code */
 
-			i += UBITS(bit_buf << 6, 6) - 64;
+		code = UBITS(16);
 
-			if (i >= 64) break;	/* illegal, check needed to avoid buffer overflow */
-
-			j = scan[i];
-
-			DUMPBITS(bit_buf, bits, 12);
-			NEEDBITS(bit_buf, bits, bit_ptr);
-
-			/* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
-			val = (SBITS(bit_buf, 12) * quantizer_scale * quant_matrix[i]) / 16;
-
-			SATURATE(val);
-			dest[j] = val;
-			mismatch ^= val;
-			DUMPBITS(bit_buf, bits, 12);
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			continue;
-
-		}
-		else if (bit_buf >= 0x02000000)
+		if (code >= 16384 && (!decoder.intra_vlc_format || decoder.mpeg1))
 		{
-			tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
+		  tab = &DCT.next[(code >> 12) - 4];
 		}
-		else if (bit_buf >= 0x00800000)
-		{
-			tab = DCT_13 + (UBITS(bit_buf, 13) - 16);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-		}
-		else if (bit_buf >= 0x00200000)
-		{
-			tab = DCT_15 + (UBITS(bit_buf, 15) - 16);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-		}
-		else
-		{
-			tab = DCT_16 + UBITS(bit_buf, 16);
-			bit_buf <<= 16;
-			GETWORD(&bit_buf, bits + 16);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-		}
-
-		break;	/* illegal, check needed to avoid buffer overflow */
-	}
-
-	dest[63] ^= mismatch & 1;
-
-	if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1;
-
-	DUMPBITS(bit_buf, bits, tab->len);	/* dump end of block code */
-
-	decoder->bitstream_buf = bit_buf;
-	decoder->bitstream_bits = bits;
-}
-
-static __forceinline void get_intra_block_B15(decoder_t * const decoder)
-{
-	int i;
-	int j;
-	int val;
-	const u8 * scan = decoder->scan;
-	const u8 * quant_matrix = decoder->intra_quantizer_matrix;
-	int quantizer_scale = decoder->quantizer_scale;
-	int mismatch;
-	const DCTtab * tab;
-	u32 bit_buf;
-	u8 * bit_ptr;
-	int bits;
-	s16 * dest;
-
-	dest = decoder->DCTblock;
-	i = 0;
-	mismatch = ~dest[0];
-
-	bit_buf = decoder->bitstream_buf;
-	bits = decoder->bitstream_bits;
-	bit_ptr = decoder->bitstream_ptr;
-
-	NEEDBITS(bit_buf, bits, bit_ptr);
-
-	while (1)
-	{
-		if (bit_buf >= 0x04000000)
+		else if (code >= 1024)
 		{
-			tab = DCT_B15_8 + (UBITS(bit_buf, 8) - 4);
-			i += tab->run;
-
-			if (i < 64)
+			if (decoder.intra_vlc_format && !decoder.mpeg1)
 			{
-normal_code:
-				j = scan[i];
-				bit_buf <<= tab->len;
-				bits += tab->len + 1;
-				/* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
-				val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
-
-				/* if (bitstream_get (1)) val = -val; */
-				val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1);
-
-				SATURATE(val);
-				dest[j] = val;
-				mismatch ^= val;
-
-				bit_buf <<= 1;
-				NEEDBITS(bit_buf, bits, bit_ptr);
-
-				continue;
+				tab = &DCT.tab0a[(code >> 8) - 4];
 			}
 			else
 			{
-				/* end of block. I commented out this code because if we */
-				/* dont exit here we will still exit at the later test :) */
-				//if (i >= 128) break;		/* end of block */
-				/* escape code */
-
-				i += UBITS(bit_buf << 6, 6) - 64;
-
-				if (i >= 64)  break;	/* illegal, check against buffer overflow */
-
-				j = scan[i];
-				DUMPBITS(bit_buf, bits, 12);
-				NEEDBITS(bit_buf, bits, bit_ptr);
-
-				/* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
-				val = (SBITS(bit_buf, 12) * quantizer_scale * quant_matrix[i]) / 16;
-
-				SATURATE(val);
-				dest[j] = val;
-				mismatch ^= val;
-				DUMPBITS(bit_buf, bits, 12);
-				NEEDBITS(bit_buf, bits, bit_ptr);
-				continue;
+				tab = &DCT.tab0[(code >> 8) - 4];
 			}
 		}
-		else if (bit_buf >= 0x02000000)
+		else if (code >= 512)
 		{
-			tab = DCT_B15_10 + (UBITS(bit_buf, 10) - 8);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-		}
-		else if (bit_buf >= 0x00800000)
-		{
-			tab = DCT_13 + (UBITS(bit_buf, 13) - 16);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
+			if (decoder.intra_vlc_format && !decoder.mpeg1)
+			{
+				tab = &DCT.tab1a[(code >> 6) - 8];
+			}
+			else
+			{
+				tab = &DCT.tab1[(code >> 6) - 8];
+			}
 		}
-		else if (bit_buf >= 0x00200000)
-		{
-			tab = DCT_15 + (UBITS(bit_buf, 15) - 16);
-			i += tab->run;
 
-			if (i < 64) goto normal_code;
+		// [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
+		// that should use a single unrolled DCT table instead of five separate tables used
+		// here.  Multiple conditional statements are very slow, while modern CPU data caches
+		// have lots of room to spare.
+
+		else if (code >= 256)
+		{
+			tab = &DCT.tab2[(code >> 4) - 16];
+		}
+		else if (code >= 128)
+		{    
+			tab = &DCT.tab3[(code >> 3) - 16];
+		}
+		else if (code >= 64)
+		{    
+			tab = &DCT.tab4[(code >> 2) - 16];
+		}
+		else if (code >= 32)
+		{    
+			tab = &DCT.tab5[(code >> 1) - 16];
+		}
+		else if (code >= 16)
+		{    
+			tab = &DCT.tab6[code - 16];
 		}
 		else
 		{
-			tab = DCT_16 + UBITS(bit_buf, 16);
-			bit_buf <<= 16;
-			GETWORD(&bit_buf, bits + 16);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
+		  ipu_cmd.pos[4] = 0;
+		  return true;
 		}
 
-		break;	/* illegal, check needed to avoid buffer overflow */
-	}
-
-	dest[63] ^= mismatch & 1;
+		DUMPBITS(tab->len);
 
-	if ((bit_buf >> 28) != 0x6)
-		ipuRegs->ctrl.ECD = 1;
-
-	DUMPBITS(bit_buf, bits, tab->len);	/* dump end of block code */
-
-	decoder->bitstream_buf = bit_buf;
-
-	decoder->bitstream_bits = bits;
-}
-
-static __forceinline int get_non_intra_block(decoder_t * const decoder)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-	int i;
-	int j;
-	int val;
-	const u8 * scan = decoder->scan;
-	const u8 * quant_matrix = decoder->non_intra_quantizer_matrix;
-	int quantizer_scale = decoder->quantizer_scale;
-	int mismatch;
-	const DCTtab * tab;
-	s16 * dest;
-
-	i = -1;
-	mismatch = -1;
-	dest = decoder->DCTblock;
-	NEEDBITS(bit_buf, bits, bit_ptr);
-
-	if (bit_buf >= 0x28000000)
-	{
-		tab = DCT_B14DC_5 + (UBITS(bit_buf, 5) - 5);
-		goto entry_1;
-	}
-	else
-		goto entry_2;
-
-	while (1)
-	{
-		if (bit_buf >= 0x28000000)
+		if (tab->run==64) /* end_of_block */
 		{
-			tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5);
-entry_1:
-			i += tab->run;
-
-			if (i >= 64) break;	/* end of block */
-normal_code:
-			j = scan[i];
-			bit_buf <<= tab->len;
-			bits += tab->len + 1;
-
-			/* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
-			val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
-
-			/* if (bitstream_get (1)) val = -val; */
-			val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1);
-
-			SATURATE(val);
-			dest[j] = val;
-			mismatch ^= val;
-			bit_buf <<= 1;
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			continue;
+			ipu_cmd.pos[4] = 0;
+			return true;
 		}
-entry_2:
-
-		if (bit_buf >= 0x04000000)
+		
+		i+= tab->run == 65 ? GETBITS(6) : tab->run;
+		if (i >= 64)
 		{
-			tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-
-			/* escape code */
-
-			i += UBITS(bit_buf << 6, 6) - 64;
-
-			if (i >= 64) break;	/* illegal, check needed to avoid buffer overflow */
-
-			j = scan[i];
-			DUMPBITS(bit_buf, bits, 12);
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			val = 2 * (SBITS(bit_buf, 12) + SBITS(bit_buf, 1)) + 1;
-
-			/* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
-			val = (val * quantizer_scale * quant_matrix[i]) / 32;
-
-			SATURATE(val);
-			dest[j] = val;
-			mismatch ^= val;
-			DUMPBITS(bit_buf, bits, 12);
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			continue;
+			ipu_cmd.pos[4] = 0;
+			return true;
 		}
-		else if (bit_buf >= 0x02000000)
+	  case 1:
+		if (!GETWORD())
 		{
-			tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
+		  ipu_cmd.pos[4] = i - 1;
+		  ipu_cmd.pos[5] = 1;
+		  return false;
 		}
-		else if (bit_buf >= 0x00800000)
-		{
-			tab = DCT_13 + (UBITS(bit_buf, 13) - 16);
-			i += tab->run;
 
-			if (i < 64) goto normal_code;
-		}
-		else if (bit_buf >= 0x00200000)
+		j = scan[i];
+
+		if (tab->run==65) /* escape */
 		{
-			tab = DCT_15 + (UBITS(bit_buf, 15) - 16);
-			i += tab->run;
+		  if(!decoder.mpeg1)
+		  {
+			  val = (SBITS(12) * quantizer_scale * quant_matrix[i]) >> 4;
+			  DUMPBITS(12);
+		  }
+		  else
+		  {
+			  val = SBITS(8);
+			  DUMPBITS(8);
 
-			if (i < 64) goto normal_code;
+			  if (!(val & 0x7f))
+			  {
+				val = GETBITS(8) + 2 * val;
+			  }
+			
+			  val = (val * quantizer_scale * quant_matrix[i]) >> 4;
+			  val = (val + ~ (((s32)val) >> 31)) | 1;
+		  }
 		}
 		else
 		{
-			tab = DCT_16 + UBITS(bit_buf, 16);
-			bit_buf <<= 16;
-			GETWORD(&bit_buf, bits + 16);
-			i += tab->run;
+		  val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
+		  if(decoder.mpeg1)
+		  {
+			/* oddification */
+			val = (val - 1) | 1;
+		  }
 
-			if (i < 64) goto normal_code;
+ 		  /* if (bitstream_get (1)) val = -val; */
+		  val = (val ^ SBITS(1)) - SBITS(1);
+		  DUMPBITS(1);
 		}
-		break;	/* illegal, check needed to avoid buffer overflow */
-	}
 
-	dest[63] ^= mismatch & 1;
+		SATURATE(val);
+		dest[j] = val;
+		ipu_cmd.pos[5] = 0;
+	 }
+  }
 
-	if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1;
-
-	DUMPBITS(bit_buf, bits, tab->len);	/* dump end of block code */
-
-	decoder->bitstream_buf = bit_buf;
-	decoder->bitstream_bits = bits;
-	return i;
-
-#undef bit_buf
-#undef bits
-#undef bit_ptr
+  ipu_cmd.pos[4] = 0;
+  return true;
 }
 
-static __forceinline void get_mpeg1_intra_block(decoder_t * const decoder)
+static __fi bool get_non_intra_block(int * last)
 {
 	int i;
 	int j;
 	int val;
-	const u8 * scan = decoder->scan;
-	const u8 * quant_matrix = decoder->intra_quantizer_matrix;
-	int quantizer_scale = decoder->quantizer_scale;
-	const DCTtab * tab;
-	u32 bit_buf;
-	int bits;
-	u8 * bit_ptr;
-	s16 * dest;
-
-	i = 0;
-	dest = decoder->DCTblock;
-	bit_buf = decoder->bitstream_buf;
-	bits = decoder->bitstream_bits;
-	bit_ptr = decoder->bitstream_ptr;
-	NEEDBITS(bit_buf, bits, bit_ptr);
-
-	while (1)
-	{
-		if (bit_buf >= 0x28000000)
+	const u8 * scan = decoder.scantype ? mpeg2_scan.alt : mpeg2_scan.norm;
+	const u8 (&quant_matrix)[64] = decoder.niq;
+	int quantizer_scale = decoder.quantizer_scale;
+	s16 * dest = decoder.DCTblock;
+	u16 code;
+
+    /* decode AC coefficients */
+    for (i= ipu_cmd.pos[4] ; ; i++)
+    {
+		switch (ipu_cmd.pos[5])
 		{
-			tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5);
-			i += tab->run;
-
-			if (i >= 64) break;	/* end of block */
-
-normal_code:
-			j = scan[i];
-			bit_buf <<= tab->len;
-			bits += tab->len + 1;
-
-			/* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
-			val = (tab->level * quantizer_scale * quant_matrix[i]) >> 4;
-
-			/* oddification */
-			val = (val - 1) | 1;
+		case 0:
+			if (!GETWORD())
+			{
+				ipu_cmd.pos[4] = i;
+				return false;
+			}
 
-			/* if (bitstream_get (1)) val = -val; */
-			val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1);
+			code = UBITS(16);
 
-			SATURATE(val);
-			dest[j] = val;
-			bit_buf <<= 1;
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			continue;
+			if (code >= 16384)
+			{
+				if (i==0)
+				{
+					tab = &DCT.first[(code >> 12) - 4];
+				}
+				else
+				{			
+					tab = &DCT.next[(code >> 12)- 4];
+				}
+			}
+			else if (code >= 1024)
+			{
+				tab = &DCT.tab0[(code >> 8) - 4];
+			}
+			else if (code >= 512)
+			{		
+				tab = &DCT.tab1[(code >> 6) - 8];
+			}
 
-		}
-		else if (bit_buf >= 0x04000000)
-		{
-			tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4);
-			i += tab->run;
+			// [TODO] Optimization: Following codes can all be done by a single "expedited" lookup
+			// that should use a single unrolled DCT table instead of five separate tables used
+			// here.  Multiple conditional statements are very slow, while modern CPU data caches
+			// have lots of room to spare.
+
+			else if (code >= 256)
+			{		
+				tab = &DCT.tab2[(code >> 4) - 16];
+			}
+			else if (code >= 128)
+			{		
+				tab = &DCT.tab3[(code >> 3) - 16];
+			}
+			else if (code >= 64)
+			{		
+				tab = &DCT.tab4[(code >> 2) - 16];
+			}
+			else if (code >= 32)
+			{		
+				tab = &DCT.tab5[(code >> 1) - 16];
+			}
+			else if (code >= 16)
+			{		
+				tab = &DCT.tab6[code - 16];
+			}
+			else
+			{
+				ipu_cmd.pos[4] = 0;
+				return true;
+			}
 
-			if (i < 64) goto normal_code;
+			DUMPBITS(tab->len);
 
-			/* escape code */
+			if (tab->run==64) /* end_of_block */
+			{
+				*last = i;
+				ipu_cmd.pos[4] = 0;
+				return true;
+			}
 
-			i += UBITS(bit_buf << 6, 6) - 64;
+			i += (tab->run == 65) ? GETBITS(6) : tab->run;
+			if (i >= 64)
+			{
+				*last = i;
+				ipu_cmd.pos[4] = 0;
+				return true;
+			}
 
-			if (i >= 64) break;	/* illegal, check needed to avoid buffer overflow */
+		case 1:
+			if (!GETWORD())
+			{
+			  ipu_cmd.pos[4] = i;
+			  ipu_cmd.pos[5] = 1;
+			  return false;
+			}
 
 			j = scan[i];
-			DUMPBITS(bit_buf, bits, 12);
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			val = SBITS(bit_buf, 8);
 
-			if (!(val & 0x7f))
+			if (tab->run==65) /* escape */
 			{
-				DUMPBITS(bit_buf, bits, 8);
-				val = UBITS(bit_buf, 8) + 2 * val;
-			}
+				if (!decoder.mpeg1)
+				{
+					val = ((2 * (SBITS(12) + SBITS(1)) + 1) * quantizer_scale * quant_matrix[i]) >> 5;
+					DUMPBITS(12);
+				}
+				else
+				{
+				  val = SBITS(8);
+				  DUMPBITS(8);
 
-			/* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
-			val = (val * quantizer_scale * quant_matrix[i]) >> 4;
+				  if (!(val & 0x7f))
+				  {
+					val = GETBITS(8) + 2 * val;
+				  }
 
-			/* oddification */
-			val = (val + ~SBITS(val, 1)) | 1;
+				  val = ((2 * (val + (((s32)val) >> 31)) + 1) * quantizer_scale * quant_matrix[i]) / 32;
+				  val = (val + ~ (((s32)val) >> 31)) | 1;
+				}
+			}
+			else
+			{
+				val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
+				val = (val ^ SBITS(1)) - SBITS(1);
+				DUMPBITS(1);
+			}
 
 			SATURATE(val);
 			dest[j] = val;
-			DUMPBITS(bit_buf, bits, 8);
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			continue;
+			ipu_cmd.pos[5] = 0;
 		}
-		else if (bit_buf >= 0x02000000)
-		{
-			tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-		}
-		else if (bit_buf >= 0x00800000)
-		{
-			tab = DCT_13 + (UBITS(bit_buf, 13) - 16);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-		}
-		else if (bit_buf >= 0x00200000)
-		{
-			tab = DCT_15 + (UBITS(bit_buf, 15) - 16);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-		}
-		else
-		{
-			tab = DCT_16 + UBITS(bit_buf, 16);
-			bit_buf <<= 16;
-			GETWORD(&bit_buf, bits + 16);
-			i += tab->run;
- goto normal_code;
-		}
-
-		break;	/* illegal, check needed to avoid buffer overflow */
 	}
 
-	if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1;
-
-	DUMPBITS(bit_buf, bits, 2);	/* dump end of block code */
-	decoder->bitstream_buf = bit_buf;
-	decoder->bitstream_bits = bits;
+	ipu_cmd.pos[4] = 0;
+	return true;
 }
 
-static __forceinline int get_mpeg1_non_intra_block(decoder_t * const decoder)
+static __fi bool slice_intra_DCT(const int cc, u8 * const dest, const int stride, const bool skip)
 {
-	int i;
-	int j;
-	int val;
-	const u8 * scan = decoder->scan;
-	const u8 * quant_matrix = decoder->non_intra_quantizer_matrix;
-	int quantizer_scale = decoder->quantizer_scale;
-	const DCTtab * tab;
-	u32 bit_buf;
-	int bits;
-	u8 * bit_ptr;
-	s16 * dest;
-
-	i = -1;
-	dest = decoder->DCTblock;
-
-	bit_buf = decoder->bitstream_buf;
-	bits = decoder->bitstream_bits;
-	bit_ptr = decoder->bitstream_ptr;
-
-	NEEDBITS(bit_buf, bits, bit_ptr);
-
-	if (bit_buf >= 0x28000000)
-	{
-		tab = DCT_B14DC_5 + (UBITS(bit_buf, 5) - 5);
-		goto entry_1;
-	}
-	else
-		goto entry_2;
-
-	while (1)
+	if (!skip || ipu_cmd.pos[3])
 	{
-		if (bit_buf >= 0x28000000)
+		ipu_cmd.pos[3] = 0;
+		if (!GETWORD())
 		{
-			tab = DCT_B14AC_5 + (UBITS(bit_buf, 5) - 5);
-entry_1:
-			i += tab->run;
-
-			if (i >= 64) break;	/* end of block */
-
-normal_code:
-			j = scan[i];
-			bit_buf <<= tab->len;
-			bits += tab->len + 1;
-
-			/* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
-			val = ((2 * tab->level + 1) * quantizer_scale * quant_matrix[i]) >> 5;
-
-			/* oddification */
-			val = (val - 1) | 1;
-
-			/* if (bitstream_get (1)) val = -val; */
-			val = (val ^ SBITS(bit_buf, 1)) - SBITS(bit_buf, 1);
-
-			SATURATE(val);
-			dest[j] = val;
-			bit_buf <<= 1;
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			continue;
+			ipu_cmd.pos[3] = 1;
+			return false;
 		}
-entry_2:
-		if (bit_buf >= 0x04000000)
-		{
-			tab = DCT_B14_8 + (UBITS(bit_buf, 8) - 4);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-
-			/* escape code */
-
-			i += UBITS(bit_buf << 6, 6) - 64;
-
-			if (i >= 64) break;	/* illegal, check needed to avoid buffer overflow */
-
-			j = scan[i];
-			DUMPBITS(bit_buf, bits, 12);
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			val = SBITS(bit_buf, 8);
-
-			if (!(val & 0x7f))
-			{
-				DUMPBITS(bit_buf, bits, 8);
-				val = UBITS(bit_buf, 8) + 2 * val;
-			}
-
-			val = 2 * (val + SBITS(val, 1)) + 1;
-
-			/* JayteeMaster: 10 points! Replaced quant_matrix[j] by quant_matrix[i] as should be */
-			val = (val * quantizer_scale * quant_matrix[i]) / 32;
-
-			/* oddification */
-			val = (val + ~SBITS(val, 1)) | 1;
 
-			SATURATE(val);
-			dest[j] = val;
-			DUMPBITS(bit_buf, bits, 8);
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			continue;
-		}
-		else if (bit_buf >= 0x02000000)
-		{
-			tab = DCT_B14_10 + (UBITS(bit_buf, 10) - 8);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-		}
-		else if (bit_buf >= 0x00800000)
-		{
-			tab = DCT_13 + (UBITS(bit_buf, 13) - 16);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-		}
-		else if (bit_buf >= 0x00200000)
-		{
-			tab = DCT_15 + (UBITS(bit_buf, 15) - 16);
-			i += tab->run;
-
-			if (i < 64) goto normal_code;
-		}
+		/* Get the intra DC coefficient and inverse quantize it */
+		if (cc == 0)
+			decoder.dc_dct_pred[0] += get_luma_dc_dct_diff();
 		else
-		{
-			tab = DCT_16 + UBITS(bit_buf, 16);
-			bit_buf <<= 16;
-			GETWORD(&bit_buf, bits + 16);
-			i += tab->run;
+			decoder.dc_dct_pred[cc] += get_chroma_dc_dct_diff();
 
-			if (i < 64) goto normal_code;
-		}
-
-		break;	/* illegal, check needed to avoid buffer overflow */
+		decoder.DCTblock[0] = decoder.dc_dct_pred[cc] << (3 - decoder.intra_dc_precision);
 	}
 
-	if ((bit_buf >> 30) != 0x2) ipuRegs->ctrl.ECD = 1;
-
-	DUMPBITS(bit_buf, bits, 2);	/* dump end of block code */
-	decoder->bitstream_buf = bit_buf;
-	decoder->bitstream_bits = bits;
-	return i;
-}
-
-static void __fastcall slice_intra_DCT(decoder_t * const decoder, const int cc,
-                                       u8 * const dest, const int stride)
-{
-	NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr);
-	/* Get the intra DC coefficient and inverse quantize it */
-
-	if (cc == 0)
-		decoder->dc_dct_pred[0] += get_luma_dc_dct_diff(decoder);
-	else
-		decoder->dc_dct_pred[cc] += get_chroma_dc_dct_diff(decoder);
-
-	decoder->DCTblock[0] = decoder->dc_dct_pred[cc] << (3 - decoder->intra_dc_precision);
-
-	if (decoder->mpeg1)
+	if (!get_intra_block())
 	{
-		get_mpeg1_intra_block(decoder);
+		return false;
 	}
-	else if (decoder->intra_vlc_format)
-	{
-		get_intra_block_B15(decoder);
-	}
-	else
-	{
-		get_intra_block_B14(decoder);
-	}
-
-	mpeg2_idct_copy(decoder->DCTblock, dest, stride);
-}
-
-/* JayteeMaster: changed dest to 16 bit signed */
-static void __fastcall slice_non_intra_DCT(decoder_t * const decoder,
-        /*u8*/s16 * const dest, const int stride)
-{
-	int last;
-	memzero(decoder->DCTblock);
-
-	if (decoder->mpeg1)
-		last = get_mpeg1_non_intra_block(decoder);
-	else
-		last = get_non_intra_block(decoder);
 
-	mpeg2_idct_add(last, decoder->DCTblock, dest, stride);
-}
-
-#if defined(_MSC_VER)
-#pragma pack(1)
-#endif
-
-struct TGA_HEADER
-{
-	u8  identsize;		// size of ID field that follows 18 u8 header (0 usually)
-	u8  colourmaptype;	 // type of colour map 0=none, 1=has palette
-	u8  imagetype;		// type of image 0=none,1=indexed,2=rgb,3=grey,+8=rle packed
-
-	s16 colourmapstart;	// first colour map entry in palette
-	s16 colourmaplength;	 // number of colours in palette
-	u8  colourmapbits;	 // number of bits per palette entry 15,16,24,32
-
-    s16 xstart;             // image x origin
-    s16 ystart;             // image y origin
-    s16 width;              // image width in pixels
-    s16 height;             // image height in pixels
-    u8  bits;               // image bits per pixel 8,16,24,32
-    u8  descriptor;         // image descriptor bits (vh flip bits)
-
-    // pixel data follows header
-} __packed;
-
-#if defined(_MSC_VER)
-#	pragma pack()
-#endif
-
-void SaveTGA(const char* filename, int width, int height, void* pdata)
-{
-	TGA_HEADER hdr;
-	FILE* f = fopen(filename, "wb");
-
-	if (f == NULL) return;
-
-	assert(sizeof(TGA_HEADER) == 18 && sizeof(hdr) == 18);
-
-	memzero(hdr);
-	hdr.imagetype = 2;
-	hdr.bits = 32;
-	hdr.width = width;
-	hdr.height = height;
-	hdr.descriptor |= 8 | (1 << 5); // 8bit alpha, flip vertical
-	fwrite(&hdr, sizeof(hdr), 1, f);
-	fwrite(pdata, width*height*4, 1, f);
-	fclose(f);
-}
-
-static int s_index = 0; //, s_frame = 0;
+	mpeg2_idct_copy(decoder.DCTblock, dest, stride);
 
-void SaveRGB32(u8* ptr)
-{
-	char filename[255];
-	sprintf(filename, "frames/frame%.4d.tga", s_index++);
-	SaveTGA(filename, 16, 16, ptr);
+	return true;
 }
 
-void waitForSCD()
+static __fi bool slice_non_intra_DCT(s16 * const dest, const int stride, const bool skip)
 {
-	u8 bit8 = 1;
-
-	while (!getBits8((u8*)&bit8, 0))
-	{
-		so_resume();
-	}
+	int last;
 
-	if (bit8 == 0)
+	if (!skip)
 	{
-		if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7);
-
-		ipuRegs->ctrl.SCD = 1;
+		memzero_sse_a(decoder.DCTblock);
 	}
 
-	while (!getBits32((u8*)&ipuRegs->top, 0))
+	if (!get_non_intra_block(&last))
 	{
-		so_resume();
+		return false;
 	}
 
-	BigEndian(ipuRegs->top, ipuRegs->top);
+	mpeg2_idct_add(last, decoder.DCTblock, dest, stride);
 
-	/*if(ipuRegs->ctrl.SCD)
-	{
-		switch(ipuRegs->top & 0xFFFFFFF0)
-		{
-			case 0x100:
-			case 0x1A0:
-				break;
-			case 0x1B0:
-				ipuRegs->ctrl.SCD = 0;
-				if(ipuRegs->top == 0x1b4) ipuRegs->ctrl.ECD = 1;
-				//else
-				//{
-				//	do
-				//	{
-				//		while(!getBits32((u8*)&ipuRegs->top, 1))
-				//		{
-				//			so_resume();
-				//		}
-
-				//		BigEndian(ipuRegs->top, ipuRegs->top);
-				//	}
-				//	while((ipuRegs->top & 0xfffffff0) != 0x100);
-				//}
-				break;
-			default:
-				ipuRegs->ctrl.SCD = 0;
-				break;
-		}
-	}*/
+	return true;
 }
 
-void __forceinline finishmpeg2sliceIDEC(decoder_t* &decoder)
+void __fi finishmpeg2sliceIDEC()
 {
-	ipuRegs->ctrl.SCD = 0;
-	coded_block_pattern = decoder->coded_block_pattern;
+	ipuRegs.ctrl.SCD = 0;
+	coded_block_pattern = decoder.coded_block_pattern;
 
-	g_BP.BP += decoder->bitstream_bits - 16;
+	g_BP.BP += decoder.bitstream_bits - 16;
 
 	if ((int)g_BP.BP < 0)
 	{
@@ -1122,395 +695,453 @@
 	}
 
 	FillInternalBuffer(&g_BP.BP, 1, 0);
-
-	waitForSCD();
 }
 
-void mpeg2sliceIDEC(void* pdone)
+bool mpeg2sliceIDEC()
 {
-	u32 read;
+	u16 code;
+	u8 bit8;
 
-	bool resumed = false;
-	decoder_t *decoder = &g_decoder;
-
-	*(int*)pdone = 0;
-	bitstream_init(decoder);
+	switch (ipu_cmd.pos[0])
+	{
+	case 0:
+		decoder.dc_dct_pred[0] =
+		decoder.dc_dct_pred[1] =
+		decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
 
-	decoder->dc_dct_pred[0] =
-	decoder->dc_dct_pred[1] =
-	decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;
+		decoder.mbc = 0;
+		ipuRegs.top = 0;
+		ipuRegs.ctrl.ECD = 0;
 
-	decoder->mbc = 0;
-	ipuRegs->ctrl.ECD = 0;
+	case 1:
+		ipu_cmd.pos[0] = 1;
+		if (!bitstream_init())
+		{
+			return false;
+		}
 
-	if (UBITS(decoder->bitstream_buf, 2) == 0)
-	{
-		ipuRegs->ctrl.SCD = 0;
-	}
-	else
-	{
+	case 2:
+		ipu_cmd.pos[0] = 2;
 		while (1)
 		{
+			macroblock_8& mb8 = decoder.mb8;
+			macroblock_rgb16& rgb16 = decoder.rgb16;
+			macroblock_rgb32& rgb32 = decoder.rgb32;
+
 			int DCT_offset, DCT_stride;
-			int mba_inc;
 			const MBAtab * mba;
 
-			NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr);
-			decoder->macroblock_modes = get_macroblock_modes(decoder);
-
-			/* maybe integrate MACROBLOCK_QUANT test into get_macroblock_modes ? */
-
-			if (decoder->macroblock_modes & MACROBLOCK_QUANT) //only IDEC
-			{
-				decoder->quantizer_scale = get_quantizer_scale(decoder);
-			}
-
-			if (decoder->macroblock_modes & DCT_TYPE_INTERLACED)
-			{
-				DCT_offset = decoder->stride;
-				DCT_stride = decoder->stride * 2;
-			}
-			else
-			{
-				DCT_offset = decoder->stride * 8;
-				DCT_stride = decoder->stride;
-			}
-
-			if (decoder->macroblock_modes & MACROBLOCK_INTRA)
+			switch (ipu_cmd.pos[1])
 			{
-				decoder->coded_block_pattern = 0x3F;//all 6 blocks
-				//ipuRegs->ctrl.CBP = 0x3f;
+			case 0:
+				decoder.macroblock_modes = get_macroblock_modes();
 
-				memzero(*decoder->mb8);
-				memzero(*decoder->rgb32);
+				if (decoder.macroblock_modes & MACROBLOCK_QUANT) //only IDEC
+				{
+					decoder.quantizer_scale = get_quantizer_scale();
+				}
 
-				slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y, DCT_stride);
-				slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + 8, DCT_stride);
-				slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset, DCT_stride);
-				slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset + 8, DCT_stride);
-				slice_intra_DCT(decoder, 1, (u8*)decoder->mb8->Cb, decoder->stride >> 1);
-				slice_intra_DCT(decoder, 2, (u8*)decoder->mb8->Cr, decoder->stride >> 1);
+				decoder.coded_block_pattern = 0x3F;//all 6 blocks
+				memzero_sse_a(mb8);
+				memzero_sse_a(rgb32);
 
-				// Send The MacroBlock via DmaIpuFrom
+			case 1:
+				ipu_cmd.pos[1] = 1;
 
-				if (decoder->ofm == 0)
+				if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
 				{
-					ipu_csc(decoder->mb8, decoder->rgb32, decoder->sgn);
-
-					g_nIPU0Data = 64;
-					g_pIPU0Pointer = (u8*)decoder->rgb32;
-					//if ( s_frame >= 39 ) SaveRGB32(g_pIPU0Pointer);
+					DCT_offset = decoder_stride;
+					DCT_stride = decoder_stride * 2;
 				}
 				else
 				{
-					ipu_csc(decoder->mb8, decoder->rgb32, decoder->sgn);
-					ipu_dither(decoder->rgb32, decoder->rgb16, decoder->dte);
+					DCT_offset = decoder_stride * 8;
+					DCT_stride = decoder_stride;
+				}
 
-					g_nIPU0Data = 32;
-					g_pIPU0Pointer = (u8*)decoder->rgb16;
-					//if ( s_frame >= 39 ) SaveRGB32(g_pIPU0Pointer);
+				switch (ipu_cmd.pos[2])
+				{
+				case 0:
+				case 1:
+					if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[2] == 1))
+					{
+						ipu_cmd.pos[2] = 1;
+						return false;
+					}
+				case 2:
+					if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[2] == 2))
+					{
+						ipu_cmd.pos[2] = 2;
+						return false;
+					}
+				case 3:
+					if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[2] == 3))
+					{
+						ipu_cmd.pos[2] = 3;
+						return false;
+					}
+				case 4:
+					if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[2] == 4))
+					{
+						ipu_cmd.pos[2] = 4;
+						return false;
+					}
+				case 5:
+					if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[2] == 5))
+					{
+						ipu_cmd.pos[2] = 5;
+						return false;
+					}
+				case 6:
+					if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[2] == 6))
+					{
+						ipu_cmd.pos[2] = 6;
+						return false;
+					}
+				}
+
+				// Send The MacroBlock via DmaIpuFrom
+				ipu_csc(mb8, rgb32, decoder.sgn);
+
+				if (decoder.ofm == 0)
+					decoder.SetOutputTo(rgb32);
+				else
+				{
+					ipu_dither(rgb32, rgb16, decoder.dte);
+					decoder.SetOutputTo(rgb16);
 				}
 
-				while (g_nIPU0Data > 0)
+			case 2:
+				while (decoder.ipu0_data > 0)
 				{
-					read = ipu_fifo.out.write((u32*)g_pIPU0Pointer, g_nIPU0Data);
+					uint read = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
 
 					if (read == 0)
 					{
-						so_resume();
-						resumed = true;
+						ipu_cmd.pos[1] = 2;
+						return false;
 					}
 					else
 					{
-						g_pIPU0Pointer += read * 16;
-						g_nIPU0Data -= read;
-
+						decoder.AdvanceIpuDataBy(read);
 					}
 				}
 
-				decoder->mbc++;
-			}
+				decoder.mbc++;
+				mbaCount = 0;
+			case 3:
+				while (1)
+				{
+					if (!GETWORD())
+					{
+						ipu_cmd.pos[1] = 3;
+						return false;
+					}
+
+					code = UBITS(16);
+					if (code >= 0x1000)
+					{
+						mba = MBA.mba5 + (UBITS(5) - 2);
+						break;
+					}
+					else if (code >= 0x0300)
+					{
+						mba = MBA.mba11 + (UBITS(11) - 24);
+						break;
+					}
+					else switch (UBITS(11))
+					{
+							case 8:		/* macroblock_escape */
+								mbaCount += 33;
+								/* pass through */
+
+							case 15:	/* macroblock_stuffing (MPEG1 only) */
+								DUMPBITS(11);
+								continue;
+
+							default:	/* end of slice/frame, or error? */
+							{
+								goto finish_idec;	
+							}
+					}
+				}
 
-			NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr);
-			mba_inc = 0;
+				DUMPBITS(mba->len);
+				mbaCount += mba->mba;
 
-			while (1)
-			{
-				if (decoder->bitstream_buf >= 0x10000000)
+				if (mbaCount)
 				{
-					mba = MBA_5 + (UBITS(decoder->bitstream_buf, 5) - 2);
-					break;
+					decoder.dc_dct_pred[0] =
+					decoder.dc_dct_pred[1] =
+					decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
+
+					decoder.mbc += mbaCount;
 				}
-				else if (decoder->bitstream_buf >= 0x03000000)
+
+			case 4:
+				if (!GETWORD())
 				{
-					mba = MBA_11 + (UBITS(decoder->bitstream_buf, 11) - 24);
-					break;
+					ipu_cmd.pos[1] = 4;
+					return false;
 				}
-				else switch (UBITS(decoder->bitstream_buf, 11))
-					{
 
-						case 8:		/* macroblock_escape */
-							mba_inc += 33;
-							/* pass through */
-
-						case 15:	/* macroblock_stuffing (MPEG1 only) */
-							DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 11);
-							NEEDBITS(decoder->bitstream_buf, decoder->bitstream_bits, decoder->bitstream_ptr);
-							continue;
-
-						default:	/* end of slice/frame, or error? */
-						{
-#ifdef MPEGHACK
-							if (!resumed) so_resume();
-#endif
-							finishmpeg2sliceIDEC(decoder);
-
-							*(int*)pdone = 1;
-							so_exit();
-						}
-					}
+				break;
 			}
 
-			DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, mba->len);
-			mba_inc += mba->mba;
+			ipu_cmd.pos[1] = 0;
+			ipu_cmd.pos[2] = 0;
+		}
+		
+finish_idec:
+		finishmpeg2sliceIDEC();
 
-			if (mba_inc)
-			{
-				decoder->dc_dct_pred[0] =
-				decoder->dc_dct_pred[1] =
-				decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;
+	case 3:
+		bit8 = 1;
+		if (!getBits8((u8*)&bit8, 0))
+		{
+			ipu_cmd.pos[0] = 3;
+			return false;
+		}
 
-				do
-				{
-					decoder->mbc++;
-				}
-				while (--mba_inc);
-			}
+		if (bit8 == 0)
+		{
+			if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7);
+
+			ipuRegs.ctrl.SCD = 1;
 		}
-	}
 
-#ifdef MPEGHACK
-	if (!resumed) so_resume();
-#endif
+	case 4:
+		if (!getBits32((u8*)&ipuRegs.top, 0))
+		{
+			ipu_cmd.pos[0] = 4;
+			return false;
+		}
 
-	finishmpeg2sliceIDEC(decoder);
+		BigEndian(ipuRegs.top, ipuRegs.top);
+		break;
+	}
 
-	*(int*)pdone = 1;
-	so_exit();
+	return true;
 }
 
-void mpeg2_slice(void* pdone)
+bool mpeg2_slice()
 {
 	int DCT_offset, DCT_stride;
-	//u8 bit8=0;
-	//u32 fp = g_BP.FP;
-	u32 bp;
-	decoder_t * decoder = &g_decoder;
-	u32 size = 0;
-
-	*(int*)pdone = 0;
-	ipuRegs->ctrl.ECD = 0;
-
-	memzero(*decoder->mb8);
-	memzero(*decoder->mb16);
-
-	bitstream_init(decoder);
+	u8 bit8;
 
-	if (decoder->dcr)
-	{
-		decoder->dc_dct_pred[0] =
-		decoder->dc_dct_pred[1] =
-		decoder->dc_dct_pred[2] = 128 << decoder->intra_dc_precision;
-	}
-
-	if (decoder->macroblock_modes & DCT_TYPE_INTERLACED)
-	{
-		DCT_offset = decoder->stride;
-		DCT_stride = decoder->stride * 2;
-	}
-	else
-	{
-		DCT_offset = decoder->stride * 8;
-		DCT_stride = decoder->stride;
-	}
+	macroblock_8& mb8 = decoder.mb8;
+	macroblock_16& mb16 = decoder.mb16;
 
-	if (decoder->macroblock_modes & MACROBLOCK_INTRA)
+	switch (ipu_cmd.pos[0])
 	{
-		decoder->coded_block_pattern = 0x3F;//all 6 blocks
-		slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y, DCT_stride);
-		slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + 8, DCT_stride);
-		slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset, DCT_stride);
-		slice_intra_DCT(decoder, 0, (u8*)decoder->mb8->Y + DCT_offset + 8, DCT_stride);
-		slice_intra_DCT(decoder, 1, (u8*)decoder->mb8->Cb, decoder->stride >> 1);
-		slice_intra_DCT(decoder, 2, (u8*)decoder->mb8->Cr, decoder->stride >> 1);
-		ipu_copy(decoder->mb8, decoder->mb16);
-	}
-	else
-	{
-		if (decoder->macroblock_modes & MACROBLOCK_PATTERN)
+	case 0:
+		if (decoder.dcr)
 		{
-			decoder->coded_block_pattern = get_coded_block_pattern(decoder);
-			/* JayteeMaster: changed from mb8 to mb16 and from u8 to s16 */
-
-			if (decoder->coded_block_pattern & 0x20) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y, DCT_stride);
-			if (decoder->coded_block_pattern & 0x10) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y + 8, DCT_stride);
-			if (decoder->coded_block_pattern & 0x08) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y + DCT_offset,	 DCT_stride);
-			if (decoder->coded_block_pattern & 0x04) slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Y + DCT_offset + 8, DCT_stride);
-			if (decoder->coded_block_pattern & 0x2)  slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Cb, decoder->stride >> 1);
-			if (decoder->coded_block_pattern & 0x1)  slice_non_intra_DCT(decoder, (s16*)decoder->mb16->Cr, decoder->stride >> 1);
-
+			decoder.dc_dct_pred[0] =
+			decoder.dc_dct_pred[1] =
+			decoder.dc_dct_pred[2] = 128 << decoder.intra_dc_precision;
+		}
+			
+		ipuRegs.ctrl.ECD = 0;
+		ipuRegs.top = 0;
+		memzero_sse_a(mb8);
+		memzero_sse_a(mb16);
+	case 1:
+		if (!bitstream_init())
+		{
+			ipu_cmd.pos[0] = 1;
+			return false;
 		}
-	}
-
-	//Send The MacroBlock via DmaIpuFrom
-
-	size = 0;	// Reset
-	ipuRegs->ctrl.SCD = 0;
-	coded_block_pattern = decoder->coded_block_pattern;
-	bp = g_BP.BP;
-	g_BP.BP += ((int)decoder->bitstream_bits - 16);
-
-	// BP goes from 0 to 128, so negative values mean to read old buffer
-	// so we minus from 128 to get the correct BP
-	if ((int)g_BP.BP < 0)
-	{
-		g_BP.BP = 128 + (int)g_BP.BP;
-
-		// After BP is positioned correctly, we need to reload the old buffer
-		// so that reading may continue properly
-		ReorderBitstream();
-	}
-
-	FillInternalBuffer(&g_BP.BP, 1, 0);
-
-	decoder->mbc = 1;
-	g_nIPU0Data = 48;
-	g_pIPU0Pointer = (u8*)decoder->mb16;
 
-	while (g_nIPU0Data > 0)
-	{
-		size = ipu_fifo.out.write((u32*)g_pIPU0Pointer, g_nIPU0Data);
+	case 2:
+		ipu_cmd.pos[0] = 2;
 
-		if (size == 0)
+		if (decoder.macroblock_modes & DCT_TYPE_INTERLACED)
 		{
-			so_resume();
+			DCT_offset = decoder_stride;
+			DCT_stride = decoder_stride * 2;
 		}
 		else
 		{
-			g_pIPU0Pointer += size * 16;
-			g_nIPU0Data -= size;
+			DCT_offset = decoder_stride * 8;
+			DCT_stride = decoder_stride;
 		}
-	}
-	waitForSCD();
-
-	decoder->bitstream_bits = 0;
-	*(int*)pdone = 1;
-	so_exit();
-}
-
-int __forceinline get_motion_delta(decoder_t * const decoder,
-                                   const int f_code)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
-
-	int delta;
-	int sign;
-	const MVtab * tab;
-
-	if ((bit_buf & 0x80000000))
-	{
-		DUMPBITS(bit_buf, bits, 1);
-		return 0x00010000;
-	}
-	else if ((bit_buf & 0xf0000000) || ((bit_buf & 0xfc000000) == 0x0c000000))
-	{
-
-		tab = MV_4 + UBITS(bit_buf, 4);
-		delta = (tab->delta << f_code) + 1;
-		bits += tab->len + f_code + 1;
-		bit_buf <<= tab->len;
-
-		sign = SBITS(bit_buf, 1);
-		bit_buf <<= 1;
-
-		if (f_code) delta += UBITS(bit_buf, f_code);
 
-		bit_buf <<= f_code;
-
-		return (delta ^ sign) - sign;
-
-	}
-	else
-	{
-		tab = MV_10 + UBITS(bit_buf, 10);
-		delta = (tab->delta << f_code) + 1;
-		bits += tab->len + 1;
-		bit_buf <<= tab->len;
-
-		sign = SBITS(bit_buf, 1);
-		bit_buf <<= 1;
+		if (decoder.macroblock_modes & MACROBLOCK_INTRA)
+		{
+			switch(ipu_cmd.pos[1])
+			{
+			case 0:
+				decoder.coded_block_pattern = 0x3F;
+			case 1:
+				if (!slice_intra_DCT(0, (u8*)mb8.Y, DCT_stride, ipu_cmd.pos[1] == 1))
+				{
+					ipu_cmd.pos[1] = 1;
+					return false;
+				}
+			case 2:
+				if (!slice_intra_DCT(0, (u8*)mb8.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
+				{
+					ipu_cmd.pos[1] = 2;
+					return false;
+				}
+			case 3:
+				if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
+				{
+					ipu_cmd.pos[1] = 3;
+					return false;
+				}
+			case 4:
+				if (!slice_intra_DCT(0, (u8*)mb8.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
+				{
+					ipu_cmd.pos[1] = 4;
+					return false;
+				}
+			case 5:
+				if (!slice_intra_DCT(1, (u8*)mb8.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
+				{
+					ipu_cmd.pos[1] = 5;
+					return false;
+				}
+			case 6:
+				if (!slice_intra_DCT(2, (u8*)mb8.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
+				{
+					ipu_cmd.pos[1] = 6;
+					return false;
+				}
+				break;
+			}
 
-		if (f_code)
+			ipu_copy(mb8, mb16);
+		}
+		else
 		{
-			NEEDBITS(bit_buf, bits, bit_ptr);
-			delta += UBITS(bit_buf, f_code);
-			DUMPBITS(bit_buf, bits, f_code);
+			if (decoder.macroblock_modes & MACROBLOCK_PATTERN)
+			{
+				switch(ipu_cmd.pos[1])
+				{
+				case 0:
+					decoder.coded_block_pattern = get_coded_block_pattern();  // max 9bits
+				case 1:
+					if (decoder.coded_block_pattern & 0x20)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Y, DCT_stride, ipu_cmd.pos[1] == 1))
+						{
+							ipu_cmd.pos[1] = 1;
+							return false;
+						}
+					}
+				case 2:
+					if (decoder.coded_block_pattern & 0x10)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Y + 8, DCT_stride, ipu_cmd.pos[1] == 2))
+						{
+							ipu_cmd.pos[1] = 2;
+							return false;
+						}
+					}
+				case 3:
+					if (decoder.coded_block_pattern & 0x08)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset, DCT_stride, ipu_cmd.pos[1] == 3))
+						{
+							ipu_cmd.pos[1] = 3;
+							return false;
+						}
+					}
+				case 4:
+					if (decoder.coded_block_pattern & 0x04)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Y + DCT_offset + 8, DCT_stride, ipu_cmd.pos[1] == 4))
+						{
+							ipu_cmd.pos[1] = 4;
+							return false;
+						}
+					}
+				case 5:
+					if (decoder.coded_block_pattern & 0x2)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Cb, decoder_stride >> 1, ipu_cmd.pos[1] == 5))
+						{
+							ipu_cmd.pos[1] = 5;
+							return false;
+						}
+					}
+				case 6:
+					if (decoder.coded_block_pattern & 0x1)
+					{
+						if (!slice_non_intra_DCT((s16*)mb16.Cr, decoder_stride >> 1, ipu_cmd.pos[1] == 6))
+						{
+							ipu_cmd.pos[1] = 6;
+							return false;
+						}
+					}
+					break;
+				}
+			}
 		}
 
-		return (delta ^ sign) - sign;
-
-	}
+		// Send The MacroBlock via DmaIpuFrom
+		ipuRegs.ctrl.SCD = 0;
+		coded_block_pattern = decoder.coded_block_pattern;
+		g_BP.BP += (int)decoder.bitstream_bits - 16;
 
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
+		// BP goes from 0 to 128, so negative values mean to read old buffer
+		// so we minus from 128 to get the correct BP
+		if ((int)g_BP.BP < 0)
+		{
+			g_BP.BP = 128 + (int)g_BP.BP;
 
-int __forceinline get_dmv(decoder_t * const decoder)
-{
-#define bit_buf (decoder->bitstream_buf)
-#define bits (decoder->bitstream_bits)
-#define bit_ptr (decoder->bitstream_ptr)
+			// After BP is positioned correctly, we need to reload the old buffer
+			// so that reading may continue properly
+			ReorderBitstream();
+		}
 
-	const DMVtab * tab;
+		decoder.mbc = 1;
+		decoder.SetOutputTo(mb16);
 
-	tab = DMV_2 + UBITS(bit_buf, 2);
-	DUMPBITS(bit_buf, bits, tab->len);
-	return tab->dmv;
-#undef bit_buf
-#undef bits
-#undef bit_ptr
-}
+	case 3:
+		while (decoder.ipu0_data > 0)
+		{
+			uint size = ipu_fifo.out.write((u32*)decoder.GetIpuDataPtr(), decoder.ipu0_data);
 
-int get_macroblock_address_increment(decoder_t * const decoder)
-{
-	const MBAtab *mba;
+			if (size == 0)
+			{
+				ipu_cmd.pos[0] = 3;
+				return false;
+			}
+			else
+			{
+				decoder.AdvanceIpuDataBy(size);
+			}
+		}
 
-	if (decoder->bitstream_buf >= 0x10000000)
-		mba = MBA_5 + (UBITS(decoder->bitstream_buf, 5) - 2);
-	else if (decoder->bitstream_buf >= 0x03000000)
-		mba = MBA_11 + (UBITS(decoder->bitstream_buf, 11) - 24);
-	else switch (UBITS(decoder->bitstream_buf, 11))
+	case 4:
+		bit8 = 1;
+		if (!getBits8((u8*)&bit8, 0))
 		{
+			ipu_cmd.pos[0] = 4;
+			return false;
+		}
 
-			case 8:		/* macroblock_escape */
-				DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 11);
-				return 0x23;
+		if (bit8 == 0)
+		{
+			if (g_BP.BP & 7) g_BP.BP += 8 - (g_BP.BP & 7);
 
-			case 15:	/* macroblock_stuffing (MPEG1 only) */
-				if (decoder->mpeg1)
-				{
-					DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, 11);
-					return 0x22;
-				}
+			ipuRegs.ctrl.SCD = 1;
+		}
 
-			default:
-				return 0;//error
+	case 5:
+		if (!getBits32((u8*)&ipuRegs.top, 0))
+		{
+			ipu_cmd.pos[0] = 5;
+			return false;
 		}
 
-	DUMPBITS(decoder->bitstream_buf, decoder->bitstream_bits, mba->len);
+		BigEndian(ipuRegs.top, ipuRegs.top);
+		decoder.bitstream_bits = 0;
+		break;
+	}
 
-	return mba->mba + 1;
-}
+	return true;
+}
\ No newline at end of file

 

  ViewVC Help
Powered by ViewVC 1.1.22