/[pcsx2_0.9.7]/trunk/pcsx2/IPU/mpeg2lib/Idct.cpp
ViewVC logotype

Contents of /trunk/pcsx2/IPU/mpeg2lib/Idct.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (show annotations) (download)
Mon Sep 6 11:40:06 2010 UTC (9 years, 5 months ago) by william
File size: 8050 byte(s)
exported r3113 from ./upstream/trunk
1 /*
2 * idct.c
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
5 * Modified by Florin for PCSX2 emu
6 *
7 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
8 * See http://libmpeg2.sourceforge.net/ for updates.
9 *
10 * mpeg2dec is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
14 *
15 * mpeg2dec is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 */
24
25 #include "PrecompiledHeader.h"
26
27 #include "Common.h"
28 #include "IPU/IPU.h"
29
30 #define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
31 #define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
32 #define W3 2408 /* 2048*sqrt (2)*cos (3*pi/16) */
33 #define W5 1609 /* 2048*sqrt (2)*cos (5*pi/16) */
34 #define W6 1108 /* 2048*sqrt (2)*cos (6*pi/16) */
35 #define W7 565 /* 2048*sqrt (2)*cos (7*pi/16) */
36 #define clp(val,res) res = (val < 0) ? 0 : ((val > 255) ? 255 : val);
37 #define clp2(val,res) res = (val < -255) ? -255 : ((val > 255) ? 255 : val);
38
39 /* idct main entry point */
40 void (__fastcall *mpeg2_idct_copy) (s16 * block, u8 * dest, int stride);
41 /* JayteeMaster: changed dest to 16 bit signed */
42 void (__fastcall *mpeg2_idct_add) (int last, s16 * block,
43 /*u8*/s16 * dest, int stride);
44
45 /*
46 * In legal streams, the IDCT output should be between -384 and +384.
47 * In corrupted streams, it is possible to force the IDCT output to go
48 * to +-3826 - this is the worst case for a column IDCT where the
49 * column inputs are 16-bit values.
50 */
51 static u8 clip_lut[1024];
52 #define CLIP(i) ((clip_lut+384)[(i)])
53
54 #if 0
55 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
56 do { \
57 t0 = W0*d0 + W1*d1; \
58 t1 = W0*d1 - W1*d0; \
59 } while (0)
60 #else
61 #define BUTTERFLY(t0,t1,W0,W1,d0,d1) \
62 do { \
63 int tmp = W0 * (d0 + d1); \
64 t0 = tmp + (W1 - W0) * d1; \
65 t1 = tmp - (W1 + W0) * d0; \
66 } while (0)
67 #endif
68
69 static __forceinline void idct_row (s16 * const block)
70 {
71 int d0, d1, d2, d3;
72 int a0, a1, a2, a3, b0, b1, b2, b3;
73 int t0, t1, t2, t3;
74
75 /* shortcut */
76 if (!(block[1] | ((s32 *)block)[1] | ((s32 *)block)[2] |
77 ((s32 *)block)[3])) {
78 u32 tmp = (u16) (block[0] << 3);
79 tmp |= tmp << 16;
80 ((s32 *)block)[0] = tmp;
81 ((s32 *)block)[1] = tmp;
82 ((s32 *)block)[2] = tmp;
83 ((s32 *)block)[3] = tmp;
84 return;
85 }
86
87 d0 = (block[0] << 11) + 128;
88 d1 = block[1];
89 d2 = block[2] << 11;
90 d3 = block[3];
91 t0 = d0 + d2;
92 t1 = d0 - d2;
93 BUTTERFLY (t2, t3, W6, W2, d3, d1);
94 a0 = t0 + t2;
95 a1 = t1 + t3;
96 a2 = t1 - t3;
97 a3 = t0 - t2;
98
99 d0 = block[4];
100 d1 = block[5];
101 d2 = block[6];
102 d3 = block[7];
103 BUTTERFLY (t0, t1, W7, W1, d3, d0);
104 BUTTERFLY (t2, t3, W3, W5, d1, d2);
105 b0 = t0 + t2;
106 b3 = t1 + t3;
107 t0 -= t2;
108 t1 -= t3;
109 b1 = ((t0 + t1) * 181) >> 8;
110 b2 = ((t0 - t1) * 181) >> 8;
111
112 block[0] = (a0 + b0) >> 8;
113 block[1] = (a1 + b1) >> 8;
114 block[2] = (a2 + b2) >> 8;
115 block[3] = (a3 + b3) >> 8;
116 block[4] = (a3 - b3) >> 8;
117 block[5] = (a2 - b2) >> 8;
118 block[6] = (a1 - b1) >> 8;
119 block[7] = (a0 - b0) >> 8;
120 }
121
122 static __forceinline void idct_col (s16 * const block)
123 {
124 int d0, d1, d2, d3;
125 int a0, a1, a2, a3, b0, b1, b2, b3;
126 int t0, t1, t2, t3;
127
128 d0 = (block[8*0] << 11) + 65536;
129 d1 = block[8*1];
130 d2 = block[8*2] << 11;
131 d3 = block[8*3];
132 t0 = d0 + d2;
133 t1 = d0 - d2;
134 BUTTERFLY (t2, t3, W6, W2, d3, d1);
135 a0 = t0 + t2;
136 a1 = t1 + t3;
137 a2 = t1 - t3;
138 a3 = t0 - t2;
139
140 d0 = block[8*4];
141 d1 = block[8*5];
142 d2 = block[8*6];
143 d3 = block[8*7];
144 BUTTERFLY (t0, t1, W7, W1, d3, d0);
145 BUTTERFLY (t2, t3, W3, W5, d1, d2);
146 b0 = t0 + t2;
147 b3 = t1 + t3;
148 t0 = (t0 - t2) >> 8;
149 t1 = (t1 - t3) >> 8;
150 b1 = (t0 + t1) * 181;
151 b2 = (t0 - t1) * 181;
152
153 block[8*0] = (a0 + b0) >> 17;
154 block[8*1] = (a1 + b1) >> 17;
155 block[8*2] = (a2 + b2) >> 17;
156 block[8*3] = (a3 + b3) >> 17;
157 block[8*4] = (a3 - b3) >> 17;
158 block[8*5] = (a2 - b2) >> 17;
159 block[8*6] = (a1 - b1) >> 17;
160 block[8*7] = (a0 - b0) >> 17;
161 }
162
163 static void __fastcall mpeg2_idct_copy_c (s16 * block, u8 * dest,
164 const int stride)
165 {
166 int i;
167
168 for (i = 0; i < 8; i++)
169 idct_row (block + 8 * i);
170 for (i = 0; i < 8; i++)
171 idct_col (block + i);
172 do {
173 dest[0] = CLIP (block[0]);
174 dest[1] = CLIP (block[1]);
175 dest[2] = CLIP (block[2]);
176 dest[3] = CLIP (block[3]);
177 dest[4] = CLIP (block[4]);
178 dest[5] = CLIP (block[5]);
179 dest[6] = CLIP (block[6]);
180 dest[7] = CLIP (block[7]);
181
182 block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0;
183 block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0;
184
185 dest += stride;
186 block += 8;
187 } while (--i);
188 }
189
190 /* JayteeMaster: changed dest to 16 bit signed */
191 static void __fastcall mpeg2_idct_add_c (const int last, s16 * block,
192 /*u8*/s16 * dest, const int stride)
193 {
194 int i;
195
196 if (last != 129 || (block[0] & 7) == 4) {
197 for (i = 0; i < 8; i++)
198 idct_row (block + 8 * i);
199 for (i = 0; i < 8; i++)
200 idct_col (block + i);
201 do {
202 dest[0] = block[0];
203 dest[1] = block[1];
204 dest[2] = block[2];
205 dest[3] = block[3];
206 dest[4] = block[4];
207 dest[5] = block[5];
208 dest[6] = block[6];
209 dest[7] = block[7];
210
211 block[0] = 0; block[1] = 0; block[2] = 0; block[3] = 0;
212 block[4] = 0; block[5] = 0; block[6] = 0; block[7] = 0;
213
214 dest += stride;
215 block += 8;
216 } while (--i);
217 } else {
218 int DC;
219
220 DC = (block[0] + 4) >> 3;
221 block[0] = block[63] = 0;
222 i = 8;
223 do {
224 dest[0] = DC;
225 dest[1] = DC;
226 dest[2] = DC;
227 dest[3] = DC;
228 dest[4] = DC;
229 dest[5] = DC;
230 dest[6] = DC;
231 dest[7] = DC;
232 dest += stride;
233 } while (--i);
234 }
235 }
236
237 extern "C"
238 {
239 u8 mpeg2_scan_norm[64] = {
240 /* Zig-Zag scan pattern */
241 0, 1, 8, 16, 9, 2, 3, 10, 17, 24, 32, 25, 18, 11, 4, 5,
242 12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13, 6, 7, 14, 21, 28,
243 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
244 58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
245 };
246
247 u8 mpeg2_scan_alt[64] = {
248 /* Alternate scan pattern */
249 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49,
250 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43,
251 51, 59, 20, 28, 5, 13, 6, 14, 21, 29, 36, 44, 52, 60, 37, 45,
252 53, 61, 22, 30, 7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63
253 };
254 };
255
256 // The MMX verson wasn't being used and it was only available as a .obj,
257 // so I removed it (gigaherz).
258 ///* idct_mmx.c */
259 //void mpeg2_idct_copy_mmxext (s16 * block, u8 * dest, int stride);
260 //void mpeg2_idct_add_mmxext (int last, s16 * block,
261 // s16 * dest, int stride);
262 //void mpeg2_idct_copy_mmx (s16 * block, u8 * dest, int stride);
263 //void mpeg2_idct_add_mmx (int last, s16 * block,
264 // s16 * dest, int stride);
265 //void mpeg2_idct_mmx_init (void);
266
267 void mpeg2_idct_init()
268 {
269 int i, j;
270
271 mpeg2_idct_copy = mpeg2_idct_copy_c;
272 mpeg2_idct_add = mpeg2_idct_add_c;
273 for (i = -384; i < 640; i++)
274 clip_lut[i+384] = (i < 0) ? 0 : ((i > 255) ? 255 : i);
275 for (i = 0; i < 64; i++) {
276 j = mpeg2_scan_norm[i];
277 mpeg2_scan_norm[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
278 j = mpeg2_scan_alt[i];
279 mpeg2_scan_alt[i] = ((j & 0x36) >> 1) | ((j & 0x09) << 2);
280 }
281 }

  ViewVC Help
Powered by ViewVC 1.1.22