/[pcsx2_0.9.7]/trunk/plugins/zzogl-pg/opengl/Mem.cpp
ViewVC logotype

Contents of /trunk/plugins/zzogl-pg/opengl/Mem.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 280 - (show annotations) (download)
Thu Dec 23 12:02:12 2010 UTC (9 years, 2 months ago) by william
File size: 13132 byte(s)
re-commit (had local access denied errors when committing)
1 /* ZZ Open GL graphics plugin
2 * Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
3 * Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
18 */
19
20 #include "GS.h"
21 #include "Mem.h"
22 #include "targets.h"
23 #include "x86.h"
24
25 #include "Mem_Transmit.h"
26 #include "Mem_Swizzle.h"
27 #ifdef ZEROGS_SSE2
28 #include <emmintrin.h>
29 #endif
30
31 BLOCK m_Blocks[0x40]; // do so blocks are indexable
32
33 PCSX2_ALIGNED16(u32 tempblock[64]);
34
35 // Add a bunch of local variables that used to be in the TransferHostLocal
36 // functions, in order to de-macro the TransmitHostLocal macros.
37 // May be in a class or namespace eventually.
38 int tempX, tempY;
39 int pitch, area, fracX;
40 int nSize;
41 u8* pstart;
42
43 // ------------------------
44 // | Y |
45 // ------------------------
46 // | block | |
47 // | aligned area | X |
48 // | | |
49 // ------------------------
50 // | Y |
51 // ------------------------
52
53 template <class T>
54 static __forceinline const T* AlignOnBlockBoundry(TransferData data, TransferFuncts fun, Point alignedPt, int& endY, const T* pbuf)
55 {
56 bool bCanAlign = ((MOD_POW2(gs.trxpos.dx, data.blockwidth) == 0) && (gs.imageX == gs.trxpos.dx) &&
57 (alignedPt.y > endY) && (alignedPt.x > gs.trxpos.dx));
58
59 if ((gs.imageEndX - gs.trxpos.dx) % data.widthlimit)
60 {
61 /* hack */
62 int testwidth = (int)nSize -
63 (gs.imageEndY - gs.imageY) * (gs.imageEndX - gs.trxpos.dx)
64 + (gs.imageX - gs.trxpos.dx);
65
66 if ((testwidth <= data.widthlimit) && (testwidth >= -data.widthlimit))
67 {
68 /* don't transfer */
69 /*ZZLog::Debug_Log("Bad texture %s: %d %d %d", #psm, gs.trxpos.dx, gs.imageEndX, nQWordSize);*/
70 //ZZLog::Error_Log("Bad texture: testwidth = %d; data.widthlimit = %d", testwidth, data.widthlimit);
71 gs.imageTransfer = -1;
72 }
73
74 bCanAlign = false;
75 }
76
77 /* first align on block boundary */
78 if (MOD_POW2(gs.imageY, data.blockheight) || !bCanAlign)
79 {
80 u32 transwidth;
81
82 if (!bCanAlign)
83 endY = gs.imageEndY; /* transfer the whole image */
84 else
85 assert(endY < gs.imageEndY); /* part of alignment condition */
86
87 if (((gs.imageEndX - gs.trxpos.dx) % data.widthlimit) || ((gs.imageEndX - gs.imageX) % data.widthlimit))
88 {
89 /* transmit with a width of 1 */
90 transwidth = (1 + (DSTPSM == PSMT4));
91 }
92 else
93 {
94 transwidth = data.widthlimit;
95 }
96
97 pbuf = TransmitHostLocalY<T>(data.psm, fun.wp, transwidth, endY, pbuf);
98
99 if (pbuf == NULL) return NULL;
100
101 if (nSize == 0 || tempY == gs.imageEndY) return NULL;
102 }
103
104 return pbuf;
105 }
106
107 template <class T>
108 static __forceinline const T* TransferAligningToBlocks(TransferData data, TransferFuncts fun, Point alignedPt, const T* pbuf)
109 {
110 bool bAligned;
111 const u32 TSize = sizeof(T);
112 _SwizzleBlock swizzle;
113
114 /* can align! */
115 pitch = gs.imageEndX - gs.trxpos.dx;
116 area = pitch * data.blockheight;
117 fracX = gs.imageEndX - alignedPt.x;
118
119 /* on top of checking whether pbuf is aligned, make sure that the width is at least aligned to its limits (due to bugs in pcsx2) */
120 bAligned = !((uptr)pbuf & 0xf) && (TransPitch(pitch, data.transfersize) & 0xf) == 0;
121
122 if (bAligned || ((DSTPSM == PSMCT24) || (DSTPSM == PSMT8H) || (DSTPSM == PSMT4HH) || (DSTPSM == PSMT4HL)))
123 swizzle = (fun.Swizzle);
124 else
125 swizzle = (fun.Swizzle_u);
126
127 //Transfer aligning to blocks.
128 for (; tempY < alignedPt.y && nSize >= area; tempY += data.blockheight, nSize -= area)
129 {
130 for (int tempj = gs.trxpos.dx; tempj < alignedPt.x; tempj += data.blockwidth, pbuf += TransPitch(data.blockwidth, data.transfersize) / TSize)
131 {
132 u8 *temp = pstart + fun.gp(tempj, tempY, gs.dstbuf.bw) * data.blockbits / 8;
133 swizzle(temp, (u8*)pbuf, TransPitch(pitch, data.transfersize));
134 }
135 #ifdef ZEROGS_SSE2
136 // Note: swizzle function uses some non temporal move (mm_stream) instruction.
137 // store fence insures that previous store are finish before execute new one.
138 _mm_sfence();
139
140 #endif
141
142 /* transfer the rest */
143 if (alignedPt.x < gs.imageEndX)
144 {
145 pbuf = TransmitHostLocalX<T>(data.psm, fun.wp, data.widthlimit, data.blockheight, alignedPt.x, pbuf);
146
147 if (pbuf == NULL) return NULL;
148
149 pbuf -= TransPitch((alignedPt.x - gs.trxpos.dx), data.transfersize) / TSize;
150 }
151 else
152 {
153 pbuf += (data.blockheight - 1) * TransPitch(pitch, data.transfersize) / TSize;
154 }
155
156 tempX = gs.trxpos.dx;
157 }
158
159 return pbuf;
160 }
161
162 static __forceinline int FinishTransfer(TransferData data, int nLeftOver)
163 {
164 if (tempY >= gs.imageEndY)
165 {
166 assert(gs.imageTransfer == -1 || tempY == gs.imageEndY);
167 gs.imageTransfer = -1;
168 /*int start, end;
169 GetRectMemAddress(start, end, gs.dstbuf.psm, gs.trxpos.dx, gs.trxpos.dy, gs.imageWnew, gs.imageHnew, gs.dstbuf.bp, gs.dstbuf.bw);
170 g_MemTargs.ClearRange(start, end);*/
171 }
172 else
173 {
174 /* update new params */
175 gs.imageY = tempY;
176 gs.imageX = tempX;
177 }
178
179 return (nSize * TransPitch(2, data.transfersize) + nLeftOver) / 2;
180 }
181
182 template <class T>
183 static __forceinline int RealTransfer(u32 psm, const void* pbyMem, u32 nQWordSize)
184 {
185 assert(gs.imageTransfer == 0);
186 TransferData data = tData[psm];
187 TransferFuncts fun(psm);
188 pstart = g_pbyGSMemory + gs.dstbuf.bp * 256;
189 const T* pbuf = (const T*)pbyMem;
190 const int tp2 = TransPitch(2, data.transfersize);
191 int nLeftOver = (nQWordSize * 4 * 2) % tp2;
192 tempY = gs.imageY;
193 tempX = gs.imageX;
194 Point alignedPt;
195
196 nSize = (nQWordSize * 4 * 2) / tp2;
197 nSize = min(nSize, gs.imageWnew * gs.imageHnew);
198
199 int endY = ROUND_UPPOW2(gs.imageY, data.blockheight);
200 alignedPt.y = ROUND_DOWNPOW2(gs.imageEndY, data.blockheight);
201 alignedPt.x = ROUND_DOWNPOW2(gs.imageEndX, data.blockwidth);
202
203 pbuf = AlignOnBlockBoundry<T>(data, fun, alignedPt, endY, pbuf);
204
205 if (pbuf == NULL) return FinishTransfer(data, nLeftOver);
206
207 pbuf = TransferAligningToBlocks<T>(data, fun, alignedPt, pbuf);
208
209 if (pbuf == NULL) return FinishTransfer(data, nLeftOver);
210
211 if (TransPitch(nSize, data.transfersize) / 4 > 0)
212 {
213 pbuf = TransmitHostLocalY<T>(psm, fun.wp, data.widthlimit, gs.imageEndY, pbuf);
214
215 if (pbuf == NULL) return FinishTransfer(data, nLeftOver);
216
217 /* sometimes wrong sizes are sent (tekken tag) */
218 assert(gs.imageTransfer == -1 || TransPitch(nSize, data.transfersize) / 4 <= 2);
219 }
220
221 return FinishTransfer(data, nLeftOver);
222 }
223
224 int TransferHostLocal32(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u32>(PSMCT32, pbyMem, nQWordSize); }
225 int TransferHostLocal32Z(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u32>(PSMT32Z, pbyMem, nQWordSize); }
226 int TransferHostLocal24(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u8>(PSMCT24, pbyMem, nQWordSize); }
227 int TransferHostLocal24Z(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u8>(PSMT24Z, pbyMem, nQWordSize); }
228 int TransferHostLocal16(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u16>(PSMCT16, pbyMem, nQWordSize); }
229 int TransferHostLocal16S(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u16>(PSMCT16S, pbyMem, nQWordSize); }
230 int TransferHostLocal16Z(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u16>(PSMT16Z, pbyMem, nQWordSize); }
231 int TransferHostLocal16SZ(const void* pbyMem, u32 nQWordSize){ return RealTransfer<u16>(PSMT16SZ, pbyMem, nQWordSize); }
232 int TransferHostLocal8(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u8>(PSMT8, pbyMem, nQWordSize); }
233 int TransferHostLocal4(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u8>(PSMT4, pbyMem, nQWordSize); }
234 int TransferHostLocal8H(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u8>(PSMT8H, pbyMem, nQWordSize); }
235 int TransferHostLocal4HL(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u8>(PSMT4HL, pbyMem, nQWordSize); }
236 int TransferHostLocal4HH(const void* pbyMem, u32 nQWordSize) { return RealTransfer<u8>(PSMT4HH, pbyMem, nQWordSize); }
237
238 void TransferLocalHost32(void* pbyMem, u32 nQWordSize) {FUNCLOG}
239 void TransferLocalHost24(void* pbyMem, u32 nQWordSize) {FUNCLOG}
240 void TransferLocalHost16(void* pbyMem, u32 nQWordSize) {FUNCLOG}
241 void TransferLocalHost16S(void* pbyMem, u32 nQWordSize) {FUNCLOG}
242 void TransferLocalHost8(void* pbyMem, u32 nQWordSize) {FUNCLOG}
243 void TransferLocalHost4(void* pbyMem, u32 nQWordSize) {FUNCLOG}
244 void TransferLocalHost8H(void* pbyMem, u32 nQWordSize) {FUNCLOG}
245 void TransferLocalHost4HL(void* pbyMem, u32 nQWordSize) {FUNCLOG}
246 void TransferLocalHost4HH(void* pbyMem, u32 nQWordSize) {FUNCLOG}
247 void TransferLocalHost32Z(void* pbyMem, u32 nQWordSize) {FUNCLOG}
248 void TransferLocalHost24Z(void* pbyMem, u32 nQWordSize) {FUNCLOG}
249 void TransferLocalHost16Z(void* pbyMem, u32 nQWordSize) {FUNCLOG}
250 void TransferLocalHost16SZ(void* pbyMem, u32 nQWordSize) {FUNCLOG}
251
252 void fill_block(BLOCK b, vector<char>& vBlockData, vector<char>& vBilinearData, int floatfmt)
253 {
254 float* psrcf = (float*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH;
255 u16* psrcw = NULL;
256 if (!floatfmt)
257 psrcw = (u16*)&vBlockData[0] + b.ox + b.oy * BLOCK_TEXWIDTH;
258
259 for(int i = 0; i < b.height; ++i)
260 {
261 u32 i_width = i*BLOCK_TEXWIDTH;
262 for(int j = 0; j < b.width; ++j)
263 {
264 /* fill the table */
265 u32 bt = b.blockTable[(i / b.colheight)*(b.width/b.colwidth) + (j / b.colwidth)];
266 u32 ct = b.columnTable[(i%b.colheight)*b.colwidth + (j%b.colwidth)];
267 u32 u = bt * 64 * b.mult + ct;
268 b.pageTable[i * b.width + j] = u;
269 if (floatfmt)
270 psrcf[i_width + j] = (float)(u) / (float)(GPU_TEXWIDTH * b.mult);
271 else
272 psrcw[i_width + j] = u;
273
274 }
275 }
276
277 if (floatfmt) {
278 float4* psrcv = (float4*)&vBilinearData[0] + b.ox + b.oy * BLOCK_TEXWIDTH;
279
280 for(int i = 0; i < b.height; ++i)
281 {
282 u32 i_width = i*BLOCK_TEXWIDTH;
283 u32 i_width2 = ((i+1)%b.height)*BLOCK_TEXWIDTH;
284 for(int j = 0; j < b.width; ++j)
285 {
286 u32 temp = ((j + 1) % b.width);
287 float4* pv = &psrcv[i_width + j];
288 pv->x = psrcf[i_width + j];
289 pv->y = psrcf[i_width + temp];
290 pv->z = psrcf[i_width2 + j];
291 pv->w = psrcf[i_width2 + temp];
292 }
293 }
294 }
295 }
296
297 void BLOCK::FillBlocks(vector<char>& vBlockData, vector<char>& vBilinearData, int floatfmt)
298 {
299 FUNCLOG
300 if (floatfmt) {
301 vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 4);
302 vBilinearData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * sizeof(float4));
303 } else {
304 vBlockData.resize(BLOCK_TEXWIDTH * BLOCK_TEXHEIGHT * 2);
305 }
306
307 BLOCK b;
308
309 memset(m_Blocks, 0, sizeof(m_Blocks));
310
311 // 32
312 b.SetDim(64, 32, 0, 0, 1);
313 b.SetTable(PSMCT32);
314 fill_block(b, vBlockData, vBilinearData, floatfmt);
315 m_Blocks[PSMCT32] = b;
316 m_Blocks[PSMCT32].SetFun(PSMCT32);
317
318 // 24 (same as 32 except write/readPixel are different)
319 m_Blocks[PSMCT24] = b;
320 m_Blocks[PSMCT24].SetFun(PSMCT24);
321
322 // 8H (same as 32 except write/readPixel are different)
323 m_Blocks[PSMT8H] = b;
324 m_Blocks[PSMT8H].SetFun(PSMT8H);
325
326 m_Blocks[PSMT4HL] = b;
327 m_Blocks[PSMT4HL].SetFun(PSMT4HL);
328
329 m_Blocks[PSMT4HH] = b;
330 m_Blocks[PSMT4HH].SetFun(PSMT4HH);
331
332 // 32z
333 b.SetDim(64, 32, 64, 0, 1);
334 b.SetTable(PSMT32Z);
335 fill_block(b, vBlockData, vBilinearData, floatfmt);
336 m_Blocks[PSMT32Z] = b;
337 m_Blocks[PSMT32Z].SetFun(PSMT32Z);
338
339 // 24Z (same as 32Z except write/readPixel are different)
340 m_Blocks[PSMT24Z] = b;
341 m_Blocks[PSMT24Z].SetFun(PSMT24Z);
342
343 // 16
344 b.SetDim(64, 64, 0, 32, 2);
345 b.SetTable(PSMCT16);
346 fill_block(b, vBlockData, vBilinearData, floatfmt);
347 m_Blocks[PSMCT16] = b;
348 m_Blocks[PSMCT16].SetFun(PSMCT16);
349
350 // 16s
351 b.SetDim(64, 64, 64, 32, 2);
352 b.SetTable(PSMCT16S);
353 fill_block(b, vBlockData, vBilinearData, floatfmt);
354 m_Blocks[PSMCT16S] = b;
355 m_Blocks[PSMCT16S].SetFun(PSMCT16S);
356
357 // 16z
358 b.SetDim(64, 64, 0, 96, 2);
359 b.SetTable(PSMT16Z);
360 fill_block(b, vBlockData, vBilinearData, floatfmt);
361 m_Blocks[PSMT16Z] = b;
362 m_Blocks[PSMT16Z].SetFun(PSMT16Z);
363
364 // 16sz
365 b.SetDim(64, 64, 64, 96, 2);
366 b.SetTable(PSMT16SZ);
367 fill_block(b, vBlockData, vBilinearData, floatfmt);
368 m_Blocks[PSMT16SZ] = b;
369 m_Blocks[PSMT16SZ].SetFun(PSMT16SZ);
370
371 // 8
372 b.SetDim(128, 64, 0, 160, 4);
373 b.SetTable(PSMT8);
374 fill_block(b, vBlockData, vBilinearData, floatfmt);
375 m_Blocks[PSMT8] = b;
376 m_Blocks[PSMT8].SetFun(PSMT8);
377
378 // 4
379 b.SetDim(128, 128, 0, 224, 8);
380 b.SetTable(PSMT4);
381 fill_block(b, vBlockData, vBilinearData, floatfmt);
382 m_Blocks[PSMT4] = b;
383 m_Blocks[PSMT4].SetFun(PSMT4);
384 }

  ViewVC Help
Powered by ViewVC 1.1.22