/[pcsx2_0.9.7]/trunk/plugins/zzogl-pg/opengl/zerogs.cpp
ViewVC logotype

Contents of /trunk/plugins/zzogl-pg/opengl/zerogs.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 273 - (show annotations) (download)
Fri Nov 12 01:10:22 2010 UTC (9 years, 8 months ago) by william
File size: 28885 byte(s)
Auto Commited Import of: pcsx2-0.9.7-DEBUG (upstream: v0.9.7.4013 local: v0.9.7.197-latest) in ./trunk
1 /* ZZ Open GL graphics plugin
2 * Copyright (c)2009-2010 zeydlitz@gmail.com, arcum42@gmail.com
3 * Based on Zerofrog's ZeroGS KOSMOS (c)2005-2008
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
18 */
19
20 //-------------------------- Includes
21 #if defined(_WIN32)
22 # include <windows.h>
23 # include "resource.h"
24 #endif
25
26 #include <stdlib.h>
27
28 #include "GS.h"
29 #include "Mem.h"
30 #include "x86.h"
31 #include "zerogs.h"
32 #include "targets.h"
33 #include "GLWin.h"
34 #include "ZZoglShaders.h"
35 #ifdef ZEROGS_SSE2
36 #include <emmintrin.h>
37 #endif
38
39 //----------------------- Defines
40
41 //-------------------------- Typedefs
42 typedef void (APIENTRYP _PFNSWAPINTERVAL)(int);
43
44 //-------------------------- Extern variables
45
46 using namespace ZeroGS;
47
48 extern u32 g_nGenVars, g_nTexVars, g_nAlphaVars, g_nResolve;
49 extern char *libraryName;
50 extern int g_nFrame, g_nRealFrame;
51
52 //extern int s_nFullscreen;
53 //-------------------------- Variables
54
55 primInfo *prim;
56
57 inline u32 FtoDW(float f) { return (*((u32*)&f)); }
58
59 int g_nDepthUpdateCount = 0;
60
61 // Consts
62 const GLenum primtype[8] = { GL_POINTS, GL_LINES, GL_LINES, GL_TRIANGLES, GL_TRIANGLES, GL_TRIANGLES, GL_TRIANGLES, 0xffffffff };
63 static const int PRIMMASK = 0x0e; // for now ignore 0x10 (AA)
64
65 PFNGLISRENDERBUFFEREXTPROC glIsRenderbufferEXT = NULL;
66 PFNGLBINDRENDERBUFFEREXTPROC glBindRenderbufferEXT = NULL;
67 PFNGLDELETERENDERBUFFERSEXTPROC glDeleteRenderbuffersEXT = NULL;
68 PFNGLGENRENDERBUFFERSEXTPROC glGenRenderbuffersEXT = NULL;
69 PFNGLRENDERBUFFERSTORAGEEXTPROC glRenderbufferStorageEXT = NULL;
70 PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC glGetRenderbufferParameterivEXT = NULL;
71 PFNGLISFRAMEBUFFEREXTPROC glIsFramebufferEXT = NULL;
72 PFNGLBINDFRAMEBUFFEREXTPROC glBindFramebufferEXT = NULL;
73 PFNGLDELETEFRAMEBUFFERSEXTPROC glDeleteFramebuffersEXT = NULL;
74 PFNGLGENFRAMEBUFFERSEXTPROC glGenFramebuffersEXT = NULL;
75 PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC glCheckFramebufferStatusEXT = NULL;
76 PFNGLFRAMEBUFFERTEXTURE1DEXTPROC glFramebufferTexture1DEXT = NULL;
77 PFNGLFRAMEBUFFERTEXTURE2DEXTPROC glFramebufferTexture2DEXT = NULL;
78 PFNGLFRAMEBUFFERTEXTURE3DEXTPROC glFramebufferTexture3DEXT = NULL;
79 PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC glFramebufferRenderbufferEXT = NULL;
80 PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC glGetFramebufferAttachmentParameterivEXT = NULL;
81 PFNGLGENERATEMIPMAPEXTPROC glGenerateMipmapEXT = NULL;
82 PFNGLDRAWBUFFERSPROC glDrawBuffers = NULL;
83
84 /////////////////////
85 // graphics resources
86
87 bool s_bTexFlush = false;
88 int s_nLastResolveReset = 0;
89 int s_nResolveCounts[30] = {0}; // resolve counts for last 30 frames
90
91 ////////////////////
92 // State parameters
93 int nBackbufferWidth, nBackbufferHeight; // ZZ
94
95 namespace ZeroGS
96 {
97 // = float4( 255.0 /256.0f, 255.0/65536.0f, 255.0f/(65535.0f*256.0f), 1.0f/(65536.0f*65536.0f));
98 // float4 g_vdepth = float4( 65536.0f*65536.0f, 256.0f*65536.0f, 65536.0f, 256.0f);
99
100 extern CRangeManager s_RangeMngr; // manages overwritten memory
101
102 // returns the first and last addresses aligned to a page that cover
103 void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
104
105 int s_nNewWidth = -1, s_nNewHeight = -1;
106 void ChangeDeviceSize(int nNewWidth, int nNewHeight);
107
108 void ProcessMessages();
109 void RenderCustom(float fAlpha); // intro anim
110
111 struct MESSAGE
112 {
113 MESSAGE() {}
114
115 MESSAGE(const char* p, u32 dw) { strcpy(str, p); dwTimeStamp = dw; }
116
117 char str[255];
118 u32 dwTimeStamp;
119 };
120
121 static list<MESSAGE> listMsgs;
122
123 ///////////////////////
124 // Method Prototypes //
125 ///////////////////////
126
127 void KickPoint();
128 void KickLine();
129 void KickTriangle();
130 void KickTriangleFan();
131 void KickSprite();
132 void KickDummy();
133
134 void ResolveInRange(int start, int end);
135
136 void ExtWrite();
137
138 void ResetRenderTarget(int index)
139 {
140 FBTexture(index);
141 }
142
143 DrawFn drawfn[8] = { KickDummy, KickDummy, KickDummy, KickDummy,
144 KickDummy, KickDummy, KickDummy, KickDummy
145 };
146
147 }; // end namespace
148
149 // does one time only initializing/destruction
150
151 class ZeroGSInit
152 {
153
154 public:
155 ZeroGSInit()
156 {
157 const u32 mem_size = MEMORY_END + 0x10000; // leave some room for out of range accesses (saves on the checks)
158 // clear
159 g_pbyGSMemory = (u8*)_aligned_malloc(mem_size, 1024);
160 memset(g_pbyGSMemory, 0, mem_size);
161
162 g_pbyGSClut = (u8*)_aligned_malloc(256 * 8, 1024); // need 512 alignment!
163 memset(g_pbyGSClut, 0, 256*8);
164 memset(&GLWin, 0, sizeof(GLWin));
165 }
166
167 ~ZeroGSInit()
168 {
169 _aligned_free(g_pbyGSMemory);
170 g_pbyGSMemory = NULL;
171
172 _aligned_free(g_pbyGSClut);
173 g_pbyGSClut = NULL;
174 }
175 };
176
177 static ZeroGSInit s_ZeroGSInit;
178
179 #ifndef GL_FRAMEBUFFER_INCOMPLETE_DUPLICATE_ATTACHMENT_EXT
180 #define GL_FRAMEBUFFER_INCOMPLETE_DUPLICATE_ATTACHMENT_EXT 0x8CD8
181 #endif
182
183 void ZeroGS::HandleGLError()
184 {
185 FUNCLOG
186 // check the error status of this framebuffer */
187 GLenum error = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
188
189 // if error != GL_FRAMEBUFFER_COMPLETE_EXT, there's an error of some sort
190
191 if (error != 0)
192 {
193 int w = 0;
194 int h = 0;
195 GLint fmt;
196 glGetRenderbufferParameterivEXT(GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_INTERNAL_FORMAT_EXT, &fmt);
197 glGetRenderbufferParameterivEXT(GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_WIDTH_EXT, &w);
198 glGetRenderbufferParameterivEXT(GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_HEIGHT_EXT, &h);
199
200 switch (error)
201 {
202 case GL_FRAMEBUFFER_COMPLETE_EXT:
203 break;
204
205 case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_EXT:
206 ZZLog::Error_Log("Error! missing a required image/buffer attachment!");
207 break;
208
209 case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT:
210 ZZLog::Error_Log("Error! has no images/buffers attached!");
211 break;
212
213 // case GL_FRAMEBUFFER_INCOMPLETE_DUPLICATE_ATTACHMENT_EXT:
214 // ZZLog::Error_Log("Error! has an image/buffer attached in multiple locations!");
215 // break;
216
217 case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS_EXT:
218 ZZLog::Error_Log("Error! has mismatched image/buffer dimensions!");
219 break;
220
221 case GL_FRAMEBUFFER_INCOMPLETE_FORMATS_EXT:
222 ZZLog::Error_Log("Error! colorbuffer attachments have different types!");
223 break;
224
225 case GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_EXT:
226 ZZLog::Error_Log("Error! trying to draw to non-attached color buffer!");
227 break;
228
229 case GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER_EXT:
230 ZZLog::Error_Log("Error! trying to read from a non-attached color buffer!");
231 break;
232
233 case GL_FRAMEBUFFER_UNSUPPORTED_EXT:
234 ZZLog::Error_Log("Error! format is not supported by current graphics card/driver!");
235 break;
236
237 default:
238 ZZLog::Error_Log("*UNKNOWN ERROR* reported from glCheckFramebufferStatusEXT(0x%x)!", error);
239 break;
240 }
241 }
242 }
243
244 void ZeroGS::GSStateReset()
245 {
246 FUNCLOG
247 icurctx = -1;
248
249 for (int i = 0; i < 2; ++i)
250 {
251 vb[i].Destroy();
252 memset(&vb[i], 0, sizeof(ZeroGS::VB));
253
254 vb[i].tex0.tw = 1;
255 vb[i].tex0.th = 1;
256 vb[i].scissor.x1 = 639;
257 vb[i].scissor.y1 = 479;
258 vb[i].tex0.tbw = 64;
259 vb[i].Init(VB_BUFFERSIZE);
260 }
261
262 s_RangeMngr.Clear();
263
264 g_MemTargs.Destroy();
265 s_RTs.Destroy();
266 s_DepthRTs.Destroy();
267 s_BitwiseTextures.Destroy();
268
269 vb[0].ictx = 0;
270 vb[1].ictx = 1;
271 }
272
273 void ZeroGS::Reset()
274 {
275 FUNCLOG
276 s_RTs.ResolveAll();
277 s_DepthRTs.ResolveAll();
278
279 vb[0].nCount = 0;
280 vb[1].nCount = 0;
281
282 memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts));
283 s_nLastResolveReset = 0;
284
285 icurctx = -1;
286 g_vsprog = g_psprog = 0;
287
288 GSStateReset();
289 Destroy(0);
290
291 drawfn[0] = KickDummy;
292 drawfn[1] = KickDummy;
293 drawfn[2] = KickDummy;
294 drawfn[3] = KickDummy;
295 drawfn[4] = KickDummy;
296 drawfn[5] = KickDummy;
297 drawfn[6] = KickDummy;
298 drawfn[7] = KickDummy;
299 }
300
301 void ZeroGS::GSReset()
302 {
303 FUNCLOG
304
305 memset(&gs, 0, sizeof(gs));
306
307 ZeroGS::GSStateReset();
308
309 gs.prac = 1;
310 prim = &gs._prim[0];
311 gs.nTriFanVert = -1;
312 gs.imageTransfer = -1;
313 gs.q = 1;
314 }
315
316 void ZeroGS::GSSoftReset(u32 mask)
317 {
318 FUNCLOG
319
320 if (mask & 1) memset(&gs.path[0], 0, sizeof(gs.path[0]));
321 if (mask & 2) memset(&gs.path[1], 0, sizeof(gs.path[1]));
322 if (mask & 4) memset(&gs.path[2], 0, sizeof(gs.path[2]));
323
324 gs.imageTransfer = -1;
325 gs.q = 1;
326 gs.nTriFanVert = -1;
327 }
328
329 void ZeroGS::AddMessage(const char* pstr, u32 ms)
330 {
331 FUNCLOG
332 listMsgs.push_back(MESSAGE(pstr, timeGetTime() + ms));
333 ZZLog::Log("%s\n", pstr);
334 }
335
336 extern RasterFont* font_p;
337 void ZeroGS::DrawText(const char* pstr, int left, int top, u32 color)
338 {
339 FUNCLOG
340 ZZshGLDisableProfile();
341
342 float4 v;
343 v.SetColor(color);
344 glColor3f(v.z, v.y, v.x);
345 //glColor3f(((color >> 16) & 0xff) / 255.0f, ((color >> 8) & 0xff)/ 255.0f, (color & 0xff) / 255.0f);
346
347 font_p->printString(pstr, left * 2.0f / (float)nBackbufferWidth - 1, 1 - top * 2.0f / (float)nBackbufferHeight, 0);
348 ZZshGLEnableProfile();
349 }
350
351 void ZeroGS::ChangeWindowSize(int nNewWidth, int nNewHeight)
352 {
353 FUNCLOG
354 nBackbufferWidth = max(nNewWidth, 16);
355 nBackbufferHeight = max(nNewHeight, 16);
356
357 if (!(conf.fullscreen()))
358 {
359 conf.width = nNewWidth;
360 conf.height = nNewHeight;
361 }
362 }
363
364 void ZeroGS::SetChangeDeviceSize(int nNewWidth, int nNewHeight)
365 {
366 FUNCLOG
367 s_nNewWidth = nNewWidth;
368 s_nNewHeight = nNewHeight;
369
370 if (!(conf.fullscreen()))
371 {
372 conf.width = nNewWidth;
373 conf.height = nNewHeight;
374 }
375 }
376
377 void ZeroGS::ChangeDeviceSize(int nNewWidth, int nNewHeight)
378 {
379 FUNCLOG
380 //int oldscreen = s_nFullscreen;
381
382 int oldwidth = nBackbufferWidth, oldheight = nBackbufferHeight;
383
384 if (!Create(nNewWidth&~7, nNewHeight&~7))
385 {
386 ZZLog::Error_Log("Failed to recreate, changing to old device.");
387
388 if (Create(oldwidth, oldheight))
389 {
390 SysMessage("Failed to create device, exiting...");
391 exit(0);
392 }
393 }
394
395 for (int i = 0; i < 2; ++i)
396 {
397 vb[i].bNeedFrameCheck = vb[i].bNeedZCheck = 1;
398 vb[i].CheckFrame(0);
399 }
400
401 assert(vb[0].pBufferData != NULL && vb[1].pBufferData != NULL);
402 }
403
404 void ZeroGS::SetAA(int mode)
405 {
406 FUNCLOG
407 float f = 1.0f;
408
409 // need to flush all targets
410 s_RTs.ResolveAll();
411 s_RTs.Destroy();
412 s_DepthRTs.ResolveAll();
413 s_DepthRTs.Destroy();
414
415 AA.x = AA.y = 0; // This is code for x0, x2, x4, x8 and x16 anti-aliasing.
416
417 if (mode > 0)
418 {
419 // ( 1, 0 ) ; ( 1, 1 ) ; ( 2, 1 ) ; ( 2, 2 )
420 // it's used as a binary shift, so x >> AA.x, y >> AA.y
421 AA.x = (mode + 1) / 2;
422 AA.y = mode / 2;
423 f = 2.0f;
424 }
425
426 memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts));
427 s_nLastResolveReset = 0;
428
429 vb[0].prndr = NULL;
430 vb[0].pdepth = NULL;
431 vb[1].prndr = NULL;
432 vb[1].pdepth = NULL;
433
434 vb[0].bNeedFrameCheck = vb[0].bNeedZCheck = 1;
435 vb[1].bNeedFrameCheck = vb[1].bNeedZCheck = 1;
436
437 glPointSize(f);
438 }
439
440 void ZeroGS::Prim()
441 {
442 FUNCLOG
443
444 VB& curvb = vb[prim->ctxt];
445
446 if (curvb.CheckPrim()) Flush(prim->ctxt);
447
448 curvb.curprim._val = prim->_val;
449 curvb.curprim.prim = prim->prim;
450 }
451
452 void ZeroGS::ProcessMessages()
453 {
454 FUNCLOG
455
456 if (listMsgs.size() > 0)
457 {
458 int left = 25, top = 15;
459 list<MESSAGE>::iterator it = listMsgs.begin();
460
461 while (it != listMsgs.end())
462 {
463 DrawText(it->str, left + 1, top + 1, 0xff000000);
464 DrawText(it->str, left, top, 0xffffff30);
465 top += 15;
466
467 if ((int)(it->dwTimeStamp - timeGetTime()) < 0)
468 it = listMsgs.erase(it);
469 else ++it;
470 }
471 }
472 }
473
474 void ZeroGS::RenderCustom(float fAlpha)
475 {
476 FUNCLOG
477 GL_REPORT_ERROR();
478
479 fAlpha = 1;
480 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); // switch to the backbuffer
481
482 DisableAllgl() ;
483 SetShaderCaller("RenderCustom");
484
485 glViewport(0, 0, nBackbufferWidth, nBackbufferHeight);
486
487 // play custom animation
488 glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
489
490 // tex coords
491 float4 v = float4(1 / 32767.0f, 1 / 32767.0f, 0, 0);
492 ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltPos, v, "g_fBitBltPos");
493 v.x = (float)nLogoWidth;
494 v.y = (float)nLogoHeight;
495 ZZshSetParameter4fv(pvsBitBlt.prog, pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
496
497 v.x = v.y = v.z = v.w = fAlpha;
498 ZZshSetParameter4fv(ppsBaseTexture.prog, ppsBaseTexture.sOneColor, v, "g_fOneColor");
499
500 if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
501
502 // inside vhDCb[0]'s target area, so render that region only
503 ZZshGLSetTextureParameter(ppsBaseTexture.prog, ppsBaseTexture.sFinal, ptexLogo, "Logo");
504 glBindBuffer(GL_ARRAY_BUFFER, vboRect);
505
506 SET_STREAM();
507
508 ZZshSetVertexShader(pvsBitBlt.prog);
509 ZZshSetPixelShader(ppsBaseTexture.prog);
510 DrawTriangleArray();
511
512 // restore
513 if (conf.wireframe()) glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
514
515 ProcessMessages();
516
517 GLWin.SwapGLBuffers();
518
519 glEnable(GL_SCISSOR_TEST);
520 glEnable(GL_STENCIL_TEST);
521
522 vb[0].bSyncVars = 0;
523 vb[1].bSyncVars = 0;
524
525 GL_REPORT_ERROR();
526 }
527
528 //////////////////////////
529 // Internal Definitions //
530 //////////////////////////
531
532
533 __forceinline void MOVZ(VertexGPU *p, u32 gsz, const VB& curvb)
534 {
535 p->z = (curvb.zprimmask == 0xffff) ? min((u32)0xffff, gsz) : gsz;
536 }
537
538 __forceinline void MOVFOG(VertexGPU *p, Vertex gsf)
539 {
540 p->f = ((s16)(gsf).f << 7) | 0x7f;
541 }
542
543
544 int Values[100] = {0, };
545
546 inline void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb)
547 {
548 int index = Index;
549 p->x = ((((int)gs.gsvertex[index].x - curvb.offset.x) >> 1) & 0xffff);
550 p->y = ((((int)gs.gsvertex[index].y - curvb.offset.y) >> 1) & 0xffff);
551 p->f = ((s16)gs.gsvertex[index].f << 7) | 0x7f;
552
553 MOVZ(p, gs.gsvertex[index].z, curvb);
554
555 p->rgba = prim->iip ? gs.gsvertex[index].rgba : gs.rgba;
556
557 // This code is somehow incorrect
558 // if ((gs.texa.aem) && ((p->rgba & 0xffffff ) == 0))
559 // p->rgba = 0;
560
561 if (conf.settings().texa)
562 {
563 u32 B = ((p->rgba & 0xfe000000) >> 1) +
564 (0x01000000 * curvb.fba.fba) ;
565 p->rgba = (p->rgba & 0xffffff) + B;
566 }
567
568 if (prim->tme)
569 {
570 if (prim->fst)
571 {
572 p->s = (float)gs.gsvertex[index].u * fiTexWidth[prim->ctxt];
573 p->t = (float)gs.gsvertex[index].v * fiTexHeight[prim->ctxt];
574 p->q = 1;
575 }
576 else
577 {
578 p->s = gs.gsvertex[index].s;
579 p->t = gs.gsvertex[index].t;
580 p->q = gs.gsvertex[index].q;
581 }
582 }
583 }
584
585 static __forceinline void OUTPUT_VERT(VertexGPU vert, u32 id)
586 {
587 #ifdef WRITE_PRIM_LOGS
588 ZZLog::Prim_Log("%c%d(%d): xyzf=(%4d,%4d,0x%x,%3d), rgba=0x%8.8x, stq = (%2.5f,%2.5f,%2.5f)\n",
589 id == 0 ? '*' : ' ', id, prim->prim, vert.x / 8, vert.y / 8, vert.z, vert.f / 128,
590 vert.rgba, Clamp(vert.s, -10, 10), Clamp(vert.t, -10, 10), Clamp(vert.q, -10, 10));
591 #endif
592 }
593
594 void ZeroGS::KickPoint()
595 {
596 FUNCLOG
597 assert(gs.primC >= 1);
598
599 VB& curvb = vb[prim->ctxt];
600
601 curvb.FlushTexData();
602
603 if ((vb[!prim->ctxt].nCount > 0) && (vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp))
604 {
605 assert(vb[prim->ctxt].nCount == 0);
606 Flush(!prim->ctxt);
607 }
608
609 curvb.NotifyWrite(1);
610
611 int last = gs.primNext(2);
612
613 VertexGPU* p = curvb.pBufferData + curvb.nCount;
614 SET_VERTEX(&p[0], last, curvb);
615 curvb.nCount++;
616
617 OUTPUT_VERT(p[0], 0);
618 }
619
620 void ZeroGS::KickLine()
621 {
622 FUNCLOG
623 assert(gs.primC >= 2);
624 VB& curvb = vb[prim->ctxt];
625
626 curvb.FlushTexData();
627
628 if ((vb[!prim->ctxt].nCount > 0) && (vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp))
629 {
630 assert(vb[prim->ctxt].nCount == 0);
631 Flush(!prim->ctxt);
632 }
633
634 curvb.NotifyWrite(2);
635
636 int next = gs.primNext();
637 int last = gs.primNext(2);
638
639 VertexGPU* p = curvb.pBufferData + curvb.nCount;
640 SET_VERTEX(&p[0], next, curvb);
641 SET_VERTEX(&p[1], last, curvb);
642
643 curvb.nCount += 2;
644
645 OUTPUT_VERT(p[0], 0);
646 OUTPUT_VERT(p[1], 1);
647 }
648
649 void ZeroGS::KickTriangle()
650 {
651 FUNCLOG
652 assert(gs.primC >= 3);
653 VB& curvb = vb[prim->ctxt];
654
655 curvb.FlushTexData();
656
657 if ((vb[!prim->ctxt].nCount > 0) && (vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp))
658 {
659 assert(vb[prim->ctxt].nCount == 0);
660 Flush(!prim->ctxt);
661 }
662
663 curvb.NotifyWrite(3);
664
665 VertexGPU* p = curvb.pBufferData + curvb.nCount;
666 SET_VERTEX(&p[0], 0, curvb);
667 SET_VERTEX(&p[1], 1, curvb);
668 SET_VERTEX(&p[2], 2, curvb);
669
670 curvb.nCount += 3;
671
672 OUTPUT_VERT(p[0], 0);
673 OUTPUT_VERT(p[1], 1);
674 OUTPUT_VERT(p[2], 2);
675 }
676
677 void ZeroGS::KickTriangleFan()
678 {
679 FUNCLOG
680 assert(gs.primC >= 3);
681 VB& curvb = vb[prim->ctxt];
682
683 curvb.FlushTexData();
684
685 if ((vb[!prim->ctxt].nCount > 0) && (vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp))
686 {
687 assert(vb[prim->ctxt].nCount == 0);
688 Flush(!prim->ctxt);
689 }
690
691 curvb.NotifyWrite(3);
692
693 VertexGPU* p = curvb.pBufferData + curvb.nCount;
694 SET_VERTEX(&p[0], 0, curvb);
695 SET_VERTEX(&p[1], 1, curvb);
696 SET_VERTEX(&p[2], 2, curvb);
697
698 curvb.nCount += 3;
699
700 // add 1 to skip the first vertex
701
702 if (gs.primIndex == gs.nTriFanVert) gs.primIndex = gs.primNext();
703
704 OUTPUT_VERT(p[0], 0);
705 OUTPUT_VERT(p[1], 1);
706 OUTPUT_VERT(p[2], 2);
707 }
708
709 void SetKickVertex(VertexGPU *p, Vertex v, int next, const VB& curvb)
710 {
711 SET_VERTEX(p, next, curvb);
712 MOVZ(p, v.z, curvb);
713 MOVFOG(p, v);
714 }
715
716 void ZeroGS::KickSprite()
717 {
718 FUNCLOG
719 assert(gs.primC >= 2);
720 VB& curvb = vb[prim->ctxt];
721
722 curvb.FlushTexData();
723
724 if ((vb[!prim->ctxt].nCount > 0) && (vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp))
725 {
726 assert(vb[prim->ctxt].nCount == 0);
727 Flush(!prim->ctxt);
728 }
729
730 curvb.NotifyWrite(6);
731 int next = gs.primNext();
732 int last = gs.primNext(2);
733
734 // sprite is too small and AA shows lines (tek4, Mana Khemia)
735 gs.gsvertex[last].x += (4 * AA.x);
736 gs.gsvertex[last].y += (4 * AA.y);
737
738 // might be bad sprite (KH dialog text)
739 //if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y )
740 //return;
741
742 VertexGPU* p = curvb.pBufferData + curvb.nCount;
743
744 SetKickVertex(&p[0], gs.gsvertex[last], next, curvb);
745 SetKickVertex(&p[3], gs.gsvertex[last], next, curvb);
746 SetKickVertex(&p[1], gs.gsvertex[last], last, curvb);
747 SetKickVertex(&p[4], gs.gsvertex[last], last, curvb);
748 SetKickVertex(&p[2], gs.gsvertex[last], next, curvb);
749
750 p[2].s = p[1].s;
751 p[2].x = p[1].x;
752
753 SetKickVertex(&p[5], gs.gsvertex[last], last, curvb);
754
755 p[5].s = p[0].s;
756 p[5].x = p[0].x;
757
758 curvb.nCount += 6;
759
760 OUTPUT_VERT(p[0], 0);
761 OUTPUT_VERT(p[1], 1);
762 }
763
764 void ZeroGS::KickDummy()
765 {
766 FUNCLOG
767 //ZZLog::Greg_Log("Kicking bad primitive: %.8x\n", *(u32*)prim);
768 }
769
770 void ZeroGS::SetFogColor(u32 fog)
771 {
772 FUNCLOG
773
774 // Always set the fog color, even if it was already set.
775 // if (gs.fogcol != fog)
776 // {
777 gs.fogcol = fog;
778
779 ZeroGS::FlushBoth();
780
781 SetShaderCaller("SetFogColor");
782 float4 v;
783
784 // set it immediately
785 v.SetColor(gs.fogcol);
786 ZZshSetParameter4fv(g_fparamFogColor, v, "g_fParamFogColor");
787
788 // }
789 }
790
791 void ZeroGS::SetFogColor(GIFRegFOGCOL* fog)
792 {
793 FUNCLOG
794
795 SetShaderCaller("SetFogColor");
796 float4 v;
797
798 v.x = fog->FCR / 255.0f;
799 v.y = fog->FCG / 255.0f;
800 v.z = fog->FCB / 255.0f;
801 ZZshSetParameter4fv(g_fparamFogColor, v, "g_fParamFogColor");
802 }
803
804 void ZeroGS::ExtWrite()
805 {
806 FUNCLOG
807 ZZLog::Warn_Log("A hollow voice says 'EXTWRITE'! Nothing happens.");
808
809 // use local DISPFB, EXTDATA, EXTBUF, and PMODE
810 // int bpp, start, end;
811 // tex0Info texframe;
812
813 // bpp = 4;
814 // if( texframe.psm == PSMT16S ) bpp = 3;
815 // else if (PSMT_ISHALF(texframe.psm)) bpp = 2;
816 //
817 // // get the start and end addresses of the buffer
818 // GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
819 }
820
821 ////////////
822 // Caches //
823 ////////////
824
825
826 // case 0: return false;
827 // case 1: break;
828 // case 2: m_CBP[0] = TEX0.CBP; break;
829 // case 3: m_CBP[1] = TEX0.CBP; break;
830 // case 4: if(m_CBP[0] == TEX0.CBP) return false; m_CBP[0] = TEX0.CBP; break;
831 // case 5: if(m_CBP[1] == TEX0.CBP) return false; m_CBP[1] = TEX0.CBP; break;
832 // case 6: ASSERT(0); return false; // ffx2 menu
833 // case 7: ASSERT(0); return false;
834 // default: __assume(0);
835
836 bool IsDirty(u32 highdword, u32 psm, int cld, int cbp)
837 {
838 int cpsm = ZZOglGet_cpsm_TexBits(highdword);
839 int csm = ZZOglGet_csm_TexBits(highdword);
840
841 if (cpsm > 1 || csm)
842 {
843 // Mana Khemia triggers this.
844 //ZZLog::Error_Log("16 bit clut not supported.");
845 return true;
846 }
847
848 int csa = ZZOglGet_csa_TexBits(highdword);
849
850 int entries = PSMT_IS8CLUT(psm) ? 256 : 16;
851
852 u64* src = (u64*)(g_pbyGSMemory + cbp * 256);
853 u64* dst = (u64*)(g_pbyGSClut + 64 * csa);
854
855 bool bRet = false;
856
857 // FIXME code generated by intrinsics is the same as the linux asm.
858 // However there is no "cmp %%esi, 0x90" equivalent in the windows asm !!!
859 // So control flow must be check
860 #define TEST_THIS
861 #ifdef TEST_THIS
862 while(entries != 0) {
863 #ifdef ZEROGS_SSE2
864 // Note: local memory datas are swizzles
865 __m128i src_0 = _mm_load_si128((__m128i*)src); // 9 8 1 0
866 __m128i src_1 = _mm_load_si128((__m128i*)src+1); // 11 10 3 2
867 __m128i src_2 = _mm_load_si128((__m128i*)src+2); // 13 12 5 4
868 __m128i src_3 = _mm_load_si128((__m128i*)src+3); // 15 14 7 6
869
870 __m128i dst_0 = _mm_load_si128((__m128i*)dst);
871 __m128i dst_1 = _mm_load_si128((__m128i*)dst+1);
872 __m128i dst_2 = _mm_load_si128((__m128i*)dst+2);
873 __m128i dst_3 = _mm_load_si128((__m128i*)dst+3);
874
875 __m128i result = _mm_cmpeq_epi32(_mm_unpacklo_epi64(src_0, src_1), dst_0);
876
877 __m128i result_tmp = _mm_cmpeq_epi32(_mm_unpacklo_epi64(src_2, src_3), dst_1);
878 result = _mm_and_si128(result, result_tmp);
879
880 result_tmp = _mm_cmpeq_epi32(_mm_unpackhi_epi64(src_0, src_1), dst_2);
881 result = _mm_and_si128(result, result_tmp);
882
883 result_tmp = _mm_cmpeq_epi32(_mm_unpackhi_epi64(src_2, src_3), dst_3);
884 result = _mm_and_si128(result, result_tmp);
885
886 u32 result_int = _mm_movemask_epi8(result);
887 if (result_int != 0xFFFF) {
888 bRet = true;
889 break;
890 }
891 #else
892 // I see no point to keep an mmx version. SSE2 versions is probably faster.
893 // Keep a slow portable C version for reference/debug
894 // Note: local memory datas are swizzles
895 if (dst[0] != src[0] || dst[1] != src[2] || dst[2] != src[4] || dst[3] != src[6]
896 || dst[4] != src[1] || dst[5] != src[3] || dst[6] != src[5] || dst[7] != src[7]) {
897 bRet = true;
898 break;
899 }
900 #endif
901
902 if (entries & 0x10) {
903 src -= 56; // go back and down one column
904 }
905
906 src += 32; // go to the right block
907
908 if (entries == 0x90) {
909 src += 32; // skip whole block
910 }
911
912 dst += 8;
913 entries -= 16;
914 }
915 #else
916
917 // do a fast test with MMX
918 #ifdef _MSC_VER
919 int storeebx;
920 __asm
921 {
922 mov storeebx, ebx
923 mov edx, dst
924 mov ecx, src
925 mov ebx, entries
926
927 Start:
928 movq mm0, [edx]
929 movq mm1, [edx+8]
930 pcmpeqd mm0, [ecx]
931 pcmpeqd mm1, [ecx+16]
932
933 movq mm2, [edx+16]
934 movq mm3, [edx+24]
935 pcmpeqd mm2, [ecx+32]
936 pcmpeqd mm3, [ecx+48]
937
938 pand mm0, mm1
939 pand mm2, mm3
940 movq mm4, [edx+32]
941 movq mm5, [edx+40]
942 pcmpeqd mm4, [ecx+8]
943 pcmpeqd mm5, [ecx+24]
944
945 pand mm0, mm2
946 pand mm4, mm5
947 movq mm6, [edx+48]
948 movq mm7, [edx+56]
949 pcmpeqd mm6, [ecx+40]
950 pcmpeqd mm7, [ecx+56]
951
952 pand mm0, mm4
953 pand mm6, mm7
954 pand mm0, mm6
955
956 pmovmskb eax, mm0
957 cmp eax, 0xff
958 je Continue
959 mov bRet, 1
960 jmp Return
961
962 Continue:
963 cmp ebx, 16
964 jle Return
965
966 test ebx, 0x10
967 jz AddEcx
968 sub ecx, 448 // go back and down one column,
969
970 AddEcx:
971 add ecx, 256 // go to the right block
972
973
974 jne Continue1
975 add ecx, 256 // skip whole block
976
977 Continue1:
978 add edx, 64
979 sub ebx, 16
980 jmp Start
981
982 Return:
983 emms
984 mov ebx, storeebx
985 }
986
987 #else // linux
988 // do a fast test with MMX
989 __asm__(
990 ".intel_syntax\n"
991 "Start:\n"
992 "movq %%mm0, [%%ecx]\n"
993 "movq %%mm1, [%%ecx+8]\n"
994 "pcmpeqd %%mm0, [%%edx]\n"
995 "pcmpeqd %%mm1, [%%edx+16]\n"
996 "movq %%mm2, [%%ecx+16]\n"
997 "movq %%mm3, [%%ecx+24]\n"
998 "pcmpeqd %%mm2, [%%edx+32]\n"
999 "pcmpeqd %%mm3, [%%edx+48]\n"
1000 "pand %%mm0, %%mm1\n"
1001 "pand %%mm2, %%mm3\n"
1002 "movq %%mm4, [%%ecx+32]\n"
1003 "movq %%mm5, [%%ecx+40]\n"
1004 "pcmpeqd %%mm4, [%%edx+8]\n"
1005 "pcmpeqd %%mm5, [%%edx+24]\n"
1006 "pand %%mm0, %%mm2\n"
1007 "pand %%mm4, %%mm5\n"
1008 "movq %%mm6, [%%ecx+48]\n"
1009 "movq %%mm7, [%%ecx+56]\n"
1010 "pcmpeqd %%mm6, [%%edx+40]\n"
1011 "pcmpeqd %%mm7, [%%edx+56]\n"
1012 "pand %%mm0, %%mm4\n"
1013 "pand %%mm6, %%mm7\n"
1014 "pand %%mm0, %%mm6\n"
1015 "pmovmskb %%eax, %%mm0\n"
1016 "cmp %%eax, 0xff\n"
1017 "je Continue\n"
1018 ".att_syntax\n"
1019 "movb $1, %0\n"
1020 ".intel_syntax\n"
1021 "jmp Return\n"
1022 "Continue:\n"
1023 "cmp %%esi, 16\n"
1024 "jle Return\n"
1025 "test %%esi, 0x10\n"
1026 "jz AddEcx\n"
1027 "sub %%edx, 448\n" // go back and down one column
1028 "AddEcx:\n"
1029 "add %%edx, 256\n" // go to the right block
1030 "cmp %%esi, 0x90\n"
1031 "jne Continue1\n"
1032 "add %%edx, 256\n" // skip whole block
1033 "Continue1:\n"
1034 "add %%ecx, 64\n"
1035 "sub %%esi, 16\n"
1036 "jmp Start\n"
1037 "Return:\n"
1038 "emms\n"
1039
1040 ".att_syntax\n" : "=m"(bRet) : "c"(dst), "d"(src), "S"(entries) : "eax", "memory");
1041
1042 #endif // _WIN32
1043 #endif
1044 return bRet;
1045 }
1046
1047 // cld state:
1048 // 000 - clut data is not loaded; data in the temp buffer is stored
1049 // 001 - clut data is always loaded.
1050 // 010 - clut data is always loaded; cbp0 = cbp.
1051 // 011 - clut data is always loadedl cbp1 = cbp.
1052 // 100 - cbp0 is compared with cbp. if different, clut data is loaded.
1053 // 101 - cbp1 is compared with cbp. if different, clut data is loaded.
1054
1055 // GSdx sets cbp0 & cbp1 when checking for clut changes. ZeroGS sets them in texClutWrite.
1056 bool ZeroGS::CheckChangeInClut(u32 highdword, u32 psm)
1057 {
1058 FUNCLOG
1059 int cld = ZZOglGet_cld_TexBits(highdword);
1060 int cbp = ZZOglGet_cbp_TexBits(highdword);
1061
1062 // processing the CLUT after tex0/2 are written
1063 //ZZLog::Error_Log("high == 0x%x; cld == %d", highdword, cld);
1064
1065 switch (cld)
1066 {
1067 case 0:
1068 return false;
1069
1070 case 1:
1071 break;
1072
1073 case 2:
1074 break;
1075
1076 case 3:
1077 break;
1078
1079 case 4:
1080 if (gs.cbp[0] == cbp) return false;
1081 break;
1082
1083 case 5:
1084 if (gs.cbp[1] == cbp) return false;
1085 break;
1086
1087 //case 4: return gs.cbp[0] != cbp;
1088 //case 5: return gs.cbp[1] != cbp;
1089
1090 // default: load
1091
1092 default:
1093 break;
1094 }
1095
1096 return IsDirty(highdword, psm, cld, cbp);
1097 }
1098
1099 void ZeroGS::texClutWrite(int ctx)
1100 {
1101 FUNCLOG
1102 s_bTexFlush = false;
1103
1104 tex0Info& tex0 = vb[ctx].tex0;
1105
1106 assert(PSMT_ISCLUT(tex0.psm));
1107
1108 // processing the CLUT after tex0/2 are written
1109 switch (tex0.cld)
1110 {
1111 case 0:
1112 return;
1113
1114 case 1:
1115 break; // tex0.cld is usually 1.
1116
1117 case 2:
1118 gs.cbp[0] = tex0.cbp;
1119 break;
1120
1121 case 3:
1122 gs.cbp[1] = tex0.cbp;
1123 break;
1124
1125 case 4:
1126 if (gs.cbp[0] == tex0.cbp) return;
1127 gs.cbp[0] = tex0.cbp;
1128 break;
1129
1130 case 5:
1131 if (gs.cbp[1] == tex0.cbp) return;
1132 gs.cbp[1] = tex0.cbp;
1133 break;
1134
1135 default: //ZZLog::Debug_Log("cld isn't 0-5!");
1136 break;
1137 }
1138
1139 Flush(!ctx);
1140
1141 int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
1142
1143 if (tex0.csm)
1144 {
1145 switch (tex0.cpsm)
1146 {
1147 // 16bit psm
1148 // eggomania uses non16bit textures for csm2
1149
1150 case PSMCT16:
1151 {
1152 u16* src = (u16*)g_pbyGSMemory + tex0.cbp * 128;
1153 u16 *dst = (u16*)(g_pbyGSClut + 32 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0));
1154
1155 for (int i = 0; i < entries; ++i)
1156 {
1157 *dst = src[getPixelAddress16_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
1158 dst += 2;
1159
1160 // check for wrapping
1161
1162 if (((u32)(uptr)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut + 2);
1163 }
1164 break;
1165 }
1166
1167 case PSMCT16S:
1168 {
1169 u16* src = (u16*)g_pbyGSMemory + tex0.cbp * 128;
1170 u16 *dst = (u16*)(g_pbyGSClut + 32 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0));
1171
1172 for (int i = 0; i < entries; ++i)
1173 {
1174 *dst = src[getPixelAddress16S_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
1175 dst += 2;
1176
1177 // check for wrapping
1178
1179 if (((u32)(uptr)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut + 2);
1180 }
1181 break;
1182 }
1183
1184 case PSMCT32:
1185 case PSMCT24:
1186 {
1187 u32* src = (u32*)g_pbyGSMemory + tex0.cbp * 64;
1188 u32 *dst = (u32*)(g_pbyGSClut + 64 * tex0.csa);
1189
1190 // check if address exceeds src
1191
1192 if (src + getPixelAddress32_0(gs.clut.cou + entries - 1, gs.clut.cov, gs.clut.cbw) >= (u32*)g_pbyGSMemory + 0x00100000)
1193 ZZLog::Error_Log("texClutWrite out of bounds.");
1194 else
1195 for (int i = 0; i < entries; ++i)
1196 {
1197 *dst = src[getPixelAddress32_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
1198 dst++;
1199 }
1200 break;
1201 }
1202
1203 default:
1204 {
1205 //ZZLog::Debug_Log("Unknown cpsm: %x (%x).", tex0.cpsm, tex0.psm);
1206 break;
1207 }
1208 }
1209 }
1210 else
1211 {
1212 u32* src = (u32*)(g_pbyGSMemory + 256 * tex0.cbp);
1213
1214 if (entries == 16)
1215 {
1216 switch (tex0.cpsm)
1217 {
1218 case PSMCT24:
1219 case PSMCT32:
1220 WriteCLUT_T32_I4_CSM1(src, (u32*)(g_pbyGSClut + 64 * tex0.csa));
1221 break;
1222
1223 default:
1224 WriteCLUT_T16_I4_CSM1(src, (u32*)(g_pbyGSClut + 32*(tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0)));
1225 break;
1226 }
1227 }
1228 else
1229 {
1230 switch (tex0.cpsm)
1231 {
1232 case PSMCT24:
1233 case PSMCT32:
1234 WriteCLUT_T32_I8_CSM1(src, (u32*)(g_pbyGSClut + 64 * tex0.csa));
1235 break;
1236
1237 default:
1238 // sse2 for 256 is more complicated, so use regular
1239 WriteCLUT_T16_I8_CSM1_c(src, (u32*)(g_pbyGSClut + 32*(tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0)));
1240 break;
1241 }
1242
1243 }
1244 }
1245 }
1246
1247

  ViewVC Help
Powered by ViewVC 1.1.22