/[pcsx2_0.9.7]/trunk/plugins/zzogl-pg/opengl/zerogs.cpp
ViewVC logotype

Contents of /trunk/plugins/zzogl-pg/opengl/zerogs.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 31 - (show annotations) (download)
Tue Sep 7 03:24:11 2010 UTC (9 years, 10 months ago) by william
File size: 29031 byte(s)
committing r3113 initial commit again...
1 /* ZeroGS KOSMOS
2 * Copyright (C) 2005-2006 zerofrog@gmail.com
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18
19 //-------------------------- Includes
20 #if defined(_WIN32)
21 # include <windows.h>
22 //# include <aviUtil.h>
23 # include "resource.h"
24 #endif
25
26 #include <stdio.h>
27
28 #include <malloc.h>
29 #include <assert.h>
30 #include <stdlib.h>
31 #include <string.h>
32
33 #include "GS.h"
34 #include "Mem.h"
35 #include "x86.h"
36 #include "zerogs.h"
37 #include "zpipe.h"
38
39 #include "ZeroGSShaders/zerogsshaders.h"
40 #include "targets.h"
41
42 //----------------------- Defines
43
44 //-------------------------- Typedefs
45 typedef void (APIENTRYP _PFNSWAPINTERVAL)(int);
46
47 //-------------------------- Extern variables
48
49 using namespace ZeroGS;
50
51 extern u32 g_nGenVars, g_nTexVars, g_nAlphaVars, g_nResolve;
52 extern char *libraryName;
53 extern int g_nFrame, g_nRealFrame;
54
55 //-------------------------- Variables
56
57 #ifdef _WIN32
58 HDC hDC = NULL; // Private GDI Device Context
59 HGLRC hRC = NULL; // Permanent Rendering Context
60 #endif
61
62 bool g_bIsLost = 0; // ZZ
63
64 bool g_bMakeSnapshot = 0;
65 string strSnapshot;
66
67 CGprogram g_vsprog = 0, g_psprog = 0; // 2 -- ZZ
68 // AVI Capture
69 int s_avicapturing = 0;
70
71 inline u32 FtoDW(float f) { return (*((u32*)&f)); }
72
73 int g_nDepthUpdateCount = 0;
74
75 // Consts
76 const GLenum primtype[8] = { GL_POINTS, GL_LINES, GL_LINES, GL_TRIANGLES, GL_TRIANGLES, GL_TRIANGLES, GL_TRIANGLES, 0xffffffff };
77 static const int PRIMMASK = 0x0e; // for now ignore 0x10 (AA)
78
79 PFNGLISRENDERBUFFEREXTPROC glIsRenderbufferEXT = NULL;
80 PFNGLBINDRENDERBUFFEREXTPROC glBindRenderbufferEXT = NULL;
81 PFNGLDELETERENDERBUFFERSEXTPROC glDeleteRenderbuffersEXT = NULL;
82 PFNGLGENRENDERBUFFERSEXTPROC glGenRenderbuffersEXT = NULL;
83 PFNGLRENDERBUFFERSTORAGEEXTPROC glRenderbufferStorageEXT = NULL;
84 PFNGLGETRENDERBUFFERPARAMETERIVEXTPROC glGetRenderbufferParameterivEXT = NULL;
85 PFNGLISFRAMEBUFFEREXTPROC glIsFramebufferEXT = NULL;
86 PFNGLBINDFRAMEBUFFEREXTPROC glBindFramebufferEXT = NULL;
87 PFNGLDELETEFRAMEBUFFERSEXTPROC glDeleteFramebuffersEXT = NULL;
88 PFNGLGENFRAMEBUFFERSEXTPROC glGenFramebuffersEXT = NULL;
89 PFNGLCHECKFRAMEBUFFERSTATUSEXTPROC glCheckFramebufferStatusEXT = NULL;
90 PFNGLFRAMEBUFFERTEXTURE1DEXTPROC glFramebufferTexture1DEXT = NULL;
91 PFNGLFRAMEBUFFERTEXTURE2DEXTPROC glFramebufferTexture2DEXT = NULL;
92 PFNGLFRAMEBUFFERTEXTURE3DEXTPROC glFramebufferTexture3DEXT = NULL;
93 PFNGLFRAMEBUFFERRENDERBUFFEREXTPROC glFramebufferRenderbufferEXT = NULL;
94 PFNGLGETFRAMEBUFFERATTACHMENTPARAMETERIVEXTPROC glGetFramebufferAttachmentParameterivEXT = NULL;
95 PFNGLGENERATEMIPMAPEXTPROC glGenerateMipmapEXT = NULL;
96 PFNGLDRAWBUFFERSPROC glDrawBuffers = NULL;
97
98 /////////////////////
99 // graphics resources
100 CGparameter g_vparamPosXY[2] = {0}, g_fparamFogColor = 0;
101
102 map<int, SHADERHEADER*> mapShaderResources;
103
104 bool s_bTexFlush = false;
105 int s_nLastResolveReset = 0;
106 int s_nWireframeCount = 0;
107 int s_nResolveCounts[30] = {0}; // resolve counts for last 30 frames
108
109 ////////////////////
110 // State parameters
111 CGcontext g_cgcontext;
112 int nBackbufferWidth, nBackbufferHeight;
113
114 u8* g_pbyGSMemory = NULL; // 4Mb GS system mem
115 u8* g_pbyGSClut = NULL; // ZZ
116
117 namespace ZeroGS
118 {
119 Vector g_vdepth, vlogz;
120
121 // = Vector( 255.0 /256.0f, 255.0/65536.0f, 255.0f/(65535.0f*256.0f), 1.0f/(65536.0f*65536.0f));
122 // Vector g_vdepth = Vector( 65536.0f*65536.0f, 256.0f*65536.0f, 65536.0f, 256.0f);
123
124 extern CRangeManager s_RangeMngr; // manages overwritten memory
125 GLenum GetRenderTargetFormat() { return GetRenderFormat() == RFT_byte8 ? 4 : g_internalRGBAFloat16Fmt; }
126
127 // returns the first and last addresses aligned to a page that cover
128 void GetRectMemAddress(int& start, int& end, int psm, int x, int y, int w, int h, int bp, int bw);
129
130 // bool LoadEffects();
131 // bool LoadExtraEffects();
132 // FRAGMENTSHADER* LoadShadeEffect(int type, int texfilter, int fog, int testaem, int exactcolor, const clampInfo& clamp, int context, bool* pbFailed);
133
134 int s_nNewWidth = -1, s_nNewHeight = -1;
135 void ChangeDeviceSize(int nNewWidth, int nNewHeight);
136
137 void ProcessMessages();
138 void RenderCustom(float fAlpha); // intro anim
139
140 struct MESSAGE
141 {
142 MESSAGE() {}
143
144 MESSAGE(const char* p, u32 dw) { strcpy(str, p); dwTimeStamp = dw; }
145
146 char str[255];
147 u32 dwTimeStamp;
148 };
149
150 static list<MESSAGE> listMsgs;
151
152 ///////////////////////
153 // Method Prototypes //
154 ///////////////////////
155
156 void KickPoint();
157 void KickLine();
158 void KickTriangle();
159 void KickTriangleFan();
160 void KickSprite();
161 void KickDummy();
162
163 void ResolveInRange(int start, int end);
164
165 void ExtWrite();
166
167 void ResetRenderTarget(int index)
168 {
169 glFramebufferTexture2DEXT(GL_FRAMEBUFFER_EXT, GL_COLOR_ATTACHMENT0_EXT + index, GL_TEXTURE_RECTANGLE_NV, 0, 0);
170 }
171
172 DrawFn drawfn[8] = { KickDummy, KickDummy, KickDummy, KickDummy,
173 KickDummy, KickDummy, KickDummy, KickDummy
174 };
175
176 }; // end namespace
177
178 // does one time only initializing/destruction
179
180 class ZeroGSInit
181 {
182
183 public:
184 ZeroGSInit()
185 {
186 const u32 mem_size = 0x00400000 + 0x10000; // leave some room for out of range accesses (saves on the checks)
187 // clear
188 g_pbyGSMemory = (u8*)_aligned_malloc(mem_size, 1024);
189 memset(g_pbyGSMemory, 0, mem_size);
190
191 g_pbyGSClut = (u8*)_aligned_malloc(256 * 8, 1024); // need 512 alignment!
192 memset(g_pbyGSClut, 0, 256*8);
193 memset(&GLWin, 0, sizeof(GLWin));
194 }
195
196 ~ZeroGSInit()
197 {
198 _aligned_free(g_pbyGSMemory);
199 g_pbyGSMemory = NULL;
200
201 _aligned_free(g_pbyGSClut);
202 g_pbyGSClut = NULL;
203 }
204 };
205
206 static ZeroGSInit s_ZeroGSInit;
207
208 #ifndef GL_FRAMEBUFFER_INCOMPLETE_DUPLICATE_ATTACHMENT_EXT
209 #define GL_FRAMEBUFFER_INCOMPLETE_DUPLICATE_ATTACHMENT_EXT 0x8CD8
210 #endif
211
212 void ZeroGS::HandleGLError()
213 {
214 FUNCLOG
215 // check the error status of this framebuffer */
216 GLenum error = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
217
218 // if error != GL_FRAMEBUFFER_COMPLETE_EXT, there's an error of some sort
219
220 if (error != 0)
221 {
222 int w, h;
223 GLint fmt;
224 glGetRenderbufferParameterivEXT(GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_INTERNAL_FORMAT_EXT, &fmt);
225 glGetRenderbufferParameterivEXT(GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_WIDTH_EXT, &w);
226 glGetRenderbufferParameterivEXT(GL_COLOR_ATTACHMENT0_EXT, GL_RENDERBUFFER_HEIGHT_EXT, &h);
227
228 switch (error)
229 {
230 case GL_FRAMEBUFFER_COMPLETE_EXT:
231 break;
232
233 case GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT_EXT:
234 ZZLog::Error_Log("Error! missing a required image/buffer attachment!");
235 break;
236
237 case GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT:
238 ZZLog::Error_Log("Error! has no images/buffers attached!");
239 break;
240
241 // case GL_FRAMEBUFFER_INCOMPLETE_DUPLICATE_ATTACHMENT_EXT:
242 // ZZLog::Error_Log("Error! has an image/buffer attached in multiple locations!");
243 // break;
244
245 case GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS_EXT:
246 ZZLog::Error_Log("Error! has mismatched image/buffer dimensions!");
247 break;
248
249 case GL_FRAMEBUFFER_INCOMPLETE_FORMATS_EXT:
250 ZZLog::Error_Log("Error! colorbuffer attachments have different types!");
251 break;
252
253 case GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER_EXT:
254 ZZLog::Error_Log("Error! trying to draw to non-attached color buffer!");
255 break;
256
257 case GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER_EXT:
258 ZZLog::Error_Log("Error! trying to read from a non-attached color buffer!");
259 break;
260
261 case GL_FRAMEBUFFER_UNSUPPORTED_EXT:
262 ZZLog::Error_Log("Error! format is not supported by current graphics card/driver!");
263 break;
264
265 default:
266 ZZLog::Error_Log("*UNKNOWN ERROR* reported from glCheckFramebufferStatusEXT() for %s!");
267 break;
268 }
269 }
270 }
271
272
273 void ZeroGS::GSStateReset()
274 {
275 FUNCLOG
276 icurctx = -1;
277
278 for (int i = 0; i < 2; ++i)
279 {
280 vb[i].Destroy();
281 memset(&vb[i], 0, sizeof(ZeroGS::VB));
282
283 vb[i].tex0.tw = 1;
284 vb[i].tex0.th = 1;
285 vb[i].scissor.x1 = 639;
286 vb[i].scissor.y1 = 479;
287 vb[i].tex0.tbw = 64;
288 vb[i].Init(VB_BUFFERSIZE);
289 }
290
291 s_RangeMngr.Clear();
292
293 g_MemTargs.Destroy();
294 s_RTs.Destroy();
295 s_DepthRTs.Destroy();
296 s_BitwiseTextures.Destroy();
297
298 vb[0].ictx = 0;
299 vb[1].ictx = 1;
300 }
301
302 void ZeroGS::AddMessage(const char* pstr, u32 ms)
303 {
304 FUNCLOG
305 listMsgs.push_back(MESSAGE(pstr, timeGetTime() + ms));
306 ZZLog::Log("%s\n", pstr);
307 }
308
309 void ZeroGS::DrawText(const char* pstr, int left, int top, u32 color)
310 {
311 FUNCLOG
312 cgGLDisableProfile(cgvProf);
313 cgGLDisableProfile(cgfProf);
314
315 Vector v;
316 v.SetColor(color);
317 glColor3f(v.z, v.y, v.x);
318 //glColor3f(((color >> 16) & 0xff) / 255.0f, ((color >> 8) & 0xff)/ 255.0f, (color & 0xff) / 255.0f);
319
320 font_p->printString(pstr, left * 2.0f / (float)nBackbufferWidth - 1, 1 - top * 2.0f / (float)nBackbufferHeight, 0);
321 cgGLEnableProfile(cgvProf);
322 cgGLEnableProfile(cgfProf);
323 }
324
325 void ZeroGS::ChangeWindowSize(int nNewWidth, int nNewHeight)
326 {
327 FUNCLOG
328 nBackbufferWidth = max(nNewWidth, 16);
329 nBackbufferHeight = max(nNewHeight, 16);
330
331 if (!(conf.options & GSOPTION_FULLSCREEN))
332 {
333 conf.width = nNewWidth;
334 conf.height = nNewHeight;
335 //SaveConfig();
336 }
337 }
338
339 void ZeroGS::SetChangeDeviceSize(int nNewWidth, int nNewHeight)
340 {
341 FUNCLOG
342 s_nNewWidth = nNewWidth;
343 s_nNewHeight = nNewHeight;
344
345 if (!(conf.options & GSOPTION_FULLSCREEN))
346 {
347 conf.width = nNewWidth;
348 conf.height = nNewHeight;
349 //SaveConfig();
350 }
351 }
352
353 void ZeroGS::Reset()
354 {
355 FUNCLOG
356 s_RTs.ResolveAll();
357 s_DepthRTs.ResolveAll();
358
359 vb[0].nCount = 0;
360 vb[1].nCount = 0;
361
362 memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts));
363 s_nLastResolveReset = 0;
364
365 icurctx = -1;
366 g_vsprog = g_psprog = 0;
367
368 GSStateReset();
369 Destroy(0);
370
371 drawfn[0] = KickDummy;
372 drawfn[1] = KickDummy;
373 drawfn[2] = KickDummy;
374 drawfn[3] = KickDummy;
375 drawfn[4] = KickDummy;
376 drawfn[5] = KickDummy;
377 drawfn[6] = KickDummy;
378 drawfn[7] = KickDummy;
379 }
380
381 void ZeroGS::ChangeDeviceSize(int nNewWidth, int nNewHeight)
382 {
383 FUNCLOG
384 //int oldscreen = s_nFullscreen;
385
386 int oldwidth = nBackbufferWidth, oldheight = nBackbufferHeight;
387
388 if (!Create(nNewWidth&~7, nNewHeight&~7))
389 {
390 ZZLog::Error_Log("Failed to recreate, changing to old device.");
391
392 if (Create(oldwidth, oldheight))
393 {
394 SysMessage("Failed to create device, exiting...");
395 exit(0);
396 }
397 }
398
399 for (int i = 0; i < 2; ++i)
400 {
401 vb[i].bNeedFrameCheck = vb[i].bNeedZCheck = 1;
402 vb[i].CheckFrame(0);
403 }
404
405 assert(vb[0].pBufferData != NULL && vb[1].pBufferData != NULL);
406 }
407
408
409 void ZeroGS::SetNegAA(int mode)
410 {
411 FUNCLOG
412 // need to flush all targets
413 s_RTs.ResolveAll();
414 s_RTs.Destroy();
415 s_DepthRTs.ResolveAll();
416 s_DepthRTs.Destroy();
417
418 s_AAz = s_AAw = 0; // This is code for x0, x2, x4, x8 and x16 anti-aliasing.
419
420 if (mode > 0)
421 {
422 s_AAz = (mode + 1) / 2; // ( 1, 0 ) ; ( 1, 1 ) -- it's used as binary shift, so x << s_AAz, y << s_AAw
423 s_AAw = mode / 2;
424 }
425
426 memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts));
427
428 s_nLastResolveReset = 0;
429
430 vb[0].prndr = NULL;
431 vb[0].pdepth = NULL;
432 vb[0].bNeedFrameCheck = 1;
433 vb[0].bNeedZCheck = 1;
434 vb[1].prndr = NULL;
435 vb[1].pdepth = NULL;
436 vb[1].bNeedFrameCheck = 1;
437 vb[1].bNeedZCheck = 1;
438 }
439
440 void ZeroGS::SetAA(int mode)
441 {
442 FUNCLOG
443 float f;
444
445 // need to flush all targets
446 s_RTs.ResolveAll();
447 s_RTs.Destroy();
448 s_DepthRTs.ResolveAll();
449 s_DepthRTs.Destroy();
450
451 s_AAx = s_AAy = 0; // This is code for x0, x2, x4, x8 and x16 anti-aliasing.
452
453 if (mode > 0)
454 {
455 s_AAx = (mode + 1) / 2; // ( 1, 0 ) ; ( 1, 1 ) ; ( 2, 1 ) ; ( 2, 2 ) -- it's used as binary shift, so x >> s_AAx, y >> s_AAy
456 s_AAy = mode / 2;
457 }
458
459 memset(s_nResolveCounts, 0, sizeof(s_nResolveCounts));
460
461 s_nLastResolveReset = 0;
462
463 vb[0].prndr = NULL;
464 vb[0].pdepth = NULL;
465 vb[0].bNeedFrameCheck = 1;
466 vb[0].bNeedZCheck = 1;
467 vb[1].prndr = NULL;
468 vb[1].pdepth = NULL;
469 vb[1].bNeedFrameCheck = 1;
470 vb[1].bNeedZCheck = 1;
471
472 f = mode > 0 ? 2.0f : 1.0f;
473 glPointSize(f);
474 }
475
476 void ZeroGS::Prim()
477 {
478 FUNCLOG
479
480 if (g_bIsLost) return;
481
482 VB& curvb = vb[prim->ctxt];
483
484 if (curvb.CheckPrim()) Flush(prim->ctxt);
485
486 curvb.curprim._val = prim->_val;
487
488 // flush the other pipe if sharing the same buffer
489 // if( vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp && vb[!prim->ctxt].nCount > 0 )
490 // {
491 // assert( vb[prim->ctxt].nCount == 0 );
492 // Flush(!prim->ctxt);
493 // }
494
495 curvb.curprim.prim = prim->prim;
496 }
497
498 void ZeroGS::ProcessMessages()
499 {
500 FUNCLOG
501
502 if (listMsgs.size() > 0)
503 {
504 int left = 25, top = 15;
505 list<MESSAGE>::iterator it = listMsgs.begin();
506
507 while (it != listMsgs.end())
508 {
509 DrawText(it->str, left + 1, top + 1, 0xff000000);
510 DrawText(it->str, left, top, 0xffffff30);
511 top += 15;
512
513 if ((int)(it->dwTimeStamp - timeGetTime()) < 0)
514 it = listMsgs.erase(it);
515 else ++it;
516 }
517 }
518 }
519
520 void ZeroGS::RenderCustom(float fAlpha)
521 {
522 FUNCLOG
523 GL_REPORT_ERROR();
524
525 fAlpha = 1;
526 glBindFramebufferEXT(GL_FRAMEBUFFER_EXT, 0); // switch to the backbuffer
527
528 DisableAllgl() ;
529 SetShaderCaller("RenderCustom");
530
531 glViewport(0, 0, nBackbufferWidth, nBackbufferHeight);
532
533 // play custom animation
534 glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT);
535
536 // tex coords
537 Vector v = Vector(1 / 32767.0f, 1 / 32767.0f, 0, 0);
538 ZZcgSetParameter4fv(pvsBitBlt.sBitBltPos, v, "g_fBitBltPos");
539 v.x = (float)nLogoWidth;
540 v.y = (float)nLogoHeight;
541 ZZcgSetParameter4fv(pvsBitBlt.sBitBltTex, v, "g_fBitBltTex");
542
543 v.x = v.y = v.z = v.w = fAlpha;
544 ZZcgSetParameter4fv(ppsBaseTexture.sOneColor, v, "g_fOneColor");
545
546 if (conf.options & GSOPTION_WIREFRAME) glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
547
548 // inside vhDCb[0]'s target area, so render that region only
549 cgGLSetTextureParameter(ppsBaseTexture.sFinal, ptexLogo);
550 cgGLEnableTextureParameter(ppsBaseTexture.sFinal);
551 glBindBuffer(GL_ARRAY_BUFFER, vboRect);
552
553 SET_STREAM();
554
555 SETVERTEXSHADER(pvsBitBlt.prog);
556 SETPIXELSHADER(ppsBaseTexture.prog);
557
558 glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
559
560 // restore
561 if (conf.options & GSOPTION_WIREFRAME) glPolygonMode(GL_FRONT_AND_BACK, GL_LINE);
562
563 ProcessMessages();
564
565 GLWin.SwapGLBuffers();
566
567 glEnable(GL_SCISSOR_TEST);
568 glEnable(GL_STENCIL_TEST);
569
570 vb[0].bSyncVars = 0;
571 vb[1].bSyncVars = 0;
572
573 GL_REPORT_ERROR();
574
575 GLint status = glCheckFramebufferStatusEXT(GL_FRAMEBUFFER_EXT);
576
577 assert(status == GL_FRAMEBUFFER_COMPLETE_EXT || status == GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT_EXT);
578 }
579
580 void ZeroGS::Restore()
581 {
582 FUNCLOG
583
584 if (!g_bIsLost) return;
585
586 //if( SUCCEEDED(pd3dDevice->Reset(&d3dpp)) ) {
587 g_bIsLost = 0;
588
589 // handle lost states
590 ZeroGS::ChangeDeviceSize(nBackbufferWidth, nBackbufferHeight);
591
592 //}
593 }
594
595 //////////////////////////
596 // Internal Definitions //
597 //////////////////////////
598
599
600 __forceinline void MOVZ(VertexGPU *p, u32 gsz, const VB& curvb)
601 {
602 p->z = (curvb.zprimmask == 0xffff) ? min((u32)0xffff, gsz) : gsz;
603 }
604
605 __forceinline void MOVFOG(VertexGPU *p, Vertex gsf)
606 {
607 p->f = ((s16)(gsf).f << 7) | 0x7f;
608 }
609
610
611 int Values[100] = {0, };
612 __forceinline void SET_VERTEX(VertexGPU *p, int Index, const VB& curvb)
613 {
614 int index = Index;
615 p->x = ((((int)gs.gsvertex[index].x - curvb.offset.x) >> 1) & 0xffff);
616 p->y = ((((int)gs.gsvertex[index].y - curvb.offset.y) >> 1) & 0xffff);
617
618 #ifdef LSD_MODE
619 int diffX = (int)gs.gsvertex[index].x - curvb.offset.x;
620 int diffY = (int)gs.gsvertex[index].y - curvb.offset.y;
621
622 if (diffX < 0) { p->x = - p->x; }
623 if (diffY < 0) { p->y = - p->y; }
624 #endif
625
626
627 p->f = ((s16)gs.gsvertex[index].f << 7) | 0x7f;
628
629 MOVZ(p, gs.gsvertex[index].z, curvb);
630
631 p->rgba = prim->iip ? gs.gsvertex[index].rgba : gs.rgba;
632
633 // This code is somehow incorrect
634 // if ((gs.texa.aem) && ((p->rgba & 0xffffff ) == 0))
635 // p->rgba = 0;
636
637 if (g_GameSettings & GAME_TEXAHACK)
638 {
639 u32 B = ((p->rgba & 0xfe000000) >> 1) +
640 (0x01000000 * curvb.fba.fba) ;
641 p->rgba = (p->rgba & 0xffffff) + B;
642 }
643
644 if (prim->tme)
645 {
646 if (prim->fst)
647 {
648 p->s = (float)gs.gsvertex[index].u * fiTexWidth[prim->ctxt];
649 p->t = (float)gs.gsvertex[index].v * fiTexHeight[prim->ctxt];
650 p->q = 1;
651 }
652 else
653 {
654 p->s = gs.gsvertex[index].s;
655 p->t = gs.gsvertex[index].t;
656 p->q = gs.gsvertex[index].q;
657 }
658 }
659 }
660
661 static __forceinline void OUTPUT_VERT(VertexGPU vert, u32 id)
662 {
663 #ifdef WRITE_PRIM_LOGS
664 ZZLog::Prim_Log("%c%d(%d): xyzf=(%4d,%4d,0x%x,%3d), rgba=0x%8.8x, stq = (%2.5f,%2.5f,%2.5f)\n",
665 id == 0 ? '*' : ' ', id, prim->prim, vert.x / 8, vert.y / 8, vert.z, vert.f / 128,
666 vert.rgba, Clamp(vert.s, -10, 10), Clamp(vert.t, -10, 10), Clamp(vert.q, -10, 10));
667 #endif
668 }
669
670 void ZeroGS::KickPoint()
671 {
672 FUNCLOG
673 assert(gs.primC >= 1);
674
675 VB& curvb = vb[prim->ctxt];
676
677 if (curvb.bNeedTexCheck) curvb.FlushTexData();
678
679 if ((vb[!prim->ctxt].nCount > 0) && (vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp))
680 {
681 assert(vb[prim->ctxt].nCount == 0);
682 Flush(!prim->ctxt);
683 }
684
685 curvb.NotifyWrite(1);
686
687 int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex);
688
689 VertexGPU* p = curvb.pBufferData + curvb.nCount;
690 SET_VERTEX(&p[0], last, curvb);
691 curvb.nCount++;
692
693 OUTPUT_VERT(p[0], 0);
694 }
695
696 void ZeroGS::KickLine()
697 {
698 FUNCLOG
699 assert(gs.primC >= 2);
700 VB& curvb = vb[prim->ctxt];
701
702 if (curvb.bNeedTexCheck) curvb.FlushTexData();
703
704 if ((vb[!prim->ctxt].nCount > 0) && (vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp))
705 {
706 assert(vb[prim->ctxt].nCount == 0);
707 Flush(!prim->ctxt);
708 }
709
710 curvb.NotifyWrite(2);
711
712 int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
713 int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex);
714
715 VertexGPU* p = curvb.pBufferData + curvb.nCount;
716 SET_VERTEX(&p[0], next, curvb);
717 SET_VERTEX(&p[1], last, curvb);
718
719 curvb.nCount += 2;
720
721 OUTPUT_VERT(p[0], 0);
722 OUTPUT_VERT(p[1], 1);
723 }
724
725 void ZeroGS::KickTriangle()
726 {
727 FUNCLOG
728 assert(gs.primC >= 3);
729 VB& curvb = vb[prim->ctxt];
730
731 if (curvb.bNeedTexCheck) curvb.FlushTexData();
732
733 if ((vb[!prim->ctxt].nCount > 0) && (vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp))
734 {
735 assert(vb[prim->ctxt].nCount == 0);
736 Flush(!prim->ctxt);
737 }
738
739 curvb.NotifyWrite(3);
740
741 VertexGPU* p = curvb.pBufferData + curvb.nCount;
742 SET_VERTEX(&p[0], 0, curvb);
743 SET_VERTEX(&p[1], 1, curvb);
744 SET_VERTEX(&p[2], 2, curvb);
745
746 curvb.nCount += 3;
747
748 OUTPUT_VERT(p[0], 0);
749 OUTPUT_VERT(p[1], 1);
750 OUTPUT_VERT(p[2], 2);
751 }
752
753 void ZeroGS::KickTriangleFan()
754 {
755 FUNCLOG
756 assert(gs.primC >= 3);
757 VB& curvb = vb[prim->ctxt];
758
759 if (curvb.bNeedTexCheck) curvb.FlushTexData();
760
761 if ((vb[!prim->ctxt].nCount > 0) && (vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp))
762 {
763 assert(vb[prim->ctxt].nCount == 0);
764 Flush(!prim->ctxt);
765 }
766
767 curvb.NotifyWrite(3);
768
769 VertexGPU* p = curvb.pBufferData + curvb.nCount;
770 SET_VERTEX(&p[0], 0, curvb);
771 SET_VERTEX(&p[1], 1, curvb);
772 SET_VERTEX(&p[2], 2, curvb);
773
774 curvb.nCount += 3;
775
776 // add 1 to skip the first vertex
777
778 if (gs.primIndex == gs.nTriFanVert) gs.primIndex = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
779
780 OUTPUT_VERT(p[0], 0);
781 OUTPUT_VERT(p[1], 1);
782 OUTPUT_VERT(p[2], 2);
783 }
784
785 __forceinline void SetKickVertex(VertexGPU *p, Vertex v, int next, const VB& curvb)
786 {
787 SET_VERTEX(p, next, curvb);
788 MOVZ(p, v.z, curvb);
789 MOVFOG(p, v);
790 }
791
792 void ZeroGS::KickSprite()
793 {
794 FUNCLOG
795 assert(gs.primC >= 2);
796 VB& curvb = vb[prim->ctxt];
797
798 if (curvb.bNeedTexCheck) curvb.FlushTexData();
799
800 if ((vb[!prim->ctxt].nCount > 0) && (vb[prim->ctxt].gsfb.fbp == vb[!prim->ctxt].gsfb.fbp))
801 {
802 assert(vb[prim->ctxt].nCount == 0);
803 Flush(!prim->ctxt);
804 }
805
806 curvb.NotifyWrite(6);
807
808 int next = (gs.primIndex + 1) % ARRAY_SIZE(gs.gsvertex);
809 int last = (gs.primIndex + 2) % ARRAY_SIZE(gs.gsvertex);
810
811 // sprite is too small and AA shows lines (tek4)
812
813 if (s_AAx)
814 {
815 gs.gsvertex[last].x += 4;
816
817 if (s_AAy) gs.gsvertex[last].y += 4;
818 }
819
820 // might be bad sprite (KH dialog text)
821 //if( gs.gsvertex[next].x == gs.gsvertex[last].x || gs.gsvertex[next].y == gs.gsvertex[last].y )
822 //return;
823
824 VertexGPU* p = curvb.pBufferData + curvb.nCount;
825
826 SetKickVertex(&p[0], gs.gsvertex[last], next, curvb);
827 SetKickVertex(&p[3], gs.gsvertex[last], next, curvb);
828 SetKickVertex(&p[1], gs.gsvertex[last], last, curvb);
829 SetKickVertex(&p[4], gs.gsvertex[last], last, curvb);
830 SetKickVertex(&p[2], gs.gsvertex[last], next, curvb);
831
832 p[2].s = p[1].s;
833 p[2].x = p[1].x;
834
835 SetKickVertex(&p[5], gs.gsvertex[last], last, curvb);
836
837 p[5].s = p[0].s;
838 p[5].x = p[0].x;
839
840 curvb.nCount += 6;
841
842 OUTPUT_VERT(p[0], 0);
843 OUTPUT_VERT(p[1], 1);
844 }
845
846 void ZeroGS::KickDummy()
847 {
848 FUNCLOG
849 //ZZLog::Greg_Log("Kicking bad primitive: %.8x\n", *(u32*)prim);
850 }
851
852 void ZeroGS::SetFogColor(u32 fog)
853 {
854 FUNCLOG
855
856 // Always set the fog color, even if it was already set.
857 // if (gs.fogcol != fog)
858 // {
859 gs.fogcol = fog;
860
861 ZeroGS::Flush(0);
862 ZeroGS::Flush(1);
863
864 if (!g_bIsLost)
865 {
866 SetShaderCaller("SetFogColor");
867 Vector v;
868
869 // set it immediately
870 // v.x = (gs.fogcol & 0xff) / 255.0f;
871 // v.y = ((gs.fogcol >> 8) & 0xff) / 255.0f;
872 // v.z = ((gs.fogcol >> 16) & 0xff) / 255.0f;
873 v.SetColor(gs.fogcol);
874 ZZcgSetParameter4fv(g_fparamFogColor, v, "g_fParamFogColor");
875 }
876
877 // }
878 }
879
880 void ZeroGS::ExtWrite()
881 {
882 FUNCLOG
883 ZZLog::Warn_Log("A hollow voice says 'EXTWRITE'! Nothing happens.");
884
885 // use local DISPFB, EXTDATA, EXTBUF, and PMODE
886 // int bpp, start, end;
887 // tex0Info texframe;
888
889 // bpp = 4;
890 // if( texframe.psm == PSMT16S ) bpp = 3;
891 // else if (PSMT_ISHALF(texframe.psm)) bpp = 2;
892 //
893 // // get the start and end addresses of the buffer
894 // GetRectMemAddress(start, end, texframe.psm, 0, 0, texframe.tw, texframe.th, texframe.tbp0, texframe.tbw);
895 }
896
897 ////////////
898 // Caches //
899 ////////////
900
901
902 // case 0: return false;
903 // case 1: break;
904 // case 2: m_CBP[0] = TEX0.CBP; break;
905 // case 3: m_CBP[1] = TEX0.CBP; break;
906 // case 4: if(m_CBP[0] == TEX0.CBP) return false; m_CBP[0] = TEX0.CBP; break;
907 // case 5: if(m_CBP[1] == TEX0.CBP) return false; m_CBP[1] = TEX0.CBP; break;
908 // case 6: ASSERT(0); return false; // ffx2 menu
909 // case 7: ASSERT(0); return false;
910 // default: __assume(0);
911
912 bool IsDirty(u32 highdword, u32 psm, int cld, int cbp)
913 {
914 int cpsm = ZZOglGet_cpsm_TexBits(highdword);
915 int csm = ZZOglGet_csm_TexBits(highdword);
916
917 if (cpsm > 1 || csm)
918 {
919 // Mana Khemia triggers this.
920 //ZZLog::Error_Log("16 bit clut not supported.");
921 return true;
922 }
923
924 int csa = ZZOglGet_csa_TexBits(highdword);
925
926 int entries = PSMT_IS8CLUT(psm) ? 256 : 16;
927
928 u64* src = (u64*)(g_pbyGSMemory + cbp * 256);
929 u64* dst = (u64*)(g_pbyGSClut + 64 * csa);
930
931 bool bRet = false;
932
933 // do a fast test with MMX
934 #ifdef _MSC_VER
935 int storeebx;
936 __asm
937 {
938 mov storeebx, ebx
939 mov edx, dst
940 mov ecx, src
941 mov ebx, entries
942
943 Start:
944 movq mm0, [edx]
945 movq mm1, [edx+8]
946 pcmpeqd mm0, [ecx]
947 pcmpeqd mm1, [ecx+16]
948
949 movq mm2, [edx+16]
950 movq mm3, [edx+24]
951 pcmpeqd mm2, [ecx+32]
952 pcmpeqd mm3, [ecx+48]
953
954 pand mm0, mm1
955 pand mm2, mm3
956 movq mm4, [edx+32]
957 movq mm5, [edx+40]
958 pcmpeqd mm4, [ecx+8]
959 pcmpeqd mm5, [ecx+24]
960
961 pand mm0, mm2
962 pand mm4, mm5
963 movq mm6, [edx+48]
964 movq mm7, [edx+56]
965 pcmpeqd mm6, [ecx+40]
966 pcmpeqd mm7, [ecx+56]
967
968 pand mm0, mm4
969 pand mm6, mm7
970 pand mm0, mm6
971
972 pmovmskb eax, mm0
973 cmp eax, 0xff
974 je Continue
975 mov bRet, 1
976 jmp Return
977
978 Continue:
979 cmp ebx, 16
980 jle Return
981
982 test ebx, 0x10
983 jz AddEcx
984 sub ecx, 448 // go back and down one column,
985
986 AddEcx:
987 add ecx, 256 // go to the right block
988
989
990 jne Continue1
991 add ecx, 256 // skip whole block
992
993 Continue1:
994 add edx, 64
995 sub ebx, 16
996 jmp Start
997
998 Return:
999 emms
1000 mov ebx, storeebx
1001 }
1002
1003 #else // linux
1004 // do a fast test with MMX
1005 __asm__(
1006 ".intel_syntax\n"
1007 "Start:\n"
1008 "movq %%mm0, [%%ecx]\n"
1009 "movq %%mm1, [%%ecx+8]\n"
1010 "pcmpeqd %%mm0, [%%edx]\n"
1011 "pcmpeqd %%mm1, [%%edx+16]\n"
1012 "movq %%mm2, [%%ecx+16]\n"
1013 "movq %%mm3, [%%ecx+24]\n"
1014 "pcmpeqd %%mm2, [%%edx+32]\n"
1015 "pcmpeqd %%mm3, [%%edx+48]\n"
1016 "pand %%mm0, %%mm1\n"
1017 "pand %%mm2, %%mm3\n"
1018 "movq %%mm4, [%%ecx+32]\n"
1019 "movq %%mm5, [%%ecx+40]\n"
1020 "pcmpeqd %%mm4, [%%edx+8]\n"
1021 "pcmpeqd %%mm5, [%%edx+24]\n"
1022 "pand %%mm0, %%mm2\n"
1023 "pand %%mm4, %%mm5\n"
1024 "movq %%mm6, [%%ecx+48]\n"
1025 "movq %%mm7, [%%ecx+56]\n"
1026 "pcmpeqd %%mm6, [%%edx+40]\n"
1027 "pcmpeqd %%mm7, [%%edx+56]\n"
1028 "pand %%mm0, %%mm4\n"
1029 "pand %%mm6, %%mm7\n"
1030 "pand %%mm0, %%mm6\n"
1031 "pmovmskb %%eax, %%mm0\n"
1032 "cmp %%eax, 0xff\n"
1033 "je Continue\n"
1034 ".att_syntax\n"
1035 "movb $1, %0\n"
1036 ".intel_syntax\n"
1037 "jmp Return\n"
1038 "Continue:\n"
1039 "cmp %%esi, 16\n"
1040 "jle Return\n"
1041 "test %%esi, 0x10\n"
1042 "jz AddEcx\n"
1043 "sub %%edx, 448\n" // go back and down one column
1044 "AddEcx:\n"
1045 "add %%edx, 256\n" // go to the right block
1046 "cmp %%esi, 0x90\n"
1047 "jne Continue1\n"
1048 "add %%edx, 256\n" // skip whole block
1049 "Continue1:\n"
1050 "add %%ecx, 64\n"
1051 "sub %%esi, 16\n"
1052 "jmp Start\n"
1053 "Return:\n"
1054 "emms\n"
1055
1056 ".att_syntax\n" : "=m"(bRet) : "c"(dst), "d"(src), "S"(entries) : "eax", "memory");
1057
1058 #endif // _WIN32
1059 return bRet;
1060 }
1061
1062 // cld state:
1063 // 000 - clut data is not loaded; data in the temp buffer is stored
1064 // 001 - clut data is always loaded.
1065 // 010 - clut data is always loaded; cbp0 = cbp.
1066 // 011 - clut data is always loadedl cbp1 = cbp.
1067 // 100 - cbp0 is compared with cbp. if different, clut data is loaded.
1068 // 101 - cbp1 is compared with cbp. if different, clut data is loaded.
1069
1070 // GSdx sets cbp0 & cbp1 when checking for clut changes. ZeroGS sets them in texClutWrite.
1071 bool ZeroGS::CheckChangeInClut(u32 highdword, u32 psm)
1072 {
1073 FUNCLOG
1074 int cld = ZZOglGet_cld_TexBits(highdword);
1075 int cbp = ZZOglGet_cbp_TexBits(highdword);
1076
1077 // processing the CLUT after tex0/2 are written
1078 //ZZLog::Error_Log("high == 0x%x; cld == %d", highdword, cld);
1079
1080 switch (cld)
1081 {
1082 case 0:
1083 return false;
1084
1085 case 1:
1086 break;
1087
1088 case 2:
1089 break;
1090
1091 case 3:
1092 break;
1093
1094 case 4:
1095 if (gs.cbp[0] == cbp) return false;
1096 break;
1097
1098 case 5:
1099 if (gs.cbp[1] == cbp) return false;
1100 break;
1101
1102 //case 4: return gs.cbp[0] != cbp;
1103 //case 5: return gs.cbp[1] != cbp;
1104
1105 // default: load
1106
1107 default:
1108 break;
1109 }
1110
1111 return IsDirty(highdword, psm, cld, cbp);
1112 }
1113
1114 void ZeroGS::texClutWrite(int ctx)
1115 {
1116 FUNCLOG
1117 s_bTexFlush = 0;
1118
1119 if (g_bIsLost) return;
1120
1121 tex0Info& tex0 = vb[ctx].tex0;
1122
1123 assert(PSMT_ISCLUT(tex0.psm));
1124
1125 // processing the CLUT after tex0/2 are written
1126 switch (tex0.cld)
1127 {
1128 case 0:
1129 return;
1130
1131 case 1:
1132 break; // tex0.cld is usually 1.
1133
1134 case 2:
1135 gs.cbp[0] = tex0.cbp;
1136 break;
1137
1138 case 3:
1139 gs.cbp[1] = tex0.cbp;
1140 break;
1141
1142 case 4:
1143 if (gs.cbp[0] == tex0.cbp) return;
1144 gs.cbp[0] = tex0.cbp;
1145 break;
1146
1147 case 5:
1148 if (gs.cbp[1] == tex0.cbp) return;
1149 gs.cbp[1] = tex0.cbp;
1150 break;
1151
1152 default: //ZZLog::Debug_Log("cld isn't 0-5!");
1153 break;
1154 }
1155
1156 Flush(!ctx);
1157
1158 int entries = PSMT_IS8CLUT(tex0.psm) ? 256 : 16;
1159
1160 if (tex0.csm)
1161 {
1162 switch (tex0.cpsm)
1163 {
1164 // 16bit psm
1165 // eggomania uses non16bit textures for csm2
1166
1167 case PSMCT16:
1168 {
1169 u16* src = (u16*)g_pbyGSMemory + tex0.cbp * 128;
1170 u16 *dst = (u16*)(g_pbyGSClut + 32 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0));
1171
1172 for (int i = 0; i < entries; ++i)
1173 {
1174 *dst = src[getPixelAddress16_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
1175 dst += 2;
1176
1177 // check for wrapping
1178
1179 if (((u32)(uptr)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut + 2);
1180 }
1181 break;
1182 }
1183
1184 case PSMCT16S:
1185 {
1186 u16* src = (u16*)g_pbyGSMemory + tex0.cbp * 128;
1187 u16 *dst = (u16*)(g_pbyGSClut + 32 * (tex0.csa & 15) + (tex0.csa >= 16 ? 2 : 0));
1188
1189 for (int i = 0; i < entries; ++i)
1190 {
1191 *dst = src[getPixelAddress16S_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
1192 dst += 2;
1193
1194 // check for wrapping
1195
1196 if (((u32)(uptr)dst & 0x3ff) == 0) dst = (u16*)(g_pbyGSClut + 2);
1197 }
1198 break;
1199 }
1200
1201 case PSMCT32:
1202 case PSMCT24:
1203 {
1204 u32* src = (u32*)g_pbyGSMemory + tex0.cbp * 64;
1205 u32 *dst = (u32*)(g_pbyGSClut + 64 * tex0.csa);
1206
1207 // check if address exceeds src
1208
1209 if (src + getPixelAddress32_0(gs.clut.cou + entries - 1, gs.clut.cov, gs.clut.cbw) >= (u32*)g_pbyGSMemory + 0x00100000)
1210 ZZLog::Error_Log("texClutWrite out of bounds.");
1211 else
1212 for (int i = 0; i < entries; ++i)
1213 {
1214 *dst = src[getPixelAddress32_0(gs.clut.cou+i, gs.clut.cov, gs.clut.cbw)];
1215 dst++;
1216 }
1217 break;
1218 }
1219
1220 default:
1221 {
1222 //ZZLog::Debug_Log("Unknown cpsm: %x (%x).", tex0.cpsm, tex0.psm);
1223 break;
1224 }
1225 }
1226 }
1227 else
1228 {
1229 switch (tex0.cpsm)
1230 {
1231 case PSMCT24:
1232 case PSMCT32:
1233 if (entries == 16)
1234 WriteCLUT_T32_I4_CSM1((u32*)(g_pbyGSMemory + tex0.cbp*256), (u32*)(g_pbyGSClut + 64*tex0.csa));
1235 else
1236 WriteCLUT_T32_I8_CSM1((u32*)(g_pbyGSMemory + tex0.cbp*256), (u32*)(g_pbyGSClut + 64*tex0.csa));
1237 break;
1238
1239 default:
1240 if (entries == 16)
1241 WriteCLUT_T16_I4_CSM1((u32*)(g_pbyGSMemory + 256 * tex0.cbp), (u32*)(g_pbyGSClut + 32*(tex0.csa&15) + (tex0.csa >= 16 ? 2 : 0)));
1242 else // sse2 for 256 is more complicated, so use regular
1243 WriteCLUT_T16_I8_CSM1_c((u32*)(g_pbyGSMemory + 256 * tex0.cbp), (u32*)(g_pbyGSClut + 32*(tex0.csa&15) + (tex0.csa >= 16 ? 2 : 0)));
1244 break;
1245 }
1246 }
1247 }
1248
1249

  ViewVC Help
Powered by ViewVC 1.1.22