/[pcsx2_0.9.7]/trunk/pcsx2/x86/sVU_zerorec.cpp
ViewVC logotype

Contents of /trunk/pcsx2/x86/sVU_zerorec.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 280 - (show annotations) (download)
Thu Dec 23 12:02:12 2010 UTC (9 years, 2 months ago) by william
File size: 129606 byte(s)
re-commit (had local access denied errors when committing)
1 /* PCSX2 - PS2 Emulator for PCs
2 * Copyright (C) 2002-2010 PCSX2 Dev Team
3 *
4 * PCSX2 is free software: you can redistribute it and/or modify it under the terms
5 * of the GNU Lesser General Public License as published by the Free Software Found-
6 * ation, either version 3 of the License, or (at your option) any later version.
7 *
8 * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10 * PURPOSE. See the GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License along with PCSX2.
13 * If not, see <http://www.gnu.org/licenses/>.
14 */
15
16 // Super VU recompiler - author: zerofrog(@gmail.com)
17
18 #include "PrecompiledHeader.h"
19
20 #include <float.h>
21 #include <list>
22 #include <map>
23
24 #include "Utilities/AsciiFile.h"
25
26 #ifndef _WIN32
27 #include <sys/types.h>
28 #endif
29
30 #include "Common.h"
31
32 #include "GS.h"
33 #include "Gif.h"
34 #include "VU.h"
35
36 #include "R5900.h"
37 #include "iR5900.h"
38 #include "System/RecTypes.h"
39
40 #include "sVU_zerorec.h"
41 #include "NakedAsm.h"
42 #include "AppConfig.h"
43
44 // Needed in gcc for find.
45 #include <algorithm>
46
47 using namespace std;
48 using namespace x86Emitter;
49
50 // temporary externs
51 extern void iDumpVU0Registers();
52 extern void iDumpVU1Registers();
53
54 // SuperVURec optimization options, uncomment only for debugging purposes
55 #define SUPERVU_CACHING // vu programs are saved and queried via memcompare (should be no reason to disable this)
56 #define SUPERVU_WRITEBACKS // don't flush the writebacks after every block
57 #define SUPERVU_VIBRANCHDELAY // when integers are modified right before a branch that uses the integer,
58 // the old integer value is used in the branch, fixes kh2
59
60 #define SUPERVU_PROPAGATEFLAGS // the correct behavior of VUs, for some reason superman breaks gfx with it on...
61
62 // use x86reg caching (faster) (not really. rather lots slower :p (rama) )
63 // ... and buggy too since we disabled EBP. Causes GoW2 to hang. Let's get rid of it,
64 // sVU is only here to serve as a regression model for Nan/INF behavior anyway. (--air)
65 //#define SUPERVU_X86CACHING
66
67
68 // registers won't be flushed at block boundaries (faster) (nothing noticeable speed-wise, causes SPS in Ratchet and clank (Nneeve) )
69 #ifndef PCSX2_DEBUG
70 //#define SUPERVU_INTERCACHING
71 #endif
72
73 #define SUPERVU_CHECKCONDITION 0 // has to be 0!!
74
75 static const uint sVU_EXESIZE = _8mb;
76
77 #define _Imm11_ (s32)( (vucode & 0x400) ? (0xfffffc00 | (vucode & 0x3ff)) : (vucode & 0x3ff) )
78 #define _UImm11_ (s32)(vucode & 0x7ff)
79
80 #define _Ft_ ((VU->code >> 16) & 0x1F) // The rt part of the instruction register
81 #define _Fs_ ((VU->code >> 11) & 0x1F) // The rd part of the instruction register
82 #define _Fd_ ((VU->code >> 6) & 0x1F) // The sa part of the instruction register
83 #define _It_ (_Ft_ & 15)
84 #define _Is_ (_Fs_ & 15)
85 #define _Id_ (_Fd_ & 15)
86
87 static const u32 QWaitTimes[] = { 6, 12 };
88 static const u32 PWaitTimes[] = { 53, 43, 28, 23, 17, 11, 10 };
89
90 static u32 s_vuInfo; // info passed into rec insts
91
92 static const u32 s_MemSize[2] = {VU0_MEMSIZE, VU1_MEMSIZE};
93 //static u8* s_recVUMem = NULL, *s_recVUPtr = NULL;
94 static RecompiledCodeReserve* s_recVUMem[2] = { NULL, NULL };
95 static u8* s_recVUPtr[2] = { NULL, NULL };
96
97 // tables which are defined at the bottom of this massive file.
98 extern void (*recVU_UPPER_OPCODE[64])(VURegs* VU, s32 info);
99 extern void (*recVU_LOWER_OPCODE[128])(VURegs* VU, s32 info);
100
101 #define INST_Q_READ 0x0001 // flush Q
102 #define INST_P_READ 0x0002 // flush P
103 #define INST_BRANCH_DELAY 0x0004
104 #define INST_CLIP_WRITE 0x0040 // inst writes CLIP in the future
105 #define INST_STATUS_WRITE 0x0080
106 #define INST_MAC_WRITE 0x0100
107 #define INST_Q_WRITE 0x0200
108 #define INST_CACHE_VI 0x0400 // write old vi value to s_VIBranchDelay
109
110 // Let's tempt fate by defining two different constants with almost identical names
111 #define INST_DUMMY_ 0x8000
112 #define INST_DUMMY 0x83c0
113
114 #define VFFREE_INVALID0 0x80000000 // (vffree[i]&0xf) is invalid
115
116 //#define FORIT(it, v) for(it = (v).begin(); it != (v).end(); ++(it))
117
118 #ifdef PCSX2_DEBUG
119 u32 s_vucount = 0;
120
121 static u32 g_vu1lastrec = 0, skipparent = -1;
122 static u32 s_svulast = 0, s_vufnheader;
123 static u32 badaddrs[][2] = {0, 0xffff};
124 #endif
125
126 union VURecRegs
127 {
128 struct
129 {
130 u16 reg;
131 u16 type;
132 };
133 u32 id;
134 };
135
136 #define SUPERVU_XGKICKDELAY 1 // yes this is needed as default (wipeout)
137
138 class VuBaseBlock;
139
140 struct VuFunctionHeader
141 {
142 struct RANGE
143 {
144 RANGE() : pmem(NULL) {}
145
146 u16 start, size;
147 void* pmem; // all the mem
148 };
149
150 VuFunctionHeader() : pprogfunc(NULL), startpc(0xffffffff) {}
151 ~VuFunctionHeader()
152 {
153 for (vector<RANGE>::iterator it = ranges.begin(); it != ranges.end(); ++it)
154 {
155 free(it->pmem);
156 }
157 }
158
159 // returns true if the checksum for the current mem is the same as this fn
160 bool IsSame(void* pmem);
161
162 u32 startpc;
163 void* pprogfunc;
164
165 vector<RANGE> ranges;
166 };
167
168 struct VuBlockHeader
169 {
170 VuBaseBlock* pblock;
171 u32 delay;
172 };
173
174 // one vu inst (lower and upper)
175 class VuInstruction
176 {
177 public:
178 VuInstruction()
179 {
180 memzero(*this);
181 nParentPc = -1;
182 vicached = -1;
183 }
184
185 int nParentPc; // used for syncing with flag writes, -1 for no parent
186
187 _vuopinfo info;
188
189 _VURegsNum regs[2]; // [0] - lower, [1] - upper
190 u32 livevars[2]; // live variables right before this inst, [0] - inst, [1] - float
191 u32 addvars[2]; // live variables to add
192 u32 usedvars[2]; // set if var is used in the future including vars used in this inst
193 u32 keepvars[2];
194 u16 pqcycles; // the number of cycles to stall if function writes to the regs
195 u16 type; // INST_
196
197 u32 pClipWrite, pMACWrite, pStatusWrite; // addrs to write the flags
198 u32 vffree[2];
199 s8 vfwrite[2], vfread0[2], vfread1[2], vfacc[2];
200 s8 vfflush[2]; // extra flush regs
201 s8 vicached; // if >= 0, then use the cached integer s_VIBranchDelay
202 VuInstruction *pPrevInst;
203
204 int SetCachedRegs(int upper, u32 vuxyz);
205 void Recompile(list<VuInstruction>::iterator& itinst, u32 vuxyz);
206 };
207
208 enum BlockType
209 {
210 BLOCKTYPE_EOP = 0x01, // at least one of the children of the block contains eop (or the block itself)
211 BLOCKTYPE_FUNCTION = 0x02,
212 BLOCKTYPE_HASEOP = 0x04, // last inst of block is an eop
213 BLOCKTYPE_MACFLAGS = 0x08,
214 BLOCKTYPE_ANALYZED = 0x40,
215 BLOCKTYPE_IGNORE = 0x80, // special for recursive fns
216 BLOCKTYPE_ANALYZEDPARENT = 0x100
217 };
218
219 // base block used when recompiling
220 class VuBaseBlock
221 {
222 public:
223 typedef list<VuBaseBlock*> LISTBLOCKS;
224
225 VuBaseBlock();
226
227 // returns true if the leads to a EOP (ALL VU blocks must ret true)
228 void AssignVFRegs();
229 void AssignVIRegs(int parent);
230
231 list<VuInstruction>::iterator GetInstIterAtPc(int instpc);
232 void GetInstsAtPc(int instpc, list<VuInstruction*>& listinsts);
233
234 void Recompile();
235
236 u16 type; // BLOCKTYPE_
237 u16 id;
238 u16 startpc;
239 u16 endpc; // first inst not in block
240 void* pcode; // x86 code pointer
241 void* pendcode; // end of the x86 code pointer
242 int cycles;
243 list<VuInstruction> insts;
244 list<VuBaseBlock*> parents;
245 LISTBLOCKS blocks; // blocks branches to
246 u32* pChildJumps[4]; // addrs that need to be filled with the children's start addrs
247 // if highest bit is set, addr needs to be relational
248 u32 vuxyz; // corresponding bit is set if reg's xyz channels are used only
249 u32 vuxy; // corresponding bit is set if reg's xyz channels are used only
250
251 _xmmregs startregs[iREGCNT_XMM], endregs[iREGCNT_XMM];
252 int nStartx86, nEndx86; // indices into s_vecRegArray
253
254 int allocX86Regs;
255 int prevFlagsOutOfBlock;
256 };
257
258 struct WRITEBACK
259 {
260 WRITEBACK() : nParentPc(0), cycle(0) //, pStatusWrite(NULL), pMACWrite(NULL)
261 {
262 viwrite[0] = viwrite[1] = 0;
263 viread[0] = viread[1] = 0;
264 }
265
266 void InitInst(VuInstruction* pinst, int cycle) const
267 {
268 u32 write = viwrite[0] | viwrite[1];
269 pinst->type = ((write & (1 << REG_CLIP_FLAG)) ? INST_CLIP_WRITE : 0) |
270 ((write & (1 << REG_MAC_FLAG)) ? INST_MAC_WRITE : 0) |
271 ((write & (1 << REG_STATUS_FLAG)) ? INST_STATUS_WRITE : 0) |
272 ((write & (1 << REG_Q)) ? INST_Q_WRITE : 0);
273 pinst->nParentPc = nParentPc;
274 pinst->info.cycle = cycle;
275 for (int i = 0; i < 2; ++i)
276 {
277 pinst->regs[i].VIwrite = viwrite[i];
278 pinst->regs[i].VIread = viread[i];
279 }
280 }
281
282 static int SortWritebacks(const WRITEBACK& w1, const WRITEBACK& w2)
283 {
284 return w1.cycle < w2.cycle;
285 }
286
287 int nParentPc;
288 int cycle;
289 u32 viwrite[2];
290 u32 viread[2];
291 };
292
293 struct VUPIPELINES
294 {
295 fmacPipe fmac[8];
296 fdivPipe fdiv;
297 efuPipe efu;
298 ialuPipe ialu[8];
299 list< WRITEBACK > listWritebacks;
300 };
301
302 VuBaseBlock::VuBaseBlock()
303 {
304 type = 0;
305 endpc = 0;
306 cycles = 0;
307 pcode = NULL;
308 id = 0;
309 memzero(pChildJumps);
310 memzero(startregs);
311 memzero(endregs);
312 allocX86Regs = nStartx86 = nEndx86 = -1;
313 prevFlagsOutOfBlock = 0;
314 }
315
316 #define SUPERVU_STACKSIZE 0x1000
317
318 static list<VuFunctionHeader*> s_listVUHeaders[2];
319 static list<VuFunctionHeader*>* s_plistCachedHeaders[2] = {NULL, NULL};
320 static VuFunctionHeader** recVUHeaders[2] = { NULL, NULL };
321 static VuBlockHeader* recVUBlocks[2] = { NULL, NULL };
322 static u8* recVUStack[2] = { NULL, NULL };
323 static u8* recVUStackPtr[2] = { NULL, NULL };
324
325 static vector<_x86regs> s_vecRegArray(128);
326
327 static VURegs* VU = NULL;
328 static list<VuBaseBlock*> s_listBlocks;
329 static u32 s_vu = 0;
330 static u32 s_UnconditionalDelay = 0; // 1 if there are two sequential branches and the last is unconditional
331 static u32 g_nLastBlockExecuted = 0;
332
333 static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex);
334 static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const VUPIPELINES& pipes);
335 static void SuperVUInitLiveness(VuBaseBlock* pblock);
336 static void SuperVULivenessAnalysis();
337 static void SuperVUEliminateDeadCode();
338 static void SuperVUAssignRegs();
339
340 //void SuperVUFreeXMMreg(int xmmreg, int xmmtype, int reg);
341 #define SuperVUFreeXMMreg 0&&
342 void SuperVUFreeXMMregs(u32* livevars);
343
344 static u32* SuperVUStaticAlloc(u32 size);
345 static void SuperVURecompile();
346
347 // allocate VU resources
348 static void SuperVUAlloc(int vuindex)
349 {
350 if (s_recVUMem[vuindex]) return;
351
352 s_recVUMem[vuindex] = new RecompiledCodeReserve( pxsFmt("SuperVU%u Recompiler Cache", vuindex), 0 );
353 s_recVUMem[vuindex]->Reserve( sVU_EXESIZE, vuindex ? HostMemoryMap::sVU1rec : HostMemoryMap::sVU0rec, _256mb );
354 s_recVUMem[vuindex]->SetProfilerName(pxsFmt("sVU%urec",vuindex));
355
356 // upper 4 bits must be zero!
357 if (!s_recVUMem[vuindex]->IsOk())
358 {
359 safe_delete(s_recVUMem[vuindex]);
360 throw Exception::VirtualMemoryMapConflict( s_recVUMem[vuindex]->GetName() )
361 .SetDiagMsg(pxsFmt( L"SuperVU failed to allocate virtual memory below 256MB." ))
362 .SetUserMsg(pxE( "!Notice:superVU:VirtualMemoryAlloc",
363 L"Out of Memory (sorta): The SuperVU recompiler was unable to reserve the specific memory "
364 L"ranges required, and will not be available for use. This is not a critical error, since "
365 L"the sVU rec is obsolete, and you should use microVU instead anyway. :)"
366 ));
367 }
368 }
369
370 void DestroyCachedHeaders(int vuindex, int j)
371 {
372 list<VuFunctionHeader*>::iterator it = s_plistCachedHeaders[vuindex][j].begin();
373
374 while (it != s_plistCachedHeaders[vuindex][j].end())
375 {
376 delete *it;
377 it++;
378 }
379
380 s_plistCachedHeaders[vuindex][j].clear();
381 }
382
383 void DestroyVUHeaders(int vuindex)
384 {
385 list<VuFunctionHeader*>::iterator it = s_listVUHeaders[vuindex].begin();
386
387 while (it != s_listVUHeaders[vuindex].end())
388 {
389 delete *it;
390 it++;
391 }
392
393 s_listVUHeaders[vuindex].clear();
394 }
395
396 // destroy VU resources
397 void SuperVUDestroy(int vuindex)
398 {
399 pxAssumeDev(vuindex >= 0 && vuindex <= 2, "Invalid VU index parameter!");
400
401 safe_delete_array(recVUHeaders[vuindex]);
402 safe_delete_array(recVUBlocks[vuindex]);
403
404 if (s_plistCachedHeaders[vuindex] != NULL)
405 {
406 for (u32 j = 0; j < s_MemSize[vuindex] / 8; ++j)
407 {
408 DestroyCachedHeaders(vuindex, j);
409 }
410 safe_delete_array(s_plistCachedHeaders[vuindex]);
411 }
412 DestroyVUHeaders(vuindex);
413
414 safe_delete(s_recVUMem[vuindex]);
415 safe_delete_array(recVUStack[vuindex]);
416 }
417
418 // reset VU
419 void SuperVUReset(int vuindex)
420 {
421 pxAssumeDev(vuindex >= 0 && vuindex <= 2, "Invalid VU index parameter!");
422
423 #ifdef PCSX2_DEBUG
424 s_vucount = 0;
425 #endif
426
427 DevCon.WriteLn("SuperVU%d: Resetting function and block lists.", vuindex);
428
429 if (recVUHeaders[vuindex] == NULL)
430 recVUHeaders[vuindex] = new VuFunctionHeader* [s_MemSize[vuindex] / 8];
431 if (recVUBlocks[vuindex] == NULL)
432 recVUBlocks[vuindex] = new VuBlockHeader[s_MemSize[vuindex] / 8];
433 if (s_plistCachedHeaders[vuindex] == NULL)
434 s_plistCachedHeaders[vuindex] = new std::list<VuFunctionHeader*>[s_MemSize[vuindex] / 8];
435
436 if (recVUHeaders[vuindex]) memset(recVUHeaders[vuindex], 0, sizeof(VuFunctionHeader*) * (s_MemSize[vuindex] / 8));
437 if (recVUBlocks[vuindex]) memset(recVUBlocks[vuindex], 0, sizeof(VuBlockHeader) * (s_MemSize[vuindex] / 8));
438
439 if (s_plistCachedHeaders[vuindex] != NULL)
440 {
441 for (u32 j = 0; j < s_MemSize[vuindex] / 8; ++j)
442 {
443 DestroyCachedHeaders(vuindex, j);
444 }
445 }
446 DestroyVUHeaders(vuindex);
447
448 if (!s_recVUMem[vuindex] || !s_recVUMem[vuindex]->IsOk()) return;
449
450 DevCon.WriteLn("SuperVU%u: Resetting recompiler cache.", vuindex);
451
452 if (!recVUStack[vuindex]) recVUStack[vuindex] = new u8[SUPERVU_STACKSIZE * 4];
453 memzero_ptr<SUPERVU_STACKSIZE>(recVUStack[vuindex]);
454
455 s_recVUMem[vuindex]->Reset();
456 s_recVUPtr[vuindex] = *s_recVUMem[vuindex];
457 }
458
459 // clear the block and any joining blocks
460 static void __fastcall SuperVUClear(u32 startpc, u32 size, int vuindex)
461 {
462 vector<VuFunctionHeader::RANGE>::iterator itrange;
463 list<VuFunctionHeader*>::iterator it = s_listVUHeaders[vuindex].begin();
464 u32 endpc = startpc + ((size * 4 + 7) & ~7); // Adding this code to ensure size is always a multiple of 8, it can be simplified to startpc+size if size is always a multiple of 8 (cottonvibes)
465 while (it != s_listVUHeaders[vuindex].end())
466 {
467
468 // for every fn, check if it has code in the range
469 for(itrange = (*it)->ranges.begin(); itrange != (*it)->ranges.end(); itrange++)
470 {
471 if (startpc < (u32)itrange->start + itrange->size && itrange->start < endpc)
472 break;
473 }
474
475 if (itrange != (*it)->ranges.end())
476 {
477 recVUHeaders[vuindex][(*it)->startpc/8] = NULL;
478 #ifdef SUPERVU_CACHING
479 list<VuFunctionHeader*>* plist = &s_plistCachedHeaders[vuindex][(*it)->startpc/8];
480 plist->push_back(*it);
481 if (plist->size() > 30)
482 {
483 // list is too big, delete
484 //Console.Warning("Performance warning: deleting cached VU program!");
485 delete plist->front();
486 plist->pop_front();
487 }
488 it = s_listVUHeaders[vuindex].erase(it);
489 #else
490 delete *it;
491 it = s_listVUHeaders[vuindex].erase(it);
492 #endif
493 }
494 else ++it;
495 }
496 }
497
498 static VuFunctionHeader* s_pFnHeader = NULL;
499 static VuBaseBlock* s_pCurBlock = NULL;
500 static VuInstruction* s_pCurInst = NULL;
501 static u32 s_StatusRead = 0, s_MACRead = 0, s_ClipRead = 0; // read addrs
502 static u32 s_PrevStatusWrite = 0, s_PrevMACWrite = 0, s_PrevClipWrite = 0, s_PrevIWrite = 0;
503 static u32 s_WriteToReadQ = 0;
504
505 static u32 s_VIBranchDelay = 0; //Value of register to use in a vi branch delayed situation
506
507
508 u32 s_TotalVUCycles; // total cycles since start of program execution
509
510
511 u32 SuperVUGetVIAddr(int reg, int read)
512 {
513 pxAssert(s_pCurInst != NULL);
514
515 switch (reg)
516 {
517 case REG_STATUS_FLAG:
518 {
519 u32 addr = (read == 2) ? s_PrevStatusWrite : (read ? s_StatusRead : s_pCurInst->pStatusWrite);
520 pxAssert(!read || addr != 0);
521 return addr;
522 }
523 case REG_MAC_FLAG:
524 {
525 u32 addr = (read == 2) ? s_PrevMACWrite : (read ? s_MACRead : s_pCurInst->pMACWrite);
526 return addr;
527 }
528 case REG_CLIP_FLAG:
529 {
530 u32 addr = (read == 2) ? s_PrevClipWrite : (read ? s_ClipRead : s_pCurInst->pClipWrite);
531 pxAssert(!read || addr != 0);
532 return addr;
533 }
534 case REG_Q:
535 return (read || s_WriteToReadQ) ? (uptr)&VU->VI[REG_Q] : (uptr)&VU->q;
536 case REG_P:
537 return read ? (uptr)&VU->VI[REG_P] : (uptr)&VU->p;
538 case REG_I:
539 return s_PrevIWrite;
540 }
541
542 #ifdef SUPERVU_VIBRANCHDELAY
543 if ((read != 0) && (s_pCurInst->regs[0].pipe == VUPIPE_BRANCH) && (s_pCurInst->vicached >= 0) && (s_pCurInst->vicached == reg))
544 {
545 return (uptr)&s_VIBranchDelay; // test for branch delays
546 }
547 #endif
548
549 return (uptr)&VU->VI[reg];
550 }
551
552 void SuperVUDumpBlock(list<VuBaseBlock*>& blocks, int vuindex)
553 {
554 u32 *mem;
555 u32 i;
556
557 g_Conf->Folders.Logs.Mkdir();
558 AsciiFile eff(
559 Path::Combine( g_Conf->Folders.Logs, wxsFormat(L"svu%cdump%.4X.txt", s_vu?L'0':L'1', s_pFnHeader->startpc) ), L"w"
560 );
561
562 eff.Printf("Format: upper_inst lower_inst\ntype f:vf_live_vars vf_used_vars i:vi_live_vars vi_used_vars inst_cycle pq_inst\n");
563 eff.Printf("Type: %.2x - qread, %.2x - pread, %.2x - clip_write, %.2x - status_write\n"
564 "%.2x - mac_write, %.2x -qflush\n",
565 INST_Q_READ, INST_P_READ, INST_CLIP_WRITE, INST_STATUS_WRITE, INST_MAC_WRITE, INST_Q_WRITE);
566 eff.Printf("XMM: Upper: read0 read1 write acc temp; Lower: read0 read1 write acc temp\n\n");
567
568 list<VuBaseBlock*>::iterator itblock;
569 list<VuInstruction>::iterator itinst;
570 VuBaseBlock::LISTBLOCKS::iterator itchild;
571
572 for(itblock = blocks.begin(); itblock != blocks.end(); itblock++)
573 {
574 eff.Printf( "block:%c %x-%x; children: ", ((*itblock)->type&BLOCKTYPE_HASEOP) ? '*' : ' ',
575 (*itblock)->startpc, (*itblock)->endpc - 8);
576
577 for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++)
578 {
579 eff.Printf("%x ", (*itchild)->startpc);
580 }
581 eff.Printf("; vuxyz = %x, vuxy = %x\n", (*itblock)->vuxyz&(*itblock)->insts.front().usedvars[1],
582 (*itblock)->vuxy&(*itblock)->insts.front().usedvars[1]);
583
584 itinst = (*itblock)->insts.begin();
585 i = (*itblock)->startpc;
586 while (itinst != (*itblock)->insts.end())
587 {
588 pxAssert(i <= (*itblock)->endpc);
589 if (itinst->type & INST_DUMMY)
590 {
591 if (itinst->nParentPc >= 0 && !(itinst->type&INST_DUMMY_))
592 {
593 // search for the parent
594 eff.Printf("writeback 0x%x (%x)\n", itinst->type, itinst->nParentPc);
595 }
596 }
597 else
598 {
599 mem = (u32*) & VU->Micro[i];
600 char* pstr = disVU1MicroUF(mem[1], i + 4);
601 eff.Printf( "%.4x: %-40s", i, pstr);
602 if (mem[1] & 0x80000000) eff.Printf( " I=%f(%.8x)\n", *(float*)mem, mem[0]);
603 else eff.Printf( "%s\n", disVU1MicroLF(mem[0], i));
604 i += 8;
605 }
606
607 ++itinst;
608 }
609
610 eff.Printf("\n");
611
612 _x86regs* pregs;
613 if ((*itblock)->nStartx86 >= 0 || (*itblock)->nEndx86 >= 0)
614 {
615 eff.Printf( "X86: AX CX DX BX SP BP SI DI\n");
616 }
617
618 if ((*itblock)->nStartx86 >= 0)
619 {
620 pregs = &s_vecRegArray[(*itblock)->nStartx86];
621 eff.Printf( "STR: ");
622 for (i = 0; i < iREGCNT_GPR; ++i)
623 {
624 if (pregs[i].inuse)
625 eff.Printf( "%.2d ", pregs[i].reg);
626 else
627 eff.Printf( "-1 ");
628 }
629 eff.Printf( "\n");
630 }
631
632 if ((*itblock)->nEndx86 >= 0)
633 {
634 eff.Printf( "END: ");
635 pregs = &s_vecRegArray[(*itblock)->nEndx86];
636 for (i = 0; i < iREGCNT_GPR; ++i)
637 {
638 if (pregs[i].inuse)
639 eff.Printf( "%.2d ", pregs[i].reg);
640 else
641 eff.Printf( "-1 ");
642 }
643 eff.Printf( "\n");
644 }
645
646 itinst = (*itblock)->insts.begin();
647 for (i = (*itblock)->startpc; i < (*itblock)->endpc; ++itinst)
648 {
649
650 if (itinst->type & INST_DUMMY)
651 {
652 }
653 else
654 {
655 char str[256];
656 sprintf(str, "%.4x:%x f:%.8x_%.8x", i, itinst->type, itinst->livevars[1], itinst->usedvars[1]);
657 eff.Printf( "%-46s i:%.8x_%.8x c:%d pq:%d\n", str,
658 itinst->livevars[0], itinst->usedvars[0], (int)itinst->info.cycle, (int)itinst->pqcycles);
659
660 sprintf(str, "XMM r0:%d r1:%d w:%d a:%d t:%x;",
661 itinst->vfread0[1], itinst->vfread1[1], itinst->vfwrite[1], itinst->vfacc[1], itinst->vffree[1]);
662 eff.Printf( "%-46s r0:%d r1:%d w:%d a:%d t:%x\n", str,
663 itinst->vfread0[0], itinst->vfread1[0], itinst->vfwrite[0], itinst->vfacc[0], itinst->vffree[0]);
664 i += 8;
665 }
666 }
667
668 //
669 #if 0 // __LINUX__
670
671 // dump the asm
672 if ((*itblock)->pcode != NULL)
673 {
674 char command[255];
675 FILE* fasm = fopen("mydump1", "wb");
676 //Console.WriteLn("writing: %x, %x", (*itblock)->startpc, (uptr)(*itblock)->pendcode - (uptr)(*itblock)->pcode);
677 fwrite((*itblock)->pcode, 1, (uptr)(*itblock)->pendcode - (uptr)(*itblock)->pcode, fasm);
678 fclose(fasm);
679 sprintf(command, "objdump -D --target=binary --architecture=i386 -M intel mydump1 > tempdump");
680 system(command);
681 fasm = fopen("tempdump", "r");
682 // read all of it and write it to f
683 fseek(fasm, 0, SEEK_END);
684 vector<char> vbuffer(ftell(fasm));
685 fseek(fasm, 0, SEEK_SET);
686 fread(&vbuffer[0], vbuffer.size(), 1, fasm);
687
688 fprintf(f, "\n\n");
689 fwrite(&vbuffer[0], vbuffer.size(), 1, f);
690 fclose(fasm);
691 }
692 #endif
693
694 eff.Printf("\n---------------\n");
695 }
696 }
697
698 // uncomment to count svu exec time
699 //#define SUPERVU_COUNT
700
701 // Private methods
702 void* SuperVUGetProgram(u32 startpc, int vuindex)
703 {
704 pxAssert(startpc < s_MemSize[vuindex]);
705 pxAssert((startpc % 8) == 0);
706 pxAssert(recVUHeaders[vuindex] != NULL);
707 VuFunctionHeader** pheader = &recVUHeaders[vuindex][startpc/8];
708
709 if (*pheader == NULL)
710 {
711
712 #ifdef SUPERVU_CACHING
713 void* pmem = (vuindex & 1) ? VU1.Micro : VU0.Micro;
714 // check if program exists in cache
715 list<VuFunctionHeader*>::iterator it;
716 for(it = s_plistCachedHeaders[vuindex][startpc/8].begin(); it != s_plistCachedHeaders[vuindex][startpc/8].end(); it++)
717 {
718 if ((*it)->IsSame(pmem))
719 {
720 // found, transfer to regular lists
721 void* pfn = (*it)->pprogfunc;
722 recVUHeaders[vuindex][startpc/8] = *it;
723 s_listVUHeaders[vuindex].push_back(*it);
724 s_plistCachedHeaders[vuindex][startpc/8].erase(it);
725 return pfn;
726 }
727 }
728 #endif
729
730 *pheader = SuperVURecompileProgram(startpc, vuindex);
731
732 if (*pheader == NULL)
733 {
734 pxAssert(s_TotalVUCycles > 0);
735 if (vuindex)
736 VU1.VI[REG_TPC].UL = startpc;
737 else
738 VU0.VI[REG_TPC].UL = startpc;
739
740 return (void*)SuperVUEndProgram;
741 }
742
743 pxAssert((*pheader)->pprogfunc != NULL);
744 }
745 //else pxAssert( (*pheader)->IsSame((vuindex&1) ? VU1.Micro : VU0.Micro) );
746
747 pxAssert((*pheader)->startpc == startpc);
748
749 return (*pheader)->pprogfunc;
750 }
751
752 bool VuFunctionHeader::IsSame(void* pmem)
753 {
754 #ifdef SUPERVU_CACHING
755 vector<RANGE>::iterator it;
756 for(it = ranges.begin(); it != ranges.end(); it++)
757 {
758 if (memcmp_mmx((u8*)pmem + it->start, it->pmem, it->size))
759 return false;
760 }
761 #endif
762 return true;
763 }
764
765 list<VuInstruction>::iterator VuBaseBlock::GetInstIterAtPc(int instpc)
766 {
767 pxAssert(instpc >= 0);
768
769 u32 curpc = startpc;
770 list<VuInstruction>::iterator it;
771 for (it = insts.begin(); it != insts.end(); ++it)
772 {
773 if (it->type & INST_DUMMY) continue;
774 if (curpc == instpc) break;
775 curpc += 8;
776 }
777
778 if (it != insts.end()) return it;
779
780 pxAssert(0);
781 return insts.begin();
782 }
783
784 void VuBaseBlock::GetInstsAtPc(int instpc, list<VuInstruction*>& listinsts)
785 {
786 pxAssert(instpc >= 0);
787
788 listinsts.clear();
789
790 u32 curpc = startpc;
791 list<VuInstruction>::iterator it;
792 for (it = insts.begin(); it != insts.end(); ++it)
793 {
794 if (it->type & INST_DUMMY) continue;
795 if (curpc == instpc) break;
796 curpc += 8;
797 }
798
799 if (it != insts.end())
800 {
801 listinsts.push_back(&(*it));
802 return;
803 }
804
805 // look for the pc in other blocks
806 for (list<VuBaseBlock*>::iterator itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); ++itblock)
807 {
808 if (*itblock == this) continue;
809
810 if (instpc >= (*itblock)->startpc && instpc < (*itblock)->endpc)
811 {
812 listinsts.push_back(&(*(*itblock)->GetInstIterAtPc(instpc)));
813 }
814 }
815
816 pxAssert(listinsts.size() > 0);
817 }
818
819 static VuFunctionHeader* SuperVURecompileProgram(u32 startpc, int vuindex)
820 {
821 pxAssert(vuindex < 2);
822 pxAssert(s_recVUPtr[vuindex] != NULL);
823 //Console.WriteLn("svu%c rec: %x", '0'+vuindex, startpc);
824
825 // if recPtr reached the mem limit reset whole mem
826 if ((s_recVUPtr[vuindex] < s_recVUMem[vuindex]->GetPtr()) || (s_recVUPtr[vuindex] >= s_recVUMem[vuindex]->GetPtrEnd() - _256kb))
827 {
828 Console.WriteLn("SuperVU%u: Recompiler cache reset...", vuindex);
829 SuperVUReset(0);
830 SuperVUReset(1);
831 if (s_TotalVUCycles > 0)
832 {
833 // already executing, so return NULL
834 return NULL;
835 }
836 }
837
838 list<VuBaseBlock*>::iterator itblock;
839
840 s_vu = vuindex;
841 VU = s_vu ? &VU1 : &VU0;
842 s_pFnHeader = new VuFunctionHeader();
843 s_listVUHeaders[vuindex].push_back(s_pFnHeader);
844 s_pFnHeader->startpc = startpc;
845
846 memset(recVUBlocks[s_vu], 0, sizeof(VuBlockHeader) * (s_MemSize[s_vu] / 8));
847
848 // analyze the global graph
849 s_listBlocks.clear();
850 VUPIPELINES pipes;
851 memzero(pipes.fmac);
852 memzero(pipes.fdiv);
853 memzero(pipes.efu);
854 memzero(pipes.ialu);
855 SuperVUBuildBlocks(NULL, startpc, pipes);
856
857 // fill parents
858 VuBaseBlock::LISTBLOCKS::iterator itchild;
859 for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
860 {
861 for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++)
862 {
863 (*itchild)->parents.push_back(*itblock);
864 }
865
866 //(*itblock)->type &= ~(BLOCKTYPE_IGNORE|BLOCKTYPE_ANALYZED);
867 }
868
869 pxAssert(s_listBlocks.front()->startpc == startpc);
870 s_listBlocks.front()->type |= BLOCKTYPE_FUNCTION;
871
872 for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
873 {
874 SuperVUInitLiveness(*itblock);
875 }
876
877 SuperVULivenessAnalysis();
878 SuperVUEliminateDeadCode();
879 SuperVUAssignRegs();
880
881 #ifdef PCSX2_DEBUG
882 if ((s_vu && (vudump&1)) || (!s_vu && (vudump&16))) SuperVUDumpBlock(s_listBlocks, s_vu);
883 #endif
884
885 // code generation
886 xSetPtr(s_recVUPtr[vuindex]);
887 branch = 0;
888
889 SuperVURecompile();
890
891 s_recVUPtr[vuindex] = xGetPtr();
892
893 // set the function's range
894 VuFunctionHeader::RANGE r;
895 s_pFnHeader->ranges.reserve(s_listBlocks.size());
896
897 for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
898 {
899 r.start = (*itblock)->startpc;
900 r.size = (*itblock)->endpc - (*itblock)->startpc;
901 #ifdef SUPERVU_CACHING
902 //memxor_mmx(r.checksum, &VU->Micro[r.start], r.size);
903 r.pmem = malloc(r.size);
904 memcpy_fast(r.pmem, &VU->Micro[r.start], r.size);
905 #endif
906 s_pFnHeader->ranges.push_back(r);
907 }
908
909 #if defined(PCSX2_DEBUG) && defined(__LINUX__)
910 // dump at the end to capture the actual code
911 if ((s_vu && (vudump&1)) || (!s_vu && (vudump&16))) SuperVUDumpBlock(s_listBlocks, s_vu);
912 #endif
913
914 // destroy
915 for (list<VuBaseBlock*>::iterator itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); ++itblock)
916 {
917 delete *itblock;
918 }
919 s_listBlocks.clear();
920
921 pxAssertDev(s_recVUPtr[vuindex] < s_recVUMem[vuindex]->GetPtrEnd(), "SuperVU recompiler cache exceeded! (possible memory corruption)");
922
923 return s_pFnHeader;
924 }
925
926 static int _recbranchAddr(u32 vucode)
927 {
928 s32 bpc = pc + (_Imm11_ << 3);
929 /*
930 if ( bpc < 0 ) {
931 Console.WriteLn("zerorec branch warning: bpc < 0 ( %x ); Using unsigned imm11", bpc);
932 bpc = pc + (_UImm11_ << 3);
933 }*/
934 bpc &= (s_MemSize[s_vu] - 1);
935
936 return bpc;
937 }
938
939 // return inst that flushes everything
940 static VuInstruction SuperVUFlushInst()
941 {
942 VuInstruction inst;
943 // don't need to read q/p
944 inst.type = INST_DUMMY_;//|INST_Q_READ|INST_P_READ;
945 return inst;
946 }
947
948 void SuperVUAddWritebacks(VuBaseBlock* pblock, const list<WRITEBACK>& listWritebacks)
949 {
950 #ifdef SUPERVU_WRITEBACKS
951 // regardless of repetition, add the pipes (for selfloops)
952 list<WRITEBACK>::const_iterator itwriteback = listWritebacks.begin();
953 list<VuInstruction>::iterator itinst = pblock->insts.begin(), itinst2;
954
955 while (itwriteback != listWritebacks.end())
956 {
957 if (itinst != pblock->insts.end() && (itinst->info.cycle < itwriteback->cycle || (itinst->type&INST_DUMMY)))
958 {
959 ++itinst;
960 continue;
961 }
962
963 itinst2 = pblock->insts.insert(itinst, VuInstruction());
964 itwriteback->InitInst(&(*itinst2), vucycle);
965 ++itwriteback;
966 }
967 #endif
968 }
969
970 #ifdef SUPERVU_VIBRANCHDELAY
971 static VuInstruction* getDelayInst(VuInstruction* pInst)
972 {
973 // check for the N cycle branch delay
974 // example of 2 cycles delay (monster house) :
975 // sqi vi05
976 // sqi vi05
977 // ibeq vi05, vi03
978 // The ibeq should read the vi05 before the first sqi
979
980 //more info:
981
982 // iaddiu vi01, 0, 1
983 // ibeq vi01, 0 <- reads vi01 before the iaddiu
984
985 // iaddiu vi01, 0, 1
986 // iaddiu vi01, vi01, 1
987 // iaddiu vi01, vi01, 1
988 // ibeq vi01, 0 <- reads vi01 before the last two iaddiu's (so the value read is 1)
989
990 // ilw vi02, addr
991 // iaddiu vi01, 0, 1
992 // ibeq vi01, vi02 <- reads current values of both vi01 and vi02 because the branch instruction stalls
993
994 int delay = 1;
995 VuInstruction* pDelayInst = NULL;
996 VuInstruction* pTargetInst = pInst->pPrevInst;
997 while (1)
998 {
999 if (pTargetInst != NULL
1000 && pTargetInst->info.cycle + delay == pInst->info.cycle
1001 && (pTargetInst->regs[0].pipe == VUPIPE_IALU || pTargetInst->regs[0].pipe == VUPIPE_FMAC)
1002 && ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff)
1003 && (delay == 1 || ((pTargetInst->regs[0].VIwrite & pInst->regs[0].VIread) & 0xffff) == ((pTargetInst->regs[0].VIwrite & pInst->pPrevInst->regs[0].VIread) & 0xffff))
1004 && !(pTargetInst->regs[0].VIread&((1 << REG_STATUS_FLAG) | (1 << REG_MAC_FLAG) | (1 << REG_CLIP_FLAG))))
1005 {
1006 pDelayInst = pTargetInst;
1007 pTargetInst = pTargetInst->pPrevInst;
1008 delay++;
1009 if (delay == 5) //maximum delay is 4 (length of the pipeline)
1010 {
1011 DevCon.WriteLn("supervu: cycle branch delay maximum (4) is reached");
1012 break;
1013 }
1014 }
1015 else break;
1016 }
1017 if (delay > 1) DevCon.WriteLn("supervu: %d cycle branch delay detected: %x %x", delay - 1, pc, s_pFnHeader->startpc);
1018 return pDelayInst;
1019 }
1020 #endif
1021
1022 static VuBaseBlock* SuperVUBuildBlocks(VuBaseBlock* parent, u32 startpc, const VUPIPELINES& pipes)
1023 {
1024 // check if block already exists
1025 //Console.WriteLn("startpc %x", startpc);
1026 startpc &= (s_vu ? 0x3fff : 0xfff);
1027 VuBlockHeader* pbh = &recVUBlocks[s_vu][startpc/8];
1028
1029 if (pbh->pblock != NULL)
1030 {
1031
1032 VuBaseBlock* pblock = pbh->pblock;
1033 list<VuInstruction>::iterator itinst;
1034
1035 if (pblock->startpc == startpc)
1036 {
1037 SuperVUAddWritebacks(pblock, pipes.listWritebacks);
1038 return pblock;
1039 }
1040
1041 // have to divide the blocks, pnewblock is first block
1042 pxAssert(startpc > pblock->startpc);
1043 pxAssert(startpc < pblock->endpc);
1044
1045 u32 dummyinst = (startpc - pblock->startpc) >> 3;
1046
1047 // count inst non-dummy insts
1048 itinst = pblock->insts.begin();
1049 int cycleoff = 0;
1050
1051 while (dummyinst > 0)
1052 {
1053 if (itinst->type & INST_DUMMY)
1054 ++itinst;
1055 else
1056 {
1057 cycleoff = itinst->info.cycle;
1058 ++itinst;
1059 --dummyinst;
1060 }
1061 }
1062
1063 // NOTE: still leaves insts with their writebacks in different blocks
1064 while (itinst->type & INST_DUMMY)
1065 ++itinst;
1066
1067 // the difference in cycles between dummy insts (naruto utlimate ninja)
1068 int cyclediff = 0;
1069 if (parent == pblock)
1070 cyclediff = itinst->info.cycle - cycleoff;
1071 cycleoff = itinst->info.cycle;
1072
1073 // new block
1074 VuBaseBlock* pnewblock = new VuBaseBlock();
1075 s_listBlocks.push_back(pnewblock);
1076
1077 pnewblock->startpc = startpc;
1078 pnewblock->endpc = pblock->endpc;
1079 pnewblock->cycles = pblock->cycles - cycleoff + cyclediff;
1080
1081 pnewblock->blocks.splice(pnewblock->blocks.end(), pblock->blocks);
1082 pnewblock->insts.splice(pnewblock->insts.end(), pblock->insts, itinst, pblock->insts.end());
1083 pnewblock->type = pblock->type;
1084
1085 // any writebacks in the next 3 cycles also belong to original block
1086 // for(itinst = pnewblock->insts.begin(); itinst != pnewblock->insts.end(); ) {
1087 // if( (itinst->type & INST_DUMMY) && itinst->nParentPc >= 0 && itinst->nParentPc < (int)startpc ) {
1088 //
1089 // if( !(itinst->type & INST_Q_WRITE) )
1090 // pblock->insts.push_back(*itinst);
1091 // itinst = pnewblock->insts.erase(itinst);
1092 // continue;
1093 // }
1094 //
1095 // ++itinst;
1096 // }
1097
1098 pbh = &recVUBlocks[s_vu][startpc/8];
1099 for (u32 inst = startpc; inst < pblock->endpc; inst += 8)
1100 {
1101 if (pbh->pblock == pblock)
1102 pbh->pblock = pnewblock;
1103 ++pbh;
1104 }
1105
1106 for(itinst = pnewblock->insts.begin(); itinst != pnewblock->insts.end(); itinst++)
1107 {
1108 itinst->info.cycle -= cycleoff;
1109 }
1110
1111 SuperVUAddWritebacks(pnewblock, pipes.listWritebacks);
1112
1113 // old block
1114 pblock->blocks.push_back(pnewblock);
1115 pblock->endpc = startpc;
1116 pblock->cycles = cycleoff;
1117 pblock->type &= BLOCKTYPE_MACFLAGS;
1118 //pblock->insts.push_back(SuperVUFlushInst()); //don't need
1119
1120 return pnewblock;
1121 }
1122
1123 VuBaseBlock* pblock = new VuBaseBlock();
1124 s_listBlocks.push_back(pblock);
1125
1126 int i = 0;
1127 branch = 0;
1128 pc = startpc;
1129 pblock->startpc = startpc;
1130
1131 // clear stalls (might be a prob)
1132 memcpy(VU->fmac, pipes.fmac, sizeof(pipes.fmac));
1133 memcpy(&VU->fdiv, &pipes.fdiv, sizeof(pipes.fdiv));
1134 memcpy(&VU->efu, &pipes.efu, sizeof(pipes.efu));
1135 memcpy(VU->ialu, pipes.ialu, sizeof(pipes.ialu));
1136 // memset(VU->fmac, 0, sizeof(VU->fmac));
1137 // memset(&VU->fdiv, 0, sizeof(VU->fdiv));
1138 // memset(&VU->efu, 0, sizeof(VU->efu));
1139
1140 vucycle = 0;
1141
1142 u8 macflags = 0;
1143
1144 list< WRITEBACK > listWritebacks;
1145 list< WRITEBACK >::iterator itwriteback;
1146 list<VuInstruction>::iterator itinst;
1147 u32 hasSecondBranch = 0;
1148 u32 needFullStatusFlag = 0;
1149
1150 #ifdef SUPERVU_WRITEBACKS
1151 listWritebacks = pipes.listWritebacks;
1152 #endif
1153
1154 // first analysis pass for status flags
1155 while (1)
1156 {
1157 u32* ptr = (u32*) & VU->Micro[pc];
1158 pc += 8;
1159 int prevbranch = branch;
1160
1161 if (ptr[1] & 0x40000000)
1162 branch = 1;
1163
1164 if (!(ptr[1] & 0x80000000)) // not I
1165 {
1166 switch (ptr[0] >> 25)
1167 {
1168 case 0x24: // jr
1169 case 0x25: // jalr
1170 case 0x20: // B
1171 case 0x21: // BAL
1172 case 0x28: // IBEQ
1173 case 0x2f: // IBGEZ
1174 case 0x2d: // IBGTZ
1175 case 0x2e: // IBLEZ
1176 case 0x2c: // IBLTZ
1177 case 0x29: // IBNE
1178 branch = 1;
1179 break;
1180
1181 case 0x14: // fseq
1182 case 0x17: // fsor
1183 //needFullStatusFlag = 2;
1184 break;
1185
1186 case 0x16: // fsand
1187 if ((ptr[0]&0xc0))
1188 {
1189 // sometimes full sticky bits are needed (simple series 2000 - oane chapara)
1190 //Console.WriteLn("needSticky: %x-%x", s_pFnHeader->startpc, startpc);
1191 needFullStatusFlag = 2;
1192 }
1193 break;
1194 }
1195 }
1196
1197 if (prevbranch)
1198 break;
1199
1200 if (pc >= s_MemSize[s_vu])
1201 {
1202 Console.Error("inf vu0 prog %x", startpc);
1203 break;
1204 }
1205 }
1206
1207 // second full pass
1208 pc = startpc;
1209 branch = 0;
1210 VuInstruction* pprevinst = NULL, *pinst = NULL;
1211
1212 while (1)
1213 {
1214
1215 if (pc == s_MemSize[s_vu])
1216 {
1217 branch |= 8;
1218 break;
1219 }
1220
1221 if (!branch && pbh->pblock != NULL)
1222 {
1223 pblock->blocks.push_back(pbh->pblock);
1224 break;
1225 }
1226
1227 int prevbranch = branch;
1228
1229 if (!prevbranch)
1230 {
1231 pbh->pblock = pblock;
1232 }
1233 else pxAssert(prevbranch || pbh->pblock == NULL);
1234
1235 pblock->insts.push_back(VuInstruction());
1236
1237 pprevinst = pinst;
1238 pinst = &pblock->insts.back();
1239 pinst->pPrevInst = pprevinst;
1240 SuperVUAnalyzeOp(VU, &pinst->info, pinst->regs);
1241
1242 #ifdef SUPERVU_VIBRANCHDELAY
1243 if (pinst->regs[0].pipe == VUPIPE_BRANCH && pblock->insts.size() > 1)
1244 {
1245
1246 VuInstruction* pdelayinst = getDelayInst(pinst);
1247 if (pdelayinst)
1248 {
1249 pdelayinst->type |= INST_CACHE_VI;
1250
1251 // find the correct register
1252 u32 mask = pdelayinst->regs[0].VIwrite & pinst->regs[0].VIread;
1253 for (int i = 0; i < 16; ++i)
1254 {
1255 if (mask & (1 << i))
1256 {
1257 pdelayinst->vicached = i;
1258 break;
1259 }
1260 }
1261
1262 pinst->vicached = pdelayinst->vicached;
1263 }
1264 }
1265 #endif
1266
1267 if (prevbranch)
1268 {
1269 if (pinst->regs[0].pipe == VUPIPE_BRANCH)
1270 hasSecondBranch = 1;
1271 pinst->type |= INST_BRANCH_DELAY;
1272 }
1273
1274 // check write back
1275 for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end();)
1276 {
1277 if (pinst->info.cycle >= itwriteback->cycle)
1278 {
1279 itinst = pblock->insts.insert(--pblock->insts.end(), VuInstruction());
1280 itwriteback->InitInst(&(*itinst), pinst->info.cycle);
1281 itwriteback = listWritebacks.erase(itwriteback);
1282 }
1283 else ++itwriteback;
1284 }
1285
1286 // add new writebacks
1287 WRITEBACK w;
1288 const u32 allflags = (1 << REG_CLIP_FLAG) | (1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG);
1289 for (int j = 0; j < 2; ++j) w.viwrite[j] = pinst->regs[j].VIwrite & allflags;
1290
1291 if (pinst->info.macflag & VUOP_WRITE) w.viwrite[1] |= (1 << REG_MAC_FLAG);
1292 if (pinst->info.statusflag & VUOP_WRITE) w.viwrite[1] |= (1 << REG_STATUS_FLAG);
1293
1294 if ((pinst->info.macflag | pinst->info.statusflag) & VUOP_READ)
1295 macflags = 1;
1296 if (pinst->regs[0].VIread & ((1 << REG_MAC_FLAG) | (1 << REG_STATUS_FLAG)))
1297 macflags = 1;
1298
1299 // if( pinst->regs[1].pipe == VUPIPE_FMAC && (pinst->regs[1].VFwrite==0&&!(pinst->regs[1].VIwrite&(1<<REG_ACC_FLAG))) )
1300 // pinst->regs[0].VIread |= (1<<REG_MAC_FLAG)|(1<<REG_STATUS_FLAG);
1301 // uregs->VIwrite |= lregs->VIwrite & (1<<REG_STATUS_FLAG);
1302
1303 if (w.viwrite[0] | w.viwrite[1])
1304 {
1305
1306 // only if coming from fmac pipeline
1307 if (((pinst->info.statusflag&VUOP_WRITE) && !(pinst->regs[0].VIwrite&(1 << REG_STATUS_FLAG))) && needFullStatusFlag)
1308 {
1309 // don't read if first inst
1310 if (needFullStatusFlag == 1)
1311 w.viread[1] |= (1 << REG_STATUS_FLAG);
1312 else --needFullStatusFlag;
1313 }
1314
1315 for (int j = 0; j < 2; ++j)
1316 {
1317 w.viread[j] |= pinst->regs[j].VIread & allflags;
1318
1319 if ((pinst->regs[j].VIread&(1 << REG_STATUS_FLAG)) && (pinst->regs[j].VIwrite&(1 << REG_STATUS_FLAG)))
1320 {
1321 // don't need the read anymore
1322 pinst->regs[j].VIread &= ~(1 << REG_STATUS_FLAG);
1323 }
1324 if ((pinst->regs[j].VIread&(1 << REG_MAC_FLAG)) && (pinst->regs[j].VIwrite&(1 << REG_MAC_FLAG)))
1325 {
1326 // don't need the read anymore
1327 pinst->regs[j].VIread &= ~(1 << REG_MAC_FLAG);
1328 }
1329
1330 pinst->regs[j].VIwrite &= ~allflags;
1331 }
1332
1333 if (pinst->info.macflag & VUOP_READ) w.viread[1] |= 1 << REG_MAC_FLAG;
1334 if (pinst->info.statusflag & VUOP_READ) w.viread[1] |= 1 << REG_STATUS_FLAG;
1335
1336 w.nParentPc = pc - 8;
1337 w.cycle = pinst->info.cycle + 4;
1338 listWritebacks.push_back(w);
1339 }
1340
1341 if (pinst->info.q&VUOP_READ) pinst->type |= INST_Q_READ;
1342 if (pinst->info.p&VUOP_READ) pinst->type |= INST_P_READ;
1343
1344 if (pinst->info.q&VUOP_WRITE)
1345 {
1346 pinst->pqcycles = QWaitTimes[pinst->info.pqinst] + 1;
1347
1348 memset(&w, 0, sizeof(w));
1349 w.nParentPc = pc - 8;
1350 w.cycle = pinst->info.cycle + pinst->pqcycles;
1351 w.viwrite[0] = 1 << REG_Q;
1352 listWritebacks.push_back(w);
1353 }
1354 if (pinst->info.p&VUOP_WRITE)
1355 pinst->pqcycles = PWaitTimes[pinst->info.pqinst] + 1;
1356
1357 if (prevbranch)
1358 {
1359 break;
1360 }
1361
1362 // make sure there is always a branch
1363 // sensible soccer overflows on vu0, so increase the limit...
1364 if ((s_vu == 1 && i >= 0x799) || (s_vu == 0 && i >= 0x201))
1365 {
1366 Console.Error("VuRec base block doesn't terminate!");
1367 pxAssert(0);
1368 break;
1369 }
1370
1371 i++;
1372 pbh++;
1373 }
1374
1375 if (macflags)
1376 pblock->type |= BLOCKTYPE_MACFLAGS;
1377
1378 pblock->endpc = pc;
1379 u32 lastpc = pc;
1380
1381 pblock->cycles = vucycle;
1382
1383 #ifdef SUPERVU_WRITEBACKS
1384 if (!branch || (branch&8))
1385 #endif
1386 {
1387 // flush writebacks
1388 if (listWritebacks.size() > 0)
1389 {
1390 listWritebacks.sort(WRITEBACK::SortWritebacks);
1391 for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ++itwriteback)
1392 {
1393 if (itwriteback->viwrite[0] & (1 << REG_Q))
1394 {
1395 // ignore all Q writebacks
1396 continue;
1397 }
1398
1399 pblock->insts.push_back(VuInstruction());
1400 itwriteback->InitInst(&pblock->insts.back(), vucycle);
1401 }
1402
1403 listWritebacks.clear();
1404 }
1405 }
1406
1407 if (!branch) return pblock;
1408
1409 if (branch & 8)
1410 {
1411 // what if also a jump?
1412 pblock->type |= BLOCKTYPE_EOP | BLOCKTYPE_HASEOP;
1413
1414 // add an instruction to flush p and q (if written)
1415 pblock->insts.push_back(SuperVUFlushInst());
1416 return pblock;
1417 }
1418
1419 // it is a (cond) branch or a jump
1420 u32 vucode = *(u32*)(VU->Micro + lastpc - 16);
1421 int bpc = _recbranchAddr(vucode) - 8;
1422
1423 VUPIPELINES newpipes;
1424 memcpy(newpipes.fmac, VU->fmac, sizeof(newpipes.fmac));
1425 memcpy(&newpipes.fdiv, &VU->fdiv, sizeof(newpipes.fdiv));
1426 memcpy(&newpipes.efu, &VU->efu, sizeof(newpipes.efu));
1427 memcpy(newpipes.ialu, VU->ialu, sizeof(newpipes.ialu));
1428
1429 for (i = 0; i < 8; ++i) newpipes.fmac[i].sCycle -= vucycle;
1430 newpipes.fdiv.sCycle -= vucycle;
1431 newpipes.efu.sCycle -= vucycle;
1432 for (i = 0; i < 8; ++i) newpipes.ialu[i].sCycle -= vucycle;
1433
1434 if (listWritebacks.size() > 0)
1435 {
1436 // flush all when jumping, send down the pipe when in branching
1437 bool bFlushWritebacks = (vucode >> 25) == 0x24 || (vucode >> 25) == 0x25;//||(vucode>>25)==0x20||(vucode>>25)==0x21;
1438
1439 listWritebacks.sort(WRITEBACK::SortWritebacks);
1440 for (itwriteback = listWritebacks.begin(); itwriteback != listWritebacks.end(); ++itwriteback)
1441 {
1442 if (itwriteback->viwrite[0] & (1 << REG_Q))
1443 {
1444 // ignore all Q writebacks
1445 continue;
1446 }
1447
1448 if (itwriteback->cycle < vucycle || bFlushWritebacks)
1449 {
1450 pblock->insts.push_back(VuInstruction());
1451 itwriteback->InitInst(&pblock->insts.back(), vucycle);
1452 }
1453 else
1454 {
1455 newpipes.listWritebacks.push_back(*itwriteback);
1456 newpipes.listWritebacks.back().cycle -= vucycle;
1457 }
1458 }
1459 }
1460
1461 if (newpipes.listWritebacks.size() > 0) // other blocks might read the mac flags
1462 pblock->type |= BLOCKTYPE_MACFLAGS;
1463
1464 u32 firstbranch = vucode >> 25;
1465 switch (firstbranch)
1466 {
1467 case 0x24: // jr
1468 pblock->type |= BLOCKTYPE_EOP; // jump out of procedure, since not returning, set EOP
1469 pblock->insts.push_back(SuperVUFlushInst());
1470 firstbranch = 0xff; //Non-Conditional Jump
1471 break;
1472
1473 case 0x25: // jalr
1474 {
1475 // linking, so will return to procedure
1476 pblock->insts.push_back(SuperVUFlushInst());
1477
1478 VuBaseBlock* pjumpblock = SuperVUBuildBlocks(pblock, lastpc, newpipes);
1479
1480 // update pblock since could have changed
1481 pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
1482 pxAssert(pblock != NULL);
1483
1484 pblock->blocks.push_back(pjumpblock);
1485 firstbranch = 0xff; //Non-Conditional Jump
1486 break;
1487 }
1488 case 0x20: // B
1489 {
1490 VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
1491
1492 // update pblock since could have changed
1493 pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
1494 pxAssert(pblock != NULL);
1495
1496 pblock->blocks.push_back(pbranchblock);
1497 firstbranch = 0xff; //Non-Conditional Jump
1498 break;
1499 }
1500 case 0x21: // BAL
1501 {
1502 VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
1503
1504 // update pblock since could have changed
1505 pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
1506 pxAssert(pblock != NULL);
1507 pblock->blocks.push_back(pbranchblock);
1508 firstbranch = 0xff; //Non-Conditional Jump
1509 break;
1510 }
1511 case 0x28: // IBEQ
1512 case 0x2f: // IBGEZ
1513 case 0x2d: // IBGTZ
1514 case 0x2e: // IBLEZ
1515 case 0x2c: // IBLTZ
1516 case 0x29: // IBNE
1517 {
1518 VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
1519
1520 // update pblock since could have changed
1521 pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
1522 pxAssert(pblock != NULL);
1523 pblock->blocks.push_back(pbranchblock);
1524
1525 // if has a second branch that is B or BAL, skip this
1526 u32 secondbranch = (*(u32*)(VU->Micro + lastpc - 8)) >> 25;
1527 if (!hasSecondBranch || (secondbranch != 0x21 && secondbranch != 0x20))
1528 {
1529 pbranchblock = SuperVUBuildBlocks(pblock, lastpc, newpipes);
1530
1531 pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
1532 pblock->blocks.push_back(pbranchblock);
1533 }
1534
1535 break;
1536 }
1537 default:
1538 pxAssert(pblock->blocks.size() == 1);
1539 break;
1540 }
1541
1542 pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
1543
1544 #ifdef SUPERVU_VIBRANCHDELAY
1545 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1546 ///// NOTE! This could still be a hack for KH2/GoW, but until we know how it properly works, this will do for now.///
1547 /////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1548
1549 if (hasSecondBranch && firstbranch != 0xff) //check the previous jump was conditional and there is a second branch
1550 {
1551 #else
1552 if (hasSecondBranch)
1553 {
1554 #endif
1555
1556 u32 vucode = *(u32*)(VU->Micro + lastpc - 8);
1557 pc = lastpc;
1558 int bpc = _recbranchAddr(vucode);
1559
1560 switch (vucode >> 25)
1561 {
1562 case 0x24: // jr
1563 Console.Error("svurec bad jr jump!");
1564 pxAssert(0);
1565 break;
1566
1567 case 0x25: // jalr
1568 {
1569 Console.Error("svurec bad jalr jump!");
1570 pxAssert(0);
1571 break;
1572 }
1573 case 0x20: // B
1574 {
1575 VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
1576
1577 // update pblock since could have changed
1578 pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
1579
1580 pblock->blocks.push_back(pbranchblock);
1581 break;
1582 }
1583 case 0x21: // BAL
1584 {
1585 VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
1586
1587 // replace instead of pushing a new block
1588 pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
1589 pblock->blocks.push_back(pbranchblock);
1590 break;
1591 }
1592 case 0x28: // IBEQ
1593 case 0x2f: // IBGEZ
1594 case 0x2d: // IBGTZ
1595 case 0x2e: // IBLEZ
1596 case 0x2c: // IBLTZ
1597 case 0x29: // IBNE
1598 {
1599 VuBaseBlock* pbranchblock = SuperVUBuildBlocks(pblock, bpc, newpipes);
1600
1601 // update pblock since could have changed
1602 pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
1603 pblock->blocks.push_back(pbranchblock);
1604
1605 // only add the block if the previous branch doesn't include the next instruction (ie, if a direct jump)
1606 if (firstbranch == 0x24 || firstbranch == 0x25 || firstbranch == 0x20 || firstbranch == 0x21)
1607 {
1608 pbranchblock = SuperVUBuildBlocks(pblock, lastpc, newpipes);
1609
1610 pblock = recVUBlocks[s_vu][lastpc/8-2].pblock;
1611 pblock->blocks.push_back(pbranchblock);
1612 }
1613
1614 break;
1615 }
1616
1617 jNO_DEFAULT;
1618 }
1619 }
1620
1621 return recVUBlocks[s_vu][startpc/8].pblock;
1622 }
1623
1624 static void SuperVUInitLiveness(VuBaseBlock* pblock)
1625 {
1626 list<VuInstruction>::iterator itinst, itnext;
1627
1628 pxAssert(pblock->insts.size() > 0);
1629
1630 for (itinst = pblock->insts.begin(); itinst != pblock->insts.end(); ++itinst)
1631 {
1632
1633 if (itinst->type & INST_DUMMY_)
1634 {
1635 itinst->addvars[0] = itinst->addvars[1] = 0xffffffff;
1636 itinst->livevars[0] = itinst->livevars[1] = 0xffffffff;
1637 itinst->keepvars[0] = itinst->keepvars[1] = 0xffffffff;
1638 itinst->usedvars[0] = itinst->usedvars[1] = 0;
1639 }
1640 else
1641 {
1642 itinst->addvars[0] = itinst->regs[0].VIread | itinst->regs[1].VIread;
1643 itinst->addvars[1] = (itinst->regs[0].VFread0 ? (1 << itinst->regs[0].VFread0) : 0) |
1644 (itinst->regs[0].VFread1 ? (1 << itinst->regs[0].VFread1) : 0) |
1645 (itinst->regs[1].VFread0 ? (1 << itinst->regs[1].VFread0) : 0) |
1646 (itinst->regs[1].VFread1 ? (1 << itinst->regs[1].VFread1) : 0);
1647
1648 // vf0 is not handled by VFread
1649 if (!itinst->regs[0].VFread0 && (itinst->regs[0].VIread & (1 << REG_VF0_FLAG))) itinst->addvars[1] |= 1;
1650 if (!itinst->regs[1].VFread0 && (itinst->regs[1].VIread & (1 << REG_VF0_FLAG))) itinst->addvars[1] |= 1;
1651 if (!itinst->regs[0].VFread1 && (itinst->regs[0].VIread & (1 << REG_VF0_FLAG)) && itinst->regs[0].VFr1xyzw != 0xff) itinst->addvars[1] |= 1;
1652 if (!itinst->regs[1].VFread1 && (itinst->regs[1].VIread & (1 << REG_VF0_FLAG)) && itinst->regs[1].VFr1xyzw != 0xff) itinst->addvars[1] |= 1;
1653
1654
1655 u32 vfwrite = 0;
1656 if (itinst->regs[0].VFwrite != 0)
1657 {
1658 if (itinst->regs[0].VFwxyzw != 0xf) itinst->addvars[1] |= 1 << itinst->regs[0].VFwrite;
1659 else vfwrite |= 1 << itinst->regs[0].VFwrite;
1660 }
1661 if (itinst->regs[1].VFwrite != 0)
1662 {
1663 if (itinst->regs[1].VFwxyzw != 0xf) itinst->addvars[1] |= 1 << itinst->regs[1].VFwrite;
1664 else vfwrite |= 1 << itinst->regs[1].VFwrite;
1665 }
1666 if ((itinst->regs[1].VIwrite & (1 << REG_ACC_FLAG)) && itinst->regs[1].VFwxyzw != 0xf)
1667 itinst->addvars[1] |= 1 << REG_ACC_FLAG;
1668
1669 u32 viwrite = (itinst->regs[0].VIwrite | itinst->regs[1].VIwrite);
1670
1671 itinst->usedvars[0] = itinst->addvars[0] | viwrite;
1672 itinst->usedvars[1] = itinst->addvars[1] | vfwrite;
1673
1674 // itinst->addvars[0] &= ~viwrite;
1675 // itinst->addvars[1] &= ~vfwrite;
1676 itinst->keepvars[0] = ~viwrite;
1677 itinst->keepvars[1] = ~vfwrite;
1678 }
1679 }
1680
1681 itinst = --pblock->insts.end();
1682 while (itinst != pblock->insts.begin())
1683 {
1684 itnext = itinst;
1685 --itnext;
1686
1687 itnext->usedvars[0] |= itinst->usedvars[0];
1688 itnext->usedvars[1] |= itinst->usedvars[1];
1689
1690 itinst = itnext;
1691 }
1692 }
1693
1694 u32 COMPUTE_LIVE(u32 R, u32 K, u32 L)
1695 {
1696 u32 live = R | ((L) & (K));
1697 // special process mac and status flags
1698 // only propagate liveness if doesn't write to the flag
1699 if (!(L&(1 << REG_STATUS_FLAG)) && !(K&(1 << REG_STATUS_FLAG)))
1700 live &= ~(1 << REG_STATUS_FLAG);
1701 if (!(L&(1 << REG_MAC_FLAG)) && !(K&(1 << REG_MAC_FLAG)))
1702 live &= ~(1 << REG_MAC_FLAG);
1703 return live;//|(1<<REG_STATUS_FLAG)|(1<<REG_MAC_FLAG);
1704 }
1705
1706 static void SuperVULivenessAnalysis()
1707 {
1708 BOOL changed;
1709 list<VuBaseBlock*>::reverse_iterator itblock;
1710 list<VuInstruction>::iterator itinst, itnext;
1711 VuBaseBlock::LISTBLOCKS::iterator itchild;
1712
1713 u32 livevars[2];
1714
1715 do
1716 {
1717 changed = FALSE;
1718 for (itblock = s_listBlocks.rbegin(); itblock != s_listBlocks.rend(); ++itblock)
1719 {
1720
1721 u32 newlive;
1722 VuBaseBlock* pb = *itblock;
1723
1724 // the last inst relies on the neighbor's insts
1725 itinst = --pb->insts.end();
1726
1727 if (pb->blocks.size() > 0)
1728 {
1729 livevars[0] = 0;
1730 livevars[1] = 0;
1731 for (itchild = pb->blocks.begin(); itchild != pb->blocks.end(); ++itchild)
1732 {
1733 VuInstruction& front = (*itchild)->insts.front();
1734 livevars[0] |= front.livevars[0];
1735 livevars[1] |= front.livevars[1];
1736 }
1737
1738 newlive = COMPUTE_LIVE(itinst->addvars[0], itinst->keepvars[0], livevars[0]);
1739
1740 // should propagate status flags whose parent insts are not in this block
1741 // if( itinst->nParentPc >= 0 && (itinst->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) )
1742 // newlive |= livevars[0]&((1<<REG_STATUS_FLAG)|(1<<REG_MAC_FLAG));
1743
1744 if (itinst->livevars[0] != newlive)
1745 {
1746 changed = TRUE;
1747 itinst->livevars[0] = newlive;
1748 }
1749
1750 newlive = COMPUTE_LIVE(itinst->addvars[1], itinst->keepvars[1], livevars[1]);
1751 if (itinst->livevars[1] != newlive)
1752 {
1753 changed = TRUE;
1754 itinst->livevars[1] = newlive;
1755 }
1756 }
1757
1758 while (itinst != pb->insts.begin())
1759 {
1760
1761 itnext = itinst;
1762 --itnext;
1763
1764 newlive = COMPUTE_LIVE(itnext->addvars[0], itnext->keepvars[0], itinst->livevars[0]);
1765
1766 // should propagate status flags whose parent insts are not in this block
1767 // if( itnext->nParentPc >= 0 && (itnext->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) && !(itinst->type & (INST_STATUS_WRITE|INST_MAC_WRITE)) )
1768 // newlive |= itinst->livevars[0]&((1<<REG_STATUS_FLAG)|(1<<REG_MAC_FLAG));
1769
1770 if (itnext->livevars[0] != newlive)
1771 {
1772 changed = TRUE;
1773 itnext->livevars[0] = newlive;
1774 itnext->livevars[1] = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]);
1775 }
1776 else
1777 {
1778 newlive = COMPUTE_LIVE(itnext->addvars[1], itnext->keepvars[1], itinst->livevars[1]);
1779 if (itnext->livevars[1] != newlive)
1780 {
1781 changed = TRUE;
1782 itnext->livevars[1] = newlive;
1783 }
1784 }
1785
1786 itinst = itnext;
1787 }
1788
1789 // if( (livevars[0] | itinst->livevars[0]) != itinst->livevars[0] ) {
1790 // changed = TRUE;
1791 // itinst->livevars[0] |= livevars[0];
1792 // }
1793 // if( (livevars[1] | itinst->livevars[1]) != itinst->livevars[1] ) {
1794 // changed = TRUE;
1795 // itinst->livevars[1] |= livevars[1];
1796 // }
1797 //
1798 // while( itinst != pb->insts.begin() ) {
1799 //
1800 // itnext = itinst; --itnext;
1801 // if( (itnext->livevars[0] | (itinst->livevars[0] & itnext->keepvars[0])) != itnext->livevars[0] ) {
1802 // changed = TRUE;
1803 // itnext->livevars[0] |= itinst->livevars[0] & itnext->keepvars[0];
1804 // itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1];
1805 // }
1806 // else if( (itnext->livevars[1] | (itinst->livevars[1] & itnext->keepvars[1])) != itnext->livevars[1] ) {
1807 // changed = TRUE;
1808 // itnext->livevars[1] |= itinst->livevars[1] & itnext->keepvars[1];
1809 // }
1810 //
1811 // itinst = itnext;
1812 // }
1813 }
1814
1815 }
1816 while (changed);
1817 }
1818
1819 static void SuperVUEliminateDeadCode()
1820 {
1821 list<VuBaseBlock*>::iterator itblock;
1822 VuBaseBlock::LISTBLOCKS::iterator itchild;
1823 list<VuInstruction>::iterator itinst, itnext;
1824 list<VuInstruction*> listParents;
1825 list<VuInstruction*>::iterator itparent;
1826
1827 for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
1828 {
1829
1830 #ifdef PCSX2_DEBUG
1831 u32 startpc = (*itblock)->startpc;
1832 u32 curpc = startpc;
1833 #endif
1834
1835 itnext = (*itblock)->insts.begin();
1836 itinst = itnext++;
1837 while (itnext != (*itblock)->insts.end())
1838 {
1839 if (itinst->type & (INST_CLIP_WRITE | INST_MAC_WRITE | INST_STATUS_WRITE))
1840 {
1841 u32 live0 = itnext->livevars[0];
1842 if (itinst->nParentPc >= 0 && itnext->nParentPc >= 0 && itinst->nParentPc != itnext->nParentPc) // superman returns
1843 {
1844 // take the live vars from the next next inst
1845 list<VuInstruction>::iterator itnextnext = itnext;
1846 ++itnextnext;
1847 if (itnextnext != (*itblock)->insts.end())
1848 {
1849 live0 = itnextnext->livevars[0];
1850 }
1851 }
1852
1853 itinst->regs[0].VIwrite &= live0;
1854 itinst->regs[1].VIwrite &= live0;
1855
1856 u32 viwrite = itinst->regs[0].VIwrite | itinst->regs[1].VIwrite;
1857
1858 (*itblock)->GetInstsAtPc(itinst->nParentPc, listParents);
1859 int removetype = 0;
1860
1861 for(itparent = listParents.begin(); itparent != listParents.end(); itparent++)
1862 {
1863 VuInstruction* parent = *itparent;
1864
1865 if (viwrite & (1 << REG_CLIP_FLAG))
1866 {
1867 parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_CLIP_FLAG));
1868 parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_CLIP_FLAG));
1869 }
1870 else
1871 removetype |= INST_CLIP_WRITE;
1872
1873 if (parent->info.macflag && (itinst->type & INST_MAC_WRITE))
1874 {
1875 if (!(viwrite&(1 << REG_MAC_FLAG)))
1876 {
1877 //parent->info.macflag = 0;
1878 // parent->regs[0].VIwrite &= ~(1<<REG_MAC_FLAG);
1879 // parent->regs[1].VIwrite &= ~(1<<REG_MAC_FLAG);
1880 // can be nonzero when a writeback belong to a different block and one branch uses
1881 // it and this one doesn't
1882 #ifndef SUPERVU_WRITEBACKS
1883 pxAssert(!(parent->regs[0].VIwrite & (1 << REG_MAC_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_MAC_FLAG)));
1884 #endif
1885 // if VUPIPE_FMAC and destination is vf00, probably need to keep the mac flag
1886 if (parent->regs[1].pipe == VUPIPE_FMAC && (parent->regs[1].VFwrite == 0 && !(parent->regs[1].VIwrite&(1 << REG_ACC_FLAG))))
1887 {
1888 parent->regs[0].VIwrite |= ((1 << REG_MAC_FLAG));
1889 parent->regs[1].VIwrite |= ((1 << REG_MAC_FLAG));
1890 }
1891 else
1892 removetype |= INST_MAC_WRITE;
1893 }
1894 else
1895 {
1896 parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_MAC_FLAG));
1897 parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_MAC_FLAG));
1898 }
1899 }
1900 else removetype |= INST_MAC_WRITE;
1901
1902 if (parent->info.statusflag && (itinst->type & INST_STATUS_WRITE))
1903 {
1904 if (!(viwrite&(1 << REG_STATUS_FLAG)))
1905 {
1906 //parent->info.statusflag = 0;
1907 // parent->regs[0].VIwrite &= ~(1<<REG_STATUS_FLAG);
1908 // parent->regs[1].VIwrite &= ~(1<<REG_STATUS_FLAG);
1909
1910 // can be nonzero when a writeback belong to a different block and one branch uses
1911 // it and this one doesn't
1912 #ifndef SUPERVU_WRITEBACKS
1913 pxAssert(!(parent->regs[0].VIwrite & (1 << REG_STATUS_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_STATUS_FLAG)));
1914 #endif
1915 if (parent->regs[1].pipe == VUPIPE_FMAC && (parent->regs[1].VFwrite == 0 && !(parent->regs[1].VIwrite&(1 << REG_ACC_FLAG))))
1916 {
1917 parent->regs[0].VIwrite |= ((1 << REG_STATUS_FLAG));
1918 parent->regs[1].VIwrite |= ((1 << REG_STATUS_FLAG));
1919 }
1920 else
1921 removetype |= INST_STATUS_WRITE;
1922 }
1923 else
1924 {
1925 parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_STATUS_FLAG));
1926 parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_STATUS_FLAG));
1927 }
1928 }
1929 else removetype |= INST_STATUS_WRITE;
1930 }
1931
1932 itinst->type &= ~removetype;
1933 if (itinst->type == 0)
1934 {
1935 itnext = (*itblock)->insts.erase(itinst);
1936 itinst = itnext++;
1937 continue;
1938 }
1939 }
1940 #ifdef PCSX2_DEBUG
1941 else
1942 {
1943 curpc += 8;
1944 }
1945 #endif
1946 itinst = itnext;
1947 ++itnext;
1948 }
1949
1950 if (itinst->type & INST_DUMMY)
1951 {
1952 // last inst with the children
1953 u32 mask = 0;
1954 for (itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); ++itchild)
1955 {
1956 mask |= (*itchild)->insts.front().livevars[0];
1957 }
1958 itinst->regs[0].VIwrite &= mask;
1959 itinst->regs[1].VIwrite &= mask;
1960 u32 viwrite = itinst->regs[0].VIwrite | itinst->regs[1].VIwrite;
1961
1962 if (itinst->nParentPc >= 0)
1963 {
1964
1965 (*itblock)->GetInstsAtPc(itinst->nParentPc, listParents);
1966 int removetype = 0;
1967
1968 for(itparent = listParents.begin(); itparent != listParents.end(); itparent++)
1969 {
1970 VuInstruction* parent = *itparent;
1971
1972 if (viwrite & (1 << REG_CLIP_FLAG))
1973 {
1974 parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_CLIP_FLAG));
1975 parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_CLIP_FLAG));
1976 }
1977 else removetype |= INST_CLIP_WRITE;
1978
1979 if (parent->info.macflag && (itinst->type & INST_MAC_WRITE))
1980 {
1981 if (!(viwrite&(1 << REG_MAC_FLAG)))
1982 {
1983 //parent->info.macflag = 0;
1984 #ifndef SUPERVU_WRITEBACKS
1985 pxAssert(!(parent->regs[0].VIwrite & (1 << REG_MAC_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_MAC_FLAG)));
1986 #endif
1987 removetype |= INST_MAC_WRITE;
1988 }
1989 else
1990 {
1991 parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_MAC_FLAG));
1992 parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_MAC_FLAG));
1993 }
1994 }
1995 else removetype |= INST_MAC_WRITE;
1996
1997 if (parent->info.statusflag && (itinst->type & INST_STATUS_WRITE))
1998 {
1999 if (!(viwrite&(1 << REG_STATUS_FLAG)))
2000 {
2001 //parent->info.statusflag = 0;
2002 #ifndef SUPERVU_WRITEBACKS
2003 pxAssert(!(parent->regs[0].VIwrite & (1 << REG_STATUS_FLAG)) && !(parent->regs[1].VIwrite & (1 << REG_STATUS_FLAG)));
2004 #endif
2005 removetype |= INST_STATUS_WRITE;
2006 }
2007 else
2008 {
2009 parent->regs[0].VIwrite |= (itinst->regs[0].VIwrite & (1 << REG_STATUS_FLAG));
2010 parent->regs[1].VIwrite |= (itinst->regs[1].VIwrite & (1 << REG_STATUS_FLAG));
2011 }
2012 }
2013 else removetype |= INST_STATUS_WRITE;
2014 }
2015
2016 itinst->type &= ~removetype;
2017 if (itinst->type == 0)
2018 {
2019 (*itblock)->insts.erase(itinst);
2020 }
2021 }
2022 }
2023 }
2024 }
2025
2026 void VuBaseBlock::AssignVFRegs()
2027 {
2028 int i;
2029 VuBaseBlock::LISTBLOCKS::iterator itchild;
2030 list<VuBaseBlock*>::iterator itblock;
2031 list<VuInstruction>::iterator itinst, itnext, itinst2;
2032
2033 // init the start regs
2034 if (type & BLOCKTYPE_ANALYZED) return; // nothing changed
2035 memcpy(xmmregs, startregs, sizeof(xmmregs));
2036
2037 if (type & BLOCKTYPE_ANALYZED)
2038 {
2039 // check if changed
2040 for (i = 0; i < iREGCNT_XMM; ++i)
2041 {
2042 if (xmmregs[i].inuse != startregs[i].inuse)
2043 break;
2044 if (xmmregs[i].inuse && (xmmregs[i].reg != startregs[i].reg || xmmregs[i].type != startregs[i].type))
2045 break;
2046 }
2047
2048 if (i == iREGCNT_XMM) return; // nothing changed
2049 }
2050
2051 u8* oldX86 = x86Ptr;
2052
2053 for(itinst = insts.begin(); itinst != insts.end(); itinst++)
2054 {
2055
2056 if (itinst->type & INST_DUMMY) continue;
2057
2058 // reserve, go from upper to lower
2059 int lastwrite = -1;
2060
2061 for (i = 1; i >= 0; --i)
2062 {
2063 _VURegsNum* regs = itinst->regs + i;
2064
2065
2066 // redo the counters so that the proper regs are released
2067 for (int j = 0; j < iREGCNT_XMM; ++j)
2068 {
2069 if (xmmregs[j].inuse)
2070 {
2071 if (xmmregs[j].type == XMMTYPE_VFREG)
2072 {
2073 int count = 0;
2074 itinst2 = itinst;
2075
2076 if (i)
2077 {
2078 if (itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg)
2079 {
2080 itinst2 = insts.end();
2081 break;
2082 }
2083 else
2084 {
2085 ++count;
2086 ++itinst2;
2087 }
2088 }
2089
2090 while (itinst2 != insts.end())
2091 {
2092 if (itinst2->regs[0].VFread0 == xmmregs[j].reg || itinst2->regs[0].VFread1 == xmmregs[j].reg || itinst2->regs[0].VFwrite == xmmregs[j].reg ||
2093 itinst2->regs[1].VFread0 == xmmregs[j].reg || itinst2->regs[1].VFread1 == xmmregs[j].reg || itinst2->regs[1].VFwrite == xmmregs[j].reg)
2094 break;
2095
2096 ++count;
2097 ++itinst2;
2098 }
2099 xmmregs[j].counter = 1000 - count;
2100 }
2101 else
2102 {
2103 pxAssert(xmmregs[j].type == XMMTYPE_ACC);
2104
2105 int count = 0;
2106 itinst2 = itinst;
2107
2108 if (i) ++itinst2; // acc isn't used in lower insts
2109
2110 while (itinst2 != insts.end())
2111 {
2112 pxAssert(!((itinst2->regs[0].VIread | itinst2->regs[0].VIwrite) & (1 << REG_ACC_FLAG)));
2113
2114 if ((itinst2->regs[1].VIread | itinst2->regs[1].VIwrite) & (1 << REG_ACC_FLAG))
2115 break;
2116
2117 ++count;
2118 ++itinst2;
2119 }
2120
2121 xmmregs[j].counter = 1000 - count;
2122 }
2123 }
2124 }
2125
2126 if (regs->VFread0) _addNeededVFtoXMMreg(regs->VFread0);
2127 if (regs->VFread1) _addNeededVFtoXMMreg(regs->VFread1);
2128 if (regs->VFwrite) _addNeededVFtoXMMreg(regs->VFwrite);
2129 if (regs->VIread & (1 << REG_ACC_FLAG)) _addNeededACCtoXMMreg();
2130 if (regs->VIread & (1 << REG_VF0_FLAG)) _addNeededVFtoXMMreg(0);
2131
2132 // alloc
2133 itinst->vfread0[i] = itinst->vfread1[i] = itinst->vfwrite[i] = itinst->vfacc[i] = -1;
2134 itinst->vfflush[i] = -1;
2135
2136 if (regs->VFread0)
2137 itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, regs->VFread0, 0);
2138 else if (regs->VIread & (1 << REG_VF0_FLAG))
2139 itinst->vfread0[i] = _allocVFtoXMMreg(VU, -1, 0, 0);
2140
2141 if (regs->VFread1)
2142 itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, regs->VFread1, 0);
2143 else if ((regs->VIread & (1 << REG_VF0_FLAG)) && regs->VFr1xyzw != 0xff)
2144 itinst->vfread1[i] = _allocVFtoXMMreg(VU, -1, 0, 0);
2145
2146 if (regs->VIread & (1 << REG_ACC_FLAG)) itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0);
2147
2148 int reusereg = -1; // 0 - VFwrite, 1 - VFAcc
2149
2150 if (regs->VFwrite)
2151 {
2152 pxAssert(!(regs->VIwrite&(1 << REG_ACC_FLAG)));
2153
2154 if (regs->VFwxyzw == 0xf)
2155 {
2156 itinst->vfwrite[i] = _checkXMMreg(XMMTYPE_VFREG, regs->VFwrite, 0);
2157 if (itinst->vfwrite[i] < 0) reusereg = 0;
2158 }
2159 else
2160 {
2161 itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0);
2162 }
2163 }
2164 else if (regs->VIwrite & (1 << REG_ACC_FLAG))
2165 {
2166
2167 if (regs->VFwxyzw == 0xf)
2168 {
2169 itinst->vfacc[i] = _checkXMMreg(XMMTYPE_ACC, 0, 0);
2170 if (itinst->vfacc[i] < 0) reusereg = 1;
2171 }
2172 else
2173 {
2174 itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0);
2175 }
2176 }
2177
2178 if (reusereg >= 0)
2179 {
2180 // reuse
2181 itnext = itinst;
2182 itnext++;
2183
2184 u8 type = reusereg ? XMMTYPE_ACC : XMMTYPE_VFREG;
2185 u8 reg = reusereg ? 0 : regs->VFwrite;
2186
2187 if (itinst->vfacc[i] >= 0 && lastwrite != itinst->vfacc[i] &&
2188 (itnext == insts.end() || ((regs->VIread&(1 << REG_ACC_FLAG)) && (!(itnext->usedvars[0]&(1 << REG_ACC_FLAG)) || !(itnext->livevars[0]&(1 << REG_ACC_FLAG))))))
2189 {
2190
2191 pxAssert(reusereg == 0);
2192 if (itnext == insts.end() || (itnext->livevars[0]&(1 << REG_ACC_FLAG))) _freeXMMreg(itinst->vfacc[i]);
2193 xmmregs[itinst->vfacc[i]].inuse = 1;
2194 xmmregs[itinst->vfacc[i]].reg = reg;
2195 xmmregs[itinst->vfacc[i]].type = type;
2196 xmmregs[itinst->vfacc[i]].mode = 0;
2197 itinst->vfwrite[i] = itinst->vfacc[i];
2198 }
2199 else if (itinst->vfread0[i] >= 0 && lastwrite != itinst->vfread0[i] &&
2200 (itnext == insts.end() || (regs->VFread0 > 0 && (!(itnext->usedvars[1]&(1 << regs->VFread0)) || !(itnext->livevars[1]&(1 << regs->VFread0))))))
2201 {
2202
2203 if (itnext == insts.end() || (itnext->livevars[1]&regs->VFread0)) _freeXMMreg(itinst->vfread0[i]);
2204
2205 xmmregs[itinst->vfread0[i]].inuse = 1;
2206 xmmregs[itinst->vfread0[i]].reg = reg;
2207 xmmregs[itinst->vfread0[i]].type = type;
2208 xmmregs[itinst->vfread0[i]].mode = 0;
2209
2210 if (reusereg)
2211 itinst->vfacc[i] = itinst->vfread0[i];
2212 else
2213 itinst->vfwrite[i] = itinst->vfread0[i];
2214 }
2215 else if (itinst->vfread1[i] >= 0 && lastwrite != itinst->vfread1[i] &&
2216 (itnext == insts.end() || (regs->VFread1 > 0 && (!(itnext->usedvars[1]&(1 << regs->VFread1)) || !(itnext->livevars[1]&(1 << regs->VFread1))))))
2217 {
2218
2219 if (itnext == insts.end() || (itnext->livevars[1]&regs->VFread1)) _freeXMMreg(itinst->vfread1[i]);
2220
2221 xmmregs[itinst->vfread1[i]].inuse = 1;
2222 xmmregs[itinst->vfread1[i]].reg = reg;
2223 xmmregs[itinst->vfread1[i]].type = type;
2224 xmmregs[itinst->vfread1[i]].mode = 0;
2225 if (reusereg)
2226 itinst->vfacc[i] = itinst->vfread1[i];
2227 else
2228 itinst->vfwrite[i] = itinst->vfread1[i];
2229 }
2230 else
2231 {
2232 if (reusereg)
2233 itinst->vfacc[i] = _allocACCtoXMMreg(VU, -1, 0);
2234 else
2235 itinst->vfwrite[i] = _allocVFtoXMMreg(VU, -1, regs->VFwrite, 0);
2236 }
2237 }
2238
2239 if (itinst->vfwrite[i] >= 0) lastwrite = itinst->vfwrite[i];
2240 else if (itinst->vfacc[i] >= 0) lastwrite = itinst->vfacc[i];
2241
2242 // always alloc at least 1 temp reg
2243 int free0 = (i || regs->VFwrite || regs->VFread0 || regs->VFread1 || (regs->VIwrite & (1 << REG_ACC_FLAG)) || (regs->VIread & (1 << REG_VF0_FLAG)))
2244 ? _allocTempXMMreg(XMMT_FPS, -1) : -1;
2245 int free1 = 0, free2 = 0;
2246
2247 if (i == 0 && itinst->vfwrite[1] >= 0 && (itinst->vfread0[0] == itinst->vfwrite[1] || itinst->vfread1[0] == itinst->vfwrite[1]))
2248 {
2249 itinst->vfflush[i] = _allocTempXMMreg(XMMT_FPS, -1);
2250 }
2251
2252 if (i == 1 && (regs->VIwrite & (1 << REG_CLIP_FLAG)))
2253 {
2254 // CLIP inst, need two extra regs
2255 if (free0 < 0) free0 = _allocTempXMMreg(XMMT_FPS, -1);
2256
2257 free1 = _allocTempXMMreg(XMMT_FPS, -1);
2258 free2 = _allocTempXMMreg(XMMT_FPS, -1);
2259 _freeXMMreg(free1);
2260 _freeXMMreg(free2);
2261 }
2262 else if (regs->VIwrite & (1 << REG_P))
2263 {
2264 // EFU inst, need extra reg
2265 free1 = _allocTempXMMreg(XMMT_FPS, -1);
2266 if (free0 == -1) free0 = free1;
2267 _freeXMMreg(free1);
2268 }
2269
2270 if (itinst->vfflush[i] >= 0) _freeXMMreg(itinst->vfflush[i]);
2271 if (free0 >= 0) _freeXMMreg(free0);
2272
2273 itinst->vffree[i] = (free0 & 0xf) | (free1 << 8) | (free2 << 16);
2274 if (free0 == -1) itinst->vffree[i] |= VFFREE_INVALID0;
2275
2276 _clearNeededXMMregs();
2277 }
2278 }
2279
2280 pxAssert(x86Ptr == oldX86);
2281 u32 analyzechildren = !(type & BLOCKTYPE_ANALYZED);
2282 type |= BLOCKTYPE_ANALYZED;
2283
2284 //memset(endregs, 0, sizeof(endregs));
2285
2286 if (analyzechildren)
2287 {
2288 for(itchild = blocks.begin(); itchild != blocks.end(); itchild++)
2289 {
2290 (*itchild)->AssignVFRegs();
2291 }
2292 }
2293 }
2294
2295 struct MARKOVBLANKET
2296 {
2297 list<VuBaseBlock*> parents;
2298 list<VuBaseBlock*> children;
2299 };
2300
2301 static MARKOVBLANKET s_markov;
2302
2303 void VuBaseBlock::AssignVIRegs(int parent)
2304 {
2305 const int maxregs = 6;
2306
2307 if (parent)
2308 {
2309 if ((type&BLOCKTYPE_ANALYZEDPARENT))
2310 return;
2311
2312 type |= BLOCKTYPE_ANALYZEDPARENT;
2313 s_markov.parents.push_back(this);
2314 for (LISTBLOCKS::iterator it = blocks.begin(); it != blocks.end(); ++it)
2315 {
2316 (*it)->AssignVIRegs(0);
2317 }
2318 return;
2319 }
2320
2321 if ((type&BLOCKTYPE_ANALYZED))
2322 return;
2323
2324 // child
2325 pxAssert(allocX86Regs == -1);
2326 allocX86Regs = s_vecRegArray.size();
2327 s_vecRegArray.resize(allocX86Regs + iREGCNT_GPR);
2328
2329 _x86regs* pregs = &s_vecRegArray[allocX86Regs];
2330 memset(pregs, 0, sizeof(_x86regs)*iREGCNT_GPR);
2331
2332 pxAssert(parents.size() > 0);
2333
2334 list<VuBaseBlock*>::iterator itparent;
2335 u32 usedvars = insts.front().usedvars[0];
2336 u32 livevars = insts.front().livevars[0];
2337
2338 if (parents.size() > 0)
2339 {
2340 u32 usedvars2 = 0xffffffff;
2341
2342 for(itparent = parents.begin(); itparent != parents.end(); itparent++)
2343 {
2344 usedvars2 &= (*itparent)->insts.front().usedvars[0];
2345 }
2346
2347 usedvars |= usedvars2;
2348 }
2349
2350 usedvars &= livevars;
2351
2352 // currently order doesn't matter
2353 int num = 0;
2354
2355 if (usedvars)
2356 {
2357 for (int i = 1; i < 16; ++i)
2358 {
2359 if (usedvars & (1 << i))
2360 {
2361 pregs[num].inuse = 1;
2362 pregs[num].reg = i;
2363
2364 livevars &= ~(1 << i);
2365
2366 if (++num >= maxregs) break;
2367 }
2368 }
2369 }
2370
2371 if (num < maxregs)
2372 {
2373 livevars &= ~usedvars;
2374 livevars &= insts.back().usedvars[0];
2375
2376 if (livevars)
2377 {
2378 for (int i = 1; i < 16; ++i)
2379 {
2380 if (livevars & (1 << i))
2381 {
2382 pregs[num].inuse = 1;
2383 pregs[num].reg = i;
2384
2385 if (++num >= maxregs) break;
2386 }
2387 }
2388 }
2389 }
2390
2391 s_markov.children.push_back(this);
2392 type |= BLOCKTYPE_ANALYZED;
2393
2394 for(itparent = parents.begin(); itparent != parents.end(); itparent++)
2395 {
2396 (*itparent)->AssignVIRegs(1);
2397 }
2398 }
2399
2400 static void SuperVUAssignRegs()
2401 {
2402 list<VuBaseBlock*>::iterator itblock, itblock2;
2403
2404 for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
2405 {
2406 (*itblock)->type &= ~BLOCKTYPE_ANALYZED;
2407 }
2408 s_listBlocks.front()->AssignVFRegs();
2409
2410 // VI assignments, find markov blanket for each node in the graph
2411 // then allocate regs based on the commonly used ones
2412 #ifdef SUPERVU_X86CACHING
2413 for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
2414 {
2415 (*itblock)->type &= ~(BLOCKTYPE_ANALYZED | BLOCKTYPE_ANALYZEDPARENT);
2416 }
2417 s_vecRegArray.resize(0);
2418 u8 usedregs[16];
2419
2420 // note: first block always has to start with no alloc regs
2421 bool bfirst = true;
2422
2423 for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
2424 {
2425
2426 if (!((*itblock)->type & BLOCKTYPE_ANALYZED))
2427 {
2428
2429 if ((*itblock)->parents.size() == 0)
2430 {
2431 (*itblock)->type |= BLOCKTYPE_ANALYZED;
2432 bfirst = false;
2433 continue;
2434 }
2435
2436 s_markov.children.clear();
2437 s_markov.parents.clear();
2438 (*itblock)->AssignVIRegs(0);
2439
2440 // assign the regs
2441 int regid = s_vecRegArray.size();
2442 s_vecRegArray.resize(regid + iREGCNT_GPR);
2443
2444 _x86regs* mergedx86 = &s_vecRegArray[regid];
2445 memset(mergedx86, 0, sizeof(_x86regs)*iREGCNT_GPR);
2446
2447 if (!bfirst)
2448 {
2449 *(u32*)usedregs = *((u32*)usedregs + 1) = *((u32*)usedregs + 2) = *((u32*)usedregs + 3) = 0;
2450
2451 for(itblock2 = s_markov.children.begin(); itblock2 != s_markov.children.end(); itblock2++)
2452 {
2453 pxAssert((*itblock2)->allocX86Regs >= 0);
2454 _x86regs* pregs = &s_vecRegArray[(*itblock2)->allocX86Regs];
2455 for (int i = 0; i < iREGCNT_GPR; ++i)
2456 {
2457 if (pregs[i].inuse && pregs[i].reg < 16)
2458 {
2459 //pxAssert( pregs[i].reg < 16);
2460 usedregs[pregs[i].reg]++;
2461 }
2462 }
2463 }
2464
2465 int num = 1;
2466 for (int i = 0; i < 16; ++i)
2467 {
2468 if (usedregs[i] == s_markov.children.size())
2469 {
2470 // use
2471 mergedx86[num].inuse = 1;
2472 mergedx86[num].reg = i;
2473 mergedx86[num].type = (s_vu ? X86TYPE_VU1 : 0) | X86TYPE_VI;
2474 mergedx86[num].mode = MODE_READ;
2475 if (++num >= iREGCNT_GPR)
2476 break;
2477 if (num == ESP)
2478 ++num;
2479 }
2480 }
2481
2482 for(itblock2 = s_markov.children.begin(); itblock2 != s_markov.children.end(); itblock2++)
2483 {
2484 pxAssert((*itblock2)->nStartx86 == -1);
2485 (*itblock2)->nStartx86 = regid;
2486 }
2487
2488 for(itblock2 = s_markov.parents.begin(); itblock2 != s_markov.parents.end(); itblock2++)
2489 {
2490 pxAssert((*itblock2)->nEndx86 == -1);
2491 (*itblock2)->nEndx86 = regid;
2492 }
2493 }
2494
2495 bfirst = false;
2496 }
2497 }
2498 #endif
2499 }
2500
2501 //////////////////
2502 // Recompilation
2503 //////////////////
2504
2505 // cycles in which the last Q,P regs were finished (written to VU->VI[])
2506 // the write occurs before the instruction is executed at that cycle
2507 // compare with s_TotalVUCycles
2508 // if less than 0, already flushed
2509 int s_writeQ, s_writeP;
2510
2511 // declare the saved registers
2512 uptr s_vu1esp, s_callstack;
2513 uptr s_vuebx, s_vuedi, s_vu1esi;
2514
2515 static int s_recWriteQ, s_recWriteP; // wait times during recompilation
2516 static int s_needFlush; // first bit - Q, second bit - P, third bit - Q has been written, fourth bit - P has been written
2517
2518 static int s_JumpX86;
2519 static int s_ScheduleXGKICK = 0, s_XGKICKReg = -1;
2520
2521 void recVUMI_XGKICK_(VURegs *VU);
2522
2523 void SuperVUCleanupProgram(u32 startpc, int vuindex)
2524 {
2525 #ifdef SUPERVU_COUNT
2526 QueryPerformanceCounter(&svufinal);
2527 svutime += (u32)(svufinal.QuadPart - svubase.QuadPart);
2528 #endif
2529
2530 VU = vuindex ? &VU1 : &VU0;
2531 VU->cycle += s_TotalVUCycles;
2532
2533 //VU cycle stealing hack, 3000 cycle maximum so it doesn't get out of hand
2534 if (s_TotalVUCycles < 3000)
2535 cpuRegs.cycle += s_TotalVUCycles * EmuConfig.Speedhacks.VUCycleSteal;
2536 else
2537 cpuRegs.cycle += 3000 * EmuConfig.Speedhacks.VUCycleSteal;
2538
2539 if ((int)s_writeQ > 0) VU->VI[REG_Q] = VU->q;
2540 if ((int)s_writeP > 0)
2541 {
2542 pxAssert(VU == &VU1);
2543 VU1.VI[REG_P] = VU1.p; // only VU1
2544 }
2545
2546 //memset(recVUStack, 0, SUPERVU_STACKSIZE * 4);
2547
2548 // Could clear allocation info to prevent possibly bad data being used in other parts of pcsx2;
2549 // not doing this because it's slow and not needed (rama)
2550 // _initXMMregs();
2551 // _initMMXregs();
2552 // _initX86regs();
2553 }
2554
2555 #if defined(_MSC_VER)
2556
2557 // entry point of all vu programs from emulator calls
2558 __declspec(naked) void SuperVUExecuteProgram(u32 startpc, int vuindex)
2559 {
2560 // Stackframe setup for the recompiler:
2561 // We rewind the stack 4 bytes, which places the parameters of this function before
2562 // any calls we might make from recompiled code. The return address for this function
2563 // call is subsequently stored in s_callstack.
2564
2565 __asm
2566 {
2567 mov eax, dword ptr [esp]
2568 mov s_TotalVUCycles, 0 // necessary to be here!
2569 add esp, 4
2570 mov s_callstack, eax
2571 call SuperVUGetProgram
2572
2573 // save cpu state
2574 //mov s_vu1ebp, ebp
2575 mov s_vu1esi, esi
2576 mov s_vuedi, edi
2577 mov s_vuebx, ebx
2578
2579 mov s_vu1esp, esp
2580 and esp, -16 // align stack for GCC compilance
2581
2582 //stmxcsr s_ssecsr
2583 ldmxcsr g_sseVUMXCSR
2584
2585 // init vars
2586 mov s_writeQ, 0xffffffff
2587 mov s_writeP, 0xffffffff
2588
2589 jmp eax
2590 }
2591 }
2592
2593 // exit point of all vu programs
2594 __declspec(naked) static void SuperVUEndProgram()
2595 {
2596 __asm
2597 {
2598 // restore cpu state
2599 ldmxcsr g_sseMXCSR
2600
2601 //mov ebp, s_vu1ebp
2602 mov esi, s_vu1esi
2603 mov edi, s_vuedi
2604 mov ebx, s_vuebx
2605
2606 mov esp, s_vu1esp // restore from aligned stack
2607
2608 call SuperVUCleanupProgram
2609 jmp s_callstack // so returns correctly
2610 }
2611 }
2612
2613 #endif
2614
2615 // Flushes P/Q regs
2616 void SuperVUFlush(int p, int wait)
2617 {
2618 u8* pjmp[3];
2619 if (!(s_needFlush&(1 << p))) return;
2620
2621 int recwait = p ? s_recWriteP : s_recWriteQ;
2622 if (!wait && s_pCurInst->info.cycle < recwait) return;
2623
2624 if (recwait == 0)
2625 {
2626 // write didn't happen this block
2627 MOV32MtoR(EAX, p ? (uptr)&s_writeP : (uptr)&s_writeQ);
2628 OR32RtoR(EAX, EAX);
2629 pjmp[0] = JS8(0);
2630
2631 if (s_pCurInst->info.cycle) SUB32ItoR(EAX, s_pCurInst->info.cycle);
2632
2633 // if writeQ <= total+offset
2634 if (!wait) // only write back if time is up
2635 {
2636 CMP32MtoR(EAX, (uptr)&s_TotalVUCycles);
2637 pjmp[1] = JG8(0);
2638 }
2639 else
2640 {
2641 // add (writeQ-total-offset) to s_TotalVUCycles
2642 // necessary?
2643 CMP32MtoR(EAX, (uptr)&s_TotalVUCycles);
2644 pjmp[2] = JLE8(0);
2645 MOV32RtoM((uptr)&s_TotalVUCycles, EAX);
2646 x86SetJ8(pjmp[2]);
2647 }
2648 }
2649 else if (wait && s_pCurInst->info.cycle < recwait)
2650 {
2651 ADD32ItoM((uptr)&s_TotalVUCycles, recwait);
2652 }
2653
2654 MOV32MtoR(EAX, SuperVUGetVIAddr(p ? REG_P : REG_Q, 0));
2655 MOV32ItoM(p ? (uptr)&s_writeP : (uptr)&s_writeQ, 0x80000000);
2656 MOV32RtoM(SuperVUGetVIAddr(p ? REG_P : REG_Q, 1), EAX);
2657
2658 if (recwait == 0)
2659 {
2660 if (!wait) x86SetJ8(pjmp[1]);
2661 x86SetJ8(pjmp[0]);
2662 }
2663
2664 if (wait || (!p && recwait == 0 && s_pCurInst->info.cycle >= 12) || (!p && recwait > 0 && s_pCurInst->info.cycle >= recwait))
2665 s_needFlush &= ~(1 << p);
2666 }
2667
2668 // executed only once per program
2669 static u32* SuperVUStaticAlloc(u32 size)
2670 {
2671 pxAssert(recVUStackPtr[s_vu] + size <= recVUStack[s_vu] + SUPERVU_STACKSIZE);
2672 // always zero
2673 if (size == 4) *(u32*)recVUStackPtr[s_vu] = 0;
2674 else memset(recVUStackPtr[s_vu], 0, size);
2675 recVUStackPtr[s_vu] += size;
2676 return (u32*)(recVUStackPtr[s_vu] - size);
2677 }
2678
2679 static void SuperVURecompile()
2680 {
2681 // save cpu state
2682 recVUStackPtr[s_vu] = recVUStack[s_vu];
2683
2684 _initXMMregs();
2685
2686 list<VuBaseBlock*>::iterator itblock;
2687
2688 for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
2689 {
2690 (*itblock)->type &= ~BLOCKTYPE_ANALYZED;
2691 }
2692
2693 s_listBlocks.front()->Recompile();
2694
2695 // make sure everything compiled
2696 for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
2697 {
2698 pxAssert(((*itblock)->type & BLOCKTYPE_ANALYZED) && (*itblock)->pcode != NULL);
2699 }
2700
2701 // link all blocks
2702 for(itblock = s_listBlocks.begin(); itblock != s_listBlocks.end(); itblock++)
2703 {
2704 VuBaseBlock::LISTBLOCKS::iterator itchild;
2705
2706 pxAssert((*itblock)->blocks.size() <= ArraySize((*itblock)->pChildJumps));
2707
2708 int i = 0;
2709 for(itchild = (*itblock)->blocks.begin(); itchild != (*itblock)->blocks.end(); itchild++)
2710 {
2711
2712 if ((u32)(uptr)(*itblock)->pChildJumps[i] == 0xffffffff)
2713 continue;
2714
2715 if ((*itblock)->pChildJumps[i] == NULL)
2716 {
2717 VuBaseBlock* pchild = *itchild;
2718
2719 if (pchild->type & BLOCKTYPE_HASEOP)
2720 {
2721 pxAssert(pchild->blocks.size() == 0);
2722
2723 AND32ItoM((uptr)&VU0.VI[ REG_VPU_STAT ].UL, s_vu ? ~0x100 : ~0x001); // E flag
2724 AND32ItoM((uptr)&VU->GetVifRegs().stat, ~VIF1_STAT_VEW);
2725
2726 MOV32ItoM((uptr)&VU->VI[REG_TPC], pchild->endpc);
2727 JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5));
2728 }
2729 // only other case is when there are two branches
2730 else
2731 {
2732 pxAssert((*itblock)->insts.back().regs[0].pipe == VUPIPE_BRANCH);
2733 }
2734
2735 continue;
2736 }
2737
2738 if ((u32)(uptr)(*itblock)->pChildJumps[i] & 0x80000000)
2739 {
2740 // relative
2741 pxAssert((uptr)(*itblock)->pChildJumps[i] <= 0xffffffff);
2742 (*itblock)->pChildJumps[i] = (u32*)((uptr)(*itblock)->pChildJumps[i] & 0x7fffffff);
2743 *(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode - ((uptr)(*itblock)->pChildJumps[i] + 4);
2744 }
2745 else
2746 {
2747 *(*itblock)->pChildJumps[i] = (uptr)(*itchild)->pcode;
2748 }
2749
2750 ++i;
2751 }
2752 }
2753
2754 s_pFnHeader->pprogfunc = s_listBlocks.front()->pcode;
2755 }
2756
2757 // debug
2758
2759
2760 u32 s_saveecx, s_saveedx, s_saveebx, s_saveesi, s_saveedi, s_saveebp;
2761 u32 g_curdebugvu;
2762
2763 //float vuDouble(u32 f);
2764
2765 #ifdef PCSX2_DEBUG
2766 static void __fastcall svudispfn( int g_curdebugvu )
2767 {
2768 static u32 i;
2769
2770 if (((vudump&8) && g_curdebugvu) || ((vudump&0x80) && !g_curdebugvu)) //&& g_vu1lastrec != g_vu1last ) {
2771 {
2772
2773 if (skipparent != g_vu1lastrec)
2774 {
2775 for (i = 0; i < ArraySize(badaddrs); ++i)
2776 {
2777 if (s_svulast == badaddrs[i][1] && g_vu1lastrec == badaddrs[i][0])
2778 break;
2779 }
2780
2781 if (i == ArraySize(badaddrs))
2782 {
2783 //static int curesp;
2784 //__asm mov curesp, esp
2785 //Console.WriteLn("tVU: %x %x %x", s_svulast, s_vucount, s_vufnheader);
2786 if (g_curdebugvu) iDumpVU1Registers();
2787 else iDumpVU0Registers();
2788 s_vucount++;
2789 }
2790 }
2791
2792 g_vu1lastrec = s_svulast;
2793 }
2794 }
2795 #endif
2796
2797 // frees all regs taking into account the livevars
2798 void SuperVUFreeXMMregs(u32* livevars)
2799 {
2800 for (int i = 0; i < iREGCNT_XMM; ++i)
2801 {
2802 if (xmmregs[i].inuse)
2803 {
2804 // same reg
2805 if ((xmmregs[i].mode & MODE_WRITE))
2806 {
2807
2808 #ifdef SUPERVU_INTERCACHING
2809 if (xmmregs[i].type == XMMTYPE_VFREG)
2810 {
2811 if (!(livevars[1] & (1 << xmmregs[i].reg))) continue;
2812 }
2813 else if (xmmregs[i].type == XMMTYPE_ACC)
2814 {
2815 if (!(livevars[0] & (1 << REG_ACC_FLAG))) continue;
2816 }
2817 #endif
2818
2819 if (xmmregs[i].mode & MODE_VUXYZ)
2820 {
2821 // ALWAYS update
2822 u32 addr = xmmregs[i].type == XMMTYPE_VFREG ? (uptr) & VU->VF[xmmregs[i].reg] : (uptr) & VU->ACC;
2823
2824 if (xmmregs[i].mode & MODE_VUZ)
2825 {
2826 SSE_MOVHPS_XMM_to_M64(addr, (x86SSERegType)i);
2827 SSE_SHUFPS_M128_to_XMM((x86SSERegType)i, addr, 0xc4);
2828 }
2829 else
2830 {
2831 SSE_MOVHPS_M64_to_XMM((x86SSERegType)i, addr + 8);
2832 }
2833
2834 xmmregs[i].mode &= ~MODE_VUXYZ;
2835 }
2836
2837 _freeXMMreg(i);
2838 }
2839 }
2840 }
2841
2842 //_freeXMMregs();
2843 }
2844
2845 static u32 runCycles = 0; // Cycles to Compare to for early exit
2846 static u32 backupEAX = 0; // Backup EAX (not sure if this is needed)
2847 void SuperVUTestVU0Condition(u32 incstack)
2848 {
2849 if (s_vu && !SUPERVU_CHECKCONDITION) return; // vu0 only
2850
2851 // sometimes games spin on vu0, so be careful with
2852 // runCycles value... woody hangs if too high
2853 // Edit: Need to test this again, if anyone ever has a "Woody" game :p
2854 MOV32RtoM((uptr)&backupEAX, EAX);
2855 MOV32MtoR(EAX, (uptr)&s_TotalVUCycles);
2856 CMP32MtoR(EAX, (uptr)&runCycles);
2857 MOV32MtoR(EAX, (uptr)&backupEAX);
2858
2859 if (incstack)
2860 {
2861 u8* ptr = JB8(0);
2862 ADD32ItoR(ESP, incstack);
2863 //CALLFunc((u32)timeout);
2864 JMP32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 5));
2865
2866 x86SetJ8(ptr);
2867 }
2868 else JAE32((uptr)SuperVUEndProgram - ((uptr)x86Ptr + 6));
2869 }
2870
2871 void VuBaseBlock::Recompile()
2872 {
2873 if (type & BLOCKTYPE_ANALYZED) return;
2874
2875 x86Align(16);
2876 pcode = x86Ptr;
2877
2878 #ifdef PCSX2_DEBUG
2879 MOV32ItoM((uptr)&s_vufnheader, s_pFnHeader->startpc);
2880 MOV32ItoM((uptr)&VU->VI[REG_TPC], startpc);
2881 MOV32ItoM((uptr)&s_svulast, startpc);
2882
2883 list<VuBaseBlock*>::iterator itparent;
2884 for (itparent = parents.begin(); itparent != parents.end(); ++itparent)
2885 {
2886 if ((*itparent)->blocks.size() == 1 && (*itparent)->blocks.front()->startpc == startpc &&
2887 ((*itparent)->insts.size() < 2 || (----(*itparent)->insts.end())->regs[0].pipe != VUPIPE_BRANCH))
2888 {
2889 MOV32ItoM((uptr)&skipparent, (*itparent)->startpc);
2890 break;
2891 }
2892 }
2893
2894 if (itparent == parents.end()) MOV32ItoM((uptr)&skipparent, -1);
2895
2896 xMOV( ecx, s_vu );
2897 xCALL( svudispfn );
2898 #endif
2899
2900 s_pCurBlock = this;
2901 s_needFlush = 3;
2902 pc = startpc;
2903 branch = 0;
2904 s_recWriteQ = s_recWriteP = 0;
2905 s_XGKICKReg = -1;
2906 s_ScheduleXGKICK = 0;
2907
2908 s_ClipRead = s_PrevClipWrite = (uptr) & VU->VI[REG_CLIP_FLAG];
2909 s_StatusRead = s_PrevStatusWrite = (uptr) & VU->VI[REG_STATUS_FLAG];
2910 s_MACRead = s_PrevMACWrite = (uptr) & VU->VI[REG_MAC_FLAG];
2911 s_PrevIWrite = (uptr) & VU->VI[REG_I];
2912 s_JumpX86 = 0;
2913 s_UnconditionalDelay = 0;
2914
2915 memcpy(xmmregs, startregs, sizeof(xmmregs));
2916 #ifdef SUPERVU_X86CACHING
2917 if (nStartx86 >= 0)
2918 memcpy(x86regs, &s_vecRegArray[nStartx86], sizeof(x86regs));
2919 else
2920 _initX86regs();
2921 #else
2922 _initX86regs();
2923 #endif
2924
2925 list<VuInstruction>::iterator itinst;
2926 for(itinst = insts.begin(); itinst != insts.end(); itinst++)
2927 {
2928 s_pCurInst = &(*itinst);
2929 if (s_JumpX86 > 0)
2930 {
2931 if (!x86regs[s_JumpX86].inuse)
2932 {
2933 // load
2934 s_JumpX86 = _allocX86reg(-1, X86TYPE_VUJUMP, 0, MODE_READ);
2935 }
2936 x86regs[s_JumpX86].needed = 1;
2937 }
2938
2939 if (s_ScheduleXGKICK && s_XGKICKReg > 0)
2940 {
2941 pxAssert(x86regs[s_XGKICKReg].inuse);
2942 x86regs[s_XGKICKReg].needed = 1;
2943 }
2944 itinst->Recompile(itinst, vuxyz);
2945
2946 if (s_ScheduleXGKICK > 0)
2947 {
2948 if (s_ScheduleXGKICK-- == 1)
2949 {
2950 recVUMI_XGKICK_(VU);
2951 }
2952 }
2953 }
2954 pxAssert(pc == endpc);
2955 pxAssert(s_ScheduleXGKICK == 0);
2956
2957 // flush flags
2958 if (s_PrevClipWrite != (uptr)&VU->VI[REG_CLIP_FLAG])
2959 {
2960 MOV32MtoR(EAX, s_PrevClipWrite);
2961 MOV32RtoM((uptr)&VU->VI[REG_CLIP_FLAG], EAX);
2962 }
2963 if (s_PrevStatusWrite != (uptr)&VU->VI[REG_STATUS_FLAG])
2964 {
2965 MOV32MtoR(EAX, s_PrevStatusWrite);
2966 MOV32RtoM((uptr)&VU->VI[REG_STATUS_FLAG], EAX);
2967 }
2968 if (s_PrevMACWrite != (uptr)&VU->VI[REG_MAC_FLAG])
2969 {
2970 MOV32MtoR(EAX, s_PrevMACWrite);
2971 MOV32RtoM((uptr)&VU->VI[REG_MAC_FLAG], EAX);