/[pcsx2_0.9.7]/trunk/pcsx2/x86/newVif_Unpack.cpp
ViewVC logotype

Contents of /trunk/pcsx2/x86/newVif_Unpack.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 148 - (show annotations) (download)
Fri Sep 10 17:22:32 2010 UTC (9 years, 4 months ago) by william
File size: 8092 byte(s)
Auto Commited Import of: pcsx2-0.9.7-DEBUG (upstream: v0.9.7.3746 local: v0.9.7.138) in ./trunk
1 /* PCSX2 - PS2 Emulator for PCs
2 * Copyright (C) 2002-2010 PCSX2 Dev Team
3 *
4 * PCSX2 is free software: you can redistribute it and/or modify it under the terms
5 * of the GNU Lesser General Public License as published by the Free Software Found-
6 * ation, either version 3 of the License, or (at your option) any later version.
7 *
8 * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10 * PURPOSE. See the GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License along with PCSX2.
13 * If not, see <http://www.gnu.org/licenses/>.
14 */
15
16 // newVif!
17 // authors: cottonvibes(@gmail.com)
18 // Jake.Stine (@gmail.com)
19
20 #include "PrecompiledHeader.h"
21 #include "Common.h"
22 #include "Vif_Dma.h"
23 #include "newVif.h"
24
25 __aligned16 nVifStruct nVif[2];
26 __aligned16 nVifCall nVifUpk[(2*2*16) *4]; // ([USN][Masking][Unpack Type]) [curCycle]
27 __aligned16 u32 nVifMask[3][4][4] = {0}; // [MaskNumber][CycleNumber][Vector]
28
29 __aligned16 const u8 nVifT[16] = {
30 4, // S-32
31 2, // S-16
32 1, // S-8
33 0, // ----
34 8, // V2-32
35 4, // V2-16
36 2, // V2-8
37 0, // ----
38 12,// V3-32
39 6, // V3-16
40 3, // V3-8
41 0, // ----
42 16,// V4-32
43 8, // V4-16
44 4, // V4-8
45 2, // V4-5
46 };
47
48 // ----------------------------------------------------------------------------
49 template< int idx, bool doMode, bool isFill >
50 __ri void __fastcall _nVifUnpackLoop(const u8 *data, u32 size);
51
52 typedef void __fastcall FnType_VifUnpackLoop(const u8 *data, u32 size);
53 typedef FnType_VifUnpackLoop* Fnptr_VifUnpackLoop;
54
55 // Unpacks Until 'Num' is 0
56 static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = {
57 {{ _nVifUnpackLoop<0,0,0>, _nVifUnpackLoop<0,0,1> },
58 { _nVifUnpackLoop<0,1,0>, _nVifUnpackLoop<0,1,1> },},
59 {{ _nVifUnpackLoop<1,0,0>, _nVifUnpackLoop<1,0,1> },
60 { _nVifUnpackLoop<1,1,0>, _nVifUnpackLoop<1,1,1> },},
61 };
62 // ----------------------------------------------------------------------------
63
64 void resetNewVif(int idx)
65 {
66 // Safety Reset : Reassign all VIF structure info, just in case the VU1 pointers have
67 // changed for some reason.
68
69 nVif[idx].idx = idx;
70 nVif[idx].VU = idx ? &VU1 : &VU0;
71 nVif[idx].vuMemLimit = idx ? 0x3ff0 : 0xff0;
72 nVif[idx].vif = &GetVifX;
73 nVif[idx].vifRegs = &vifXRegs;
74 nVif[idx].bSize = 0;
75 memzero(nVif[idx].buffer);
76
77 if (newVifDynaRec) dVifReset(idx);
78 }
79
80 void closeNewVif(int idx) {
81 if (newVifDynaRec) dVifClose(idx);
82 }
83
84 static __fi u8* setVUptr(int vuidx, const u8* vuMemBase, int offset) {
85 return (u8*)(vuMemBase + ( offset & (vuidx ? 0x3ff0 : 0xff0) ));
86 }
87
88 static __fi void incVUptr(int vuidx, u8* &ptr, const u8* vuMemBase, int amount) {
89 pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check
90 ptr += amount;
91 vif->tag.addr += amount;
92 int diff = ptr - (vuMemBase + (vuidx ? 0x4000 : 0x1000));
93 if (diff >= 0) {
94 ptr = (u8*)(vuMemBase + diff);
95 }
96 }
97
98 static __fi void incVUptrBy16(int vuidx, u8* &ptr, const u8* vuMemBase) {
99 pxAssume( ((uptr)ptr & 0xf) == 0 ); // alignment check
100 ptr += 16;
101 vif->tag.addr += 16;
102 if( ptr == (vuMemBase + (vuidx ? 0x4000 : 0x1000)) ) {
103 ptr -= (vuidx ? 0x4000 : 0x1000);
104 }
105 }
106
107 int nVifUnpack(int idx, const u8* data) {
108 nVifStruct& v = nVif[idx];
109 vif = v.vif;
110 vifRegs = v.vifRegs;
111
112 const int ret = aMin(vif->vifpacketsize, vif->tag.size);
113 const bool isFill = (vifRegs->cycle.cl < vifRegs->cycle.wl);
114 s32 size = ret << 2;
115
116 if (ret == v.vif->tag.size) { // Full Transfer
117 if (v.bSize) { // Last transfer was partial
118 memcpy_fast(&v.buffer[v.bSize], data, size);
119 v.bSize += size;
120 data = v.buffer;
121 size = v.bSize;
122 }
123 if (size > 0 || isFill) {
124 if (newVifDynaRec) dVifUnpack(idx, data, size, isFill);
125 else _nVifUnpack(idx, data, size, isFill);
126 }
127 vif->tag.size = 0;
128 vif->cmd = 0;
129 v.bSize = 0;
130 }
131 else { // Partial Transfer
132 memcpy_fast(&v.buffer[v.bSize], data, size);
133 v.bSize += size;
134 vif->tag.size -= ret;
135 }
136
137 return ret;
138 }
139
140 static void setMasks(int idx, const VIFregisters& v) {
141 u32* row = idx ? g_vifmask.Row1 : g_vifmask.Row0;
142 u32* col = idx ? g_vifmask.Col1 : g_vifmask.Col0;
143 for (int i = 0; i < 16; i++) {
144 int m = (v.mask >> (i*2)) & 3;
145 switch (m) {
146 case 0: // Data
147 nVifMask[0][i/4][i%4] = 0xffffffff;
148 nVifMask[1][i/4][i%4] = 0;
149 nVifMask[2][i/4][i%4] = 0;
150 break;
151 case 1: // Row
152 nVifMask[0][i/4][i%4] = 0;
153 nVifMask[1][i/4][i%4] = 0;
154 nVifMask[2][i/4][i%4] = newVifDynaRec ? row[i%4] : ((u32*)&v.r0)[(i%4)*4];
155 break;
156 case 2: // Col
157 nVifMask[0][i/4][i%4] = 0;
158 nVifMask[1][i/4][i%4] = 0;
159 nVifMask[2][i/4][i%4] = newVifDynaRec ? col[i/4] : ((u32*)&v.c0)[(i/4)*4];
160 break;
161 case 3: // Write Protect
162 nVifMask[0][i/4][i%4] = 0;
163 nVifMask[1][i/4][i%4] = 0xffffffff;
164 nVifMask[2][i/4][i%4] = 0;
165 break;
166 }
167 }
168 }
169
170 // ----------------------------------------------------------------------------
171 // Unpacking Optimization notes:
172 // ----------------------------------------------------------------------------
173 // Some games send a LOT of single-cycle packets (God of War, SotC, TriAce games, etc),
174 // so we always need to be weary of keeping loop setup code optimized. It's not always
175 // a "win" to move code outside the loop, like normally in most other loop scenarios.
176 //
177 // The biggest bottleneck of the current code is the call/ret needed to invoke the SSE
178 // unpackers. A better option is to generate the entire vifRegs->num loop code as part
179 // of the SSE template, and inline the SSE code into the heart of it. This both avoids
180 // the call/ret and opens the door for resolving some register dependency chains in the
181 // current emitted functions. (this is what zero's SSE does to get it's final bit of
182 // speed advantage over the new vif). --air
183 //
184 // As a secondary optimization to above, special handlers could be generated for the
185 // cycleSize==1 case, which is used frequently enough, and results in enough code
186 // elimination that it would probably be a win in most cases (and for sure in many
187 // "slow" games that need it most). --air
188
189 template< int idx, bool doMode, bool isFill >
190 __ri void __fastcall _nVifUnpackLoop(const u8 *data, u32 size) {
191
192 const int cycleSize = isFill ? vifRegs->cycle.cl : vifRegs->cycle.wl;
193 const int blockSize = isFill ? vifRegs->cycle.wl : vifRegs->cycle.cl;
194 const int skipSize = blockSize - cycleSize;
195 //DevCon.WriteLn("[%d][%d][%d][num=%d][upk=%d][cl=%d][bl=%d][skip=%d]", isFill, doMask, doMode, vifRegs->num, upkNum, vif->cl, blockSize, skipSize);
196
197 if (vif->cmd & 0x10) setMasks(idx, *vifRegs);
198
199 const int usn = !!(vif->usn);
200 const int upkNum = vif->cmd & 0x1f;
201 //const s8& vift = nVifT[upkNum]; // might be useful later when other SSE paths are finished.
202
203 const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ];
204 const VIFUnpackFuncTable& ft = VIFfuncTable[upkNum];
205 UNPACKFUNCTYPE func = usn ? ft.funcU : ft.funcS;
206
207 const u8* vuMemBase = (idx ? VU1 : VU0).Mem;
208 u8* dest = setVUptr(idx, vuMemBase, vif->tag.addr);
209 if (vif->cl >= blockSize) vif->cl = 0;
210
211 while (vifRegs->num) {
212 if (vif->cl < cycleSize) {
213 // This should always be true as per the _1mb buffer used to merge partial transfers.
214 pxAssume (size >= ft.gsize);
215 if (doMode) {
216 //DevCon.WriteLn("Non SSE; unpackNum = %d", upkNum);
217 func((u32*)dest, (u32*)data);
218 }
219 else {
220 //DevCon.WriteLn("SSE Unpack!");
221 fnbase[aMin(vif->cl, 3)](dest, data);
222 }
223 data += ft.gsize;
224 size -= ft.gsize;
225 vifRegs->num--;
226 incVUptrBy16(idx, dest, vuMemBase);
227 if (++vif->cl == blockSize) vif->cl = 0;
228 }
229 else if (isFill) {
230 //DevCon.WriteLn("isFill!");
231 func((u32*)dest, (u32*)data);
232 vifRegs->num--;
233 incVUptrBy16(idx, dest, vuMemBase);
234 if (++vif->cl == blockSize) vif->cl = 0;
235 }
236 else {
237 incVUptr(idx, dest, vuMemBase, 16 * skipSize);
238 vif->cl = 0;
239 }
240 }
241 }
242
243 __fi void _nVifUnpack(int idx, const u8 *data, u32 size, bool isFill) {
244
245 const bool doMode = !!vifRegs->mode;
246 UnpackLoopTable[idx][doMode][isFill]( data, size );
247 }
248

  ViewVC Help
Powered by ViewVC 1.1.22