/[pcsx2_0.9.7]/trunk/pcsx2/x86/newVif_Unpack.cpp
ViewVC logotype

Contents of /trunk/pcsx2/x86/newVif_Unpack.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 280 - (show annotations) (download)
Thu Dec 23 12:02:12 2010 UTC (9 years, 2 months ago) by william
File size: 8516 byte(s)
re-commit (had local access denied errors when committing)
1 /* PCSX2 - PS2 Emulator for PCs
2 * Copyright (C) 2002-2010 PCSX2 Dev Team
3 *
4 * PCSX2 is free software: you can redistribute it and/or modify it under the terms
5 * of the GNU Lesser General Public License as published by the Free Software Found-
6 * ation, either version 3 of the License, or (at your option) any later version.
7 *
8 * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10 * PURPOSE. See the GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License along with PCSX2.
13 * If not, see <http://www.gnu.org/licenses/>.
14 */
15
16 // newVif!
17 // authors: cottonvibes(@gmail.com)
18 // Jake.Stine (@gmail.com)
19
20 #include "PrecompiledHeader.h"
21 #include "Common.h"
22 #include "Vif_Dma.h"
23 #include "newVif.h"
24
25 __aligned16 nVifStruct nVif[2];
26
27 // Interpreter-style SSE unpacks. Array layout matches the interpreter C unpacks.
28 // ([USN][Masking][Unpack Type]) [curCycle]
29 __aligned16 nVifCall nVifUpk[(2*2*16) *4];
30
31 // This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks
32 // and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly.
33 // [MaskNumber][CycleNumber][Vector]
34 __aligned16 u32 nVifMask[3][4][4] = {0};
35
36 // Number of bytes of data in the source stream needed for each vector.
37 // [equivalent to ((32 >> VL) * (VN+1)) / 8]
38 __aligned16 const u8 nVifT[16] = {
39 4, // S-32
40 2, // S-16
41 1, // S-8
42 0, // ----
43 8, // V2-32
44 4, // V2-16
45 2, // V2-8
46 0, // ----
47 12,// V3-32
48 6, // V3-16
49 3, // V3-8
50 0, // ----
51 16,// V4-32
52 8, // V4-16
53 4, // V4-8
54 2, // V4-5
55 };
56
57 // ----------------------------------------------------------------------------
58 template< int idx, bool doMode, bool isFill >
59 __ri void __fastcall _nVifUnpackLoop(const u8* data);
60
61 typedef void __fastcall FnType_VifUnpackLoop(const u8* data);
62 typedef FnType_VifUnpackLoop* Fnptr_VifUnpackLoop;
63
64 // Unpacks Until 'Num' is 0
65 static const __aligned16 Fnptr_VifUnpackLoop UnpackLoopTable[2][2][2] = {
66 {{ _nVifUnpackLoop<0,0,0>, _nVifUnpackLoop<0,0,1> },
67 { _nVifUnpackLoop<0,1,0>, _nVifUnpackLoop<0,1,1> },},
68 {{ _nVifUnpackLoop<1,0,0>, _nVifUnpackLoop<1,0,1> },
69 { _nVifUnpackLoop<1,1,0>, _nVifUnpackLoop<1,1,1> },},
70 };
71 // ----------------------------------------------------------------------------
72
73 nVifStruct::nVifStruct()
74 {
75 vifBlocks = NULL;
76 numBlocks = 0;
77
78 recReserveSizeMB = 8;
79 }
80
81 void reserveNewVif(int idx)
82 {
83 if (newVifDynaRec) dVifReserve(idx);
84 }
85
86 void resetNewVif(int idx)
87 {
88 // Safety Reset : Reassign all VIF structure info, just in case the VU1 pointers have
89 // changed for some reason.
90
91 nVif[idx].idx = idx;
92 nVif[idx].bSize = 0;
93 memzero(nVif[idx].buffer);
94
95 if (newVifDynaRec) dVifReset(idx);
96 }
97
98 void closeNewVif(int idx) {
99 if (newVifDynaRec) dVifClose(idx);
100 }
101
102 void releaseNewVif(int idx) {
103 if (newVifDynaRec) dVifRelease(idx);
104 }
105
106 static __fi u8* getVUptr(uint idx, int offset) {
107 return (u8*)(vuRegs[idx].Mem + ( offset & (idx ? 0x3ff0 : 0xff0) ));
108 }
109
110
111 _vifT int nVifUnpack(const u8* data) {
112 nVifStruct& v = nVif[idx];
113 vifStruct& vif = GetVifX;
114 VIFregisters& vifRegs = vifXRegs;
115
116 const uint ret = aMin(vif.vifpacketsize, vif.tag.size);
117 const bool isFill = (vifRegs.cycle.cl < vifRegs.cycle.wl);
118 s32 size = ret << 2;
119
120 if (ret == vif.tag.size) { // Full Transfer
121 if (v.bSize) { // Last transfer was partial
122 memcpy_fast(&v.buffer[v.bSize], data, size);
123 v.bSize += size;
124 data = v.buffer;
125
126 vif.cl = 0;
127 vifRegs.num = (vifXRegs.code >> 16) & 0xff; // grab NUM form the original VIFcode input.
128 if (!vifRegs.num) vifRegs.num = 256;
129 }
130
131 if (newVifDynaRec) dVifUnpack<idx>(data, isFill);
132 else _nVifUnpack(idx, data, vifRegs.mode, isFill);
133
134 vif.tag.size = 0;
135 vif.cmd = 0;
136 vifRegs.num = 0;
137 v.bSize = 0;
138 }
139 else { // Partial Transfer
140 memcpy_fast(&v.buffer[v.bSize], data, size);
141 v.bSize += size;
142 vif.tag.size -= ret;
143
144 const u8& vSize = nVifT[vif.cmd & 0x0f];
145
146 // We need to provide accurate accounting of the NUM register, in case games decided
147 // to read back from it mid-transfer. Since so few games actually use partial transfers
148 // of VIF unpacks, this code should not be any bottleneck.
149
150 while (size >= vSize) {
151 --vifRegs.num;
152 ++vif.cl;
153
154 if (isFill) {
155 if (vif.cl < vifRegs.cycle.cl) size -= vSize;
156 else if (vif.cl == vifRegs.cycle.wl) vif.cl = 0;
157 }
158 else
159 {
160 size -= vSize;
161 if (vif.cl >= vifRegs.cycle.wl) vif.cl = 0;
162 }
163 }
164 }
165
166 return ret;
167 }
168
169 template int nVifUnpack<0>(const u8* data);
170 template int nVifUnpack<1>(const u8* data);
171
172 // This is used by the interpreted SSE unpacks only. Recompiled SSE unpacks
173 // and the interpreted C unpacks use the vif.MaskRow/MaskCol members directly.
174 static void setMasks(const vifStruct& vif, const VIFregisters& v) {
175 for (int i = 0; i < 16; i++) {
176 int m = (v.mask >> (i*2)) & 3;
177 switch (m) {
178 case 0: // Data
179 nVifMask[0][i/4][i%4] = 0xffffffff;
180 nVifMask[1][i/4][i%4] = 0;
181 nVifMask[2][i/4][i%4] = 0;
182 break;
183 case 1: // MaskRow
184 nVifMask[0][i/4][i%4] = 0;
185 nVifMask[1][i/4][i%4] = 0;
186 nVifMask[2][i/4][i%4] = vif.MaskRow._u32[i%4];
187 break;
188 case 2: // MaskCol
189 nVifMask[0][i/4][i%4] = 0;
190 nVifMask[1][i/4][i%4] = 0;
191 nVifMask[2][i/4][i%4] = vif.MaskCol._u32[i/4];
192 break;
193 case 3: // Write Protect
194 nVifMask[0][i/4][i%4] = 0;
195 nVifMask[1][i/4][i%4] = 0xffffffff;
196 nVifMask[2][i/4][i%4] = 0;
197 break;
198 }
199 }
200 }
201
202 // ----------------------------------------------------------------------------
203 // Unpacking Optimization notes:
204 // ----------------------------------------------------------------------------
205 // Some games send a LOT of single-cycle packets (God of War, SotC, TriAce games, etc),
206 // so we always need to be weary of keeping loop setup code optimized. It's not always
207 // a "win" to move code outside the loop, like normally in most other loop scenarios.
208 //
209 // The biggest bottleneck of the current code is the call/ret needed to invoke the SSE
210 // unpackers. A better option is to generate the entire vifRegs.num loop code as part
211 // of the SSE template, and inline the SSE code into the heart of it. This both avoids
212 // the call/ret and opens the door for resolving some register dependency chains in the
213 // current emitted functions. (this is what zero's SSE does to get it's final bit of
214 // speed advantage over the new vif). --air
215 //
216 // The BEST optimizatin strategy here is to use data available to us from the UNPACK dispatch
217 // -- namely the unpack type and mask flag -- in combination mode and usn values -- to
218 // generate ~600 special versions of this function. But since it's an interpreter, who gives
219 // a crap? Really? :p
220 //
221
222 // size - size of the packet fragment incoming from DMAC.
223 template< int idx, bool doMode, bool isFill >
224 __ri void __fastcall _nVifUnpackLoop(const u8* data) {
225
226 vifStruct& vif = GetVifX;
227 VIFregisters& vifRegs = vifXRegs;
228
229 // skipSize used for skipping writes only
230 const int skipSize = (vifRegs.cycle.cl - vifRegs.cycle.wl) * 16;
231
232 //DevCon.WriteLn("[%d][%d][%d][num=%d][upk=%d][cl=%d][bl=%d][skip=%d]", isFill, doMask, doMode, vifRegs.num, upkNum, vif.cl, blockSize, skipSize);
233
234 if (!doMode && (vif.cmd & 0x10)) setMasks(vif, vifRegs);
235
236 const int usn = !!vif.usn;
237 const int upkNum = vif.cmd & 0x1f;
238 const u8& vSize = nVifT[upkNum & 0x0f];
239 //uint vl = vif.cmd & 0x03;
240 //uint vn = (vif.cmd >> 2) & 0x3;
241 //uint vSize = ((32 >> vl) * (vn+1)) / 8; // size of data (in bytes) used for each write cycle
242
243 const nVifCall* fnbase = &nVifUpk[ ((usn*2*16) + upkNum) * (4*1) ];
244 const UNPACKFUNCTYPE ft = VIFfuncTable[idx][doMode ? vifRegs.mode : 0][ ((usn*2*16) + upkNum) ];
245
246 pxAssume (vif.cl == 0);
247 pxAssume (vifRegs.cycle.wl > 0);
248
249 do {
250 u8* dest = getVUptr(idx, vif.tag.addr);
251
252 if (doMode) {
253 //if (1) {
254 ft(dest, data);
255 }
256 else {
257 //DevCon.WriteLn("SSE Unpack!");
258 uint cl3 = aMin(vif.cl,3);
259 fnbase[cl3](dest, data);
260 }
261
262 vif.tag.addr += 16;
263 --vifRegs.num;
264 ++vif.cl;
265
266 if (isFill) {
267 //DevCon.WriteLn("isFill!");
268 if (vif.cl < vifRegs.cycle.cl) data += vSize;
269 else if (vif.cl == vifRegs.cycle.wl) vif.cl = 0;
270 }
271 else
272 {
273 data += vSize;
274
275 if (vif.cl >= vifRegs.cycle.wl) {
276 vif.tag.addr += skipSize;
277 vif.cl = 0;
278 }
279 }
280 } while (vifRegs.num);
281 }
282
283 __fi void _nVifUnpack(int idx, const u8* data, uint mode, bool isFill) {
284
285 UnpackLoopTable[idx][!!mode][isFill]( data );
286 }
287

  ViewVC Help
Powered by ViewVC 1.1.22