/[pcsx2_0.9.7]/trunk/pcsx2/x86/newVif_UnpackSSE.cpp
ViewVC logotype

Contents of /trunk/pcsx2/x86/newVif_UnpackSSE.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 280 - (show annotations) (download)
Thu Dec 23 12:02:12 2010 UTC (9 years, 1 month ago) by william
File size: 8228 byte(s)
re-commit (had local access denied errors when committing)
1 /* PCSX2 - PS2 Emulator for PCs
2 * Copyright (C) 2002-2010 PCSX2 Dev Team
3 *
4 * PCSX2 is free software: you can redistribute it and/or modify it under the terms
5 * of the GNU Lesser General Public License as published by the Free Software Found-
6 * ation, either version 3 of the License, or (at your option) any later version.
7 *
8 * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10 * PURPOSE. See the GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License along with PCSX2.
13 * If not, see <http://www.gnu.org/licenses/>.
14 */
15
16 #include "PrecompiledHeader.h"
17 #include "newVif_UnpackSSE.h"
18
19 #define xMOV8(regX, loc) xMOVSSZX(regX, loc)
20 #define xMOV16(regX, loc) xMOVSSZX(regX, loc)
21 #define xMOV32(regX, loc) xMOVSSZX(regX, loc)
22 #define xMOV64(regX, loc) xMOVUPS(regX, loc)
23 #define xMOV128(regX, loc) xMOVUPS(regX, loc)
24
25 static __pagealigned u8 nVifUpkExec[__pagesize*4];
26
27 // Merges xmm vectors without modifying source reg
28 void mergeVectors(xRegisterSSE dest, xRegisterSSE src, xRegisterSSE temp, int xyzw) {
29 if (x86caps.hasStreamingSIMD4Extensions || (xyzw==15)
30 || (xyzw==12) || (xyzw==11) || (xyzw==8) || (xyzw==3)) {
31 mVUmergeRegs(dest, src, xyzw);
32 }
33 else {
34 xMOVAPS(temp, src);
35 mVUmergeRegs(dest, temp, xyzw);
36 }
37 }
38
39 // =====================================================================================================
40 // VifUnpackSSE_Base Section
41 // =====================================================================================================
42 VifUnpackSSE_Base::VifUnpackSSE_Base()
43 : dstIndirect(ecx) // parameter 1 of __fastcall
44 , srcIndirect(edx) // parameter 2 of __fastcall
45 , workReg( xmm1 )
46 , destReg( xmm0 )
47 {
48 }
49
50 void VifUnpackSSE_Base::xMovDest() const {
51 if (IsUnmaskedOp()) { xMOVAPS (ptr[dstIndirect], destReg); }
52 else { doMaskWrite(destReg); }
53 }
54
55 void VifUnpackSSE_Base::xShiftR(const xRegisterSSE& regX, int n) const {
56 if (usn) { xPSRL.D(regX, n); }
57 else { xPSRA.D(regX, n); }
58 }
59
60 void VifUnpackSSE_Base::xPMOVXX8(const xRegisterSSE& regX) const {
61 if (usn) xPMOVZX.BD(regX, ptr32[srcIndirect]);
62 else xPMOVSX.BD(regX, ptr32[srcIndirect]);
63 }
64
65 void VifUnpackSSE_Base::xPMOVXX16(const xRegisterSSE& regX) const {
66 if (usn) xPMOVZX.WD(regX, ptr64[srcIndirect]);
67 else xPMOVSX.WD(regX, ptr64[srcIndirect]);
68 }
69
70 void VifUnpackSSE_Base::xUPK_S_32() const {
71 xMOV32 (workReg, ptr32[srcIndirect]);
72 xPSHUF.D (destReg, workReg, _v0);
73 }
74
75 void VifUnpackSSE_Base::xUPK_S_16() const {
76 if (x86caps.hasStreamingSIMD4Extensions)
77 {
78 xPMOVXX16 (workReg);
79 }
80 else
81 {
82 xMOV16 (workReg, ptr32[srcIndirect]);
83 xPUNPCK.LWD(workReg, workReg);
84 xShiftR (workReg, 16);
85 }
86 xPSHUF.D (destReg, workReg, _v0);
87 }
88
89 void VifUnpackSSE_Base::xUPK_S_8() const {
90 if (x86caps.hasStreamingSIMD4Extensions)
91 {
92 xPMOVXX8 (workReg);
93 }
94 else
95 {
96 xMOV8 (workReg, ptr32[srcIndirect]);
97 xPUNPCK.LBW(workReg, workReg);
98 xPUNPCK.LWD(workReg, workReg);
99 xShiftR (workReg, 24);
100 }
101 xPSHUF.D (destReg, workReg, _v0);
102 }
103
104 // The V2 + V3 unpacks have freaky behaviour, the manual claims "indeterminate".
105 // After testing on the PS2, it's very much determinate in 99% of cases
106 // and games like Lemmings, And1 Streetball rely on this data to be like this!
107 // I have commented after each shuffle to show what data is going where - Ref
108
109 void VifUnpackSSE_Base::xUPK_V2_32() const {
110 xMOV64 (destReg, ptr32[srcIndirect]);
111 xMOVH.PS (destReg, ptr32[srcIndirect]); //v1v0v1v0
112 }
113
114 void VifUnpackSSE_Base::xUPK_V2_16() const {
115 if (x86caps.hasStreamingSIMD4Extensions)
116 {
117 xPMOVXX16 (destReg);
118 }
119 else
120 {
121 xMOV32 (destReg, ptr32[srcIndirect]);
122 xPUNPCK.LWD(destReg, destReg);
123 xShiftR (destReg, 16);
124 }
125 xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0
126 }
127
128 void VifUnpackSSE_Base::xUPK_V2_8() const {
129 if (x86caps.hasStreamingSIMD4Extensions)
130 {
131 xPMOVXX8 (destReg);
132 }
133 else
134 {
135 xMOV16 (destReg, ptr32[srcIndirect]);
136 xPUNPCK.LBW(destReg, destReg);
137 xPUNPCK.LWD(destReg, destReg);
138 xShiftR (destReg, 24);
139 }
140 xPSHUF.D (destReg, destReg, 0x44); //v1v0v1v0
141 }
142
143 void VifUnpackSSE_Base::xUPK_V3_32() const {
144 xMOV128 (destReg, ptr128[srcIndirect]);
145 }
146
147 void VifUnpackSSE_Base::xUPK_V3_16() const {
148 if (x86caps.hasStreamingSIMD4Extensions)
149 {
150 xPMOVXX16 (destReg);
151 }
152 else
153 {
154 xMOV64 (destReg, ptr32[srcIndirect]);
155 xPUNPCK.LWD(destReg, destReg);
156 xShiftR (destReg, 16);
157 }
158 }
159
160 void VifUnpackSSE_Base::xUPK_V3_8() const {
161 if (x86caps.hasStreamingSIMD4Extensions)
162 {
163 xPMOVXX8 (destReg);
164 }
165 else
166 {
167 xMOV32 (destReg, ptr32[srcIndirect]);
168 xPUNPCK.LBW(destReg, destReg);
169 xPUNPCK.LWD(destReg, destReg);
170 xShiftR (destReg, 24);
171 }
172 }
173
174 void VifUnpackSSE_Base::xUPK_V4_32() const {
175 xMOV128 (destReg, ptr32[srcIndirect]);
176 }
177
178 void VifUnpackSSE_Base::xUPK_V4_16() const {
179 if (x86caps.hasStreamingSIMD4Extensions)
180 {
181 xPMOVXX16 (destReg);
182 }
183 else
184 {
185 xMOV64 (destReg, ptr32[srcIndirect]);
186 xPUNPCK.LWD(destReg, destReg);
187 xShiftR (destReg, 16);
188 }
189 }
190
191 void VifUnpackSSE_Base::xUPK_V4_8() const {
192 if (x86caps.hasStreamingSIMD4Extensions)
193 {
194 xPMOVXX8 (destReg);
195 }
196 else
197 {
198 xMOV32 (destReg, ptr32[srcIndirect]);
199 xPUNPCK.LBW(destReg, destReg);
200 xPUNPCK.LWD(destReg, destReg);
201 xShiftR (destReg, 24);
202 }
203 }
204
205 void VifUnpackSSE_Base::xUPK_V4_5() const {
206 xMOV16 (workReg, ptr32[srcIndirect]);
207 xPSHUF.D (workReg, workReg, _v0);
208 xPSLL.D (workReg, 3); // ABG|R5.000
209 xMOVAPS (destReg, workReg); // x|x|x|R
210 xPSRL.D (workReg, 8); // ABG
211 xPSLL.D (workReg, 3); // AB|G5.000
212 mVUmergeRegs(destReg, workReg, 0x4);// x|x|G|R
213 xPSRL.D (workReg, 8); // AB
214 xPSLL.D (workReg, 3); // A|B5.000
215 mVUmergeRegs(destReg, workReg, 0x2);// x|B|G|R
216 xPSRL.D (workReg, 8); // A
217 xPSLL.D (workReg, 7); // A.0000000
218 mVUmergeRegs(destReg, workReg, 0x1);// A|B|G|R
219 xPSLL.D (destReg, 24); // can optimize to
220 xPSRL.D (destReg, 24); // single AND...
221 }
222
223 void VifUnpackSSE_Base::xUnpack( int upknum ) const
224 {
225 switch( upknum )
226 {
227 case 0: xUPK_S_32(); break;
228 case 1: xUPK_S_16(); break;
229 case 2: xUPK_S_8(); break;
230
231 case 4: xUPK_V2_32(); break;
232 case 5: xUPK_V2_16(); break;
233 case 6: xUPK_V2_8(); break;
234
235 case 8: xUPK_V3_32(); break;
236 case 9: xUPK_V3_16(); break;
237 case 10: xUPK_V3_8(); break;
238
239 case 12: xUPK_V4_32(); break;
240 case 13: xUPK_V4_16(); break;
241 case 14: xUPK_V4_8(); break;
242 case 15: xUPK_V4_5(); break;
243
244 case 3:
245 case 7:
246 case 11:
247 pxFailRel( wxsFormat( L"Vpu/Vif - Invalid Unpack! [%d]", upknum ) );
248 break;
249 }
250 }
251
252 // =====================================================================================================
253 // VifUnpackSSE_Simple
254 // =====================================================================================================
255
256 VifUnpackSSE_Simple::VifUnpackSSE_Simple(bool usn_, bool domask_, int curCycle_)
257 {
258 curCycle = curCycle_;
259 usn = usn_;
260 doMask = domask_;
261 }
262
263 void VifUnpackSSE_Simple::doMaskWrite(const xRegisterSSE& regX) const {
264 xMOVAPS(xmm7, ptr[dstIndirect]);
265 int offX = aMin(curCycle, 3);
266 xPAND(regX, ptr32[nVifMask[0][offX]]);
267 xPAND(xmm7, ptr32[nVifMask[1][offX]]);
268 xPOR (regX, ptr32[nVifMask[2][offX]]);
269 xPOR (regX, xmm7);
270 xMOVAPS(ptr[dstIndirect], regX);
271 }
272
273 // ecx = dest, edx = src
274 static void nVifGen(int usn, int mask, int curCycle) {
275
276 int usnpart = usn*2*16;
277 int maskpart = mask*16;
278
279 VifUnpackSSE_Simple vpugen( !!usn, !!mask, curCycle );
280
281 for( int i=0; i<16; ++i )
282 {
283 nVifCall& ucall( nVifUpk[((usnpart+maskpart+i) * 4) + curCycle] );
284 ucall = NULL;
285 if( nVifT[i] == 0 ) continue;
286
287 ucall = (nVifCall)xGetAlignedCallTarget();
288 vpugen.xUnpack(i);
289 vpugen.xMovDest();
290 xRET();
291
292 pxAssert( ((uptr)xGetPtr() - (uptr)nVifUpkExec) < sizeof(nVifUpkExec) );
293 }
294 }
295
296 void VifUnpackSSE_Init()
297 {
298 HostSys::MemProtectStatic(nVifUpkExec, PageAccess_ReadWrite());
299 memset8<0xcc>( nVifUpkExec );
300
301 xSetPtr( nVifUpkExec );
302
303 for (int a = 0; a < 2; a++) {
304 for (int b = 0; b < 2; b++) {
305 for (int c = 0; c < 4; c++) {
306 nVifGen(a, b, c);
307 }}}
308
309 HostSys::MemProtectStatic(nVifUpkExec, PageAccess_ExecOnly());
310 }

  ViewVC Help
Powered by ViewVC 1.1.22