/[pcsx2_0.9.7]/trunk/common/src/x86emitter/cpudetect.cpp
ViewVC logotype

Contents of /trunk/common/src/x86emitter/cpudetect.cpp

Parent Directory Parent Directory | Revision Log Revision Log


Revision 280 - (show annotations) (download)
Thu Dec 23 12:02:12 2010 UTC (9 years, 1 month ago) by william
File size: 11273 byte(s)
re-commit (had local access denied errors when committing)
1 /* Cpudetection lib
2 * Copyright (C) 2002-2010 PCSX2 Dev Team
3 *
4 * PCSX2 is free software: you can redistribute it and/or modify it under the terms
5 * of the GNU Lesser General Public License as published by the Free Software Found-
6 * ation, either version 3 of the License, or (at your option) any later version.
7 *
8 * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10 * PURPOSE. See the GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License along with PCSX2.
13 * If not, see <http://www.gnu.org/licenses/>.
14 */
15
16 #include "PrecompiledHeader.h"
17 #include "cpudetect_internal.h"
18 #include "internal.h"
19
20 using namespace x86Emitter;
21
22 __aligned16 x86capabilities x86caps;
23
24 // Recompiled code buffer for SSE and MXCSR feature testing.
25 static __pagealigned u8 recSSE[__pagesize];
26 static __pagealigned u8 targetFXSAVE[512];
27
28 #ifdef __LINUX__
29 # include <sys/time.h>
30 # include <errno.h>
31 #endif
32
33 static const char* bool_to_char( bool testcond )
34 {
35 return testcond ? "true" : "false";
36 }
37
38 // Warning! We've had problems with the MXCSR detection code causing stack corruption in
39 // MSVC PGO builds. The problem was fixed when I moved the MXCSR code to this function, and
40 // moved the recSSE[] array to a global static (it was local to cpudetectInit). Commented
41 // here in case the nutty crash ever re-surfaces. >_<
42 void x86capabilities::SIMD_EstablishMXCSRmask()
43 {
44 if( !hasStreamingSIMDExtensions ) return;
45
46 MXCSR_Mask.bitmask = 0xFFBF; // MMX/SSE default
47
48 if( hasStreamingSIMD2Extensions )
49 {
50 // This is generally safe assumption, but FXSAVE is the "correct" way to
51 // detect MXCSR masking features of the cpu, so we use it's result below
52 // and override this.
53
54 MXCSR_Mask.bitmask = 0xFFFF; // SSE2 features added
55 }
56
57 if( !CanEmitShit() ) return;
58
59 // the fxsave buffer must be 16-byte aligned to avoid GPF. I just save it to an
60 // unused portion of recSSE, since it has plenty of room to spare.
61
62 HostSys::MemProtectStatic( recSSE, PageAccess_ReadWrite() );
63
64 xSetPtr( recSSE );
65 xFXSAVE( ptr[&targetFXSAVE] );
66 xRET();
67
68 HostSys::MemProtectStatic( recSSE, PageAccess_ExecOnly() );
69
70 CallAddress( recSSE );
71
72 u32 result = (u32&)targetFXSAVE[28]; // bytes 28->32 are the MXCSR_Mask.
73 if( result != 0 )
74 MXCSR_Mask.bitmask = result;
75 }
76
77 // Counts the number of cpu cycles executed over the requested number of PerformanceCounter
78 // ticks. Returns that exact count.
79 // For best results you should pick a period of time long enough to get a reading that won't
80 // be prone to rounding error; but short enough that it'll be highly unlikely to be interrupted
81 // by the operating system task switches.
82 s64 x86capabilities::_CPUSpeedHz( u64 time ) const
83 {
84 u64 timeStart, timeStop;
85 s64 startCycle, endCycle;
86
87 if( ! hasTimeStampCounter )
88 return 0;
89
90 SingleCoreAffinity affinity_lock;
91
92 // Align the cpu execution to a cpuTick boundary.
93
94 do {
95 timeStart = GetCPUTicks();
96 startCycle = __rdtsc();
97 } while( GetCPUTicks() == timeStart );
98
99 do {
100 timeStop = GetCPUTicks();
101 endCycle = __rdtsc();
102 } while( ( timeStop - timeStart ) < time );
103
104 s64 cycleCount = endCycle - startCycle;
105 s64 timeCount = timeStop - timeStart;
106 s64 overrun = timeCount - time;
107 if( !overrun ) return cycleCount;
108
109 // interference could cause us to overshoot the target time, compensate:
110
111 double cyclesPerTick = (double)cycleCount / (double)timeCount;
112 double newCycleCount = (double)cycleCount - (cyclesPerTick * overrun);
113
114 return (s64)newCycleCount;
115 }
116
117 wxString x86capabilities::GetTypeName() const
118 {
119 switch( TypeID )
120 {
121 case 0: return L"Standard OEM";
122 case 1: return L"Overdrive";
123 case 2: return L"Dual";
124 case 3: return L"Reserved";
125 default: return L"Unknown";
126 }
127 }
128
129 void x86capabilities::CountCores()
130 {
131 Identify();
132
133 s32 regs[ 4 ];
134 u32 cmds;
135
136 __cpuid( regs, 0x80000000 );
137 cmds = regs[ 0 ];
138
139 // detect multicore for AMD cpu
140
141 if ((cmds >= 0x80000008) && (VendorID == x86Vendor_AMD) )
142 {
143 // AMD note: they don't support hyperthreading, but they like to flag this true
144 // anyway. Let's force-unflag it until we come up with a better solution.
145 // (note: seems to affect some Phenom II's only? -- Athlon X2's and PhenomI's do
146 // not seem to do this) --air
147 hasMultiThreading = 0;
148 }
149
150 // This will assign values into LogicalCores and PhysicalCores
151 CountLogicalCores();
152 }
153
154 static const char* tbl_x86vendors[] =
155 {
156 "GenuineIntel",
157 "AuthenticAMD"
158 "Unknown ",
159 };
160
161 // Performs all _cpuid-related activity. This fills *most* of the x86caps structure, except for
162 // the cpuSpeed and the mxcsr masks. Those must be completed manually.
163 void x86capabilities::Identify()
164 {
165 if( isIdentified ) return;
166 isIdentified = true;
167
168 s32 regs[ 4 ];
169 u32 cmds;
170
171 //AMD 64 STUFF
172 u32 x86_64_8BITBRANDID;
173 u32 x86_64_12BITBRANDID;
174
175 memzero( VendorName );
176 __cpuid( regs, 0 );
177
178 cmds = regs[ 0 ];
179 ((u32*)VendorName)[ 0 ] = regs[ 1 ];
180 ((u32*)VendorName)[ 1 ] = regs[ 3 ];
181 ((u32*)VendorName)[ 2 ] = regs[ 2 ];
182
183 // Determine Vendor Specifics!
184 // It's really not recommended that we base much (if anything) on CPU vendor names,
185 // however it's currently necessary in order to gain a (pseudo)reliable count of cores
186 // and threads used by the CPU (AMD and Intel can't agree on how to make this info available).
187
188 int& vid = (int&)VendorID;
189 for( vid=0; vid<x86Vendor_Unknown; ++vid )
190 {
191 if( memcmp( VendorName, tbl_x86vendors[vid], 12 ) == 0 ) break;
192 }
193
194 if ( cmds >= 0x00000001 )
195 {
196 __cpuid( regs, 0x00000001 );
197
198 StepID = regs[ 0 ] & 0xf;
199 Model = (regs[ 0 ] >> 4) & 0xf;
200 FamilyID = (regs[ 0 ] >> 8) & 0xf;
201 TypeID = (regs[ 0 ] >> 12) & 0x3;
202 x86_64_8BITBRANDID = regs[ 1 ] & 0xff;
203 Flags = regs[ 3 ];
204 Flags2 = regs[ 2 ];
205 }
206
207 __cpuid( regs, 0x80000000 );
208 cmds = regs[ 0 ];
209 if ( cmds >= 0x80000001 )
210 {
211 __cpuid( regs, 0x80000001 );
212
213 x86_64_12BITBRANDID = regs[1] & 0xfff;
214 EFlags2 = regs[ 2 ];
215 EFlags = regs[ 3 ];
216 }
217
218 memzero( FamilyName );
219 __cpuid( (int*)FamilyName, 0x80000002);
220 __cpuid( (int*)(FamilyName+16), 0x80000003);
221 __cpuid( (int*)(FamilyName+32), 0x80000004);
222
223 hasFloatingPointUnit = ( Flags >> 0 ) & 1;
224 hasVirtual8086ModeEnhancements = ( Flags >> 1 ) & 1;
225 hasDebuggingExtensions = ( Flags >> 2 ) & 1;
226 hasPageSizeExtensions = ( Flags >> 3 ) & 1;
227 hasTimeStampCounter = ( Flags >> 4 ) & 1;
228 hasModelSpecificRegisters = ( Flags >> 5 ) & 1;
229 hasPhysicalAddressExtension = ( Flags >> 6 ) & 1;
230 hasMachineCheckArchitecture = ( Flags >> 7 ) & 1;
231 hasCOMPXCHG8BInstruction = ( Flags >> 8 ) & 1;
232 hasAdvancedProgrammableInterruptController = ( Flags >> 9 ) & 1;
233 hasSEPFastSystemCall = ( Flags >> 11 ) & 1;
234 hasMemoryTypeRangeRegisters = ( Flags >> 12 ) & 1;
235 hasPTEGlobalFlag = ( Flags >> 13 ) & 1;
236 hasMachineCheckArchitecture = ( Flags >> 14 ) & 1;
237 hasConditionalMoveAndCompareInstructions = ( Flags >> 15 ) & 1;
238 hasFGPageAttributeTable = ( Flags >> 16 ) & 1;
239 has36bitPageSizeExtension = ( Flags >> 17 ) & 1;
240 hasProcessorSerialNumber = ( Flags >> 18 ) & 1;
241 hasCFLUSHInstruction = ( Flags >> 19 ) & 1;
242 hasDebugStore = ( Flags >> 21 ) & 1;
243 hasACPIThermalMonitorAndClockControl = ( Flags >> 22 ) & 1;
244 hasMultimediaExtensions = ( Flags >> 23 ) & 1; //mmx
245 hasFastStreamingSIMDExtensionsSaveRestore = ( Flags >> 24 ) & 1;
246 hasStreamingSIMDExtensions = ( Flags >> 25 ) & 1; //sse
247 hasStreamingSIMD2Extensions = ( Flags >> 26 ) & 1; //sse2
248 hasSelfSnoop = ( Flags >> 27 ) & 1;
249 hasMultiThreading = ( Flags >> 28 ) & 1;
250 hasThermalMonitor = ( Flags >> 29 ) & 1;
251 hasIntel64BitArchitecture = ( Flags >> 30 ) & 1;
252
253 // -------------------------------------------------
254 // --> SSE3 / SSSE3 / SSE4.1 / SSE 4.2 detection <--
255 // -------------------------------------------------
256
257 hasStreamingSIMD3Extensions = ( Flags2 >> 0 ) & 1; //sse3
258 hasSupplementalStreamingSIMD3Extensions = ( Flags2 >> 9 ) & 1; //ssse3
259 hasStreamingSIMD4Extensions = ( Flags2 >> 19 ) & 1; //sse4.1
260 hasStreamingSIMD4Extensions2 = ( Flags2 >> 20 ) & 1; //sse4.2
261
262 // Ones only for AMDs:
263 hasMultimediaExtensionsExt = ( EFlags >> 22 ) & 1; //mmx2
264 hasAMD64BitArchitecture = ( EFlags >> 29 ) & 1; //64bit cpu
265 has3DNOWInstructionExtensionsExt = ( EFlags >> 30 ) & 1; //3dnow+
266 has3DNOWInstructionExtensions = ( EFlags >> 31 ) & 1; //3dnow
267 hasStreamingSIMD4ExtensionsA = ( EFlags2 >> 6 ) & 1; //INSERTQ / EXTRQ / MOVNT
268
269 isIdentified = true;
270 }
271
272 u32 x86capabilities::CalculateMHz() const
273 {
274 InitCPUTicks();
275 u64 span = GetTickFrequency();
276
277 if( (span % 1000) < 400 ) // helps minimize rounding errors
278 return (u32)( _CPUSpeedHz( span / 1000 ) / 1000 );
279 else
280 return (u32)( _CPUSpeedHz( span / 500 ) / 2000 );
281 }
282
283 // Special extended version of SIMD testning, which uses exceptions to double-check the presence
284 // of SSE2/3/4 instructions. Useful if you don't trust cpuid (at least one report of an invalid
285 // cpuid has been reported on a Core2 Quad -- the user fixed it by clearing his CMOS).
286 //
287 // Results of CPU
288 void x86capabilities::SIMD_ExceptionTest()
289 {
290 HostSys::MemProtectStatic( recSSE, PageAccess_ReadWrite() );
291
292 //////////////////////////////////////////////////////////////////////////////////////////
293 // SIMD Instruction Support Detection (Second Pass)
294 //
295
296 if( CanTestInstructionSets() )
297 {
298 xSetPtr( recSSE );
299 xMOVDQU( ptr[ecx], xmm1 );
300 xMOVSLDUP( xmm1, xmm0 );
301 xMOVDQU( xmm1, ptr[ecx] );
302 xRET();
303
304 u8* funcSSSE3 = xGetPtr();
305 xMOVDQU( ptr[ecx], xmm1 );
306 xPABS.W( xmm1, xmm0 );
307 xMOVDQU( xmm1, ptr[ecx] );
308 xRET();
309
310 u8* funcSSE41 = xGetPtr();
311 xMOVDQU( ptr[ecx], xmm1 );
312 xBLEND.VPD( xmm1, xmm0 );
313 xMOVDQU( xmm1, ptr[ecx] );
314 xRET();
315
316 HostSys::MemProtectStatic( recSSE, PageAccess_ExecOnly() );
317
318 bool sse3_result = _test_instruction( recSSE ); // sse3
319 bool ssse3_result = _test_instruction( funcSSSE3 );
320 bool sse41_result = _test_instruction( funcSSE41 );
321
322 // Test for and log any irregularities here.
323 // We take the instruction test result over cpuid since (in theory) it should be a
324 // more reliable gauge of the cpu's actual ability. But since a difference in bit
325 // and actual ability may represent a cmos/bios problem, we report it to the user.
326
327 if( sse3_result != !!hasStreamingSIMD3Extensions )
328 {
329 Console.Warning( "SSE3 Detection Inconsistency: cpuid=%s, test_result=%s",
330 bool_to_char( !!hasStreamingSIMD3Extensions ), bool_to_char( sse3_result ) );
331
332 hasStreamingSIMD3Extensions = sse3_result;
333 }
334
335 if( ssse3_result != !!hasSupplementalStreamingSIMD3Extensions )
336 {
337 Console.Warning( "SSSE3 Detection Inconsistency: cpuid=%s, test_result=%s",
338 bool_to_char( !!hasSupplementalStreamingSIMD3Extensions ), bool_to_char( ssse3_result ) );
339
340 hasSupplementalStreamingSIMD3Extensions = ssse3_result;
341 }
342
343 if( sse41_result != !!hasStreamingSIMD4Extensions )
344 {
345 Console.Warning( "SSE4 Detection Inconsistency: cpuid=%s, test_result=%s",
346 bool_to_char( !!hasStreamingSIMD4Extensions ), bool_to_char( sse41_result ) );
347
348 hasStreamingSIMD4Extensions = sse41_result;
349 }
350
351 }
352
353 SIMD_EstablishMXCSRmask();
354 }
355

  ViewVC Help
Powered by ViewVC 1.1.22