/[pcsx2_0.9.7]/trunk/common/include/Utilities/win_memzero.h
ViewVC logotype

Diff of /trunk/common/include/Utilities/win_memzero.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

--- trunk/common/include/Utilities/win_memzero.h	2010/09/07 06:28:05	61
+++ trunk/common/include/Utilities/win_memzero.h	2010/09/07 11:08:22	62
@@ -60,7 +60,7 @@
 
 // This is an implementation of the memzero_ptr fast memset routine (for zero-clears only).
 template< size_t _bytes >
-static __forceinline void memzero_ptr( void *dest )
+static __fi void memzero_ptr( void *dest )
 {
 	if( MZFbytes == 0 ) return;
 
@@ -73,6 +73,10 @@
 		return;
 	}
 
+#if 0
+	// SSE-based memory clear.  Currently disabled so to avoid unnecessary dependence on
+	// SSE cpu instruction sets.  (memzero typically isn't used in any performance critical
+	// situations anyway)
 	enum
 	{
 		remainder = MZFbytes & 127,
@@ -86,8 +90,6 @@
 
 	if( (MZFbytes & 0xf) == 0 )
 	{
-		u64 _xmm_backup[2];
-
 		if( ((uptr)dest & 0xf) != 0 )
 		{
 			// UNALIGNED COPY MODE.
@@ -97,24 +99,21 @@
 			{
 				__asm
 				{
-					movups _xmm_backup,xmm0;
 					mov ecx,dest
 					pxor xmm0,xmm0
 					mov eax,bytes128
 
-					align 16
-
 				_loop_6:
-					movups [ecx],xmm0;
-					movups [ecx+0x10],xmm0;
-					movups [ecx+0x20],xmm0;
-					movups [ecx+0x30],xmm0;
-					movups [ecx+0x40],xmm0;
-					movups [ecx+0x50],xmm0;
-					movups [ecx+0x60],xmm0;
-					movups [ecx+0x70],xmm0;
+					movups [ecx],xmm0
+					movups [ecx+0x10],xmm0
+					movups [ecx+0x20],xmm0
+					movups [ecx+0x30],xmm0
+					movups [ecx+0x40],xmm0
+					movups [ecx+0x50],xmm0
+					movups [ecx+0x60],xmm0
+					movups [ecx+0x70],xmm0
 					sub ecx,-128
-					dec eax;
+					sub eax,1
 					jnz _loop_6;
 				}
 				if( remainder != 0 )
@@ -130,10 +129,6 @@
 						jnz _loop_5;
 					}
 				}
-				__asm
-				{
-					movups xmm0,[_xmm_backup];
-				}
 				return;
 			}
 		}
@@ -145,24 +140,21 @@
 
 			__asm
 			{
-				movups _xmm_backup,xmm0;
 				mov ecx,dest
 				pxor xmm0,xmm0
 				mov eax,bytes128
 
-				align 16
-
 			_loop_8:
-				movaps [ecx],xmm0;
-				movaps [ecx+0x10],xmm0;
-				movaps [ecx+0x20],xmm0;
-				movaps [ecx+0x30],xmm0;
-				movaps [ecx+0x40],xmm0;
-				movaps [ecx+0x50],xmm0;
-				movaps [ecx+0x60],xmm0;
-				movaps [ecx+0x70],xmm0;
+				movaps [ecx],xmm0
+				movaps [ecx+0x10],xmm0
+				movaps [ecx+0x20],xmm0
+				movaps [ecx+0x30],xmm0
+				movaps [ecx+0x40],xmm0
+				movaps [ecx+0x50],xmm0
+				movaps [ecx+0x60],xmm0
+				movaps [ecx+0x70],xmm0
 				sub ecx,-128
-				dec eax;
+				sub eax,1
 				jnz _loop_8;
 			}
 			if( remainder != 0 )
@@ -173,22 +165,19 @@
 					mov eax, remainder
 
 				_loop_10:
-					movaps [ecx+eax],xmm0;
+					movaps [ecx+eax],xmm0
 					sub eax,16;
 					jnz _loop_10;
 				}
 			}
-			__asm
-			{
-				movups xmm0,[_xmm_backup];
-			}
 			return;
 		}
 	}
+	#endif
 
 	// This function only works on 32-bit alignments.
-	jASSUME( (MZFbytes & 0x3) == 0 );
-	jASSUME( ((uptr)dest & 0x3) == 0 );
+	pxAssume( (MZFbytes & 0x3) == 0 );
+	pxAssume( ((uptr)dest & 0x3) == 0 );
 
 	enum
 	{
@@ -258,7 +247,7 @@
 
 // An optimized memset for 8 bit destination data.
 template< u8 data, size_t _bytes >
-static __forceinline void memset_8( void *dest )
+static __fi void memset_8( void *dest )
 {
 	if( MZFbytes == 0 ) return;
 
@@ -271,19 +260,16 @@
 		return;
 	}
 
-	//u64 _xmm_backup[2];
-
 	/*static const size_t remainder = MZFbytes & 127;
 	static const size_t bytes128 = MZFbytes / 128;
 	if( bytes128 > 32 )
 	{
 		// This function only works on 128-bit alignments.
-		jASSUME( (MZFbytes & 0xf) == 0 );
-		jASSUME( ((uptr)dest & 0xf) == 0 );
+		pxAssume( (MZFbytes & 0xf) == 0 );
+		pxAssume( ((uptr)dest & 0xf) == 0 );
 
 		__asm
 		{
-			movups _xmm_backup,xmm0;
 			mov eax,bytes128
 			mov ecx,dest
 			movss xmm0,data
@@ -316,14 +302,10 @@
 				jnz _loop_10;
 			}
 		}
-		__asm
-		{
-			movups xmm0,[_xmm_backup];
-		}
 	}*/
 
 	// This function only works on 32-bit alignments of data copied.
-	jASSUME( (MZFbytes & 0x3) == 0 );
+	pxAssume( (MZFbytes & 0x3) == 0 );
 
 	enum
 	{
@@ -392,12 +374,12 @@
 }
 
 template< u16 data, size_t _bytes >
-static __forceinline void memset_16( void *dest )
+static __fi void memset_16( void *dest )
 {
 	if( MZFbytes == 0 ) return;
 
-	if( (MZFbytes & 0x1) != 0 )
-		throw Exception::LogicError( "Invalid parameter passed to memset_16 - data length is not a multiple of 16 or 32 bits." );
+	// Assertion: data length must be a multiple of 16 or 32 bits
+	pxAssume( (MZFbytes & 0x1) == 0 );
 
 	if( (MZFbytes & 0x3) != 0 )
 	{
@@ -411,7 +393,7 @@
 	//u64 _xmm_backup[2];
 
 	// This function only works on 32-bit alignments of data copied.
-	jASSUME( (MZFbytes & 0x3) == 0 );
+	pxAssume( (MZFbytes & 0x3) == 0 );
 
 	enum
 	{
@@ -480,13 +462,12 @@
 }
 
 template< u32 data, size_t MZFbytes >
-static __forceinline void memset_32( void *dest )
+static __fi void memset_32( void *dest )
 {
 	if( MZFbytes == 0 ) return;
 
-	if( (MZFbytes & 0x3) != 0 )
-		throw Exception::LogicError( "Invalid parameter passed to memset_32 - data length is not a multiple of 32 bits." );
-
+	// Assertion: data length must be a multiple of 32 bits
+	pxAssume( (MZFbytes & 0x3) == 0 );
 
 	//u64 _xmm_backup[2];
 
@@ -494,7 +475,7 @@
 	// If the data length is not a factor of 32 bits, the C++ optimizing compiler will
 	// probably just generate mysteriously broken code in Release builds. ;)
 
-	jASSUME( (MZFbytes & 0x3) == 0 );
+	pxAssume( (MZFbytes & 0x3) == 0 );
 
 	enum
 	{
@@ -566,28 +547,28 @@
 // Structures, static arrays, etc.  No need to include sizeof() crap, this does it automatically
 // for you!
 template< typename T >
-static __forceinline void memzero( T& object )
+static __fi void memzero( T& object )
 {
 	memzero_ptr<sizeof(T)>( &object );
 }
 
 // This method clears an object with the given 8 bit value.
 template< u8 data, typename T >
-static __forceinline void memset8( T& object )
+static __fi void memset8( T& object )
 {
 	memset_8<data, sizeof(T)>( &object );
 }
 
 // This method clears an object with the given 16 bit value.
 template< u16 data, typename T >
-static __forceinline void memset16( T& object )
+static __fi void memset16( T& object )
 {
 	memset_16<data, sizeof(T)>( &object );
 }
 
 // This method clears an object with the given 32 bit value.
 template< u32 data, typename T >
-static __forceinline void memset32( T& object )
+static __fi void memset32( T& object )
 {
 	memset_32<data, sizeof(T)>( &object );
 }

 

  ViewVC Help
Powered by ViewVC 1.1.22