/[pcsx2_0.9.7]/trunk/common/include/x86emitter/implement/simd_shufflepack.h
ViewVC logotype

Annotation of /trunk/common/include/x86emitter/implement/simd_shufflepack.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 62 - (hide annotations) (download)
Tue Sep 7 11:08:22 2010 UTC (9 years, 11 months ago) by william
File MIME type: text/plain
File size: 11716 byte(s)
Auto Commited Import of: pcsx2-0.9.7-r3738-debug in ./trunk
1 william 31 /* PCSX2 - PS2 Emulator for PCs
2     * Copyright (C) 2002-2010 PCSX2 Dev Team
3     *
4     * PCSX2 is free software: you can redistribute it and/or modify it under the terms
5     * of the GNU Lesser General Public License as published by the Free Software Found-
6     * ation, either version 3 of the License, or (at your option) any later version.
7     *
8     * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9     * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10     * PURPOSE. See the GNU General Public License for more details.
11     *
12     * You should have received a copy of the GNU General Public License along with PCSX2.
13     * If not, see <http://www.gnu.org/licenses/>.
14     */
15    
16     #pragma once
17    
18     namespace x86Emitter {
19    
20     // --------------------------------------------------------------------------------------
21     // xImplSimd_Shuffle
22     // --------------------------------------------------------------------------------------
23     struct xImplSimd_Shuffle
24     {
25     inline void _selector_assertion_check( u8 selector ) const;
26    
27     void PS( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const;
28 william 62 void PS( const xRegisterSSE& to, const xIndirectVoid& from, u8 selector ) const;
29 william 31
30     void PD( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const;
31 william 62 void PD( const xRegisterSSE& to, const xIndirectVoid& from, u8 selector ) const;
32 william 31 };
33    
34     // --------------------------------------------------------------------------------------
35     // xImplSimd_PShuffle
36     // --------------------------------------------------------------------------------------
37     struct xImplSimd_PShuffle
38     {
39     // Copies words from src and inserts them into dest at word locations selected with
40     // the order operand (8 bit immediate).
41     const xImplSimd_DestRegImmMMX W;
42    
43     // Copies doublewords from src and inserts them into dest at dword locations selected
44     // with the order operand (8 bit immediate).
45     const xImplSimd_DestRegImmSSE D;
46    
47     // Copies words from the low quadword of src and inserts them into the low quadword
48     // of dest at word locations selected with the order operand (8 bit immediate).
49     // The high quadword of src is copied to the high quadword of dest.
50     const xImplSimd_DestRegImmSSE LW;
51    
52     // Copies words from the high quadword of src and inserts them into the high quadword
53     // of dest at word locations selected with the order operand (8 bit immediate).
54     // The low quadword of src is copied to the low quadword of dest.
55     const xImplSimd_DestRegImmSSE HW;
56    
57     // [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
58     // control mask in src. If the most significant bit (bit[7]) of each byte of the
59     // shuffle control mask is set, then constant zero is written in the result byte.
60     // Each byte in the shuffle control mask forms an index to permute the corresponding
61     // byte in dest. The value of each index is the least significant 4 bits (128-bit
62     // operation) or 3 bits (64-bit operation) of the shuffle control byte.
63     //
64     // Operands can be MMX or XMM registers.
65     const xImplSimd_DestRegEither B;
66 william 62
67     // below is my test bed for a new system, free of subclasses. Was supposed to improve intellisense
68     // but it doesn't (makes it worse). Will try again in MSVC 2010. --air
69    
70     #if 0
71     // Copies words from src and inserts them into dest at word locations selected with
72     // the order operand (8 bit immediate).
73     void W( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { xOpWrite0F( 0x70, to, from, imm ); }
74     void W( const xRegisterMMX& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0x70, to, from, imm ); }
75    
76     // Copies doublewords from src and inserts them into dest at dword locations selected
77     // with the order operand (8 bit immediate).
78     void D( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0x66, 0x70, to, from, imm ); }
79     void D( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0x66, 0x70, to, from, imm ); }
80    
81     // Copies words from the low quadword of src and inserts them into the low quadword
82     // of dest at word locations selected with the order operand (8 bit immediate).
83     // The high quadword of src is copied to the high quadword of dest.
84     void LW( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0xf2, 0x70, to, from, imm ); }
85     void LW( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0xf2, 0x70, to, from, imm ); }
86    
87     // Copies words from the high quadword of src and inserts them into the high quadword
88     // of dest at word locations selected with the order operand (8 bit immediate).
89     // The low quadword of src is copied to the low quadword of dest.
90     void HW( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0xf3, 0x70, to, from, imm ); }
91     void HW( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0xf3, 0x70, to, from, imm ); }
92    
93     // [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
94     // control mask in src. If the most significant bit (bit[7]) of each byte of the
95     // shuffle control mask is set, then constant zero is written in the result byte.
96     // Each byte in the shuffle control mask forms an index to permute the corresponding
97     // byte in dest. The value of each index is the least significant 4 bits (128-bit
98     // operation) or 3 bits (64-bit operation) of the shuffle control byte.
99     //
100     // Operands can be MMX or XMM registers.
101     void B( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, 0x0038 ); }
102     void B( const xRegisterSSE& to, const xIndirectVoid& from ) const { OpWriteSSE( 0x66, 0x0038 ); }
103     void B( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteSSE( 0x00, 0x0038 ); }
104     void B( const xRegisterMMX& to, const xIndirectVoid& from ) const { OpWriteSSE( 0x00, 0x0038 ); }
105     #endif
106 william 31 };
107    
108     // --------------------------------------------------------------------------------------
109     // SimdImpl_PUnpack
110     // --------------------------------------------------------------------------------------
111     struct SimdImpl_PUnpack
112     {
113     // Unpack and interleave low-order bytes from src and dest into dest.
114     const xImplSimd_DestRegEither LBW;
115     // Unpack and interleave low-order words from src and dest into dest.
116     const xImplSimd_DestRegEither LWD;
117     // Unpack and interleave low-order doublewords from src and dest into dest.
118     const xImplSimd_DestRegEither LDQ;
119     // Unpack and interleave low-order quadwords from src and dest into dest.
120     const xImplSimd_DestRegSSE LQDQ;
121    
122     // Unpack and interleave high-order bytes from src and dest into dest.
123     const xImplSimd_DestRegEither HBW;
124     // Unpack and interleave high-order words from src and dest into dest.
125     const xImplSimd_DestRegEither HWD;
126     // Unpack and interleave high-order doublewords from src and dest into dest.
127     const xImplSimd_DestRegEither HDQ;
128     // Unpack and interleave high-order quadwords from src and dest into dest.
129     const xImplSimd_DestRegSSE HQDQ;
130     };
131    
132     // --------------------------------------------------------------------------------------
133     // SimdImpl_Pack
134     // --------------------------------------------------------------------------------------
135     // Pack with Signed or Unsigned Saturation
136     //
137     struct SimdImpl_Pack
138     {
139     // Converts packed signed word integers from src and dest into packed signed
140     // byte integers in dest, using signed saturation.
141     const xImplSimd_DestRegEither SSWB;
142    
143     // Converts packed signed dword integers from src and dest into packed signed
144     // word integers in dest, using signed saturation.
145     const xImplSimd_DestRegEither SSDW;
146    
147     // Converts packed unsigned word integers from src and dest into packed unsigned
148     // byte integers in dest, using unsigned saturation.
149     const xImplSimd_DestRegEither USWB;
150    
151     // [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
152     // unsigned word integers in dest, using signed saturation.
153     const xImplSimd_DestRegSSE USDW;
154     };
155    
156     // --------------------------------------------------------------------------------------
157     // SimdImpl_Unpack
158     // --------------------------------------------------------------------------------------
159     struct xImplSimd_Unpack
160     {
161     // Unpacks the high doubleword [single-precision] values from src and dest into
162     // dest, such that the result of dest looks like this:
163     // dest[0] <- dest[2]
164     // dest[1] <- src[2]
165     // dest[2] <- dest[3]
166     // dest[3] <- src[3]
167     //
168     const xImplSimd_DestRegSSE HPS;
169    
170     // Unpacks the high quadword [double-precision] values from src and dest into
171     // dest, such that the result of dest looks like this:
172     // dest.lo <- dest.hi
173     // dest.hi <- src.hi
174     //
175     const xImplSimd_DestRegSSE HPD;
176    
177     // Unpacks the low doubleword [single-precision] values from src and dest into
178     // dest, such that the result of dest looks like this:
179     // dest[3] <- src[1]
180     // dest[2] <- dest[1]
181     // dest[1] <- src[0]
182     // dest[0] <- dest[0]
183     //
184     const xImplSimd_DestRegSSE LPS;
185    
186     // Unpacks the low quadword [double-precision] values from src and dest into
187     // dest, effectively moving the low portion of src into the upper portion of dest.
188     // The result of dest is loaded as such:
189     // dest.hi <- src.lo
190     // dest.lo <- dest.lo [remains unchanged!]
191     //
192     const xImplSimd_DestRegSSE LPD;
193     };
194    
195    
196     struct xImplSimd_InsertExtractHelper
197     {
198     u16 Opcode;
199    
200     // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
201     void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const;
202    
203     // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
204 william 62 void operator()( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8 ) const;
205 william 31 };
206    
207     // --------------------------------------------------------------------------------------
208     // SimdImpl_PInsert
209     // --------------------------------------------------------------------------------------
210     // PINSRW/B/D [all but Word form are SSE4.1 only!]
211     //
212     struct xImplSimd_PInsert
213     {
214     void W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const;
215 william 62 void W( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8 ) const;
216 william 31
217     void W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const;
218 william 62 void W( const xRegisterMMX& to, const xIndirectVoid& from, u8 imm8 ) const;
219 william 31
220     // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
221     xImplSimd_InsertExtractHelper B;
222    
223     // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
224     xImplSimd_InsertExtractHelper D;
225     };
226    
227    
228     //////////////////////////////////////////////////////////////////////////////////////////
229     // PEXTRW/B/D [all but Word form are SSE4.1 only!]
230     //
231     // Note: Word form's indirect memory form is only available in SSE4.1.
232     //
233     struct SimdImpl_PExtract
234     {
235     // Copies the word element specified by imm8 from src to dest. The upper bits
236     // of dest are zero-extended (cleared). This can be used to extract any single packed
237     // word value from src into an x86 32 bit register.
238     //
239     // [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
240     //
241     void W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const;
242     void W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const;
243 william 62 void W( const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8 ) const;
244 william 31
245     // [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
246     // of dest are zero-extended (cleared). This can be used to extract any single packed
247     // byte value from src into an x86 32 bit register.
248     const xImplSimd_InsertExtractHelper B;
249    
250     // [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
251     // used to extract any single packed dword value from src into an x86 32 bit register.
252     const xImplSimd_InsertExtractHelper D;
253     };
254    
255     }

  ViewVC Help
Powered by ViewVC 1.1.22