/[pcsx2_0.9.7]/trunk/common/include/x86emitter/implement/simd_shufflepack.h
ViewVC logotype

Contents of /trunk/common/include/x86emitter/implement/simd_shufflepack.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 62 - (show annotations) (download)
Tue Sep 7 11:08:22 2010 UTC (9 years, 10 months ago) by william
File MIME type: text/plain
File size: 11716 byte(s)
Auto Commited Import of: pcsx2-0.9.7-r3738-debug in ./trunk
1 /* PCSX2 - PS2 Emulator for PCs
2 * Copyright (C) 2002-2010 PCSX2 Dev Team
3 *
4 * PCSX2 is free software: you can redistribute it and/or modify it under the terms
5 * of the GNU Lesser General Public License as published by the Free Software Found-
6 * ation, either version 3 of the License, or (at your option) any later version.
7 *
8 * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10 * PURPOSE. See the GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License along with PCSX2.
13 * If not, see <http://www.gnu.org/licenses/>.
14 */
15
16 #pragma once
17
18 namespace x86Emitter {
19
20 // --------------------------------------------------------------------------------------
21 // xImplSimd_Shuffle
22 // --------------------------------------------------------------------------------------
23 struct xImplSimd_Shuffle
24 {
25 inline void _selector_assertion_check( u8 selector ) const;
26
27 void PS( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const;
28 void PS( const xRegisterSSE& to, const xIndirectVoid& from, u8 selector ) const;
29
30 void PD( const xRegisterSSE& to, const xRegisterSSE& from, u8 selector ) const;
31 void PD( const xRegisterSSE& to, const xIndirectVoid& from, u8 selector ) const;
32 };
33
34 // --------------------------------------------------------------------------------------
35 // xImplSimd_PShuffle
36 // --------------------------------------------------------------------------------------
37 struct xImplSimd_PShuffle
38 {
39 // Copies words from src and inserts them into dest at word locations selected with
40 // the order operand (8 bit immediate).
41 const xImplSimd_DestRegImmMMX W;
42
43 // Copies doublewords from src and inserts them into dest at dword locations selected
44 // with the order operand (8 bit immediate).
45 const xImplSimd_DestRegImmSSE D;
46
47 // Copies words from the low quadword of src and inserts them into the low quadword
48 // of dest at word locations selected with the order operand (8 bit immediate).
49 // The high quadword of src is copied to the high quadword of dest.
50 const xImplSimd_DestRegImmSSE LW;
51
52 // Copies words from the high quadword of src and inserts them into the high quadword
53 // of dest at word locations selected with the order operand (8 bit immediate).
54 // The low quadword of src is copied to the low quadword of dest.
55 const xImplSimd_DestRegImmSSE HW;
56
57 // [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
58 // control mask in src. If the most significant bit (bit[7]) of each byte of the
59 // shuffle control mask is set, then constant zero is written in the result byte.
60 // Each byte in the shuffle control mask forms an index to permute the corresponding
61 // byte in dest. The value of each index is the least significant 4 bits (128-bit
62 // operation) or 3 bits (64-bit operation) of the shuffle control byte.
63 //
64 // Operands can be MMX or XMM registers.
65 const xImplSimd_DestRegEither B;
66
67 // below is my test bed for a new system, free of subclasses. Was supposed to improve intellisense
68 // but it doesn't (makes it worse). Will try again in MSVC 2010. --air
69
70 #if 0
71 // Copies words from src and inserts them into dest at word locations selected with
72 // the order operand (8 bit immediate).
73 void W( const xRegisterMMX& to, const xRegisterMMX& from, u8 imm ) const { xOpWrite0F( 0x70, to, from, imm ); }
74 void W( const xRegisterMMX& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0x70, to, from, imm ); }
75
76 // Copies doublewords from src and inserts them into dest at dword locations selected
77 // with the order operand (8 bit immediate).
78 void D( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0x66, 0x70, to, from, imm ); }
79 void D( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0x66, 0x70, to, from, imm ); }
80
81 // Copies words from the low quadword of src and inserts them into the low quadword
82 // of dest at word locations selected with the order operand (8 bit immediate).
83 // The high quadword of src is copied to the high quadword of dest.
84 void LW( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0xf2, 0x70, to, from, imm ); }
85 void LW( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0xf2, 0x70, to, from, imm ); }
86
87 // Copies words from the high quadword of src and inserts them into the high quadword
88 // of dest at word locations selected with the order operand (8 bit immediate).
89 // The low quadword of src is copied to the low quadword of dest.
90 void HW( const xRegisterSSE& to, const xRegisterSSE& from, u8 imm ) const { xOpWrite0F( 0xf3, 0x70, to, from, imm ); }
91 void HW( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm ) const { xOpWrite0F( 0xf3, 0x70, to, from, imm ); }
92
93 // [sSSE-3] Performs in-place shuffles of bytes in dest according to the shuffle
94 // control mask in src. If the most significant bit (bit[7]) of each byte of the
95 // shuffle control mask is set, then constant zero is written in the result byte.
96 // Each byte in the shuffle control mask forms an index to permute the corresponding
97 // byte in dest. The value of each index is the least significant 4 bits (128-bit
98 // operation) or 3 bits (64-bit operation) of the shuffle control byte.
99 //
100 // Operands can be MMX or XMM registers.
101 void B( const xRegisterSSE& to, const xRegisterSSE& from ) const { OpWriteSSE( 0x66, 0x0038 ); }
102 void B( const xRegisterSSE& to, const xIndirectVoid& from ) const { OpWriteSSE( 0x66, 0x0038 ); }
103 void B( const xRegisterMMX& to, const xRegisterMMX& from ) const { OpWriteSSE( 0x00, 0x0038 ); }
104 void B( const xRegisterMMX& to, const xIndirectVoid& from ) const { OpWriteSSE( 0x00, 0x0038 ); }
105 #endif
106 };
107
108 // --------------------------------------------------------------------------------------
109 // SimdImpl_PUnpack
110 // --------------------------------------------------------------------------------------
111 struct SimdImpl_PUnpack
112 {
113 // Unpack and interleave low-order bytes from src and dest into dest.
114 const xImplSimd_DestRegEither LBW;
115 // Unpack and interleave low-order words from src and dest into dest.
116 const xImplSimd_DestRegEither LWD;
117 // Unpack and interleave low-order doublewords from src and dest into dest.
118 const xImplSimd_DestRegEither LDQ;
119 // Unpack and interleave low-order quadwords from src and dest into dest.
120 const xImplSimd_DestRegSSE LQDQ;
121
122 // Unpack and interleave high-order bytes from src and dest into dest.
123 const xImplSimd_DestRegEither HBW;
124 // Unpack and interleave high-order words from src and dest into dest.
125 const xImplSimd_DestRegEither HWD;
126 // Unpack and interleave high-order doublewords from src and dest into dest.
127 const xImplSimd_DestRegEither HDQ;
128 // Unpack and interleave high-order quadwords from src and dest into dest.
129 const xImplSimd_DestRegSSE HQDQ;
130 };
131
132 // --------------------------------------------------------------------------------------
133 // SimdImpl_Pack
134 // --------------------------------------------------------------------------------------
135 // Pack with Signed or Unsigned Saturation
136 //
137 struct SimdImpl_Pack
138 {
139 // Converts packed signed word integers from src and dest into packed signed
140 // byte integers in dest, using signed saturation.
141 const xImplSimd_DestRegEither SSWB;
142
143 // Converts packed signed dword integers from src and dest into packed signed
144 // word integers in dest, using signed saturation.
145 const xImplSimd_DestRegEither SSDW;
146
147 // Converts packed unsigned word integers from src and dest into packed unsigned
148 // byte integers in dest, using unsigned saturation.
149 const xImplSimd_DestRegEither USWB;
150
151 // [SSE-4.1] Converts packed unsigned dword integers from src and dest into packed
152 // unsigned word integers in dest, using signed saturation.
153 const xImplSimd_DestRegSSE USDW;
154 };
155
156 // --------------------------------------------------------------------------------------
157 // SimdImpl_Unpack
158 // --------------------------------------------------------------------------------------
159 struct xImplSimd_Unpack
160 {
161 // Unpacks the high doubleword [single-precision] values from src and dest into
162 // dest, such that the result of dest looks like this:
163 // dest[0] <- dest[2]
164 // dest[1] <- src[2]
165 // dest[2] <- dest[3]
166 // dest[3] <- src[3]
167 //
168 const xImplSimd_DestRegSSE HPS;
169
170 // Unpacks the high quadword [double-precision] values from src and dest into
171 // dest, such that the result of dest looks like this:
172 // dest.lo <- dest.hi
173 // dest.hi <- src.hi
174 //
175 const xImplSimd_DestRegSSE HPD;
176
177 // Unpacks the low doubleword [single-precision] values from src and dest into
178 // dest, such that the result of dest looks like this:
179 // dest[3] <- src[1]
180 // dest[2] <- dest[1]
181 // dest[1] <- src[0]
182 // dest[0] <- dest[0]
183 //
184 const xImplSimd_DestRegSSE LPS;
185
186 // Unpacks the low quadword [double-precision] values from src and dest into
187 // dest, effectively moving the low portion of src into the upper portion of dest.
188 // The result of dest is loaded as such:
189 // dest.hi <- src.lo
190 // dest.lo <- dest.lo [remains unchanged!]
191 //
192 const xImplSimd_DestRegSSE LPD;
193 };
194
195
196 struct xImplSimd_InsertExtractHelper
197 {
198 u16 Opcode;
199
200 // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
201 void operator()( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const;
202
203 // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
204 void operator()( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8 ) const;
205 };
206
207 // --------------------------------------------------------------------------------------
208 // SimdImpl_PInsert
209 // --------------------------------------------------------------------------------------
210 // PINSRW/B/D [all but Word form are SSE4.1 only!]
211 //
212 struct xImplSimd_PInsert
213 {
214 void W( const xRegisterSSE& to, const xRegister32& from, u8 imm8 ) const;
215 void W( const xRegisterSSE& to, const xIndirectVoid& from, u8 imm8 ) const;
216
217 void W( const xRegisterMMX& to, const xRegister32& from, u8 imm8 ) const;
218 void W( const xRegisterMMX& to, const xIndirectVoid& from, u8 imm8 ) const;
219
220 // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
221 xImplSimd_InsertExtractHelper B;
222
223 // [SSE-4.1] Allowed with SSE registers only (MMX regs are invalid)
224 xImplSimd_InsertExtractHelper D;
225 };
226
227
228 //////////////////////////////////////////////////////////////////////////////////////////
229 // PEXTRW/B/D [all but Word form are SSE4.1 only!]
230 //
231 // Note: Word form's indirect memory form is only available in SSE4.1.
232 //
233 struct SimdImpl_PExtract
234 {
235 // Copies the word element specified by imm8 from src to dest. The upper bits
236 // of dest are zero-extended (cleared). This can be used to extract any single packed
237 // word value from src into an x86 32 bit register.
238 //
239 // [SSE-4.1] Note: Indirect memory forms of this instruction are an SSE-4.1 extension!
240 //
241 void W( const xRegister32& to, const xRegisterSSE& from, u8 imm8 ) const;
242 void W( const xRegister32& to, const xRegisterMMX& from, u8 imm8 ) const;
243 void W( const xIndirectVoid& dest, const xRegisterSSE& from, u8 imm8 ) const;
244
245 // [SSE-4.1] Copies the byte element specified by imm8 from src to dest. The upper bits
246 // of dest are zero-extended (cleared). This can be used to extract any single packed
247 // byte value from src into an x86 32 bit register.
248 const xImplSimd_InsertExtractHelper B;
249
250 // [SSE-4.1] Copies the dword element specified by imm8 from src to dest. This can be
251 // used to extract any single packed dword value from src into an x86 32 bit register.
252 const xImplSimd_InsertExtractHelper D;
253 };
254
255 }

  ViewVC Help
Powered by ViewVC 1.1.22