/[pcsx2_0.9.7]/trunk/common/include/x86emitter/implement/simd_arithmetic.h
ViewVC logotype

Contents of /trunk/common/include/x86emitter/implement/simd_arithmetic.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 31 - (show annotations) (download)
Tue Sep 7 03:24:11 2010 UTC (10 years, 2 months ago) by william
File MIME type: text/plain
File size: 12986 byte(s)
committing r3113 initial commit again...
1 /* PCSX2 - PS2 Emulator for PCs
2 * Copyright (C) 2002-2010 PCSX2 Dev Team
3 *
4 * PCSX2 is free software: you can redistribute it and/or modify it under the terms
5 * of the GNU Lesser General Public License as published by the Free Software Found-
6 * ation, either version 3 of the License, or (at your option) any later version.
7 *
8 * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10 * PURPOSE. See the GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License along with PCSX2.
13 * If not, see <http://www.gnu.org/licenses/>.
14 */
15
16 #pragma once
17
18 namespace x86Emitter {
19
20 // --------------------------------------------------------------------------------------
21 // _SimdShiftHelper
22 // --------------------------------------------------------------------------------------
23 struct _SimdShiftHelper
24 {
25 u8 Prefix;
26 u16 Opcode;
27 u16 OpcodeImm;
28 u8 Modcode;
29
30 void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const;
31 void operator()( const xRegisterSSE& to, const ModSibBase& from ) const;
32
33 void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const;
34 void operator()( const xRegisterMMX& to, const ModSibBase& from ) const;
35
36 void operator()( const xRegisterSSE& to, u8 imm8 ) const;
37 void operator()( const xRegisterMMX& to, u8 imm8 ) const;
38 };
39
40 // --------------------------------------------------------------------------------------
41 // xImplSimd_Shift / xImplSimd_ShiftWithoutQ
42 // --------------------------------------------------------------------------------------
43
44 // Used for PSRA, which lacks the Q form.
45 //
46 struct xImplSimd_ShiftWithoutQ
47 {
48 const _SimdShiftHelper W;
49 const _SimdShiftHelper D;
50 };
51
52 // Implements PSRL and PSLL
53 //
54 struct xImplSimd_Shift
55 {
56 const _SimdShiftHelper W;
57 const _SimdShiftHelper D;
58 const _SimdShiftHelper Q;
59
60 void DQ( const xRegisterSSE& to, u8 imm8 ) const;
61 };
62
63 //////////////////////////////////////////////////////////////////////////////////////////
64 //
65 struct xImplSimd_AddSub
66 {
67 const xImplSimd_DestRegEither B;
68 const xImplSimd_DestRegEither W;
69 const xImplSimd_DestRegEither D;
70 const xImplSimd_DestRegEither Q;
71
72 // Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
73 const xImplSimd_DestRegEither SB;
74
75 // Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
76 const xImplSimd_DestRegEither SW;
77
78 // Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
79 const xImplSimd_DestRegEither USB;
80
81 // Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
82 const xImplSimd_DestRegEither USW;
83 };
84
85 //////////////////////////////////////////////////////////////////////////////////////////
86 //
87 struct xImplSimd_PMul
88 {
89 const xImplSimd_DestRegEither LW;
90 const xImplSimd_DestRegEither HW;
91 const xImplSimd_DestRegEither HUW;
92 const xImplSimd_DestRegEither UDQ;
93
94 // [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
95 // corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
96 // integers. Each intermediate 32-bit integer is truncated to the 18 most significant
97 // bits. Rounding is always performed by adding 1 to the least significant bit of the
98 // 18-bit intermediate result. The final result is obtained by selecting the 16 bits
99 // immediately to the right of the most significant bit of each 18-bit intermediate
100 // result and packed to the destination operand.
101 //
102 // Both operands can be MMX or XMM registers. Source can be register or memory.
103 //
104 const xImplSimd_DestRegEither HRSW;
105
106 // [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
107 // the low 32 bits of each product in xmm1.
108 const xImplSimd_DestRegSSE LD;
109
110 // [SSE-4.1] Multiply the packed signed dword integers in dest with src.
111 const xImplSimd_DestRegSSE DQ;
112 };
113
114 //////////////////////////////////////////////////////////////////////////////////////////
115 // For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
116 //
117 struct xImplSimd_rSqrt
118 {
119 const xImplSimd_DestRegSSE PS;
120 const xImplSimd_DestRegSSE SS;
121 };
122
123 //////////////////////////////////////////////////////////////////////////////////////////
124 // SQRT has PS/SS/SD forms, but not the PD form.
125 //
126 struct xImplSimd_Sqrt
127 {
128 const xImplSimd_DestRegSSE PS;
129 const xImplSimd_DestRegSSE SS;
130 const xImplSimd_DestRegSSE SD;
131 };
132
133 //////////////////////////////////////////////////////////////////////////////////////////
134 //
135 struct xImplSimd_AndNot
136 {
137 const xImplSimd_DestRegSSE PS;
138 const xImplSimd_DestRegSSE PD;
139 };
140
141 //////////////////////////////////////////////////////////////////////////////////////////
142 // Packed absolute value. [sSSE3 only]
143 //
144 struct xImplSimd_PAbsolute
145 {
146 // [sSSE-3] Computes the absolute value of bytes in the src, and stores the result
147 // in dest, as UNSIGNED.
148 const xImplSimd_DestRegEither B;
149
150 // [sSSE-3] Computes the absolute value of word in the src, and stores the result
151 // in dest, as UNSIGNED.
152 const xImplSimd_DestRegEither W;
153
154 // [sSSE-3] Computes the absolute value of doublewords in the src, and stores the
155 // result in dest, as UNSIGNED.
156 const xImplSimd_DestRegEither D;
157 };
158
159 //////////////////////////////////////////////////////////////////////////////////////////
160 // Packed Sign [sSSE3 only] - Negate/zero/preserve packed integers in dest depending on the
161 // corresponding sign in src.
162 //
163 struct xImplSimd_PSign
164 {
165 // [sSSE-3] negates each byte element of dest if the signed integer value of the
166 // corresponding data element in src is less than zero. If the signed integer value
167 // of a data element in src is positive, the corresponding data element in dest is
168 // unchanged. If a data element in src is zero, the corresponding data element in
169 // dest is set to zero.
170 const xImplSimd_DestRegEither B;
171
172 // [sSSE-3] negates each word element of dest if the signed integer value of the
173 // corresponding data element in src is less than zero. If the signed integer value
174 // of a data element in src is positive, the corresponding data element in dest is
175 // unchanged. If a data element in src is zero, the corresponding data element in
176 // dest is set to zero.
177 const xImplSimd_DestRegEither W;
178
179 // [sSSE-3] negates each doubleword element of dest if the signed integer value
180 // of the corresponding data element in src is less than zero. If the signed integer
181 // value of a data element in src is positive, the corresponding data element in dest
182 // is unchanged. If a data element in src is zero, the corresponding data element in
183 // dest is set to zero.
184 const xImplSimd_DestRegEither D;
185
186 };
187
188 //////////////////////////////////////////////////////////////////////////////////////////
189 // Packed Multiply and Add!!
190 //
191 struct xImplSimd_PMultAdd
192 {
193 // Multiplies the individual signed words of dest by the corresponding signed words
194 // of src, producing temporary signed, doubleword results. The adjacent doubleword
195 // results are then summed and stored in the destination operand.
196 //
197 // DEST[31:0] = ( DEST[15:0] * SRC[15:0]) + (DEST[31:16] * SRC[31:16] );
198 // DEST[63:32] = ( DEST[47:32] * SRC[47:32]) + (DEST[63:48] * SRC[63:48] );
199 // [.. repeat in the case of XMM src/dest operands ..]
200 //
201 const xImplSimd_DestRegEither WD;
202
203 // [sSSE-3] multiplies vertically each unsigned byte of dest with the corresponding
204 // signed byte of src, producing intermediate signed 16-bit integers. Each adjacent
205 // pair of signed words is added and the saturated result is packed to dest.
206 // For example, the lowest-order bytes (bits 7-0) in src and dest are multiplied
207 // and the intermediate signed word result is added with the corresponding
208 // intermediate result from the 2nd lowest-order bytes (bits 15-8) of the operands;
209 // the sign-saturated result is stored in the lowest word of dest (bits 15-0).
210 // The same operation is performed on the other pairs of adjacent bytes.
211 //
212 // In Coder Speak:
213 // DEST[15-0] = SaturateToSignedWord( SRC[15-8] * DEST[15-8] + SRC[7-0] * DEST[7-0] );
214 // DEST[31-16] = SaturateToSignedWord( SRC[31-24] * DEST[31-24] + SRC[23-16] * DEST[23-16] );
215 // [.. repeat for each 16 bits up to 64 (mmx) or 128 (xmm) ..]
216 //
217 const xImplSimd_DestRegEither UBSW;
218 };
219
220 //////////////////////////////////////////////////////////////////////////////////////////
221 // Packed Horizontal Add [SSE3 only]
222 //
223 struct xImplSimd_HorizAdd
224 {
225 // [SSE-3] Horizontal Add of Packed Data. A three step process:
226 // * Adds the single-precision floating-point values in the first and second dwords of
227 // dest and stores the result in the first dword of dest.
228 // * Adds single-precision floating-point values in the third and fourth dword of dest
229 // stores the result in the second dword of dest.
230 // * Adds single-precision floating-point values in the first and second dword of *src*
231 // and stores the result in the third dword of dest.
232 const xImplSimd_DestRegSSE PS;
233
234 // [SSE-3] Horizontal Add of Packed Data. A two step process:
235 // * Adds the double-precision floating-point values in the high and low quadwords of
236 // dest and stores the result in the low quadword of dest.
237 // * Adds the double-precision floating-point values in the high and low quadwords of
238 // *src* stores the result in the high quadword of dest.
239 const xImplSimd_DestRegSSE PD;
240 };
241
242 //////////////////////////////////////////////////////////////////////////////////////////
243 // DotProduct calculation (SSE4.1 only!)
244 //
245 struct xImplSimd_DotProduct
246 {
247 // [SSE-4.1] Conditionally multiplies the packed single precision floating-point
248 // values in dest with the packed single-precision floats in src depending on a
249 // mask extracted from the high 4 bits of the immediate byte. If a condition mask
250 // bit in Imm8[7:4] is zero, the corresponding multiplication is replaced by a value
251 // of 0.0. The four resulting single-precision values are summed into an inter-
252 // mediate result.
253 //
254 // The intermediate result is conditionally broadcasted to the destination using a
255 // broadcast mask specified by bits [3:0] of the immediate byte. If a broadcast
256 // mask bit is 1, the intermediate result is copied to the corresponding dword
257 // element in dest. If a broadcast mask bit is zero, the corresponding element in
258 // the destination is set to zero.
259 //
260 xImplSimd_DestRegImmSSE PS;
261
262 // [SSE-4.1]
263 xImplSimd_DestRegImmSSE PD;
264 };
265
266 //////////////////////////////////////////////////////////////////////////////////////////
267 // Rounds floating point values (packed or single scalar) by an arbitrary rounding mode.
268 // (SSE4.1 only!)
269 struct xImplSimd_Round
270 {
271 // [SSE-4.1] Rounds the 4 packed single-precision src values and stores them in dest.
272 //
273 // Imm8 specifies control fields for the rounding operation:
274 // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
275 // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
276 // Bits 1:0 - Specifies a rounding mode for this instruction only.
277 //
278 // Rounding Mode Reference:
279 // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
280 //
281 const xImplSimd_DestRegImmSSE PS;
282
283 // [SSE-4.1] Rounds the 2 packed double-precision src values and stores them in dest.
284 //
285 // Imm8 specifies control fields for the rounding operation:
286 // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
287 // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
288 // Bits 1:0 - Specifies a rounding mode for this instruction only.
289 //
290 // Rounding Mode Reference:
291 // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
292 //
293 const xImplSimd_DestRegImmSSE PD;
294
295 // [SSE-4.1] Rounds the single-precision src value and stores in dest.
296 //
297 // Imm8 specifies control fields for the rounding operation:
298 // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
299 // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
300 // Bits 1:0 - Specifies a rounding mode for this instruction only.
301 //
302 // Rounding Mode Reference:
303 // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
304 //
305 const xImplSimd_DestRegImmSSE SS;
306
307 // [SSE-4.1] Rounds the double-precision src value and stores in dest.
308 //
309 // Imm8 specifies control fields for the rounding operation:
310 // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
311 // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
312 // Bits 1:0 - Specifies a rounding mode for this instruction only.
313 //
314 // Rounding Mode Reference:
315 // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
316 //
317 const xImplSimd_DestRegImmSSE SD;
318 };
319
320 } // End namespace x86Emitter
321

  ViewVC Help
Powered by ViewVC 1.1.22