/[pcsx2_0.9.7]/trunk/common/include/x86emitter/implement/simd_arithmetic.h
ViewVC logotype

Annotation of /trunk/common/include/x86emitter/implement/simd_arithmetic.h

Parent Directory Parent Directory | Revision Log Revision Log


Revision 31 - (hide annotations) (download)
Tue Sep 7 03:24:11 2010 UTC (9 years, 10 months ago) by william
File MIME type: text/plain
File size: 12986 byte(s)
committing r3113 initial commit again...
1 william 31 /* PCSX2 - PS2 Emulator for PCs
2     * Copyright (C) 2002-2010 PCSX2 Dev Team
3     *
4     * PCSX2 is free software: you can redistribute it and/or modify it under the terms
5     * of the GNU Lesser General Public License as published by the Free Software Found-
6     * ation, either version 3 of the License, or (at your option) any later version.
7     *
8     * PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
9     * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
10     * PURPOSE. See the GNU General Public License for more details.
11     *
12     * You should have received a copy of the GNU General Public License along with PCSX2.
13     * If not, see <http://www.gnu.org/licenses/>.
14     */
15    
16     #pragma once
17    
18     namespace x86Emitter {
19    
20     // --------------------------------------------------------------------------------------
21     // _SimdShiftHelper
22     // --------------------------------------------------------------------------------------
23     struct _SimdShiftHelper
24     {
25     u8 Prefix;
26     u16 Opcode;
27     u16 OpcodeImm;
28     u8 Modcode;
29    
30     void operator()( const xRegisterSSE& to, const xRegisterSSE& from ) const;
31     void operator()( const xRegisterSSE& to, const ModSibBase& from ) const;
32    
33     void operator()( const xRegisterMMX& to, const xRegisterMMX& from ) const;
34     void operator()( const xRegisterMMX& to, const ModSibBase& from ) const;
35    
36     void operator()( const xRegisterSSE& to, u8 imm8 ) const;
37     void operator()( const xRegisterMMX& to, u8 imm8 ) const;
38     };
39    
40     // --------------------------------------------------------------------------------------
41     // xImplSimd_Shift / xImplSimd_ShiftWithoutQ
42     // --------------------------------------------------------------------------------------
43    
44     // Used for PSRA, which lacks the Q form.
45     //
46     struct xImplSimd_ShiftWithoutQ
47     {
48     const _SimdShiftHelper W;
49     const _SimdShiftHelper D;
50     };
51    
52     // Implements PSRL and PSLL
53     //
54     struct xImplSimd_Shift
55     {
56     const _SimdShiftHelper W;
57     const _SimdShiftHelper D;
58     const _SimdShiftHelper Q;
59    
60     void DQ( const xRegisterSSE& to, u8 imm8 ) const;
61     };
62    
63     //////////////////////////////////////////////////////////////////////////////////////////
64     //
65     struct xImplSimd_AddSub
66     {
67     const xImplSimd_DestRegEither B;
68     const xImplSimd_DestRegEither W;
69     const xImplSimd_DestRegEither D;
70     const xImplSimd_DestRegEither Q;
71    
72     // Add/Sub packed signed byte [8bit] integers from src into dest, and saturate the results.
73     const xImplSimd_DestRegEither SB;
74    
75     // Add/Sub packed signed word [16bit] integers from src into dest, and saturate the results.
76     const xImplSimd_DestRegEither SW;
77    
78     // Add/Sub packed unsigned byte [8bit] integers from src into dest, and saturate the results.
79     const xImplSimd_DestRegEither USB;
80    
81     // Add/Sub packed unsigned word [16bit] integers from src into dest, and saturate the results.
82     const xImplSimd_DestRegEither USW;
83     };
84    
85     //////////////////////////////////////////////////////////////////////////////////////////
86     //
87     struct xImplSimd_PMul
88     {
89     const xImplSimd_DestRegEither LW;
90     const xImplSimd_DestRegEither HW;
91     const xImplSimd_DestRegEither HUW;
92     const xImplSimd_DestRegEither UDQ;
93    
94     // [SSE-3] PMULHRSW multiplies vertically each signed 16-bit integer from dest with the
95     // corresponding signed 16-bit integer of source, producing intermediate signed 32-bit
96     // integers. Each intermediate 32-bit integer is truncated to the 18 most significant
97     // bits. Rounding is always performed by adding 1 to the least significant bit of the
98     // 18-bit intermediate result. The final result is obtained by selecting the 16 bits
99     // immediately to the right of the most significant bit of each 18-bit intermediate
100     // result and packed to the destination operand.
101     //
102     // Both operands can be MMX or XMM registers. Source can be register or memory.
103     //
104     const xImplSimd_DestRegEither HRSW;
105    
106     // [SSE-4.1] Multiply the packed dword signed integers in dest with src, and store
107     // the low 32 bits of each product in xmm1.
108     const xImplSimd_DestRegSSE LD;
109    
110     // [SSE-4.1] Multiply the packed signed dword integers in dest with src.
111     const xImplSimd_DestRegSSE DQ;
112     };
113    
114     //////////////////////////////////////////////////////////////////////////////////////////
115     // For instructions that have PS/SS form only (most commonly reciprocal Sqrt functions)
116     //
117     struct xImplSimd_rSqrt
118     {
119     const xImplSimd_DestRegSSE PS;
120     const xImplSimd_DestRegSSE SS;
121     };
122    
123     //////////////////////////////////////////////////////////////////////////////////////////
124     // SQRT has PS/SS/SD forms, but not the PD form.
125     //
126     struct xImplSimd_Sqrt
127     {
128     const xImplSimd_DestRegSSE PS;
129     const xImplSimd_DestRegSSE SS;
130     const xImplSimd_DestRegSSE SD;
131     };
132    
133     //////////////////////////////////////////////////////////////////////////////////////////
134     //
135     struct xImplSimd_AndNot
136     {
137     const xImplSimd_DestRegSSE PS;
138     const xImplSimd_DestRegSSE PD;
139     };
140    
141     //////////////////////////////////////////////////////////////////////////////////////////
142     // Packed absolute value. [sSSE3 only]
143     //
144     struct xImplSimd_PAbsolute
145     {
146     // [sSSE-3] Computes the absolute value of bytes in the src, and stores the result
147     // in dest, as UNSIGNED.
148     const xImplSimd_DestRegEither B;
149    
150     // [sSSE-3] Computes the absolute value of word in the src, and stores the result
151     // in dest, as UNSIGNED.
152     const xImplSimd_DestRegEither W;
153    
154     // [sSSE-3] Computes the absolute value of doublewords in the src, and stores the
155     // result in dest, as UNSIGNED.
156     const xImplSimd_DestRegEither D;
157     };
158    
159     //////////////////////////////////////////////////////////////////////////////////////////
160     // Packed Sign [sSSE3 only] - Negate/zero/preserve packed integers in dest depending on the
161     // corresponding sign in src.
162     //
163     struct xImplSimd_PSign
164     {
165     // [sSSE-3] negates each byte element of dest if the signed integer value of the
166     // corresponding data element in src is less than zero. If the signed integer value
167     // of a data element in src is positive, the corresponding data element in dest is
168     // unchanged. If a data element in src is zero, the corresponding data element in
169     // dest is set to zero.
170     const xImplSimd_DestRegEither B;
171    
172     // [sSSE-3] negates each word element of dest if the signed integer value of the
173     // corresponding data element in src is less than zero. If the signed integer value
174     // of a data element in src is positive, the corresponding data element in dest is
175     // unchanged. If a data element in src is zero, the corresponding data element in
176     // dest is set to zero.
177     const xImplSimd_DestRegEither W;
178    
179     // [sSSE-3] negates each doubleword element of dest if the signed integer value
180     // of the corresponding data element in src is less than zero. If the signed integer
181     // value of a data element in src is positive, the corresponding data element in dest
182     // is unchanged. If a data element in src is zero, the corresponding data element in
183     // dest is set to zero.
184     const xImplSimd_DestRegEither D;
185    
186     };
187    
188     //////////////////////////////////////////////////////////////////////////////////////////
189     // Packed Multiply and Add!!
190     //
191     struct xImplSimd_PMultAdd
192     {
193     // Multiplies the individual signed words of dest by the corresponding signed words
194     // of src, producing temporary signed, doubleword results. The adjacent doubleword
195     // results are then summed and stored in the destination operand.
196     //
197     // DEST[31:0] = ( DEST[15:0] * SRC[15:0]) + (DEST[31:16] * SRC[31:16] );
198     // DEST[63:32] = ( DEST[47:32] * SRC[47:32]) + (DEST[63:48] * SRC[63:48] );
199     // [.. repeat in the case of XMM src/dest operands ..]
200     //
201     const xImplSimd_DestRegEither WD;
202    
203     // [sSSE-3] multiplies vertically each unsigned byte of dest with the corresponding
204     // signed byte of src, producing intermediate signed 16-bit integers. Each adjacent
205     // pair of signed words is added and the saturated result is packed to dest.
206     // For example, the lowest-order bytes (bits 7-0) in src and dest are multiplied
207     // and the intermediate signed word result is added with the corresponding
208     // intermediate result from the 2nd lowest-order bytes (bits 15-8) of the operands;
209     // the sign-saturated result is stored in the lowest word of dest (bits 15-0).
210     // The same operation is performed on the other pairs of adjacent bytes.
211     //
212     // In Coder Speak:
213     // DEST[15-0] = SaturateToSignedWord( SRC[15-8] * DEST[15-8] + SRC[7-0] * DEST[7-0] );
214     // DEST[31-16] = SaturateToSignedWord( SRC[31-24] * DEST[31-24] + SRC[23-16] * DEST[23-16] );
215     // [.. repeat for each 16 bits up to 64 (mmx) or 128 (xmm) ..]
216     //
217     const xImplSimd_DestRegEither UBSW;
218     };
219    
220     //////////////////////////////////////////////////////////////////////////////////////////
221     // Packed Horizontal Add [SSE3 only]
222     //
223     struct xImplSimd_HorizAdd
224     {
225     // [SSE-3] Horizontal Add of Packed Data. A three step process:
226     // * Adds the single-precision floating-point values in the first and second dwords of
227     // dest and stores the result in the first dword of dest.
228     // * Adds single-precision floating-point values in the third and fourth dword of dest
229     // stores the result in the second dword of dest.
230     // * Adds single-precision floating-point values in the first and second dword of *src*
231     // and stores the result in the third dword of dest.
232     const xImplSimd_DestRegSSE PS;
233    
234     // [SSE-3] Horizontal Add of Packed Data. A two step process:
235     // * Adds the double-precision floating-point values in the high and low quadwords of
236     // dest and stores the result in the low quadword of dest.
237     // * Adds the double-precision floating-point values in the high and low quadwords of
238     // *src* stores the result in the high quadword of dest.
239     const xImplSimd_DestRegSSE PD;
240     };
241    
242     //////////////////////////////////////////////////////////////////////////////////////////
243     // DotProduct calculation (SSE4.1 only!)
244     //
245     struct xImplSimd_DotProduct
246     {
247     // [SSE-4.1] Conditionally multiplies the packed single precision floating-point
248     // values in dest with the packed single-precision floats in src depending on a
249     // mask extracted from the high 4 bits of the immediate byte. If a condition mask
250     // bit in Imm8[7:4] is zero, the corresponding multiplication is replaced by a value
251     // of 0.0. The four resulting single-precision values are summed into an inter-
252     // mediate result.
253     //
254     // The intermediate result is conditionally broadcasted to the destination using a
255     // broadcast mask specified by bits [3:0] of the immediate byte. If a broadcast
256     // mask bit is 1, the intermediate result is copied to the corresponding dword
257     // element in dest. If a broadcast mask bit is zero, the corresponding element in
258     // the destination is set to zero.
259     //
260     xImplSimd_DestRegImmSSE PS;
261    
262     // [SSE-4.1]
263     xImplSimd_DestRegImmSSE PD;
264     };
265    
266     //////////////////////////////////////////////////////////////////////////////////////////
267     // Rounds floating point values (packed or single scalar) by an arbitrary rounding mode.
268     // (SSE4.1 only!)
269     struct xImplSimd_Round
270     {
271     // [SSE-4.1] Rounds the 4 packed single-precision src values and stores them in dest.
272     //
273     // Imm8 specifies control fields for the rounding operation:
274     // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
275     // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
276     // Bits 1:0 - Specifies a rounding mode for this instruction only.
277     //
278     // Rounding Mode Reference:
279     // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
280     //
281     const xImplSimd_DestRegImmSSE PS;
282    
283     // [SSE-4.1] Rounds the 2 packed double-precision src values and stores them in dest.
284     //
285     // Imm8 specifies control fields for the rounding operation:
286     // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
287     // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
288     // Bits 1:0 - Specifies a rounding mode for this instruction only.
289     //
290     // Rounding Mode Reference:
291     // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
292     //
293     const xImplSimd_DestRegImmSSE PD;
294    
295     // [SSE-4.1] Rounds the single-precision src value and stores in dest.
296     //
297     // Imm8 specifies control fields for the rounding operation:
298     // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
299     // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
300     // Bits 1:0 - Specifies a rounding mode for this instruction only.
301     //
302     // Rounding Mode Reference:
303     // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
304     //
305     const xImplSimd_DestRegImmSSE SS;
306    
307     // [SSE-4.1] Rounds the double-precision src value and stores in dest.
308     //
309     // Imm8 specifies control fields for the rounding operation:
310     // Bit 3 - processor behavior for a precision exception (0: normal, 1: inexact)
311     // Bit 2 - If enabled, use MXCSR.RC, else use RC specified in bits 1:0 of this Imm8.
312     // Bits 1:0 - Specifies a rounding mode for this instruction only.
313     //
314     // Rounding Mode Reference:
315     // 0 - Nearest, 1 - Negative Infinity, 2 - Positive infinity, 3 - Truncate.
316     //
317     const xImplSimd_DestRegImmSSE SD;
318     };
319    
320     } // End namespace x86Emitter
321    

  ViewVC Help
Powered by ViewVC 1.1.22