/[pcsx2_0.9.7]/trunk/pcsx2/IPU/yuv2rgb.asm
ViewVC logotype

Contents of /trunk/pcsx2/IPU/yuv2rgb.asm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 31 - (show annotations) (download)
Tue Sep 7 03:24:11 2010 UTC (9 years, 5 months ago) by william
File size: 5501 byte(s)
committing r3113 initial commit again...
1 ;/********************************************************
2 ; * Some code. Copyright (C) 2003 by Pascal Massimino. *
3 ; * All Rights Reserved. (http://skal.planet-d.net) *
4 ; * For Educational/Academic use ONLY. See 'LICENSE.TXT'.*
5 ; ********************************************************/
6 ;//////////////////////////////////////////////////////////
7 ;// NASM macros
8 ;//////////////////////////////////////////////////////////
9
10 %ifdef LINUX
11
12 ;//////////////////////////////////////////////////////////
13 ; LINUX / egcs / macros
14 ;//////////////////////////////////////////////////////////
15
16 %macro extrn 1
17 extern %1
18 %define %1 %1
19 %endmacro
20 %macro globl 1
21 global %1
22 %define %1 %1
23 %endmacro
24
25 %macro DATA 0
26 [section data align=16 write alloc USE32]
27 %endmacro
28 %macro TEXT 0
29 [section text align=16 nowrite alloc exec USE32]
30 %endmacro
31
32 %endif ; LINUX
33
34 ;//////////////////////////////////////////////////////////
35
36 %ifdef WIN32
37
38 %macro extrn 1
39 extern _%1
40 %define %1 _%1
41 %endmacro
42
43 %macro globl 1
44 global _%1
45 %define %1 _%1
46 %endmacro
47
48 %macro DATA 0
49 [section .data align=16 write alloc USE32]
50 %endmacro
51 %macro TEXT 0
52 [section .text align=16 nowrite alloc exec USE32]
53 %endmacro
54
55 %endif ; WIN32
56
57 ;//////////////////////////////////////////////////////////
58 ;
59 ; MACRO for timing. NASM.
60 ; Total additional code size is 0xb0.
61 ; this keep code alignment right.
62
63 extrn Skl_Cur_Count_
64 extrn Skl_Print_Tics
65
66 %macro SKL_USE_RDSTC 0
67 extrn SKL_RDTSC_0_ASM
68 extrn SKL_RDTSC_1_ASM
69 extrn SKL_RDTSC_2_ASM
70 %endmacro
71 %define SKL_RDTSC_OFFSET 15 ; check value with skl_rdtsc.h...
72
73 %macro SKL_RDTSC_IN 0
74 SKL_USE_RDSTC
75 call SKL_RDTSC_0_ASM
76 .Skl_RDTSC_Loop_:
77 call SKL_RDTSC_1_ASM
78 %endmacro
79
80 %macro SKL_RDTSC_OUT 0
81 call SKL_RDTSC_2_ASM
82 dec dword [Skl_Cur_Count_]
83 jge near .Skl_RDTSC_Loop_
84 push dword 53
85 call Skl_Print_Tics
86 %endmacro
87
88 ;//////////////////////////////////////////////////////////
89
90 globl Skl_YUV_To_RGB32_MMX
91
92 ;//////////////////////////////////////////////////////////////////////
93
94 ; eax: *U
95 ; ebx: *V
96 ; esi: *Y
97 ; edx: Src_BpS
98 ; edi: *Dst
99 ; ebx: Dst_BpS
100 ; ecx: Counter
101
102 %define RGBp esp+20
103 %define Yp esp+16
104 %define Up esp+12
105 %define Vp esp+8
106 %define xCnt esp+4
107 %define yCnt esp+0
108
109 Skl_YUV_To_RGB32_MMX:
110
111 push ebx
112 push esi
113 push edi
114 push ebp
115
116 mov edi, [esp+4 +16] ; RGB
117 mov ebp, [esp+12 +16] ; Y
118 mov eax, [esp+16 +16] ; U
119 mov ebx, [esp+20 +16] ; V
120 mov edx, [esp+24 +16] ; Src_BpS
121 mov ecx, [esp+28 +16] ; Width
122
123 lea edi, [edi+4*ecx] ; RGB += Width*sizeof(32b)
124 lea ebp, [ebp+ecx] ; ebp: Y1 = Y + Width
125 add edx, ebp ; edx: Y2 = Y1+ BpS
126 push edi ; [RGBp]
127 push ebp ; [Yp]
128 shr ecx, 1 ; Width/=2
129 lea eax, [eax+ecx] ; U += W/2
130 lea ebx, [ebx+ecx] ; V += W/2
131 push eax ; [Up]
132 push ebx ; [Vp]
133
134 neg ecx ; ecx = -Width/2
135 push ecx ; save [xCnt]
136 push eax ; fake ([yCnt])
137
138 mov ecx, [esp+32 +40] ; Height
139 shr ecx, 1 ; /2
140
141 mov esi, [Up]
142 mov edi, [Vp]
143
144 jmp .Go
145
146 align 16
147 .Loop_y
148 dec ecx
149 jg .Add
150
151 add esp, 24 ; rid of all tmp
152 pop ebp
153 pop edi
154 pop esi
155 pop ebx
156
157 ret
158
159 align 16
160 .Add
161 mov edi, [esp+8 +40] ; Dst_BpS
162 mov esi, [esp+24 +40] ; Src_BpS
163 mov edx, [RGBp]
164 mov ebp, [Yp]
165 lea edx, [edx+2*edi] ; RGB += 2*Dst_BpS
166 lea ebp, [ebp+2*esi] ; Y += 2*Src_BpS
167 mov [RGBp], edx
168 mov edi, [Vp]
169 mov [Yp], ebp ; Y1
170 lea edx, [ebp+esi] ; Y2
171
172 lea edi, [edi+esi] ; V += Src_BpS
173 add esi, [Up] ; U += Src_BpS
174 mov [Vp], edi
175 mov [Up], esi
176
177 .Go
178 mov [yCnt], ecx
179 mov ecx, [xCnt]
180
181 ; 5210c@640x480
182
183 .Loop_x ; edi,esi: U,V; ebp,edx: Y1, Y2; ecx: xCnt
184
185 ; R = Y + a.U
186 ; G = Y + c.V + b.U
187 ; B = Y + d.V
188
189 movzx eax, byte [edi+ecx+0]
190 movzx ebx, byte [esi+ecx+0]
191 movq mm0, [Skl_YUV_Tab32_MMX+0*2048 + eax*8]
192 movzx eax, byte [edi+ecx+1]
193 paddw mm0, [Skl_YUV_Tab32_MMX+1*2048 + ebx*8]
194 movzx ebx, byte [esi+ecx+1]
195 movq mm4, [Skl_YUV_Tab32_MMX+0*2048 + eax*8]
196 movzx eax, byte [ebp + 2*ecx+0]
197 paddw mm4, [Skl_YUV_Tab32_MMX+1*2048 + ebx*8]
198 movzx ebx, byte [ebp + 2*ecx+1]
199
200 movq mm1, mm0
201 movq mm2, mm0
202 movq mm3, mm0
203 movq mm5, mm4
204 movq mm6, mm4
205 movq mm7, mm4
206
207 paddw mm0, [Skl_YUV_Tab32_MMX+2*2048 + eax*8]
208 movzx eax, byte [ebp + 2*ecx+2]
209 paddw mm1, [Skl_YUV_Tab32_MMX+2*2048 + ebx*8]
210 movzx ebx, byte [ebp + 2*ecx+3]
211 packuswb mm0, mm1
212 paddw mm4, [Skl_YUV_Tab32_MMX+2*2048 + eax*8]
213 movzx eax, byte [edx + 2*ecx+0]
214 paddw mm5, [Skl_YUV_Tab32_MMX+2*2048 + ebx*8]
215
216 packuswb mm4, mm5
217 mov esi, [RGBp]
218 movzx ebx, byte [edx + 2*ecx+1]
219 movq [esi+8*ecx+0], mm0 ; 2x32b
220 movq [esi+8*ecx+8], mm4 ; 2x32b
221
222 paddw mm2, [Skl_YUV_Tab32_MMX+2*2048 + eax*8]
223 movzx eax, byte [edx + 2*ecx+2]
224 paddw mm3, [Skl_YUV_Tab32_MMX+2*2048 + ebx*8]
225 movzx ebx, byte [edx + 2*ecx+3]
226 packuswb mm2, mm3
227 paddw mm6, [Skl_YUV_Tab32_MMX+2*2048 + eax*8]
228 add esi, [esp+8 +40]
229 paddw mm7, [Skl_YUV_Tab32_MMX+2*2048 + ebx*8]
230
231 mov edi, [Vp]
232 packuswb mm6, mm7
233 movq [esi+8*ecx+0], mm2 ; 2x32b
234 movq [esi+8*ecx+8], mm6 ; 2x32b
235
236 add ecx, 2
237 mov esi, [Up]
238
239 jl near .Loop_x
240
241 mov ecx, [yCnt]
242 jmp .Loop_y

  ViewVC Help
Powered by ViewVC 1.1.22