Revision 2da0d70d libswscale/swscale_template.c
libswscale/swscale_template.c  

71  71 
#endif 
72  72  
73  73 
#define YSCALEYUV2YV12X(x, offset, dest, width) \ 
74 
asm volatile(\


75 
"xor %%"REG_a", %%"REG_a" \n\t"\


76 
"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\


77 
"movq %%mm3, %%mm4 \n\t"\


78 
"lea " offset "(%0), %%"REG_d" \n\t"\


79 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


80 
ASMALIGN(4) /* FIXME Unroll? */\


81 
"1: \n\t"\


82 
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\


83 
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\


84 
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5\n\t" /* srcData */\


85 
"add $16, %%"REG_d" \n\t"\


86 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


87 
"test %%"REG_S", %%"REG_S" \n\t"\


88 
"pmulhw %%mm0, %%mm2 \n\t"\


89 
"pmulhw %%mm0, %%mm5 \n\t"\


90 
"paddw %%mm2, %%mm3 \n\t"\


91 
"paddw %%mm5, %%mm4 \n\t"\


92 
" jnz 1b \n\t"\


93 
"psraw $3, %%mm3 \n\t"\


94 
"psraw $3, %%mm4 \n\t"\


95 
"packuswb %%mm4, %%mm3 \n\t"\


96 
MOVNTQ(%%mm3, (%1, %%REGa))\


97 
"add $8, %%"REG_a" \n\t"\


98 
"cmp %2, %%"REG_a" \n\t"\


99 
"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\


100 
"movq %%mm3, %%mm4 \n\t"\


101 
"lea " offset "(%0), %%"REG_d" \n\t"\


102 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


103 
"jb 1b \n\t"\


104 
:: "r" (&c>redDither),\


105 
"r" (dest), "g" (width)\


106 
: "%"REG_a, "%"REG_d, "%"REG_S\


107 
);


74 
asm volatile(\


75 
"xor %%"REG_a", %%"REG_a" \n\t"\


76 
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\


77 
"movq %%mm3, %%mm4 \n\t"\


78 
"lea " offset "(%0), %%"REG_d" \n\t"\


79 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


80 
ASMALIGN(4) /* FIXME Unroll? */\


81 
"1: \n\t"\


82 
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\


83 
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\


84 
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* srcData */\


85 
"add $16, %%"REG_d" \n\t"\


86 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


87 
"test %%"REG_S", %%"REG_S" \n\t"\


88 
"pmulhw %%mm0, %%mm2 \n\t"\


89 
"pmulhw %%mm0, %%mm5 \n\t"\


90 
"paddw %%mm2, %%mm3 \n\t"\


91 
"paddw %%mm5, %%mm4 \n\t"\


92 
" jnz 1b \n\t"\


93 
"psraw $3, %%mm3 \n\t"\


94 
"psraw $3, %%mm4 \n\t"\


95 
"packuswb %%mm4, %%mm3 \n\t"\


96 
MOVNTQ(%%mm3, (%1, %%REGa))\


97 
"add $8, %%"REG_a" \n\t"\


98 
"cmp %2, %%"REG_a" \n\t"\


99 
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\


100 
"movq %%mm3, %%mm4 \n\t"\


101 
"lea " offset "(%0), %%"REG_d" \n\t"\


102 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


103 
"jb 1b \n\t"\


104 
:: "r" (&c>redDither),\ 

105 
"r" (dest), "g" (width)\ 

106 
: "%"REG_a, "%"REG_d, "%"REG_S\ 

107 
); 

108  108  
109  109 
#define YSCALEYUV2YV12X_ACCURATE(x, offset, dest, width) \ 
110 
asm volatile(\


111 
"lea " offset "(%0), %%"REG_d" \n\t"\


112 
"xor %%"REG_a", %%"REG_a" \n\t"\


113 
"pxor %%mm4, %%mm4 \n\t"\


114 
"pxor %%mm5, %%mm5 \n\t"\


115 
"pxor %%mm6, %%mm6 \n\t"\


116 
"pxor %%mm7, %%mm7 \n\t"\


117 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


118 
ASMALIGN(4) \


119 
"1: \n\t"\


120 
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0\n\t" /* srcData */\


121 
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2\n\t" /* srcData */\


122 
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\


123 
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1\n\t" /* srcData */\


124 
"movq %%mm0, %%mm3 \n\t"\


125 
"punpcklwd %%mm1, %%mm0 \n\t"\


126 
"punpckhwd %%mm1, %%mm3 \n\t"\


127 
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\


128 
"pmaddwd %%mm1, %%mm0 \n\t"\


129 
"pmaddwd %%mm1, %%mm3 \n\t"\


130 
"paddd %%mm0, %%mm4 \n\t"\


131 
"paddd %%mm3, %%mm5 \n\t"\


132 
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3\n\t" /* srcData */\


133 
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\


134 
"add $16, %%"REG_d" \n\t"\


135 
"test %%"REG_S", %%"REG_S" \n\t"\


136 
"movq %%mm2, %%mm0 \n\t"\


137 
"punpcklwd %%mm3, %%mm2 \n\t"\


138 
"punpckhwd %%mm3, %%mm0 \n\t"\


139 
"pmaddwd %%mm1, %%mm2 \n\t"\


140 
"pmaddwd %%mm1, %%mm0 \n\t"\


141 
"paddd %%mm2, %%mm6 \n\t"\


142 
"paddd %%mm0, %%mm7 \n\t"\


143 
" jnz 1b \n\t"\


144 
"psrad $16, %%mm4 \n\t"\


145 
"psrad $16, %%mm5 \n\t"\


146 
"psrad $16, %%mm6 \n\t"\


147 
"psrad $16, %%mm7 \n\t"\


148 
"movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\


149 
"packssdw %%mm5, %%mm4 \n\t"\


150 
"packssdw %%mm7, %%mm6 \n\t"\


151 
"paddw %%mm0, %%mm4 \n\t"\


152 
"paddw %%mm0, %%mm6 \n\t"\


153 
"psraw $3, %%mm4 \n\t"\


154 
"psraw $3, %%mm6 \n\t"\


155 
"packuswb %%mm6, %%mm4 \n\t"\


156 
MOVNTQ(%%mm4, (%1, %%REGa))\


157 
"add $8, %%"REG_a" \n\t"\


158 
"cmp %2, %%"REG_a" \n\t"\


159 
"lea " offset "(%0), %%"REG_d" \n\t"\


160 
"pxor %%mm4, %%mm4 \n\t"\


161 
"pxor %%mm5, %%mm5 \n\t"\


162 
"pxor %%mm6, %%mm6 \n\t"\


163 
"pxor %%mm7, %%mm7 \n\t"\


164 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


165 
"jb 1b \n\t"\


166 
:: "r" (&c>redDither),\


167 
"r" (dest), "g" (width)\


168 
: "%"REG_a, "%"REG_d, "%"REG_S\


169 
);


110 
asm volatile(\


111 
"lea " offset "(%0), %%"REG_d" \n\t"\


112 
"xor %%"REG_a", %%"REG_a" \n\t"\


113 
"pxor %%mm4, %%mm4 \n\t"\


114 
"pxor %%mm5, %%mm5 \n\t"\


115 
"pxor %%mm6, %%mm6 \n\t"\


116 
"pxor %%mm7, %%mm7 \n\t"\


117 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


118 
ASMALIGN(4) \


119 
"1: \n\t"\


120 
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* srcData */\


121 
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* srcData */\


122 
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\


123 
"movq " #x "(%%"REG_S", %%"REG_a", 2), %%mm1 \n\t" /* srcData */\


124 
"movq %%mm0, %%mm3 \n\t"\


125 
"punpcklwd %%mm1, %%mm0 \n\t"\


126 
"punpckhwd %%mm1, %%mm3 \n\t"\


127 
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\


128 
"pmaddwd %%mm1, %%mm0 \n\t"\


129 
"pmaddwd %%mm1, %%mm3 \n\t"\


130 
"paddd %%mm0, %%mm4 \n\t"\


131 
"paddd %%mm3, %%mm5 \n\t"\


132 
"movq 8+" #x "(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* srcData */\


133 
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\


134 
"add $16, %%"REG_d" \n\t"\


135 
"test %%"REG_S", %%"REG_S" \n\t"\


136 
"movq %%mm2, %%mm0 \n\t"\


137 
"punpcklwd %%mm3, %%mm2 \n\t"\


138 
"punpckhwd %%mm3, %%mm0 \n\t"\


139 
"pmaddwd %%mm1, %%mm2 \n\t"\


140 
"pmaddwd %%mm1, %%mm0 \n\t"\


141 
"paddd %%mm2, %%mm6 \n\t"\


142 
"paddd %%mm0, %%mm7 \n\t"\


143 
" jnz 1b \n\t"\


144 
"psrad $16, %%mm4 \n\t"\


145 
"psrad $16, %%mm5 \n\t"\


146 
"psrad $16, %%mm6 \n\t"\


147 
"psrad $16, %%mm7 \n\t"\


148 
"movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\


149 
"packssdw %%mm5, %%mm4 \n\t"\


150 
"packssdw %%mm7, %%mm6 \n\t"\


151 
"paddw %%mm0, %%mm4 \n\t"\


152 
"paddw %%mm0, %%mm6 \n\t"\


153 
"psraw $3, %%mm4 \n\t"\


154 
"psraw $3, %%mm6 \n\t"\


155 
"packuswb %%mm6, %%mm4 \n\t"\


156 
MOVNTQ(%%mm4, (%1, %%REGa))\


157 
"add $8, %%"REG_a" \n\t"\


158 
"cmp %2, %%"REG_a" \n\t"\


159 
"lea " offset "(%0), %%"REG_d" \n\t"\


160 
"pxor %%mm4, %%mm4 \n\t"\


161 
"pxor %%mm5, %%mm5 \n\t"\


162 
"pxor %%mm6, %%mm6 \n\t"\


163 
"pxor %%mm7, %%mm7 \n\t"\


164 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


165 
"jb 1b \n\t"\


166 
:: "r" (&c>redDither),\ 

167 
"r" (dest), "g" (width)\ 

168 
: "%"REG_a, "%"REG_d, "%"REG_S\ 

169 
); 

170  170  
171  171 
#define YSCALEYUV2YV121 \ 
172 
"mov %2, %%"REG_a" \n\t"\


173 
ASMALIGN(4) /* FIXME Unroll? */\


174 
"1: \n\t"\


175 
"movq (%0, %%"REG_a", 2), %%mm0 \n\t"\


176 
"movq 8(%0, %%"REG_a", 2), %%mm1\n\t"\


177 
"psraw $7, %%mm0 \n\t"\


178 
"psraw $7, %%mm1 \n\t"\


179 
"packuswb %%mm1, %%mm0 \n\t"\


180 
MOVNTQ(%%mm0, (%1, %%REGa))\


181 
"add $8, %%"REG_a" \n\t"\


182 
"jnc 1b \n\t"


172 
"mov %2, %%"REG_a" \n\t"\


173 
ASMALIGN(4) /* FIXME Unroll? */\


174 
"1: \n\t"\


175 
"movq (%0, %%"REG_a", 2), %%mm0 \n\t"\


176 
"movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\


177 
"psraw $7, %%mm0 \n\t"\


178 
"psraw $7, %%mm1 \n\t"\


179 
"packuswb %%mm1, %%mm0 \n\t"\


180 
MOVNTQ(%%mm0, (%1, %%REGa))\


181 
"add $8, %%"REG_a" \n\t"\


182 
"jnc 1b \n\t"


183  183  
184  184 
/* 
185 
:: "m" (lumFilterSize), "m" (chrFilterSize),


186 
"m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),


187 
"r" (dest), "m" (dstW),


188 
"m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)


189 
: "%eax", "%ebx", "%ecx", "%edx", "%esi"


185 
:: "m" (lumFilterSize), "m" (chrFilterSize),


186 
"m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4),


187 
"r" (dest), "m" (dstW),


188 
"m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize)


189 
: "%eax", "%ebx", "%ecx", "%edx", "%esi"


190  190 
*/ 
191  191 
#define YSCALEYUV2PACKEDX \ 
192 
asm volatile(\


193 
"xor %%"REG_a", %%"REG_a" \n\t"\


194 
ASMALIGN(4)\


195 
"nop \n\t"\


196 
"1: \n\t"\


197 
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\


198 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


199 
"movq "VROUNDER_OFFSET"(%0), %%mm3\n\t"\


200 
"movq %%mm3, %%mm4 \n\t"\


201 
ASMALIGN(4)\


202 
"2: \n\t"\


203 
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\


204 
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\


205 
"movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\


206 
"add $16, %%"REG_d" \n\t"\


207 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


208 
"pmulhw %%mm0, %%mm2 \n\t"\


209 
"pmulhw %%mm0, %%mm5 \n\t"\


210 
"paddw %%mm2, %%mm3 \n\t"\


211 
"paddw %%mm5, %%mm4 \n\t"\


212 
"test %%"REG_S", %%"REG_S" \n\t"\


213 
" jnz 2b \n\t"\


192 
asm volatile(\


193 
"xor %%"REG_a", %%"REG_a" \n\t"\


194 
ASMALIGN(4)\


195 
"nop \n\t"\


196 
"1: \n\t"\


197 
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\


198 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


199 
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\


200 
"movq %%mm3, %%mm4 \n\t"\


201 
ASMALIGN(4)\


202 
"2: \n\t"\


203 
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\


204 
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\


205 
"movq 4096(%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\


206 
"add $16, %%"REG_d" \n\t"\


207 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


208 
"pmulhw %%mm0, %%mm2 \n\t"\


209 
"pmulhw %%mm0, %%mm5 \n\t"\


210 
"paddw %%mm2, %%mm3 \n\t"\


211 
"paddw %%mm5, %%mm4 \n\t"\


212 
"test %%"REG_S", %%"REG_S" \n\t"\


213 
" jnz 2b \n\t"\


214  214 
\ 
215 
"lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\


216 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


217 
"movq "VROUNDER_OFFSET"(%0), %%mm1\n\t"\


218 
"movq %%mm1, %%mm7 \n\t"\


219 
ASMALIGN(4)\


220 
"2: \n\t"\


221 
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\


222 
"movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\


223 
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\


224 
"add $16, %%"REG_d" \n\t"\


225 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


226 
"pmulhw %%mm0, %%mm2 \n\t"\


227 
"pmulhw %%mm0, %%mm5 \n\t"\


228 
"paddw %%mm2, %%mm1 \n\t"\


229 
"paddw %%mm5, %%mm7 \n\t"\


230 
"test %%"REG_S", %%"REG_S" \n\t"\


231 
" jnz 2b \n\t"\


232  
233 
#define YSCALEYUV2PACKEDX_END\ 

234 
:: "r" (&c>redDither), \


235 
"m" (dummy), "m" (dummy), "m" (dummy),\


236 
"r" (dest), "m" (dstW)\


237 
: "%"REG_a, "%"REG_d, "%"REG_S\


238 
);


215 
"lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\


216 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


217 
"movq "VROUNDER_OFFSET"(%0), %%mm1 \n\t"\


218 
"movq %%mm1, %%mm7 \n\t"\


219 
ASMALIGN(4)\


220 
"2: \n\t"\


221 
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\


222 
"movq (%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y1srcData */\


223 
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm5 \n\t" /* Y2srcData */\


224 
"add $16, %%"REG_d" \n\t"\


225 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


226 
"pmulhw %%mm0, %%mm2 \n\t"\


227 
"pmulhw %%mm0, %%mm5 \n\t"\


228 
"paddw %%mm2, %%mm1 \n\t"\


229 
"paddw %%mm5, %%mm7 \n\t"\


230 
"test %%"REG_S", %%"REG_S" \n\t"\


231 
" jnz 2b \n\t"\


232  
233 
#define YSCALEYUV2PACKEDX_END \


234 
:: "r" (&c>redDither), \


235 
"m" (dummy), "m" (dummy), "m" (dummy),\ 

236 
"r" (dest), "m" (dstW) \


237 
: "%"REG_a, "%"REG_d, "%"REG_S \


238 
); 

239  239  
240  240 
#define YSCALEYUV2PACKEDX_ACCURATE \ 
241 
asm volatile(\


242 
"xor %%"REG_a", %%"REG_a" \n\t"\


243 
ASMALIGN(4)\


244 
"nop \n\t"\


245 
"1: \n\t"\


246 
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\


247 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


248 
"pxor %%mm4, %%mm4 \n\t"\


249 
"pxor %%mm5, %%mm5 \n\t"\


250 
"pxor %%mm6, %%mm6 \n\t"\


251 
"pxor %%mm7, %%mm7 \n\t"\


252 
ASMALIGN(4)\


253 
"2: \n\t"\


254 
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\


255 
"movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\


256 
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\


257 
"movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\


258 
"movq %%mm0, %%mm3 \n\t"\


259 
"punpcklwd %%mm1, %%mm0 \n\t"\


260 
"punpckhwd %%mm1, %%mm3 \n\t"\


261 
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\


262 
"pmaddwd %%mm1, %%mm0 \n\t"\


263 
"pmaddwd %%mm1, %%mm3 \n\t"\


264 
"paddd %%mm0, %%mm4 \n\t"\


265 
"paddd %%mm3, %%mm5 \n\t"\


266 
"movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\


267 
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\


268 
"add $16, %%"REG_d" \n\t"\


269 
"test %%"REG_S", %%"REG_S" \n\t"\


270 
"movq %%mm2, %%mm0 \n\t"\


271 
"punpcklwd %%mm3, %%mm2 \n\t"\


272 
"punpckhwd %%mm3, %%mm0 \n\t"\


273 
"pmaddwd %%mm1, %%mm2 \n\t"\


274 
"pmaddwd %%mm1, %%mm0 \n\t"\


275 
"paddd %%mm2, %%mm6 \n\t"\


276 
"paddd %%mm0, %%mm7 \n\t"\


277 
" jnz 2b \n\t"\


278 
"psrad $16, %%mm4 \n\t"\


279 
"psrad $16, %%mm5 \n\t"\


280 
"psrad $16, %%mm6 \n\t"\


281 
"psrad $16, %%mm7 \n\t"\


282 
"movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\


283 
"packssdw %%mm5, %%mm4 \n\t"\


284 
"packssdw %%mm7, %%mm6 \n\t"\


285 
"paddw %%mm0, %%mm4 \n\t"\


286 
"paddw %%mm0, %%mm6 \n\t"\


287 
"movq %%mm4, "U_TEMP"(%0) \n\t"\


288 
"movq %%mm6, "V_TEMP"(%0) \n\t"\


241 
asm volatile(\


242 
"xor %%"REG_a", %%"REG_a" \n\t"\


243 
ASMALIGN(4)\


244 
"nop \n\t"\


245 
"1: \n\t"\


246 
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\


247 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


248 
"pxor %%mm4, %%mm4 \n\t"\


249 
"pxor %%mm5, %%mm5 \n\t"\


250 
"pxor %%mm6, %%mm6 \n\t"\


251 
"pxor %%mm7, %%mm7 \n\t"\


252 
ASMALIGN(4)\


253 
"2: \n\t"\


254 
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\


255 
"movq 4096(%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\


256 
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\


257 
"movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\


258 
"movq %%mm0, %%mm3 \n\t"\


259 
"punpcklwd %%mm1, %%mm0 \n\t"\


260 
"punpckhwd %%mm1, %%mm3 \n\t"\


261 
"movq 8(%%"REG_d"), %%mm1 \n\t" /* filterCoeff */\


262 
"pmaddwd %%mm1, %%mm0 \n\t"\


263 
"pmaddwd %%mm1, %%mm3 \n\t"\


264 
"paddd %%mm0, %%mm4 \n\t"\


265 
"paddd %%mm3, %%mm5 \n\t"\


266 
"movq 4096(%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\


267 
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\


268 
"add $16, %%"REG_d" \n\t"\


269 
"test %%"REG_S", %%"REG_S" \n\t"\


270 
"movq %%mm2, %%mm0 \n\t"\


271 
"punpcklwd %%mm3, %%mm2 \n\t"\


272 
"punpckhwd %%mm3, %%mm0 \n\t"\


273 
"pmaddwd %%mm1, %%mm2 \n\t"\


274 
"pmaddwd %%mm1, %%mm0 \n\t"\


275 
"paddd %%mm2, %%mm6 \n\t"\


276 
"paddd %%mm0, %%mm7 \n\t"\


277 
" jnz 2b \n\t"\


278 
"psrad $16, %%mm4 \n\t"\


279 
"psrad $16, %%mm5 \n\t"\


280 
"psrad $16, %%mm6 \n\t"\


281 
"psrad $16, %%mm7 \n\t"\


282 
"movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\


283 
"packssdw %%mm5, %%mm4 \n\t"\


284 
"packssdw %%mm7, %%mm6 \n\t"\


285 
"paddw %%mm0, %%mm4 \n\t"\


286 
"paddw %%mm0, %%mm6 \n\t"\


287 
"movq %%mm4, "U_TEMP"(%0) \n\t"\


288 
"movq %%mm6, "V_TEMP"(%0) \n\t"\


289  289 
\ 
290 
"lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d"\n\t"\


291 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


292 
"pxor %%mm1, %%mm1 \n\t"\


293 
"pxor %%mm5, %%mm5 \n\t"\


294 
"pxor %%mm7, %%mm7 \n\t"\


295 
"pxor %%mm6, %%mm6 \n\t"\


296 
ASMALIGN(4)\


297 
"2: \n\t"\


298 
"movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\


299 
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\


300 
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\


301 
"movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\


302 
"movq %%mm0, %%mm3 \n\t"\


303 
"punpcklwd %%mm4, %%mm0 \n\t"\


304 
"punpckhwd %%mm4, %%mm3 \n\t"\


305 
"movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\


306 
"pmaddwd %%mm4, %%mm0 \n\t"\


307 
"pmaddwd %%mm4, %%mm3 \n\t"\


308 
"paddd %%mm0, %%mm1 \n\t"\


309 
"paddd %%mm3, %%mm5 \n\t"\


310 
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\


311 
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\


312 
"add $16, %%"REG_d" \n\t"\


313 
"test %%"REG_S", %%"REG_S" \n\t"\


314 
"movq %%mm2, %%mm0 \n\t"\


315 
"punpcklwd %%mm3, %%mm2 \n\t"\


316 
"punpckhwd %%mm3, %%mm0 \n\t"\


317 
"pmaddwd %%mm4, %%mm2 \n\t"\


318 
"pmaddwd %%mm4, %%mm0 \n\t"\


319 
"paddd %%mm2, %%mm7 \n\t"\


320 
"paddd %%mm0, %%mm6 \n\t"\


321 
" jnz 2b \n\t"\


322 
"psrad $16, %%mm1 \n\t"\


323 
"psrad $16, %%mm5 \n\t"\


324 
"psrad $16, %%mm7 \n\t"\


325 
"psrad $16, %%mm6 \n\t"\


326 
"movq "VROUNDER_OFFSET"(%0), %%mm0\n\t"\


327 
"packssdw %%mm5, %%mm1 \n\t"\


328 
"packssdw %%mm6, %%mm7 \n\t"\


329 
"paddw %%mm0, %%mm1 \n\t"\


330 
"paddw %%mm0, %%mm7 \n\t"\


331 
"movq "U_TEMP"(%0), %%mm3 \n\t"\


332 
"movq "V_TEMP"(%0), %%mm4 \n\t"\


290 
"lea "LUM_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\


291 
"mov (%%"REG_d"), %%"REG_S" \n\t"\


292 
"pxor %%mm1, %%mm1 \n\t"\


293 
"pxor %%mm5, %%mm5 \n\t"\


294 
"pxor %%mm7, %%mm7 \n\t"\


295 
"pxor %%mm6, %%mm6 \n\t"\


296 
ASMALIGN(4)\


297 
"2: \n\t"\


298 
"movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\


299 
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\


300 
"mov 4(%%"REG_d"), %%"REG_S" \n\t"\


301 
"movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\


302 
"movq %%mm0, %%mm3 \n\t"\


303 
"punpcklwd %%mm4, %%mm0 \n\t"\


304 
"punpckhwd %%mm4, %%mm3 \n\t"\


305 
"movq 8(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\


306 
"pmaddwd %%mm4, %%mm0 \n\t"\


307 
"pmaddwd %%mm4, %%mm3 \n\t"\


308 
"paddd %%mm0, %%mm1 \n\t"\


309 
"paddd %%mm3, %%mm5 \n\t"\


310 
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\


311 
"mov 16(%%"REG_d"), %%"REG_S" \n\t"\


312 
"add $16, %%"REG_d" \n\t"\


313 
"test %%"REG_S", %%"REG_S" \n\t"\


314 
"movq %%mm2, %%mm0 \n\t"\


315 
"punpcklwd %%mm3, %%mm2 \n\t"\


316 
"punpckhwd %%mm3, %%mm0 \n\t"\


317 
"pmaddwd %%mm4, %%mm2 \n\t"\


318 
"pmaddwd %%mm4, %%mm0 \n\t"\


319 
"paddd %%mm2, %%mm7 \n\t"\


320 
"paddd %%mm0, %%mm6 \n\t"\


321 
" jnz 2b \n\t"\


322 
"psrad $16, %%mm1 \n\t"\


323 
"psrad $16, %%mm5 \n\t"\


324 
"psrad $16, %%mm7 \n\t"\


325 
"psrad $16, %%mm6 \n\t"\


326 
"movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\


327 
"packssdw %%mm5, %%mm1 \n\t"\


328 
"packssdw %%mm6, %%mm7 \n\t"\


329 
"paddw %%mm0, %%mm1 \n\t"\


330 
"paddw %%mm0, %%mm7 \n\t"\


331 
"movq "U_TEMP"(%0), %%mm3 \n\t"\


332 
"movq "V_TEMP"(%0), %%mm4 \n\t"\


333  333  
334  334 
#define YSCALEYUV2RGBX \ 
335 
"psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U128)8*/\


336 
"psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V128)8*/\


337 
"movq %%mm3, %%mm2 \n\t" /* (U128)8*/\


338 
"movq %%mm4, %%mm5 \n\t" /* (V128)8*/\


339 
"pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\


340 
"pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\


341 
/* mm2=(U128)8, mm3=ug, mm4=vg mm5=(V128)8 */\


342 
"pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\


343 
"pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\


344 
"psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y16)*/\


345 
"psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y16)*/\


346 
"pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\


347 
"pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\


348 
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\


349 
"paddw %%mm3, %%mm4 \n\t"\


350 
"movq %%mm2, %%mm0 \n\t"\


351 
"movq %%mm5, %%mm6 \n\t"\


352 
"movq %%mm4, %%mm3 \n\t"\


353 
"punpcklwd %%mm2, %%mm2 \n\t"\


354 
"punpcklwd %%mm5, %%mm5 \n\t"\


355 
"punpcklwd %%mm4, %%mm4 \n\t"\


356 
"paddw %%mm1, %%mm2 \n\t"\


357 
"paddw %%mm1, %%mm5 \n\t"\


358 
"paddw %%mm1, %%mm4 \n\t"\


359 
"punpckhwd %%mm0, %%mm0 \n\t"\


360 
"punpckhwd %%mm6, %%mm6 \n\t"\


361 
"punpckhwd %%mm3, %%mm3 \n\t"\


362 
"paddw %%mm7, %%mm0 \n\t"\


363 
"paddw %%mm7, %%mm6 \n\t"\


364 
"paddw %%mm7, %%mm3 \n\t"\


365 
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\


366 
"packuswb %%mm0, %%mm2 \n\t"\


367 
"packuswb %%mm6, %%mm5 \n\t"\


368 
"packuswb %%mm3, %%mm4 \n\t"\


369 
"pxor %%mm7, %%mm7 \n\t"


335 
"psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U128)8*/\


336 
"psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V128)8*/\


337 
"movq %%mm3, %%mm2 \n\t" /* (U128)8*/\


338 
"movq %%mm4, %%mm5 \n\t" /* (V128)8*/\


339 
"pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\


340 
"pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\


341 
/* mm2=(U128)8, mm3=ug, mm4=vg mm5=(V128)8 */\ 

342 
"pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\


343 
"pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\


344 
"psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y16)*/\


345 
"psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y16)*/\


346 
"pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\


347 
"pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\


348 
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ 

349 
"paddw %%mm3, %%mm4 \n\t"\


350 
"movq %%mm2, %%mm0 \n\t"\


351 
"movq %%mm5, %%mm6 \n\t"\


352 
"movq %%mm4, %%mm3 \n\t"\


353 
"punpcklwd %%mm2, %%mm2 \n\t"\


354 
"punpcklwd %%mm5, %%mm5 \n\t"\


355 
"punpcklwd %%mm4, %%mm4 \n\t"\


356 
"paddw %%mm1, %%mm2 \n\t"\


357 
"paddw %%mm1, %%mm5 \n\t"\


358 
"paddw %%mm1, %%mm4 \n\t"\


359 
"punpckhwd %%mm0, %%mm0 \n\t"\


360 
"punpckhwd %%mm6, %%mm6 \n\t"\


361 
"punpckhwd %%mm3, %%mm3 \n\t"\


362 
"paddw %%mm7, %%mm0 \n\t"\


363 
"paddw %%mm7, %%mm6 \n\t"\


364 
"paddw %%mm7, %%mm3 \n\t"\


365 
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\


366 
"packuswb %%mm0, %%mm2 \n\t"\


367 
"packuswb %%mm6, %%mm5 \n\t"\


368 
"packuswb %%mm3, %%mm4 \n\t"\


369 
"pxor %%mm7, %%mm7 \n\t"


370  370 
#if 0 
371  371 
#define FULL_YSCALEYUV2RGB \ 
372 
"pxor %%mm7, %%mm7 \n\t"\


373 
"movd %6, %%mm6 \n\t" /*yalpha1*/\


374 
"punpcklwd %%mm6, %%mm6 \n\t"\


375 
"punpcklwd %%mm6, %%mm6 \n\t"\


376 
"movd %7, %%mm5 \n\t" /*uvalpha1*/\


377 
"punpcklwd %%mm5, %%mm5 \n\t"\


378 
"punpcklwd %%mm5, %%mm5 \n\t"\


379 
"xor %%"REG_a", %%"REG_a" \n\t"\


380 
ASMALIGN(4)\


381 
"1: \n\t"\


382 
"movq (%0, %%"REG_a", 2), %%mm0 \n\t" /*buf0[eax]*/\


383 
"movq (%1, %%"REG_a", 2), %%mm1 \n\t" /*buf1[eax]*/\


384 
"movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\


385 
"movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\


386 
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax]  buf1[eax]*/\


387 
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax]  uvbuf1[eax]*/\


388 
"pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax]  buf1[eax])yalpha1>>16*/\


389 
"pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax]  uvbuf1[eax])uvalpha1>>16*/\


390 
"psraw $4, %%mm1 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


391 
"movq 4096(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\


392 
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax]  uvbuf1[eax] >>4*/\


393 
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1yalpha1) >>16*/\


394 
"movq 4096(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\


395 
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1  uvbuf1[eax](1uvalpha1)*/\


396 
"psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048]*/\


397 
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y16)*/\


398 
"psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U128)*/\


399 
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\


372 
"pxor %%mm7, %%mm7 \n\t"\


373 
"movd %6, %%mm6 \n\t" /*yalpha1*/\


374 
"punpcklwd %%mm6, %%mm6 \n\t"\


375 
"punpcklwd %%mm6, %%mm6 \n\t"\


376 
"movd %7, %%mm5 \n\t" /*uvalpha1*/\


377 
"punpcklwd %%mm5, %%mm5 \n\t"\


378 
"punpcklwd %%mm5, %%mm5 \n\t"\


379 
"xor %%"REG_a", %%"REG_a" \n\t"\


380 
ASMALIGN(4)\


381 
"1: \n\t"\


382 
"movq (%0, %%"REG_a",2), %%mm0 \n\t" /*buf0[eax]*/\


383 
"movq (%1, %%"REG_a",2), %%mm1 \n\t" /*buf1[eax]*/\


384 
"movq (%2, %%"REG_a",2), %%mm2 \n\t" /* uvbuf0[eax]*/\


385 
"movq (%3, %%"REG_a",2), %%mm3 \n\t" /* uvbuf1[eax]*/\


386 
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax]  buf1[eax]*/\


387 
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax]  uvbuf1[eax]*/\


388 
"pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax]  buf1[eax])yalpha1>>16*/\


389 
"pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax]  uvbuf1[eax])uvalpha1>>16*/\


390 
"psraw $4, %%mm1 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


391 
"movq 4096(%2, %%"REG_a",2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\


392 
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax]  uvbuf1[eax] >>4*/\


393 
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1yalpha1) >>16*/\


394 
"movq 4096(%3, %%"REG_a",2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\


395 
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1  uvbuf1[eax](1uvalpha1)*/\


396 
"psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048]*/\


397 
"psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y16)*/\


398 
"psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U128)*/\


399 
"pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\


400  400 
\ 
401  401 
\ 
402 
"pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048]  uvbuf1[eax+2048])uvalpha1>>16*/\


403 
"movq %%mm3, %%mm2 \n\t" /* (U128)8*/\


404 
"pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\


405 
"psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048] >>4*/\


406 
"pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\


407 
"paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1  uvbuf1[eax+2048](1uvalpha1)*/\


408 
"psubw "MANGLE(w400)", %%mm0 \n\t" /* (V128)8*/\


402 
"pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048]  uvbuf1[eax+2048])uvalpha1>>16*/\


403 
"movq %%mm3, %%mm2 \n\t" /* (U128)8*/\


404 
"pmulhw "MANGLE(ubCoeff)", %%mm3 \n\t"\


405 
"psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048] >>4*/\


406 
"pmulhw "MANGLE(ugCoeff)", %%mm2 \n\t"\


407 
"paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1  uvbuf1[eax+2048](1uvalpha1)*/\


408 
"psubw "MANGLE(w400)", %%mm0 \n\t" /* (V128)8*/\


409  409 
\ 
410  410 
\ 
411 
"movq %%mm0, %%mm4 \n\t" /* (V128)8*/\


412 
"pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\


413 
"pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\


414 
"paddw %%mm1, %%mm3 \n\t" /* B*/\


415 
"paddw %%mm1, %%mm0 \n\t" /* R*/\


416 
"packuswb %%mm3, %%mm3 \n\t"\


411 
"movq %%mm0, %%mm4 \n\t" /* (V128)8*/\


412 
"pmulhw "MANGLE(vrCoeff)", %%mm0 \n\t"\


413 
"pmulhw "MANGLE(vgCoeff)", %%mm4 \n\t"\


414 
"paddw %%mm1, %%mm3 \n\t" /* B*/\


415 
"paddw %%mm1, %%mm0 \n\t" /* R*/\


416 
"packuswb %%mm3, %%mm3 \n\t"\


417  417 
\ 
418 
"packuswb %%mm0, %%mm0 \n\t"\


419 
"paddw %%mm4, %%mm2 \n\t"\


420 
"paddw %%mm2, %%mm1 \n\t" /* G*/\


418 
"packuswb %%mm0, %%mm0 \n\t"\


419 
"paddw %%mm4, %%mm2 \n\t"\


420 
"paddw %%mm2, %%mm1 \n\t" /* G*/\


421  421 
\ 
422 
"packuswb %%mm1, %%mm1 \n\t"


422 
"packuswb %%mm1, %%mm1 \n\t"


423  423 
#endif 
424  424  
425  425 
#define REAL_YSCALEYUV2PACKED(index, c) \ 
426 
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\


427 
"movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1\n\t"\


428 
"psraw $3, %%mm0 \n\t"\


429 
"psraw $3, %%mm1 \n\t"\


430 
"movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\


431 
"movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\


432 
"xor "#index", "#index" \n\t"\


433 
ASMALIGN(4)\


434 
"1: \n\t"\


435 
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\


436 
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\


437 
"movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\


438 
"movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\


439 
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax]  uvbuf1[eax]*/\


440 
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048]*/\


441 
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\


442 
"pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax]  uvbuf1[eax])uvalpha1>>16*/\


443 
"pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048]  uvbuf1[eax+2048])uvalpha1>>16*/\


444 
"psraw $7, %%mm3 \n\t" /* uvbuf0[eax]  uvbuf1[eax] >>4*/\


445 
"psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048] >>4*/\


446 
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1  uvbuf1[eax](1uvalpha1)*/\


447 
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1  uvbuf1[eax+2048](1uvalpha1)*/\


448 
"movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\


449 
"movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\


450 
"movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\


451 
"movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\


452 
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax]  buf1[eax]*/\


453 
"psubw %%mm7, %%mm6 \n\t" /* buf0[eax]  buf1[eax]*/\


454 
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax]  buf1[eax])yalpha1>>16*/\


455 
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax]  buf1[eax])yalpha1>>16*/\


456 
"psraw $7, %%mm1 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


457 
"psraw $7, %%mm7 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


458 
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1yalpha1) >>16*/\


459 
"paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1yalpha1) >>16*/\


426 
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\


427 
"movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\


428 
"psraw $3, %%mm0 \n\t"\


429 
"psraw $3, %%mm1 \n\t"\


430 
"movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c") \n\t"\


431 
"movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c") \n\t"\


432 
"xor "#index", "#index" \n\t"\


433 
ASMALIGN(4)\


434 
"1: \n\t"\


435 
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\


436 
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\


437 
"movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\


438 
"movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\


439 
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax]  uvbuf1[eax]*/\


440 
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048]*/\


441 
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\


442 
"pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax]  uvbuf1[eax])uvalpha1>>16*/\


443 
"pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048]  uvbuf1[eax+2048])uvalpha1>>16*/\


444 
"psraw $7, %%mm3 \n\t" /* uvbuf0[eax]  uvbuf1[eax] >>4*/\


445 
"psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048] >>4*/\


446 
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1  uvbuf1[eax](1uvalpha1)*/\


447 
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1  uvbuf1[eax+2048](1uvalpha1)*/\


448 
"movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\


449 
"movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\


450 
"movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\


451 
"movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\


452 
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax]  buf1[eax]*/\


453 
"psubw %%mm7, %%mm6 \n\t" /* buf0[eax]  buf1[eax]*/\


454 
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax]  buf1[eax])yalpha1>>16*/\


455 
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax]  buf1[eax])yalpha1>>16*/\


456 
"psraw $7, %%mm1 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


457 
"psraw $7, %%mm7 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


458 
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1yalpha1) >>16*/\


459 
"paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1yalpha1) >>16*/\


460  460  
461  461 
#define YSCALEYUV2PACKED(index, c) REAL_YSCALEYUV2PACKED(index, c) 
462  462  
463  463 
#define REAL_YSCALEYUV2RGB(index, c) \ 
464 
"xor "#index", "#index" \n\t"\


465 
ASMALIGN(4)\


466 
"1: \n\t"\


467 
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\


468 
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\


469 
"movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\


470 
"movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\


471 
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax]  uvbuf1[eax]*/\


472 
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048]*/\


473 
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\


474 
"pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax]  uvbuf1[eax])uvalpha1>>16*/\


475 
"pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048]  uvbuf1[eax+2048])uvalpha1>>16*/\


476 
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax]  uvbuf1[eax] >>4*/\


477 
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048] >>4*/\


478 
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1  uvbuf1[eax](1uvalpha1)*/\


479 
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1  uvbuf1[eax+2048](1uvalpha1)*/\


480 
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U128)8*/\


481 
"psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V128)8*/\


482 
"movq %%mm3, %%mm2 \n\t" /* (U128)8*/\


483 
"movq %%mm4, %%mm5 \n\t" /* (V128)8*/\


484 
"pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\


485 
"pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\


486 
/* mm2=(U128)8, mm3=ug, mm4=vg mm5=(V128)8 */\


487 
"movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\


488 
"movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\


489 
"movq 8(%0, "#index", 2), %%mm6\n\t" /*buf0[eax]*/\


490 
"movq 8(%1, "#index", 2), %%mm7\n\t" /*buf1[eax]*/\


491 
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax]  buf1[eax]*/\


492 
"psubw %%mm7, %%mm6 \n\t" /* buf0[eax]  buf1[eax]*/\


493 
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax]  buf1[eax])yalpha1>>16*/\


494 
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax]  buf1[eax])yalpha1>>16*/\


495 
"psraw $4, %%mm1 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


496 
"psraw $4, %%mm7 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


497 
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1yalpha1) >>16*/\


498 
"paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1yalpha1) >>16*/\


499 
"pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\


500 
"pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\


501 
"psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y16)*/\


502 
"psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y16)*/\


503 
"pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\


504 
"pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\


505 
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\


506 
"paddw %%mm3, %%mm4 \n\t"\


507 
"movq %%mm2, %%mm0 \n\t"\


508 
"movq %%mm5, %%mm6 \n\t"\


509 
"movq %%mm4, %%mm3 \n\t"\


510 
"punpcklwd %%mm2, %%mm2 \n\t"\


511 
"punpcklwd %%mm5, %%mm5 \n\t"\


512 
"punpcklwd %%mm4, %%mm4 \n\t"\


513 
"paddw %%mm1, %%mm2 \n\t"\


514 
"paddw %%mm1, %%mm5 \n\t"\


515 
"paddw %%mm1, %%mm4 \n\t"\


516 
"punpckhwd %%mm0, %%mm0 \n\t"\


517 
"punpckhwd %%mm6, %%mm6 \n\t"\


518 
"punpckhwd %%mm3, %%mm3 \n\t"\


519 
"paddw %%mm7, %%mm0 \n\t"\


520 
"paddw %%mm7, %%mm6 \n\t"\


521 
"paddw %%mm7, %%mm3 \n\t"\


522 
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\


523 
"packuswb %%mm0, %%mm2 \n\t"\


524 
"packuswb %%mm6, %%mm5 \n\t"\


525 
"packuswb %%mm3, %%mm4 \n\t"\


526 
"pxor %%mm7, %%mm7 \n\t"


464 
"xor "#index", "#index" \n\t"\


465 
ASMALIGN(4)\


466 
"1: \n\t"\


467 
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\


468 
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\


469 
"movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\


470 
"movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\


471 
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax]  uvbuf1[eax]*/\


472 
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048]*/\


473 
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\


474 
"pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax]  uvbuf1[eax])uvalpha1>>16*/\


475 
"pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048]  uvbuf1[eax+2048])uvalpha1>>16*/\


476 
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax]  uvbuf1[eax] >>4*/\


477 
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048] >>4*/\


478 
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1  uvbuf1[eax](1uvalpha1)*/\


479 
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1  uvbuf1[eax+2048](1uvalpha1)*/\


480 
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U128)8*/\


481 
"psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V128)8*/\


482 
"movq %%mm3, %%mm2 \n\t" /* (U128)8*/\


483 
"movq %%mm4, %%mm5 \n\t" /* (V128)8*/\


484 
"pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\


485 
"pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\


486 
/* mm2=(U128)8, mm3=ug, mm4=vg mm5=(V128)8 */\


487 
"movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\


488 
"movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\


489 
"movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\


490 
"movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\


491 
"psubw %%mm1, %%mm0 \n\t" /* buf0[eax]  buf1[eax]*/\


492 
"psubw %%mm7, %%mm6 \n\t" /* buf0[eax]  buf1[eax]*/\


493 
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t" /* (buf0[eax]  buf1[eax])yalpha1>>16*/\


494 
"pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6 \n\t" /* (buf0[eax]  buf1[eax])yalpha1>>16*/\


495 
"psraw $4, %%mm1 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


496 
"psraw $4, %%mm7 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


497 
"paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1yalpha1) >>16*/\


498 
"paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1yalpha1) >>16*/\


499 
"pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\


500 
"pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\


501 
"psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y16)*/\


502 
"psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y16)*/\


503 
"pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\


504 
"pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\


505 
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\


506 
"paddw %%mm3, %%mm4 \n\t"\


507 
"movq %%mm2, %%mm0 \n\t"\


508 
"movq %%mm5, %%mm6 \n\t"\


509 
"movq %%mm4, %%mm3 \n\t"\


510 
"punpcklwd %%mm2, %%mm2 \n\t"\


511 
"punpcklwd %%mm5, %%mm5 \n\t"\


512 
"punpcklwd %%mm4, %%mm4 \n\t"\


513 
"paddw %%mm1, %%mm2 \n\t"\


514 
"paddw %%mm1, %%mm5 \n\t"\


515 
"paddw %%mm1, %%mm4 \n\t"\


516 
"punpckhwd %%mm0, %%mm0 \n\t"\


517 
"punpckhwd %%mm6, %%mm6 \n\t"\


518 
"punpckhwd %%mm3, %%mm3 \n\t"\


519 
"paddw %%mm7, %%mm0 \n\t"\


520 
"paddw %%mm7, %%mm6 \n\t"\


521 
"paddw %%mm7, %%mm3 \n\t"\


522 
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\


523 
"packuswb %%mm0, %%mm2 \n\t"\


524 
"packuswb %%mm6, %%mm5 \n\t"\


525 
"packuswb %%mm3, %%mm4 \n\t"\


526 
"pxor %%mm7, %%mm7 \n\t"


527  527 
#define YSCALEYUV2RGB(index, c) REAL_YSCALEYUV2RGB(index, c) 
528  528  
529  529 
#define REAL_YSCALEYUV2PACKED1(index, c) \ 
530 
"xor "#index", "#index" \n\t"\


531 
ASMALIGN(4)\


532 
"1: \n\t"\


533 
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\


534 
"movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\


535 
"psraw $7, %%mm3 \n\t" \


536 
"psraw $7, %%mm4 \n\t" \


537 
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\


538 
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\


539 
"psraw $7, %%mm1 \n\t" \


540 
"psraw $7, %%mm7 \n\t" \


530 
"xor "#index", "#index" \n\t"\


531 
ASMALIGN(4)\


532 
"1: \n\t"\


533 
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\


534 
"movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\


535 
"psraw $7, %%mm3 \n\t" \


536 
"psraw $7, %%mm4 \n\t" \


537 
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\


538 
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\


539 
"psraw $7, %%mm1 \n\t" \


540 
"psraw $7, %%mm7 \n\t" \


541  541  
542  542 
#define YSCALEYUV2PACKED1(index, c) REAL_YSCALEYUV2PACKED1(index, c) 
543  543  
544  544 
#define REAL_YSCALEYUV2RGB1(index, c) \ 
545 
"xor "#index", "#index" \n\t"\


546 
ASMALIGN(4)\


547 
"1: \n\t"\


548 
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\


549 
"movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\


550 
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax]  uvbuf1[eax] >>4*/\


551 
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048] >>4*/\


552 
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U128)8*/\


553 
"psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V128)8*/\


554 
"movq %%mm3, %%mm2 \n\t" /* (U128)8*/\


555 
"movq %%mm4, %%mm5 \n\t" /* (V128)8*/\


556 
"pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\


557 
"pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\


558 
/* mm2=(U128)8, mm3=ug, mm4=vg mm5=(V128)8 */\


559 
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\


560 
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\


561 
"psraw $4, %%mm1 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


562 
"psraw $4, %%mm7 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


563 
"pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\


564 
"pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\


565 
"psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y16)*/\


566 
"psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y16)*/\


567 
"pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\


568 
"pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\


569 
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\


570 
"paddw %%mm3, %%mm4 \n\t"\


571 
"movq %%mm2, %%mm0 \n\t"\


572 
"movq %%mm5, %%mm6 \n\t"\


573 
"movq %%mm4, %%mm3 \n\t"\


574 
"punpcklwd %%mm2, %%mm2 \n\t"\


575 
"punpcklwd %%mm5, %%mm5 \n\t"\


576 
"punpcklwd %%mm4, %%mm4 \n\t"\


577 
"paddw %%mm1, %%mm2 \n\t"\


578 
"paddw %%mm1, %%mm5 \n\t"\


579 
"paddw %%mm1, %%mm4 \n\t"\


580 
"punpckhwd %%mm0, %%mm0 \n\t"\


581 
"punpckhwd %%mm6, %%mm6 \n\t"\


582 
"punpckhwd %%mm3, %%mm3 \n\t"\


583 
"paddw %%mm7, %%mm0 \n\t"\


584 
"paddw %%mm7, %%mm6 \n\t"\


585 
"paddw %%mm7, %%mm3 \n\t"\


586 
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\


587 
"packuswb %%mm0, %%mm2 \n\t"\


588 
"packuswb %%mm6, %%mm5 \n\t"\


589 
"packuswb %%mm3, %%mm4 \n\t"\


590 
"pxor %%mm7, %%mm7 \n\t"


545 
"xor "#index", "#index" \n\t"\


546 
ASMALIGN(4)\


547 
"1: \n\t"\


548 
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\


549 
"movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\


550 
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax]  uvbuf1[eax] >>4*/\


551 
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048]  uvbuf1[eax+2048] >>4*/\


552 
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U128)8*/\


553 
"psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V128)8*/\


554 
"movq %%mm3, %%mm2 \n\t" /* (U128)8*/\


555 
"movq %%mm4, %%mm5 \n\t" /* (V128)8*/\


556 
"pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\


557 
"pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\


558 
/* mm2=(U128)8, mm3=ug, mm4=vg mm5=(V128)8 */\


559 
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\


560 
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\


561 
"psraw $4, %%mm1 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


562 
"psraw $4, %%mm7 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


563 
"pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\


564 
"pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\


565 
"psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y16)*/\


566 
"psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y16)*/\


567 
"pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\


568 
"pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\


569 
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\


570 
"paddw %%mm3, %%mm4 \n\t"\


571 
"movq %%mm2, %%mm0 \n\t"\


572 
"movq %%mm5, %%mm6 \n\t"\


573 
"movq %%mm4, %%mm3 \n\t"\


574 
"punpcklwd %%mm2, %%mm2 \n\t"\


575 
"punpcklwd %%mm5, %%mm5 \n\t"\


576 
"punpcklwd %%mm4, %%mm4 \n\t"\


577 
"paddw %%mm1, %%mm2 \n\t"\


578 
"paddw %%mm1, %%mm5 \n\t"\


579 
"paddw %%mm1, %%mm4 \n\t"\


580 
"punpckhwd %%mm0, %%mm0 \n\t"\


581 
"punpckhwd %%mm6, %%mm6 \n\t"\


582 
"punpckhwd %%mm3, %%mm3 \n\t"\


583 
"paddw %%mm7, %%mm0 \n\t"\


584 
"paddw %%mm7, %%mm6 \n\t"\


585 
"paddw %%mm7, %%mm3 \n\t"\


586 
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\


587 
"packuswb %%mm0, %%mm2 \n\t"\


588 
"packuswb %%mm6, %%mm5 \n\t"\


589 
"packuswb %%mm3, %%mm4 \n\t"\


590 
"pxor %%mm7, %%mm7 \n\t"


591  591 
#define YSCALEYUV2RGB1(index, c) REAL_YSCALEYUV2RGB1(index, c) 
592  592  
593  593 
#define REAL_YSCALEYUV2PACKED1b(index, c) \ 
594 
"xor "#index", "#index" \n\t"\


595 
ASMALIGN(4)\


596 
"1: \n\t"\


597 
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\


598 
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\


599 
"movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\


600 
"movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\


601 
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\


602 
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\


603 
"psrlw $8, %%mm3 \n\t" \


604 
"psrlw $8, %%mm4 \n\t" \


605 
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\


606 
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\


607 
"psraw $7, %%mm1 \n\t" \


608 
"psraw $7, %%mm7 \n\t"


594 
"xor "#index", "#index" \n\t"\


595 
ASMALIGN(4)\


596 
"1: \n\t"\


597 
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\


598 
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\


599 
"movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\


600 
"movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\


601 
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\


602 
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\


603 
"psrlw $8, %%mm3 \n\t" \


604 
"psrlw $8, %%mm4 \n\t" \


605 
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\


606 
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\


607 
"psraw $7, %%mm1 \n\t" \


608 
"psraw $7, %%mm7 \n\t"


609  609 
#define YSCALEYUV2PACKED1b(index, c) REAL_YSCALEYUV2PACKED1b(index, c) 
610  610  
611  611 
// do vertical chrominance interpolation 
612  612 
#define REAL_YSCALEYUV2RGB1b(index, c) \ 
613 
"xor "#index", "#index" \n\t"\


614 
ASMALIGN(4)\


615 
"1: \n\t"\


616 
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\


617 
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\


618 
"movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\


619 
"movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\


620 
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\


621 
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\


622 
"psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\


623 
"psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\


624 
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U128)8*/\


625 
"psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V128)8*/\


626 
"movq %%mm3, %%mm2 \n\t" /* (U128)8*/\


627 
"movq %%mm4, %%mm5 \n\t" /* (V128)8*/\


628 
"pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\


629 
"pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\


630 
/* mm2=(U128)8, mm3=ug, mm4=vg mm5=(V128)8 */\


631 
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\


632 
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\


633 
"psraw $4, %%mm1 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


634 
"psraw $4, %%mm7 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


635 
"pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\


636 
"pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\


637 
"psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y16)*/\


638 
"psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y16)*/\


639 
"pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\


640 
"pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\


641 
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\


642 
"paddw %%mm3, %%mm4 \n\t"\


643 
"movq %%mm2, %%mm0 \n\t"\


644 
"movq %%mm5, %%mm6 \n\t"\


645 
"movq %%mm4, %%mm3 \n\t"\


646 
"punpcklwd %%mm2, %%mm2 \n\t"\


647 
"punpcklwd %%mm5, %%mm5 \n\t"\


648 
"punpcklwd %%mm4, %%mm4 \n\t"\


649 
"paddw %%mm1, %%mm2 \n\t"\


650 
"paddw %%mm1, %%mm5 \n\t"\


651 
"paddw %%mm1, %%mm4 \n\t"\


652 
"punpckhwd %%mm0, %%mm0 \n\t"\


653 
"punpckhwd %%mm6, %%mm6 \n\t"\


654 
"punpckhwd %%mm3, %%mm3 \n\t"\


655 
"paddw %%mm7, %%mm0 \n\t"\


656 
"paddw %%mm7, %%mm6 \n\t"\


657 
"paddw %%mm7, %%mm3 \n\t"\


658 
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\


659 
"packuswb %%mm0, %%mm2 \n\t"\


660 
"packuswb %%mm6, %%mm5 \n\t"\


661 
"packuswb %%mm3, %%mm4 \n\t"\


662 
"pxor %%mm7, %%mm7 \n\t"


613 
"xor "#index", "#index" \n\t"\


614 
ASMALIGN(4)\


615 
"1: \n\t"\


616 
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\


617 
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\


618 
"movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\


619 
"movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\


620 
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\


621 
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\


622 
"psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\


623 
"psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\


624 
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U128)8*/\


625 
"psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V128)8*/\


626 
"movq %%mm3, %%mm2 \n\t" /* (U128)8*/\


627 
"movq %%mm4, %%mm5 \n\t" /* (V128)8*/\


628 
"pmulhw "UG_COEFF"("#c"), %%mm3 \n\t"\


629 
"pmulhw "VG_COEFF"("#c"), %%mm4 \n\t"\


630 
/* mm2=(U128)8, mm3=ug, mm4=vg mm5=(V128)8 */\


631 
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\


632 
"movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\


633 
"psraw $4, %%mm1 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


634 
"psraw $4, %%mm7 \n\t" /* buf0[eax]  buf1[eax] >>4*/\


635 
"pmulhw "UB_COEFF"("#c"), %%mm2 \n\t"\


636 
"pmulhw "VR_COEFF"("#c"), %%mm5 \n\t"\


637 
"psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y16)*/\


638 
"psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y16)*/\


639 
"pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\


640 
"pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\


641 
/* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\


642 
"paddw %%mm3, %%mm4 \n\t"\


643 
"movq %%mm2, %%mm0 \n\t"\


644 
"movq %%mm5, %%mm6 \n\t"\


645 
"movq %%mm4, %%mm3 \n\t"\


646 
"punpcklwd %%mm2, %%mm2 \n\t"\


647 
"punpcklwd %%mm5, %%mm5 \n\t"\


648 
"punpcklwd %%mm4, %%mm4 \n\t"\


649 
"paddw %%mm1, %%mm2 \n\t"\


650 
"paddw %%mm1, %%mm5 \n\t"\


651 
"paddw %%mm1, %%mm4 \n\t"\


652 
"punpckhwd %%mm0, %%mm0 \n\t"\


653 
"punpckhwd %%mm6, %%mm6 \n\t"\


654 
"punpckhwd %%mm3, %%mm3 \n\t"\


655 
"paddw %%mm7, %%mm0 \n\t"\


656 
"paddw %%mm7, %%mm6 \n\t"\


657 
"paddw %%mm7, %%mm3 \n\t"\


658 
/* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\


659 
"packuswb %%mm0, %%mm2 \n\t"\


660 
"packuswb %%mm6, %%mm5 \n\t"\


661 
"packuswb %%mm3, %%mm4 \n\t"\


662 
"pxor %%mm7, %%mm7 \n\t"


663  663 
#define YSCALEYUV2RGB1b(index, c) REAL_YSCALEYUV2RGB1b(index, c) 
664  664  
665  665 
#define REAL_WRITEBGR32(dst, dstw, index) \ 
666 
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\


667 
"movq %%mm2, %%mm1 \n\t" /* B */\


668 
"movq %%mm5, %%mm6 \n\t" /* R */\


669 
"punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\


670 
"punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\


671 
"punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\


672 
"punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\


673 
"movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\


674 
"movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\


675 
"punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\


676 
"punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\


677 
"punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\


678 
"punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\


666 
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\


667 
"movq %%mm2, %%mm1 \n\t" /* B */\


668 
"movq %%mm5, %%mm6 \n\t" /* R */\


669 
"punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\


670 
"punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\


671 
"punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\


672 
"punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\


673 
"movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\


674 
"movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\


675 
"punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\


676 
"punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\


677 
"punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\


678 
"punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\


679  679 
\ 
680 
MOVNTQ(%%mm0, (dst, index, 4))\


681 
MOVNTQ(%%mm2, 8(dst, index, 4))\


682 
MOVNTQ(%%mm1, 16(dst, index, 4))\


683 
MOVNTQ(%%mm3, 24(dst, index, 4))\


680 
MOVNTQ(%%mm0, (dst, index, 4))\


681 
MOVNTQ(%%mm2, 8(dst, index, 4))\


682 
MOVNTQ(%%mm1, 16(dst, index, 4))\


683 
MOVNTQ(%%mm3, 24(dst, index, 4))\


684  684 
\ 
685 
"add $8, "#index" \n\t"\


686 
"cmp "#dstw", "#index" \n\t"\


687 
" jb 1b \n\t"


685 
"add $8, "#index" \n\t"\


686 
"cmp "#dstw", "#index" \n\t"\


687 
" jb 1b \n\t"


688  688 
#define WRITEBGR32(dst, dstw, index) REAL_WRITEBGR32(dst, dstw, index) 
689  689  
690  690 
#define REAL_WRITEBGR16(dst, dstw, index) \ 
691 
"pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\


692 
"pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\


693 
"pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\


694 
"psrlq $3, %%mm2 \n\t"\


691 
"pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\


692 
"pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\


693 
"pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\


694 
"psrlq $3, %%mm2 \n\t"\


695  695 
\ 
696 
"movq %%mm2, %%mm1 \n\t"\


697 
"movq %%mm4, %%mm3 \n\t"\


696 
"movq %%mm2, %%mm1 \n\t"\


697 
"movq %%mm4, %%mm3 \n\t"\


698  698 
\ 
699 
"punpcklbw %%mm7, %%mm3 \n\t"\


700 
"punpcklbw %%mm5, %%mm2 \n\t"\


701 
"punpckhbw %%mm7, %%mm4 \n\t"\


702 
"punpckhbw %%mm5, %%mm1 \n\t"\


699 
"punpcklbw %%mm7, %%mm3 \n\t"\


700 
"punpcklbw %%mm5, %%mm2 \n\t"\


701 
"punpckhbw %%mm7, %%mm4 \n\t"\


702 
"punpckhbw %%mm5, %%mm1 \n\t"\


703  703 
\ 
704 
"psllq $3, %%mm3 \n\t"\


705 
"psllq $3, %%mm4 \n\t"\


704 
"psllq $3, %%mm3 \n\t"\


705 
"psllq $3, %%mm4 \n\t"\


706  706 
\ 
707 
"por %%mm3, %%mm2 \n\t"\


708 
"por %%mm4, %%mm1 \n\t"\


707 
"por %%mm3, %%mm2 \n\t"\


708 
"por %%mm4, %%mm1 \n\t"\


709  709 
\ 
710 
MOVNTQ(%%mm2, (dst, index, 2))\


711 
MOVNTQ(%%mm1, 8(dst, index, 2))\


710 
MOVNTQ(%%mm2, (dst, index, 2))\


711 
MOVNTQ(%%mm1, 8(dst, index, 2))\


712  712 
\ 
713 
"add $8, "#index" \n\t"\


714 
"cmp "#dstw", "#index" \n\t"\


715 
" jb 1b \n\t"


713 
"add $8, "#index" \n\t"\


714 
"cmp "#dstw", "#index" \n\t"\


715 
" jb 1b \n\t"


716  716 
#define WRITEBGR16(dst, dstw, index) REAL_WRITEBGR16(dst, dstw, index) 
717  717  
718  718 
#define REAL_WRITEBGR15(dst, dstw, index) \ 
719 
"pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\


720 
"pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\


721 
"pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\


722 
"psrlq $3, %%mm2 \n\t"\


723 
"psrlq $1, %%mm5 \n\t"\


719 
"pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\


720 
"pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\


721 
"pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\


722 
"psrlq $3, %%mm2 \n\t"\


723 
"psrlq $1, %%mm5 \n\t"\


724  724 
\ 
725 
"movq %%mm2, %%mm1 \n\t"\


726 
"movq %%mm4, %%mm3 \n\t"\


725 
"movq %%mm2, %%mm1 \n\t"\


726 
"movq %%mm4, %%mm3 \n\t"\


727  727 
\ 
728 
"punpcklbw %%mm7, %%mm3 \n\t"\


729 
"punpcklbw %%mm5, %%mm2 \n\t"\


730 
"punpckhbw %%mm7, %%mm4 \n\t"\


731 
"punpckhbw %%mm5, %%mm1 \n\t"\


728 
"punpcklbw %%mm7, %%mm3 \n\t"\


729 
"punpcklbw %%mm5, %%mm2 \n\t"\


730 
"punpckhbw %%mm7, %%mm4 \n\t"\


731 
"punpckhbw %%mm5, %%mm1 \n\t"\


732  732 
\ 
733 
"psllq $2, %%mm3 \n\t"\


734 
"psllq $2, %%mm4 \n\t"\


733 
"psllq $2, %%mm3 \n\t"\


734 
"psllq $2, %%mm4 \n\t"\


735  735 
\ 
736 
"por %%mm3, %%mm2 \n\t"\


737 
"por %%mm4, %%mm1 \n\t"\


736 
"por %%mm3, %%mm2 \n\t"\


737 
"por %%mm4, %%mm1 \n\t"\


738  738 
\ 
739 
MOVNTQ(%%mm2, (dst, index, 2))\


740 
MOVNTQ(%%mm1, 8(dst, index, 2))\


739 
MOVNTQ(%%mm2, (dst, index, 2))\


740 
MOVNTQ(%%mm1, 8(dst, index, 2))\


741  741 
\ 
742 
"add $8, "#index" \n\t"\


743 
"cmp "#dstw", "#index" \n\t"\


744 
" jb 1b \n\t"


742 
"add $8, "#index" \n\t"\


743 
"cmp "#dstw", "#index" \n\t"\


744 
" jb 1b \n\t"


745  745 
#define WRITEBGR15(dst, dstw, index) REAL_WRITEBGR15(dst, dstw, index) 
746  746  
747  747 
#define WRITEBGR24OLD(dst, dstw, index) \ 
748 
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\


749 
"movq %%mm2, %%mm1 \n\t" /* B */\


750 
"movq %%mm5, %%mm6 \n\t" /* R */\


751 
"punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\


752 
"punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\


753 
"punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\


754 
"punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\


755 
"movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\


756 
"movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\


757 
"punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\


758 
"punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\


759 
"punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\


760 
"punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\


748 
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\


749 
"movq %%mm2, %%mm1 \n\t" /* B */\


750 
"movq %%mm5, %%mm6 \n\t" /* R */\


751 
"punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\


752 
"punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\


753 
"punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\


754 
"punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\


755 
"movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\


756 
"movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\


757 
"punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\


758 
"punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\


759 
"punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\


760 
"punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\


761  761 
\ 
762 
"movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\


763 
"psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\


764 
"pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\


765 
"pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\


766 
"por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\


767 
"movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\


768 
"psllq $48, %%mm2 \n\t" /* GB000000 1 */\


769 
"por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\


762 
"movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\


763 
"psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\


764 
"pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 0 */\


765 
"pand "MANGLE(bm11111000)", %%mm0 \n\t" /* 00RGB000 0.5 */\


766 
"por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\


767 
"movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\


768 
"psllq $48, %%mm2 \n\t" /* GB000000 1 */\


769 
"por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\


770  770 
\ 
771 
"movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\


772 
"psrld $16, %%mm4 \n\t" /* 000R000R 1 */\


773 
"psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\


774 
"por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\


775 
"pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\


776 
"movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\


777 
"psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\


778 
"pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\


779 
"pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\


780 
"por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\


781 
"movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\


782 
"psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\


783 
"por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\


771 
"movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\


772 
"psrld $16, %%mm4 \n\t" /* 000R000R 1 */\


773 
"psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\


774 
"por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\


775 
"pand "MANGLE(bm00001111)", %%mm2 \n\t" /* 0000RGBR 1 */\


776 
"movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\


777 
"psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\


778 
"pand "MANGLE(bm00000111)", %%mm4 \n\t" /* 00000RGB 2 */\


779 
"pand "MANGLE(bm11111000)", %%mm1 \n\t" /* 00RGB000 2.5 */\


780 
"por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\


781 
"movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\


782 
"psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\


783 
"por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\


784  784 
\ 
785 
"psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\


786 
"movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\


787 
"psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\


788 
"pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\


789 
"pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\


790 
"por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\


791 
"psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\


792 
"por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\


785 
"psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\


786 
"movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\


787 
"psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\


788 
"pand "MANGLE(bm00000111)", %%mm5 \n\t" /* 00000RGB 3 */\


789 
"pand "MANGLE(bm11111000)", %%mm3 \n\t" /* 00RGB000 3.5 */\


790 
"por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\


791 
"psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\


792 
"por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\


793  793 
\ 
794 
MOVNTQ(%%mm0, (dst))\


795 
MOVNTQ(%%mm2, 8(dst))\


796 
MOVNTQ(%%mm3, 16(dst))\


797 
"add $24, "#dst" \n\t"\


794 
MOVNTQ(%%mm0, (dst))\


795 
MOVNTQ(%%mm2, 8(dst))\


796 
MOVNTQ(%%mm3, 16(dst))\


797 
"add $24, "#dst" \n\t"\


798  798 
\ 
799 
"add $8, "#index" \n\t"\


800 
"cmp "#dstw", "#index" \n\t"\


801 
" jb 1b \n\t"


799 
"add $8, "#index" \n\t"\


800 
"cmp "#dstw", "#index" \n\t"\


801 
" jb 1b \n\t"


802  802  
803  803 
#define WRITEBGR24MMX(dst, dstw, index) \ 
804 
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\


805 
"movq %%mm2, %%mm1 \n\t" /* B */\


806 
"movq %%mm5, %%mm6 \n\t" /* R */\


807 
"punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\


808 
"punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\


809 
"punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\


810 
"punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\


811 
"movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\


812 
"movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\


813 
"punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\


814 
"punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\


815 
"punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\


816 
"punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\


804 
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\


805 
"movq %%mm2, %%mm1 \n\t" /* B */\


806 
"movq %%mm5, %%mm6 \n\t" /* R */\


807 
"punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\


808 
"punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\


809 
"punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\


810 
"punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\


811 
"movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\


812 
"movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\


813 
"punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\


814 
"punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\


815 
"punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\


816 
"punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\


817  817 
\ 
818 
"movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\


819 
"movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\


820 
"movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\


821 
"movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\


818 
"movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\


819 
"movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\


820 
"movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\


821 
"movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\


822  822 
\ 
823 
"psllq $40, %%mm0 \n\t" /* RGB00000 0 */\


824 
"psllq $40, %%mm2 \n\t" /* RGB00000 1 */\


825 
"psllq $40, %%mm1 \n\t" /* RGB00000 2 */\


826 
"psllq $40, %%mm3 \n\t" /* RGB00000 3 */\


823 
"psllq $40, %%mm0 \n\t" /* RGB00000 0 */\


824 
"psllq $40, %%mm2 \n\t" /* RGB00000 1 */\


825 
"psllq $40, %%mm1 \n\t" /* RGB00000 2 */\


826 
"psllq $40, %%mm3 \n\t" /* RGB00000 3 */\


827  827 
\ 
828 
"punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\


829 
"punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\


830 
"punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\


831 
"punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\


828 
"punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\


829 
"punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\


830 
"punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\


831 
"punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\


832  832 
\ 
833 
"psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\


834 
"movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\


835 
"psllq $40, %%mm2 \n\t" /* GB000000 1 */\


836 
"por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\


837 
MOVNTQ(%%mm0, (dst))\


833 
"psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\


834 
"movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\


835 
"psllq $40, %%mm2 \n\t" /* GB000000 1 */\


836 
"por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\


837 
MOVNTQ(%%mm0, (dst))\


838  838 
\ 
839 
"psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\


840 
"movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\


841 
"psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\


842 
"por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\


843 
MOVNTQ(%%mm6, 8(dst))\


839 
"psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\


840 
"movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\


841 
"psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\


842 
"por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\


843 
MOVNTQ(%%mm6, 8(dst))\


844  844 
\ 
845 
"psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\


846 
"psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\


847 
"por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\


848 
MOVNTQ(%%mm5, 16(dst))\


845 
"psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\


846 
"psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\


847 
"por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\


848 
MOVNTQ(%%mm5, 16(dst))\


849  849 
\ 
850 
"add $24, "#dst" \n\t"\


850 
"add $24, "#dst" \n\t"\


851  851 
\ 
852 
"add $8, "#index" \n\t"\


853 
"cmp "#dstw", "#index" \n\t"\


854 
" jb 1b \n\t"


852 
"add $8, "#index" \n\t"\


853 
"cmp "#dstw", "#index" \n\t"\


854 
" jb 1b \n\t"


855  855  
856  856 
#define WRITEBGR24MMX2(dst, dstw, index) \ 
857 
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ 

858 
"movq "MANGLE(M24A)", %%mm0 \n\t"\ 

859 
"movq "MANGLE(M24C)", %%mm7 \n\t"\ 

860 
"pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ 

861 
"pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ 
Also available in: Unified diff