Revision 8a322796 libswscale/yuv2rgb_template.c

View differences:

libswscale/yuv2rgb_template.c
1 1
/*
2
 * yuv2rgb_mmx.c, Software YUV to RGB converter with Intel MMX "technology"
2
 * yuv2rgb_mmx.c, software YUV to RGB converter with Intel MMX "technology"
3 3
 *
4 4
 * Copyright (C) 2000, Silicon Integrated System Corp.
5 5
 *
......
31 31
#undef SFENCE
32 32

  
33 33
#ifdef HAVE_3DNOW
34
/* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */
34
/* On K6 femms is faster than emms. On K7 femms is directly mapped on emms. */
35 35
#define EMMS     "femms"
36 36
#else
37 37
#define EMMS     "emms"
......
147 147
        g6Dither= ff_dither4[y&1];
148 148
        g5Dither= ff_dither8[y&1];
149 149
        r5Dither= ff_dither8[(y+1)&1];
150
        /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
151
           pixels in each iteration */
150
        /* This MMX assembly code deals with a SINGLE scan line at a time,
151
         * it converts 8 pixels in each iteration. */
152 152
        asm volatile (
153 153
        /* load data for start of next scan line */
154 154
        "movd    (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
......
156 156
        "movq (%5, %0, 2), %%mm6;" /* Load 8  Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
157 157
        //".balign 16     \n\t"
158 158
        "1:             \n\t"
159
        /* no speed diference on my p3@500 with prefetch,
160
         * if it is faster for anyone with -benchmark then tell me
159
        /* No speed difference on my p3@500 with prefetch,
160
         * if it is faster for anyone with -benchmark then tell me.
161 161
        PREFETCH" 64(%0) \n\t"
162 162
        PREFETCH" 64(%1) \n\t"
163 163
        PREFETCH" 64(%2) \n\t"
......
180 180
        "movq %%mm0, %%mm5;" /* Copy B7-B0 */
181 181
        "movq %%mm2, %%mm7;" /* Copy G7-G0 */
182 182

  
183
        /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
183
        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
184 184
        "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
185 185
        "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
186 186

  
......
190 190
        "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
191 191
        MOVNTQ "      %%mm0, (%1);" /* store pixel 0-3 */
192 192

  
193
        /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
193
        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
194 194
        "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0 */
195 195
        "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
196 196

  
......
242 242
        g6Dither= ff_dither4[y&1];
243 243
        g5Dither= ff_dither8[y&1];
244 244
        r5Dither= ff_dither8[(y+1)&1];
245
        /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
246
           pixels in each iteration */
245
        /* This MMX assembly code deals with a SINGLE scan line at a time,
246
         * it converts 8 pixels in each iteration. */
247 247
        asm volatile (
248 248
        /* load data for start of next scan line */
249 249
        "movd    (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
......
271 271
        "movq %%mm0, %%mm5;" /* Copy B7-B0 */
272 272
        "movq %%mm2, %%mm7;" /* Copy G7-G0 */
273 273

  
274
        /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
274
        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
275 275
        "punpcklbw %%mm4, %%mm2;" /* 0_0_0_0 0_0_0_0 g7g6g5g4 g3_0_0_0 */
276 276
        "punpcklbw %%mm1, %%mm0;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
277 277

  
......
281 281
        "movq 8 (%5, %0, 2), %%mm6;" /* Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0 */
282 282
        MOVNTQ "      %%mm0, (%1);"  /* store pixel 0-3 */
283 283

  
284
        /* convert rgb24 plane to rgb16 pack for pixel 0-3 */
284
        /* convert RGB24 plane to RGB16 pack for pixel 0-3 */
285 285
        "punpckhbw %%mm4, %%mm7;" /* 0_0_0_0 0_0_0_0 0_g7g6g5 g4g3_0_0 */
286 286
        "punpckhbw %%mm1, %%mm5;" /* r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3 */
287 287

  
......
326 326
        uint8_t *pv = src[2] + (y>>1)*srcStride[2];
327 327
        long index= -h_size/2;
328 328

  
329
        /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
330
           pixels in each iteration */
329
        /* This MMX assembly code deals with a SINGLE scan line at a time,
330
         * it converts 8 pixels in each iteration. */
331 331
        asm volatile (
332 332
        /* load data for start of next scan line */
333 333
        "movd    (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */
......
472 472
        uint8_t *pv = src[2] + (y>>1)*srcStride[2];
473 473
        long index= -h_size/2;
474 474

  
475
        /* this mmx assembly code deals with SINGLE scan line at a time, it convert 8
476
           pixels in each iteration */
475
        /* This MMX assembly code deals with a SINGLE scan line at a time,
476
         * it converts 8 pixels in each iteration. */
477 477
        asm volatile (
478 478
        /* load data for start of next scan line */
479 479
        "movd    (%2, %0), %%mm0;" /* Load 4 Cb 00 00 00 00 u3 u2 u1 u0 */

Also available in: Unified diff