Statistics
| Branch: | Revision:

ffmpeg / libavfilter / libmpcodecs / vf_ilpack.c @ e4852fb3

History | View | Annotate | Download (12.5 KB)

1
/*
2
 * This file is part of MPlayer.
3
 *
4
 * MPlayer is free software; you can redistribute it and/or modify
5
 * it under the terms of the GNU General Public License as published by
6
 * the Free Software Foundation; either version 2 of the License, or
7
 * (at your option) any later version.
8
 *
9
 * MPlayer is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
 * GNU General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU General Public License along
15
 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
16
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
17
 */
18

    
19
#include <stdio.h>
20
#include <stdlib.h>
21
#include <string.h>
22
#include <inttypes.h>
23

    
24
#include "config.h"
25
#include "mp_msg.h"
26
#include "cpudetect.h"
27

    
28
#include "img_format.h"
29
#include "mp_image.h"
30
#include "vf.h"
31

    
32
typedef void (pack_func_t)(unsigned char *dst, unsigned char *y,
33
    unsigned char *u, unsigned char *v, int w, int us, int vs);
34

    
35
struct vf_priv_s {
36
    int mode;
37
    pack_func_t *pack[2];
38
};
39

    
40
static void pack_nn_C(unsigned char *dst, unsigned char *y,
41
    unsigned char *u, unsigned char *v, int w)
42
{
43
    int j;
44
    for (j = w/2; j; j--) {
45
        *dst++ = *y++;
46
        *dst++ = *u++;
47
        *dst++ = *y++;
48
        *dst++ = *v++;
49
    }
50
}
51

    
52
static void pack_li_0_C(unsigned char *dst, unsigned char *y,
53
    unsigned char *u, unsigned char *v, int w, int us, int vs)
54
{
55
    int j;
56
    for (j = w/2; j; j--) {
57
        *dst++ = *y++;
58
        *dst++ = (u[us+us] + 7*u[0])>>3;
59
        *dst++ = *y++;
60
        *dst++ = (v[vs+vs] + 7*v[0])>>3;
61
        u++; v++;
62
    }
63
}
64

    
65
static void pack_li_1_C(unsigned char *dst, unsigned char *y,
66
    unsigned char *u, unsigned char *v, int w, int us, int vs)
67
{
68
    int j;
69
    for (j = w/2; j; j--) {
70
        *dst++ = *y++;
71
        *dst++ = (3*u[us+us] + 5*u[0])>>3;
72
        *dst++ = *y++;
73
        *dst++ = (3*v[vs+vs] + 5*v[0])>>3;
74
        u++; v++;
75
    }
76
}
77

    
78
#if HAVE_MMX
79
static void pack_nn_MMX(unsigned char *dst, unsigned char *y,
80
    unsigned char *u, unsigned char *v, int w)
81
{
82
    __asm__ volatile (""
83
        ASMALIGN(4)
84
        "1: \n\t"
85
        "movq (%0), %%mm1 \n\t"
86
        "movq (%0), %%mm2 \n\t"
87
        "movq (%1), %%mm4 \n\t"
88
        "movq (%2), %%mm6 \n\t"
89
        "punpcklbw %%mm6, %%mm4 \n\t"
90
        "punpcklbw %%mm4, %%mm1 \n\t"
91
        "punpckhbw %%mm4, %%mm2 \n\t"
92

    
93
        "add $8, %0 \n\t"
94
        "add $4, %1 \n\t"
95
        "add $4, %2 \n\t"
96
        "movq %%mm1, (%3) \n\t"
97
        "movq %%mm2, 8(%3) \n\t"
98
        "add $16, %3 \n\t"
99
        "decl %4 \n\t"
100
        "jnz 1b \n\t"
101
        "emms \n\t"
102
        :
103
        : "r" (y), "r" (u), "r" (v), "r" (dst), "r" (w/8)
104
        : "memory"
105
        );
106
    pack_nn_C(dst, y, u, v, (w&7));
107
}
108

    
109
#if HAVE_EBX_AVAILABLE
110
static void pack_li_0_MMX(unsigned char *dst, unsigned char *y,
111
    unsigned char *u, unsigned char *v, int w, int us, int vs)
112
{
113
    __asm__ volatile (""
114
        "push %%"REG_BP" \n\t"
115
#if ARCH_X86_64
116
        "mov %6, %%"REG_BP" \n\t"
117
#else
118
        "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
119
        "movl (%%"REG_d"), %%"REG_d" \n\t"
120
#endif
121
        "pxor %%mm0, %%mm0 \n\t"
122

    
123
        ASMALIGN(4)
124
        ".Lli0: \n\t"
125
        "movq (%%"REG_S"), %%mm1 \n\t"
126
        "movq (%%"REG_S"), %%mm2 \n\t"
127

    
128
        "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
129
        "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
130
        "punpcklbw %%mm0, %%mm4 \n\t"
131
        "punpcklbw %%mm0, %%mm6 \n\t"
132
        "movq (%%"REG_a"), %%mm3 \n\t"
133
        "movq (%%"REG_b"), %%mm5 \n\t"
134
        "punpcklbw %%mm0, %%mm3 \n\t"
135
        "punpcklbw %%mm0, %%mm5 \n\t"
136
        "paddw %%mm3, %%mm4 \n\t"
137
        "paddw %%mm5, %%mm6 \n\t"
138
        "paddw %%mm3, %%mm4 \n\t"
139
        "paddw %%mm5, %%mm6 \n\t"
140
        "paddw %%mm3, %%mm4 \n\t"
141
        "paddw %%mm5, %%mm6 \n\t"
142
        "paddw %%mm3, %%mm4 \n\t"
143
        "paddw %%mm5, %%mm6 \n\t"
144
        "paddw %%mm3, %%mm4 \n\t"
145
        "paddw %%mm5, %%mm6 \n\t"
146
        "paddw %%mm3, %%mm4 \n\t"
147
        "paddw %%mm5, %%mm6 \n\t"
148
        "paddw %%mm3, %%mm4 \n\t"
149
        "paddw %%mm5, %%mm6 \n\t"
150
        "psrlw $3, %%mm4 \n\t"
151
        "psrlw $3, %%mm6 \n\t"
152
        "packuswb %%mm4, %%mm4 \n\t"
153
        "packuswb %%mm6, %%mm6 \n\t"
154
        "punpcklbw %%mm6, %%mm4 \n\t"
155
        "punpcklbw %%mm4, %%mm1 \n\t"
156
        "punpckhbw %%mm4, %%mm2 \n\t"
157

    
158
        "movq %%mm1, (%%"REG_D") \n\t"
159
        "movq %%mm2, 8(%%"REG_D") \n\t"
160

    
161
        "movq 8(%%"REG_S"), %%mm1 \n\t"
162
        "movq 8(%%"REG_S"), %%mm2 \n\t"
163

    
164
        "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
165
        "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
166
        "punpckhbw %%mm0, %%mm4 \n\t"
167
        "punpckhbw %%mm0, %%mm6 \n\t"
168
        "movq (%%"REG_a"), %%mm3 \n\t"
169
        "movq (%%"REG_b"), %%mm5 \n\t"
170
        "punpckhbw %%mm0, %%mm3 \n\t"
171
        "punpckhbw %%mm0, %%mm5 \n\t"
172
        "paddw %%mm3, %%mm4 \n\t"
173
        "paddw %%mm5, %%mm6 \n\t"
174
        "paddw %%mm3, %%mm4 \n\t"
175
        "paddw %%mm5, %%mm6 \n\t"
176
        "paddw %%mm3, %%mm4 \n\t"
177
        "paddw %%mm5, %%mm6 \n\t"
178
        "paddw %%mm3, %%mm4 \n\t"
179
        "paddw %%mm5, %%mm6 \n\t"
180
        "paddw %%mm3, %%mm4 \n\t"
181
        "paddw %%mm5, %%mm6 \n\t"
182
        "paddw %%mm3, %%mm4 \n\t"
183
        "paddw %%mm5, %%mm6 \n\t"
184
        "paddw %%mm3, %%mm4 \n\t"
185
        "paddw %%mm5, %%mm6 \n\t"
186
        "psrlw $3, %%mm4 \n\t"
187
        "psrlw $3, %%mm6 \n\t"
188
        "packuswb %%mm4, %%mm4 \n\t"
189
        "packuswb %%mm6, %%mm6 \n\t"
190
        "punpcklbw %%mm6, %%mm4 \n\t"
191
        "punpcklbw %%mm4, %%mm1 \n\t"
192
        "punpckhbw %%mm4, %%mm2 \n\t"
193

    
194
        "add $16, %%"REG_S" \n\t"
195
        "add $8, %%"REG_a" \n\t"
196
        "add $8, %%"REG_b" \n\t"
197

    
198
        "movq %%mm1, 16(%%"REG_D") \n\t"
199
        "movq %%mm2, 24(%%"REG_D") \n\t"
200
        "add $32, %%"REG_D" \n\t"
201

    
202
        "decl %%ecx \n\t"
203
        "jnz .Lli0 \n\t"
204
        "emms \n\t"
205
        "pop %%"REG_BP" \n\t"
206
        :
207
        : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
208
#if ARCH_X86_64
209
        "d" ((x86_reg)us), "r" ((x86_reg)vs)
210
#else
211
        "d" (&us)
212
#endif
213
        : "memory"
214
        );
215
    pack_li_0_C(dst, y, u, v, (w&15), us, vs);
216
}
217

    
218
static void pack_li_1_MMX(unsigned char *dst, unsigned char *y,
219
    unsigned char *u, unsigned char *v, int w, int us, int vs)
220
{
221
    __asm__ volatile (""
222
        "push %%"REG_BP" \n\t"
223
#if ARCH_X86_64
224
        "mov %6, %%"REG_BP" \n\t"
225
#else
226
        "movl 4(%%"REG_d"), %%"REG_BP" \n\t"
227
        "movl (%%"REG_d"), %%"REG_d" \n\t"
228
#endif
229
        "pxor %%mm0, %%mm0 \n\t"
230

    
231
        ASMALIGN(4)
232
        ".Lli1: \n\t"
233
        "movq (%%"REG_S"), %%mm1 \n\t"
234
        "movq (%%"REG_S"), %%mm2 \n\t"
235

    
236
        "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
237
        "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
238
        "punpcklbw %%mm0, %%mm4 \n\t"
239
        "punpcklbw %%mm0, %%mm6 \n\t"
240
        "movq (%%"REG_a"), %%mm3 \n\t"
241
        "movq (%%"REG_b"), %%mm5 \n\t"
242
        "punpcklbw %%mm0, %%mm3 \n\t"
243
        "punpcklbw %%mm0, %%mm5 \n\t"
244
        "movq %%mm4, %%mm7 \n\t"
245
        "paddw %%mm4, %%mm4 \n\t"
246
        "paddw %%mm7, %%mm4 \n\t"
247
        "movq %%mm6, %%mm7 \n\t"
248
        "paddw %%mm6, %%mm6 \n\t"
249
        "paddw %%mm7, %%mm6 \n\t"
250
        "paddw %%mm3, %%mm4 \n\t"
251
        "paddw %%mm5, %%mm6 \n\t"
252
        "paddw %%mm3, %%mm4 \n\t"
253
        "paddw %%mm5, %%mm6 \n\t"
254
        "paddw %%mm3, %%mm4 \n\t"
255
        "paddw %%mm5, %%mm6 \n\t"
256
        "paddw %%mm3, %%mm4 \n\t"
257
        "paddw %%mm5, %%mm6 \n\t"
258
        "paddw %%mm3, %%mm4 \n\t"
259
        "paddw %%mm5, %%mm6 \n\t"
260
        "psrlw $3, %%mm4 \n\t"
261
        "psrlw $3, %%mm6 \n\t"
262
        "packuswb %%mm4, %%mm4 \n\t"
263
        "packuswb %%mm6, %%mm6 \n\t"
264
        "punpcklbw %%mm6, %%mm4 \n\t"
265
        "punpcklbw %%mm4, %%mm1 \n\t"
266
        "punpckhbw %%mm4, %%mm2 \n\t"
267

    
268
        "movq %%mm1, (%%"REG_D") \n\t"
269
        "movq %%mm2, 8(%%"REG_D") \n\t"
270

    
271
        "movq 8(%%"REG_S"), %%mm1 \n\t"
272
        "movq 8(%%"REG_S"), %%mm2 \n\t"
273

    
274
        "movq (%%"REG_a",%%"REG_d",2), %%mm4 \n\t"
275
        "movq (%%"REG_b",%%"REG_BP",2), %%mm6 \n\t"
276
        "punpckhbw %%mm0, %%mm4 \n\t"
277
        "punpckhbw %%mm0, %%mm6 \n\t"
278
        "movq (%%"REG_a"), %%mm3 \n\t"
279
        "movq (%%"REG_b"), %%mm5 \n\t"
280
        "punpckhbw %%mm0, %%mm3 \n\t"
281
        "punpckhbw %%mm0, %%mm5 \n\t"
282
        "movq %%mm4, %%mm7 \n\t"
283
        "paddw %%mm4, %%mm4 \n\t"
284
        "paddw %%mm7, %%mm4 \n\t"
285
        "movq %%mm6, %%mm7 \n\t"
286
        "paddw %%mm6, %%mm6 \n\t"
287
        "paddw %%mm7, %%mm6 \n\t"
288
        "paddw %%mm3, %%mm4 \n\t"
289
        "paddw %%mm5, %%mm6 \n\t"
290
        "paddw %%mm3, %%mm4 \n\t"
291
        "paddw %%mm5, %%mm6 \n\t"
292
        "paddw %%mm3, %%mm4 \n\t"
293
        "paddw %%mm5, %%mm6 \n\t"
294
        "paddw %%mm3, %%mm4 \n\t"
295
        "paddw %%mm5, %%mm6 \n\t"
296
        "paddw %%mm3, %%mm4 \n\t"
297
        "paddw %%mm5, %%mm6 \n\t"
298
        "psrlw $3, %%mm4 \n\t"
299
        "psrlw $3, %%mm6 \n\t"
300
        "packuswb %%mm4, %%mm4 \n\t"
301
        "packuswb %%mm6, %%mm6 \n\t"
302
        "punpcklbw %%mm6, %%mm4 \n\t"
303
        "punpcklbw %%mm4, %%mm1 \n\t"
304
        "punpckhbw %%mm4, %%mm2 \n\t"
305

    
306
        "add $16, %%"REG_S" \n\t"
307
        "add $8, %%"REG_a" \n\t"
308
        "add $8, %%"REG_b" \n\t"
309

    
310
        "movq %%mm1, 16(%%"REG_D") \n\t"
311
        "movq %%mm2, 24(%%"REG_D") \n\t"
312
        "add $32, %%"REG_D" \n\t"
313

    
314
        "decl %%ecx \n\t"
315
        "jnz .Lli1 \n\t"
316
        "emms \n\t"
317
        "pop %%"REG_BP" \n\t"
318
        :
319
        : "S" (y), "D" (dst), "a" (u), "b" (v), "c" (w/16),
320
#if ARCH_X86_64
321
        "d" ((x86_reg)us), "r" ((x86_reg)vs)
322
#else
323
        "d" (&us)
324
#endif
325
        : "memory"
326
        );
327
    pack_li_1_C(dst, y, u, v, (w&15), us, vs);
328
}
329
#endif /* HAVE_EBX_AVAILABLE */
330
#endif
331

    
332
static pack_func_t *pack_nn;
333
static pack_func_t *pack_li_0;
334
static pack_func_t *pack_li_1;
335

    
336
static void ilpack(unsigned char *dst, unsigned char *src[3],
337
    int dststride, int srcstride[3], int w, int h, pack_func_t *pack[2])
338
{
339
    int i;
340
    unsigned char *y, *u, *v;
341
    int ys = srcstride[0], us = srcstride[1], vs = srcstride[2];
342
    int a, b;
343

    
344
    y = src[0];
345
    u = src[1];
346
    v = src[2];
347

    
348
    pack_nn(dst, y, u, v, w, 0, 0);
349
    y += ys; dst += dststride;
350
    pack_nn(dst, y, u+us, v+vs, w, 0, 0);
351
    y += ys; dst += dststride;
352
    for (i=2; i<h-2; i++) {
353
        a = (i&2) ? 1 : -1;
354
        b = (i&1) ^ ((i&2)>>1);
355
        pack[b](dst, y, u, v, w, us*a, vs*a);
356
        y += ys;
357
        if ((i&3) == 1) {
358
            u -= us;
359
            v -= vs;
360
        } else {
361
            u += us;
362
            v += vs;
363
        }
364
        dst += dststride;
365
    }
366
    pack_nn(dst, y, u, v, w, 0, 0);
367
    y += ys; dst += dststride; u += us; v += vs;
368
    pack_nn(dst, y, u, v, w, 0, 0);
369
}
370

    
371

    
372
static int put_image(struct vf_instance *vf, mp_image_t *mpi, double pts)
373
{
374
    mp_image_t *dmpi;
375

    
376
    // hope we'll get DR buffer:
377
    dmpi=vf_get_image(vf->next, IMGFMT_YUY2,
378
              MP_IMGTYPE_TEMP, MP_IMGFLAG_ACCEPT_STRIDE,
379
              mpi->w, mpi->h);
380

    
381
    ilpack(dmpi->planes[0], mpi->planes, dmpi->stride[0], mpi->stride, mpi->w, mpi->h, vf->priv->pack);
382

    
383
    return vf_next_put_image(vf,dmpi, pts);
384
}
385

    
386
static int config(struct vf_instance *vf,
387
          int width, int height, int d_width, int d_height,
388
          unsigned int flags, unsigned int outfmt)
389
{
390
    /* FIXME - also support UYVY output? */
391
    return vf_next_config(vf, width, height, d_width, d_height, flags, IMGFMT_YUY2);
392
}
393

    
394

    
395
static int query_format(struct vf_instance *vf, unsigned int fmt)
396
{
397
    /* FIXME - really any YUV 4:2:0 input format should work */
398
    switch (fmt) {
399
    case IMGFMT_YV12:
400
    case IMGFMT_IYUV:
401
    case IMGFMT_I420:
402
        return vf_next_query_format(vf,IMGFMT_YUY2);
403
    }
404
    return 0;
405
}
406

    
407
static int vf_open(vf_instance_t *vf, char *args)
408
{
409
    vf->config=config;
410
    vf->query_format=query_format;
411
    vf->put_image=put_image;
412
    vf->priv = calloc(1, sizeof(struct vf_priv_s));
413
    vf->priv->mode = 1;
414
    if (args) sscanf(args, "%d", &vf->priv->mode);
415

    
416
    pack_nn = (pack_func_t *)pack_nn_C;
417
    pack_li_0 = pack_li_0_C;
418
    pack_li_1 = pack_li_1_C;
419
#if HAVE_MMX
420
    if(gCpuCaps.hasMMX) {
421
        pack_nn = (pack_func_t *)pack_nn_MMX;
422
#if HAVE_EBX_AVAILABLE
423
        pack_li_0 = pack_li_0_MMX;
424
        pack_li_1 = pack_li_1_MMX;
425
#endif
426
    }
427
#endif
428

    
429
    switch(vf->priv->mode) {
430
    case 0:
431
        vf->priv->pack[0] = vf->priv->pack[1] = pack_nn;
432
        break;
433
    default:
434
        mp_msg(MSGT_VFILTER, MSGL_WARN,
435
            "ilpack: unknown mode %d (fallback to linear)\n",
436
            vf->priv->mode);
437
    case 1:
438
        vf->priv->pack[0] = pack_li_0;
439
        vf->priv->pack[1] = pack_li_1;
440
        break;
441
    }
442

    
443
    return 1;
444
}
445

    
446
const vf_info_t vf_info_ilpack = {
447
    "4:2:0 planar -> 4:2:2 packed reinterlacer",
448
    "ilpack",
449
    "Richard Felker",
450
    "",
451
    vf_open,
452
    NULL
453
};