Statistics
| Branch: | Revision:

ffmpeg / libavcodec / i386 / mpegvideo_mmx_template.c @ 2f349de2

History | View | Annotate | Download (5.53 KB)

1
/*
2
    Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
3

4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

    
19
#undef SPREADW
20
#undef PMAXW
21
#ifdef HAVE_MMX2
22
#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
23
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
24

    
25
#else
26
#define SPREADW(a) \
27
        "punpcklwd " #a ", " #a " \n\t"\
28
        "punpcklwd " #a ", " #a " \n\t"
29
#define PMAXW(a,b) \
30
        "psubusw " #a ", " #b " \n\t"\
31
        "paddw " #a ", " #b " \n\t"
32
#endif
33

    
34
static int RENAME(dct_quantize)(MpegEncContext *s,
35
                            DCTELEM *block, int n,
36
                            int qscale)
37
{
38
    int i, level, last_non_zero_p1, q;
39
    const UINT16 *qmat;
40
    static __align8 INT16 temp_block[64];
41
    int minLevel, maxLevel;
42
    
43
    if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
44
        /* mpeg4 */
45
        minLevel= -2048;
46
        maxLevel= 2047;
47
    }else if(s->out_format==FMT_MPEG1){
48
        /* mpeg1 */
49
        minLevel= -255;
50
        maxLevel= 255;
51
    }else{
52
        /* h263 / msmpeg4 */
53
        minLevel= -128;
54
        maxLevel= 127;
55
    }
56

    
57
    av_fdct (block);
58
    
59
    if (s->mb_intra) {
60
        int dummy;
61
        if (n < 4)
62
            q = s->y_dc_scale;
63
        else
64
            q = s->c_dc_scale;
65
        
66
        /* note: block[0] is assumed to be positive */
67
#if 1
68
        asm volatile (
69
                "xorl %%edx, %%edx        \n\t"
70
                "mul %%ebx                \n\t"
71
                : "=d" (temp_block[0]), "=a"(dummy)
72
                : "a" (block[0] + (q >> 1)), "b" (inverse[q])
73
        );
74
#else
75
        asm volatile (
76
                "xorl %%edx, %%edx        \n\t"
77
                "divw %%bx                \n\t"
78
                "movzwl %%ax, %%eax        \n\t"
79
                : "=a" (temp_block[0])
80
                : "a" (block[0] + (q >> 1)), "b" (q)
81
                : "%edx"
82
        );
83
#endif
84
//        temp_block[0] = (block[0] + (q >> 1)) / q;
85
        i = 1;
86
        last_non_zero_p1 = 1;
87
        if (s->out_format == FMT_H263) {
88
            qmat = s->q_non_intra_matrix16;
89
        } else {
90
            qmat = s->q_intra_matrix16;
91
        }
92
        for(i=1;i<4;i++) {
93
            level = block[i] * qmat[i];
94
            level = level / (1 << (QMAT_SHIFT_MMX - 3));
95
            /* XXX: currently, this code is not optimal. the range should be:
96
               mpeg1: -255..255
97
               mpeg2: -2048..2047
98
               h263:  -128..127
99
               mpeg4: -2048..2047
100
            */
101
            if (level > maxLevel)
102
                level = maxLevel;
103
            else if (level < minLevel)
104
                level = minLevel;
105
            temp_block[i] = level;
106

    
107
            if(level) 
108
                if(last_non_zero_p1 < inv_zigzag_direct16[i]) last_non_zero_p1= inv_zigzag_direct16[i];
109
            block[i]=0;
110
        }
111
    } else {
112
        i = 0;
113
        last_non_zero_p1 = 0;
114
        qmat = s->q_non_intra_matrix16;
115
    }
116

    
117
    asm volatile( /* XXX: small rounding bug, but it shouldnt matter */
118
        "movd %3, %%mm3                        \n\t"
119
        SPREADW(%%mm3)
120
        "movd %4, %%mm4                        \n\t"
121
        SPREADW(%%mm4)
122
        "movd %5, %%mm5                        \n\t"
123
        SPREADW(%%mm5)
124
        "pxor %%mm7, %%mm7                \n\t"
125
        "movd %%eax, %%mm2                \n\t"
126
        SPREADW(%%mm2)
127
        "movl %6, %%eax                        \n\t"
128
        ".balign 16                        \n\t"
129
        "1:                                \n\t"
130
        "movq (%1, %%eax), %%mm0        \n\t"
131
        "movq (%2, %%eax), %%mm1        \n\t"
132
        "movq %%mm0, %%mm6                \n\t"
133
        "psraw $15, %%mm6                \n\t"
134
        "pmulhw %%mm0, %%mm1                \n\t"
135
        "psubsw %%mm6, %%mm1                \n\t"
136
#ifdef HAVE_MMX2
137
        "pminsw %%mm3, %%mm1                \n\t"
138
        "pmaxsw %%mm4, %%mm1                \n\t"
139
#else
140
        "paddsw %%mm3, %%mm1                \n\t"
141
        "psubusw %%mm4, %%mm1                \n\t"
142
        "paddsw %%mm5, %%mm1                \n\t"
143
#endif
144
        "movq %%mm1, (%8, %%eax)        \n\t"
145
        "pcmpeqw %%mm7, %%mm1                \n\t"
146
        "movq (%7, %%eax), %%mm0        \n\t"
147
        "movq %%mm7, (%1, %%eax)        \n\t"
148
        "pandn %%mm0, %%mm1                \n\t"
149
        PMAXW(%%mm1, %%mm2)
150
        "addl $8, %%eax                        \n\t"
151
        " js 1b                                \n\t"
152
        "movq %%mm2, %%mm0                \n\t"
153
        "psrlq $32, %%mm2                \n\t"
154
        PMAXW(%%mm0, %%mm2)
155
        "movq %%mm2, %%mm0                \n\t"
156
        "psrlq $16, %%mm2                \n\t"
157
        PMAXW(%%mm0, %%mm2)
158
        "movd %%mm2, %%eax                \n\t"
159
        "movzbl %%al, %%eax                \n\t"
160
        : "+a" (last_non_zero_p1)
161
        : "r" (block+64), "r" (qmat+64), 
162
#ifdef HAVE_MMX2
163
          "m" (maxLevel),          "m" (minLevel),                    "m" (0 /* dummy */), "g" (2*i - 128),
164
#else
165
          "m" (0x7FFF - maxLevel), "m" (0x7FFF -maxLevel + minLevel), "m" (minLevel),      "g" (2*i - 128),
166
#endif
167
          "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
168
    );
169
// last_non_zero_p1=64;       
170
    /* permute for IDCT */
171
    asm volatile(
172
        "movl %0, %%eax                        \n\t"
173
        "pushl %%ebp                        \n\t"
174
        "movl %%esp, " MANGLE(esp_temp) "\n\t"
175
        "1:                                \n\t"
176
        "movzbl (%1, %%eax), %%ebx        \n\t"
177
        "movzbl 1(%1, %%eax), %%ebp        \n\t"
178
        "movw (%2, %%ebx, 2), %%cx        \n\t"
179
        "movw (%2, %%ebp, 2), %%sp        \n\t"
180
        "movzbl " MANGLE(permutation) "(%%ebx), %%ebx\n\t"
181
        "movzbl " MANGLE(permutation) "(%%ebp), %%ebp\n\t"
182
        "movw %%cx, (%3, %%ebx, 2)        \n\t"
183
        "movw %%sp, (%3, %%ebp, 2)        \n\t"
184
        "addl $2, %%eax                        \n\t"
185
        " js 1b                                \n\t"
186
        "movl " MANGLE(esp_temp) ", %%esp\n\t"
187
        "popl %%ebp                        \n\t"
188
        : 
189
        : "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S" (temp_block), "D" (block)
190
        : "%eax", "%ebx", "%ecx"
191
        );
192
/*
193
    for(i=0; i<last_non_zero_p1; i++)
194
    {
195
       int j= zigzag_direct_noperm[i];
196
       block[block_permute_op(j)]= temp_block[j];
197
    }
198
*/
199
//block_permute(block);
200
    return last_non_zero_p1 - 1;
201
}