Revision badaf88e libavcodec/i386/mpegvideo_mmx.c

View differences:

libavcodec/i386/mpegvideo_mmx.c
17 17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 18
 *
19 19
 * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
20
 * h263 dequantizer by Michael Niedermayer <michaelni@gmx.at>
20 21
 */
21 22

  
22 23
#include "../dsputil.h"
23 24
#include "../mpegvideo.h"
24 25

  
26
extern UINT8 zigzag_end[64];
27

  
25 28
#if 0
26 29

  
27 30
/* XXX: GL: I don't understand why this function needs optimization
......
69 72
static void dct_unquantize_h263_mmx(MpegEncContext *s,
70 73
                                  DCTELEM *block, int n, int qscale)
71 74
{
72
    int i, level, qmul, qadd;
73

  
75
    int i, level, qmul, qadd, nCoeffs;
76
    
74 77
    qmul = s->qscale << 1;
75 78
    qadd = (s->qscale - 1) | 1;
76 79

  
......
91 94
			block[i] = level;
92 95
		}
93 96
	}
97
	nCoeffs=64;
94 98
    } else {
95 99
        i = 0;
100
	nCoeffs= zigzag_end[ s->block_last_index[n] ];
96 101
    }
97

  
102
//printf("%d %d  ", qmul, qadd);
98 103
asm volatile(
99 104
		"movd %1, %%mm6			\n\t" //qmul
100 105
		"packssdw %%mm6, %%mm6		\n\t"
......
138 143
		"movq %%mm1, 8(%0, %3)		\n\t"
139 144

  
140 145
		"addl $16, %3			\n\t"
141
		"cmpl $128, %3			\n\t"
142
		"jb 1b				\n\t"
143
		::"r" (block), "g"(qmul), "g" (qadd), "r" (2*i)
146
		"js 1b				\n\t"
147
		::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(i-nCoeffs))
144 148
		: "memory"
145 149
	);
146 150
}
......
178 182
static void dct_unquantize_mpeg1_mmx(MpegEncContext *s,
179 183
                                     DCTELEM *block, int n, int qscale)
180 184
{
181
    int i, level;
185
    int i, level, nCoeffs;
182 186
    const UINT16 *quant_matrix;
187
    
188
    if(s->alternate_scan) nCoeffs= 64;
189
    else nCoeffs= nCoeffs= zigzag_end[ s->block_last_index[n] ];
190

  
183 191
    if (s->mb_intra) {
184 192
        if (n < 4) 
185 193
            block[0] = block[0] * s->y_dc_scale;
186 194
        else
187 195
            block[0] = block[0] * s->c_dc_scale;
188
        if (s->out_format == FMT_H263) {
196
        /* isnt used anymore (we have a h263 unquantizer since some time)
197
	if (s->out_format == FMT_H263) {
189 198
            i = 1;
190 199
            goto unquant_even;
191
        }
200
        }*/
192 201
        /* XXX: only mpeg1 */
193 202
        quant_matrix = s->intra_matrix;
194 203
	i=1;
......
214 223
	"packssdw %%mm6, %%mm7\n\t" /* mm7 = qscale | qscale | qscale | qscale */
215 224
	"pxor	%%mm6, %%mm6\n\t"
216 225
	::"g"(qscale),"m"(mm_wone),"m"(mm_wabs):"memory");
217
        for(;i<64;i+=4) {
226
        for(;i<nCoeffs;i+=4) {
218 227
		__asm __volatile(
219 228
			"movq	%1, %%mm0\n\t"
220 229
			"movq	%%mm7, %%mm1\n\t"
......
258 267
	    }
259 268
	    i++;
260 269
	}
261

  
262 270
asm volatile(
263 271
		"pcmpeqw %%mm7, %%mm7		\n\t"
264 272
		"psrlw $15, %%mm7		\n\t"
......
307 315
		"movq %%mm5, 8(%0, %3)		\n\t"
308 316

  
309 317
		"addl $16, %3			\n\t"
310
		"cmpl $128, %3			\n\t"
311
		"jb 1b				\n\t"
312
		::"r" (block), "r"(quant_matrix), "g" (qscale), "r" (2*i)
318
		"js 1b				\n\t"
319
		::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (2*(i-nCoeffs))
313 320
		: "memory"
314 321
	);
315 322
    }

Also available in: Unified diff