Revision 2f349de2

View differences:

libavcodec/dsputil.c
49 49
    53, 60, 61, 54, 47, 55, 62, 63
50 50
};
51 51

  
52
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
53
UINT16 __align8 inv_zigzag_direct16[64];
54

  
55
/* not permutated zigzag_direct for MMX quantizer */
56
UINT8 zigzag_direct_noperm[64];
57

  
52 58
UINT8 ff_alternate_horizontal_scan[64] = {
53 59
    0,  1,  2,  3,  8,  9, 16, 17, 
54 60
    10, 11,  4,  5,  6,  7, 15, 14,
......
83 89
	0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
84 90
};
85 91

  
92
/* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
93
UINT32 inverse[256]={
94
         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
95
 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
96
 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
97
 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
98
 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
99
 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
100
  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
101
  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
102
  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
103
  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
104
  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
105
  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
106
  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
107
  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
108
  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
109
  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
110
  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
111
  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
112
  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
113
  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
114
  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
115
  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
116
  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
117
  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
118
  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
119
  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
120
  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
121
  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
122
  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
123
  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
124
  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
125
  17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
126
};
127

  
86 128
/* used to skip zeros at the end */
87 129
UINT8 zigzag_end[64];
88 130

  
......
515 557
    else
516 558
        for(i=0; i<64; i++) permutation[i]=i;
517 559

  
560
    for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1;
561
    for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i];
562
    
518 563
    if (use_permuted_idct) {
519 564
        /* permute for IDCT */
520 565
        for(i=0;i<64;i++) {
libavcodec/i386/mpegvideo_mmx.c
22 22

  
23 23
#include "../dsputil.h"
24 24
#include "../mpegvideo.h"
25
#include "../avcodec.h"
26
#include "../mangle.h"
25 27

  
26 28
extern UINT8 zigzag_end[64];
27 29
extern void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w);
30
extern int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale);
31

  
32
extern UINT8 zigzag_direct_noperm[64];
33
extern UINT16 inv_zigzag_direct16[64];
34
extern UINT32 inverse[256];
28 35

  
29 36
#if 0
30 37

  
......
252 259
        }
253 260
    } else {
254 261
        i = 0;
255
    unquant_even:
262
//    unquant_even:
256 263
        quant_matrix = s->non_intra_matrix;
257 264
	/* Align on 4 elements boundary */
258 265
	while(i&7)
......
411 418
    }
412 419
}
413 420

  
421
static volatile int esp_temp;
422

  
423
void unused_var_warning_killer(){
424
	esp_temp++;
425
}
426

  
427
#undef HAVE_MMX2
428
#define RENAME(a) a ## _MMX
429
#include "mpegvideo_mmx_template.c"
430

  
431
#define HAVE_MMX2
432
#undef RENAME
433
#define RENAME(a) a ## _MMX2
434
#include "mpegvideo_mmx_template.c"
414 435

  
415 436
void MPV_common_init_mmx(MpegEncContext *s)
416 437
{
......
421 442
        	s->dct_unquantize = dct_unquantize_mpeg1_mmx;
422 443
	
423 444
	draw_edges = draw_edges_mmx;
445

  
446
	if(mm_flags & MM_MMXEXT){
447
	        dct_quantize= dct_quantize_MMX2;
448
	}else{
449
		dct_quantize= dct_quantize_MMX;
450
	}
424 451
    }
425 452
}
libavcodec/i386/mpegvideo_mmx_template.c
1
/*
2
    Copyright (C) 2002 Michael Niedermayer <michaelni@gmx.at>
3

  
4
    This program is free software; you can redistribute it and/or modify
5
    it under the terms of the GNU General Public License as published by
6
    the Free Software Foundation; either version 2 of the License, or
7
    (at your option) any later version.
8

  
9
    This program is distributed in the hope that it will be useful,
10
    but WITHOUT ANY WARRANTY; without even the implied warranty of
11
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12
    GNU General Public License for more details.
13

  
14
    You should have received a copy of the GNU General Public License
15
    along with this program; if not, write to the Free Software
16
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
*/
18

  
19
#undef SPREADW
20
#undef PMAXW
21
#ifdef HAVE_MMX2
22
#define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
23
#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
24

  
25
#else
26
#define SPREADW(a) \
27
	"punpcklwd " #a ", " #a " \n\t"\
28
	"punpcklwd " #a ", " #a " \n\t"
29
#define PMAXW(a,b) \
30
	"psubusw " #a ", " #b " \n\t"\
31
	"paddw " #a ", " #b " \n\t"
32
#endif
33

  
34
static int RENAME(dct_quantize)(MpegEncContext *s,
35
                            DCTELEM *block, int n,
36
                            int qscale)
37
{
38
    int i, level, last_non_zero_p1, q;
39
    const UINT16 *qmat;
40
    static __align8 INT16 temp_block[64];
41
    int minLevel, maxLevel;
42
    
43
    if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
44
	/* mpeg4 */
45
        minLevel= -2048;
46
	maxLevel= 2047;
47
    }else if(s->out_format==FMT_MPEG1){
48
	/* mpeg1 */
49
        minLevel= -255;
50
	maxLevel= 255;
51
    }else{
52
	/* h263 / msmpeg4 */
53
        minLevel= -128;
54
	maxLevel= 127;
55
    }
56

  
57
    av_fdct (block);
58
    
59
    if (s->mb_intra) {
60
        int dummy;
61
        if (n < 4)
62
            q = s->y_dc_scale;
63
        else
64
            q = s->c_dc_scale;
65
        
66
        /* note: block[0] is assumed to be positive */
67
#if 1
68
	asm volatile (
69
		"xorl %%edx, %%edx	\n\t"
70
		"mul %%ebx		\n\t"
71
		: "=d" (temp_block[0]), "=a"(dummy)
72
		: "a" (block[0] + (q >> 1)), "b" (inverse[q])
73
	);
74
#else
75
	asm volatile (
76
		"xorl %%edx, %%edx	\n\t"
77
		"divw %%bx		\n\t"
78
		"movzwl %%ax, %%eax	\n\t"
79
		: "=a" (temp_block[0])
80
		: "a" (block[0] + (q >> 1)), "b" (q)
81
		: "%edx"
82
	);
83
#endif
84
//        temp_block[0] = (block[0] + (q >> 1)) / q;
85
        i = 1;
86
        last_non_zero_p1 = 1;
87
        if (s->out_format == FMT_H263) {
88
            qmat = s->q_non_intra_matrix16;
89
        } else {
90
            qmat = s->q_intra_matrix16;
91
        }
92
        for(i=1;i<4;i++) {
93
            level = block[i] * qmat[i];
94
            level = level / (1 << (QMAT_SHIFT_MMX - 3));
95
            /* XXX: currently, this code is not optimal. the range should be:
96
               mpeg1: -255..255
97
               mpeg2: -2048..2047
98
               h263:  -128..127
99
               mpeg4: -2048..2047
100
            */
101
            if (level > maxLevel)
102
                level = maxLevel;
103
            else if (level < minLevel)
104
                level = minLevel;
105
            temp_block[i] = level;
106

  
107
	    if(level) 
108
	        if(last_non_zero_p1 < inv_zigzag_direct16[i]) last_non_zero_p1= inv_zigzag_direct16[i];
109
	    block[i]=0;
110
        }
111
    } else {
112
        i = 0;
113
        last_non_zero_p1 = 0;
114
        qmat = s->q_non_intra_matrix16;
115
    }
116

  
117
    asm volatile( /* XXX: small rounding bug, but it shouldnt matter */
118
	"movd %3, %%mm3			\n\t"
119
	SPREADW(%%mm3)
120
	"movd %4, %%mm4			\n\t"
121
	SPREADW(%%mm4)
122
	"movd %5, %%mm5			\n\t"
123
	SPREADW(%%mm5)
124
	"pxor %%mm7, %%mm7		\n\t"
125
	"movd %%eax, %%mm2		\n\t"
126
	SPREADW(%%mm2)
127
	"movl %6, %%eax			\n\t"
128
	".balign 16			\n\t"
129
	"1:				\n\t"
130
	"movq (%1, %%eax), %%mm0	\n\t"
131
	"movq (%2, %%eax), %%mm1	\n\t"
132
	"movq %%mm0, %%mm6		\n\t"
133
	"psraw $15, %%mm6		\n\t"
134
	"pmulhw %%mm0, %%mm1		\n\t"
135
	"psubsw %%mm6, %%mm1		\n\t"
136
#ifdef HAVE_MMX2
137
	"pminsw %%mm3, %%mm1		\n\t"
138
	"pmaxsw %%mm4, %%mm1		\n\t"
139
#else
140
	"paddsw %%mm3, %%mm1		\n\t"
141
	"psubusw %%mm4, %%mm1		\n\t"
142
	"paddsw %%mm5, %%mm1		\n\t"
143
#endif
144
	"movq %%mm1, (%8, %%eax)	\n\t"
145
	"pcmpeqw %%mm7, %%mm1		\n\t"
146
	"movq (%7, %%eax), %%mm0	\n\t"
147
	"movq %%mm7, (%1, %%eax)	\n\t"
148
	"pandn %%mm0, %%mm1		\n\t"
149
	PMAXW(%%mm1, %%mm2)
150
	"addl $8, %%eax			\n\t"
151
	" js 1b				\n\t"
152
	"movq %%mm2, %%mm0		\n\t"
153
	"psrlq $32, %%mm2		\n\t"
154
	PMAXW(%%mm0, %%mm2)
155
	"movq %%mm2, %%mm0		\n\t"
156
	"psrlq $16, %%mm2		\n\t"
157
	PMAXW(%%mm0, %%mm2)
158
	"movd %%mm2, %%eax		\n\t"
159
	"movzbl %%al, %%eax		\n\t"
160
	: "+a" (last_non_zero_p1)
161
	: "r" (block+64), "r" (qmat+64), 
162
#ifdef HAVE_MMX2
163
	  "m" (maxLevel),          "m" (minLevel),                    "m" (0 /* dummy */), "g" (2*i - 128),
164
#else
165
	  "m" (0x7FFF - maxLevel), "m" (0x7FFF -maxLevel + minLevel), "m" (minLevel),      "g" (2*i - 128),
166
#endif
167
	  "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
168
    );
169
// last_non_zero_p1=64;       
170
    /* permute for IDCT */
171
    asm volatile(
172
	"movl %0, %%eax			\n\t"
173
	"pushl %%ebp			\n\t"
174
	"movl %%esp, " MANGLE(esp_temp) "\n\t"
175
	"1:				\n\t"
176
	"movzbl (%1, %%eax), %%ebx	\n\t"
177
	"movzbl 1(%1, %%eax), %%ebp	\n\t"
178
	"movw (%2, %%ebx, 2), %%cx	\n\t"
179
	"movw (%2, %%ebp, 2), %%sp	\n\t"
180
	"movzbl " MANGLE(permutation) "(%%ebx), %%ebx\n\t"
181
	"movzbl " MANGLE(permutation) "(%%ebp), %%ebp\n\t"
182
	"movw %%cx, (%3, %%ebx, 2)	\n\t"
183
	"movw %%sp, (%3, %%ebp, 2)	\n\t"
184
	"addl $2, %%eax			\n\t"
185
	" js 1b				\n\t"
186
	"movl " MANGLE(esp_temp) ", %%esp\n\t"
187
	"popl %%ebp			\n\t"
188
	: 
189
	: "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S" (temp_block), "D" (block)
190
	: "%eax", "%ebx", "%ecx"
191
	);
192
/*
193
    for(i=0; i<last_non_zero_p1; i++)
194
    {
195
       int j= zigzag_direct_noperm[i];
196
       block[block_permute_op(j)]= temp_block[j];
197
    }
198
*/
199
//block_permute(block);
200
    return last_non_zero_p1 - 1;
201
}
libavcodec/mpegvideo.c
35 35
                                   DCTELEM *block, int n, int qscale);
36 36
static void dct_unquantize_h263_c(MpegEncContext *s, 
37 37
                                  DCTELEM *block, int n, int qscale);
38
static int dct_quantize(MpegEncContext *s, DCTELEM *block, int n, int qscale);
39
static int dct_quantize_mmx(MpegEncContext *s, 
40
                            DCTELEM *block, int n,
41
                            int qscale);
42 38
static void draw_edges_c(UINT8 *buf, int wrap, int width, int height, int w);
39
static int dct_quantize_c(MpegEncContext *s, DCTELEM *block, int n, int qscale);
43 40

  
41
int (*dct_quantize)(MpegEncContext *s, DCTELEM *block, int n, int qscale)= dct_quantize_c;
44 42
void (*draw_edges)(UINT8 *buf, int wrap, int width, int height, int w)= draw_edges_c;
45 43

  
46 44
#define EDGE_WIDTH 16
......
74 72

  
75 73
extern UINT8 zigzag_end[64];
76 74

  
77
/* XXX: should use variable shift ? */
78
#define QMAT_SHIFT_MMX 19
79
#define QMAT_SHIFT 25
80

  
81
static void convert_matrix(int *qmat, const UINT16 *quant_matrix, int qscale)
75
static void convert_matrix(int *qmat, UINT16 *qmat16, const UINT16 *quant_matrix, int qscale)
82 76
{
83 77
    int i;
84 78

  
85 79
    if (av_fdct == jpeg_fdct_ifast) {
86 80
        for(i=0;i<64;i++) {
87 81
            /* 16 <= qscale * quant_matrix[i] <= 7905 */
88
            /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */
82
            /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
83
            /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
84
            /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
89 85
            
90
            qmat[i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / 
91
                            (aanscales[i] * qscale * quant_matrix[i]));
86
            qmat[block_permute_op(i)] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / 
87
                            (aanscales[i] * qscale * quant_matrix[block_permute_op(i)]));
92 88
        }
93 89
    } else {
94 90
        for(i=0;i<64;i++) {
95 91
            /* We can safely suppose that 16 <= quant_matrix[i] <= 255
96
               So 16 <= qscale * quant_matrix[i] <= 7905
97
               so (1 << QMAT_SHIFT) / 16 >= qmat[i] >= (1 << QMAT_SHIFT) / 7905
92
               So 16           <= qscale * quant_matrix[i]             <= 7905
93
               so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
94
               so 32768        >= (1<<19) / (qscale * quant_matrix[i]) >= 67
98 95
            */
99
            qmat[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
96
            qmat[i]   = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]);
97
            qmat16[i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]);
100 98
        }
101 99
    }
102 100
}
......
418 416
void MPV_frame_end(MpegEncContext *s)
419 417
{
420 418
    /* draw edge for correct motion prediction if outside */
421
    if (s->pict_type != B_TYPE) {
419
    if (s->pict_type != B_TYPE && !s->intra_only) {
422 420
      if(s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MPEG4){
423 421
        draw_edges(s->current_picture[0], s->linesize, s->mb_width*16, s->mb_height*16, EDGE_WIDTH);
424 422
        draw_edges(s->current_picture[1], s->linesize/2, s->mb_width*8, s->mb_height*8, EDGE_WIDTH/2);
......
457 455
    avctx->key_frame = (s->pict_type == I_TYPE);
458 456
    
459 457
    MPV_frame_start(s);
460

  
458
    
461 459
    for(i=0;i<3;i++) {
462 460
        UINT8 *src = pict->data[i];
463 461
        UINT8 *dest = s->current_picture[i];
......
472 470
            h >>= 1;
473 471
        }
474 472

  
475
        for(j=0;j<h;j++) {
476
            memcpy(dest, src, w);
477
            dest += dest_wrap;
478
            src += src_wrap;
479
        }
473
	if(s->intra_only && dest_wrap==src_wrap){
474
	    s->current_picture[i] = pict->data[i];
475
	}else {
476
            for(j=0;j<h;j++) {
477
                memcpy(dest, src, w);
478
                dest += dest_wrap;
479
                src += src_wrap;
480
            }
481
	}
480 482
        s->new_picture[i] = s->current_picture[i];
481 483
    }
482 484

  
......
873 875
        s->intra_matrix[0] = default_intra_matrix[0];
874 876
        for(i=1;i<64;i++)
875 877
            s->intra_matrix[i] = (default_intra_matrix[i] * s->qscale) >> 3;
876
        convert_matrix(s->q_intra_matrix, s->intra_matrix, 8);
878
        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, 8);
877 879
    } else {
878
        convert_matrix(s->q_intra_matrix, s->intra_matrix, s->qscale);
879
        convert_matrix(s->q_non_intra_matrix, s->non_intra_matrix, s->qscale);
880
        convert_matrix(s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, s->qscale);
881
        convert_matrix(s->q_non_intra_matrix, s->q_non_intra_matrix16, s->non_intra_matrix, s->qscale);
880 882
    }
881 883

  
882 884
    switch(s->out_format) {
......
1011 1013
                s->y_dc_scale = 8;
1012 1014
                s->c_dc_scale = 8;
1013 1015
            }
1014

  
1015 1016
            for(i=0;i<6;i++) {
1016
                int last_index;
1017
                if (av_fdct == jpeg_fdct_ifast)
1018
                    last_index = dct_quantize(s, s->block[i], i, s->qscale);
1019
                else
1020
                    last_index = dct_quantize_mmx(s, s->block[i], i, s->qscale);
1021
                s->block_last_index[i] = last_index;
1017
                s->block_last_index[i] = dct_quantize(s, s->block[i], i, s->qscale);
1022 1018
            }
1023 1019

  
1024 1020
            /* huffman encode */
......
1060 1056
    //    fprintf(stderr,"\nNumber of GOB: %d", s->gob_number);
1061 1057
}
1062 1058

  
1063
static int dct_quantize(MpegEncContext *s, 
1059
static int dct_quantize_c(MpegEncContext *s, 
1064 1060
                        DCTELEM *block, int n,
1065 1061
                        int qscale)
1066 1062
{
......
1157 1153
                level = maxLevel;
1158 1154
            else if (level < minLevel)
1159 1155
                level = minLevel;
1160
            block[j] = level;
1161
            last_non_zero = i;
1162
        } else {
1163
            block[j] = 0;
1164
        }
1165
    }
1166
    return last_non_zero;
1167
}
1168

  
1169
static int dct_quantize_mmx(MpegEncContext *s, 
1170
                            DCTELEM *block, int n,
1171
                            int qscale)
1172
{
1173
    int i, j, level, last_non_zero, q;
1174
    const int *qmat;
1175
    int minLevel, maxLevel;
1176

  
1177
    if(s->avctx!=NULL && s->avctx->codec->id==CODEC_ID_MPEG4){
1178
	/* mpeg4 */
1179
        minLevel= -2048;
1180
	maxLevel= 2047;
1181
    }else if(s->out_format==FMT_MPEG1){
1182
	/* mpeg1 */
1183
        minLevel= -255;
1184
	maxLevel= 255;
1185
    }else{
1186
	/* h263 / msmpeg4 */
1187
        minLevel= -128;
1188
	maxLevel= 127;
1189
    }
1190 1156

  
1191
    av_fdct (block);
1192
    
1193
    /* we need this permutation so that we correct the IDCT
1194
       permutation. will be moved into DCT code */
1195
    block_permute(block);
1196

  
1197
    if (s->mb_intra) {
1198
        if (n < 4)
1199
            q = s->y_dc_scale;
1200
        else
1201
            q = s->c_dc_scale;
1202
        
1203
        /* note: block[0] is assumed to be positive */
1204
        block[0] = (block[0] + (q >> 1)) / q;
1205
        i = 1;
1206
        last_non_zero = 0;
1207
        if (s->out_format == FMT_H263) {
1208
            qmat = s->q_non_intra_matrix;
1209
        } else {
1210
            qmat = s->q_intra_matrix;
1211
        }
1212
    } else {
1213
        i = 0;
1214
        last_non_zero = -1;
1215
        qmat = s->q_non_intra_matrix;
1216
    }
1217

  
1218
    for(;i<64;i++) {
1219
        j = zigzag_direct[i];
1220
        level = block[j];
1221
        level = level * qmat[j];
1222
        /* XXX: slight error for the low range. Test should be equivalent to
1223
           (level <= -(1 << (QMAT_SHIFT_MMX - 3)) || level >= (1 <<
1224
           (QMAT_SHIFT_MMX - 3)))
1225
        */
1226
        if (((level << (31 - (QMAT_SHIFT_MMX - 3))) >> (31 - (QMAT_SHIFT_MMX - 3))) != 
1227
            level) {
1228
            level = level / (1 << (QMAT_SHIFT_MMX - 3));
1229
            /* XXX: currently, this code is not optimal. the range should be:
1230
               mpeg1: -255..255
1231
               mpeg2: -2048..2047
1232
               h263:  -128..127
1233
               mpeg4: -2048..2047
1234
            */
1235
            if (level > maxLevel)
1236
                level = maxLevel;
1237
            else if (level < minLevel)
1238
                level = minLevel;
1239 1157
            block[j] = level;
1240 1158
            last_non_zero = i;
1241 1159
        } else {
libavcodec/mpegvideo.h
30 30

  
31 31
#define MPEG_BUF_SIZE (16 * 1024)
32 32

  
33
#define QMAT_SHIFT_MMX 19
34
#define QMAT_SHIFT 25
35

  
33 36
typedef struct MpegEncContext {
34 37
    struct AVCodecContext *avctx;
35 38
    /* the following parameters must be initialized before encoding */
......
120 123
    /* precomputed matrix (combine qscale and DCT renorm) */
121 124
    int q_intra_matrix[64];
122 125
    int q_non_intra_matrix[64];
126
    /* identical to the above but for MMX & these are not permutated */
127
    UINT16 __align8 q_intra_matrix16[64] ;
128
    UINT16 __align8 q_non_intra_matrix16[64];
123 129
    int block_last_index[6];  /* last non zero coefficient in block */
124 130

  
125 131
    void *opaque; /* private data for the user */

Also available in: Unified diff