Revision badaf88e

View differences:

libavcodec/dsputil.c
83 83
	0x32, 0x3A, 0x33, 0x3B, 0x36, 0x3E, 0x37, 0x3F,
84 84
};
85 85

  
86
/* used to skip zeros at the end */
87
UINT8 zigzag_end[64];
88

  
86 89
UINT8 permutation[64];
87 90
//UINT8 invPermutation[64];
88 91

  
92
static void build_zigzag_end()
93
{
94
    int lastIndex;
95
    int lastIndexAfterPerm=0;
96
    for(lastIndex=0; lastIndex<64; lastIndex++)
97
    {
98
        if(zigzag_direct[lastIndex] > lastIndexAfterPerm) 
99
            lastIndexAfterPerm= zigzag_direct[lastIndex];
100
        zigzag_end[lastIndex]= lastIndexAfterPerm + 1;
101
    }
102
}
103

  
89 104
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size)
90 105
{
91 106
    DCTELEM *p;
......
509 524
        block_permute(default_intra_matrix);
510 525
        block_permute(default_non_intra_matrix);
511 526
    }
527
    
528
    build_zigzag_end();
512 529
}
libavcodec/i386/mpegvideo_mmx.c
17 17
 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 18
 *
19 19
 * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
20
 * h263 dequantizer by Michael Niedermayer <michaelni@gmx.at>
20 21
 */
21 22

  
22 23
#include "../dsputil.h"
23 24
#include "../mpegvideo.h"
24 25

  
26
extern UINT8 zigzag_end[64];
27

  
25 28
#if 0
26 29

  
27 30
/* XXX: GL: I don't understand why this function needs optimization
......
69 72
static void dct_unquantize_h263_mmx(MpegEncContext *s,
70 73
                                  DCTELEM *block, int n, int qscale)
71 74
{
72
    int i, level, qmul, qadd;
73

  
75
    int i, level, qmul, qadd, nCoeffs;
76
    
74 77
    qmul = s->qscale << 1;
75 78
    qadd = (s->qscale - 1) | 1;
76 79

  
......
91 94
			block[i] = level;
92 95
		}
93 96
	}
97
	nCoeffs=64;
94 98
    } else {
95 99
        i = 0;
100
	nCoeffs= zigzag_end[ s->block_last_index[n] ];
96 101
    }
97

  
102
//printf("%d %d  ", qmul, qadd);
98 103
asm volatile(
99 104
		"movd %1, %%mm6			\n\t" //qmul
100 105
		"packssdw %%mm6, %%mm6		\n\t"
......
138 143
		"movq %%mm1, 8(%0, %3)		\n\t"
139 144

  
140 145
		"addl $16, %3			\n\t"
141
		"cmpl $128, %3			\n\t"
142
		"jb 1b				\n\t"
143
		::"r" (block), "g"(qmul), "g" (qadd), "r" (2*i)
146
		"js 1b				\n\t"
147
		::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(i-nCoeffs))
144 148
		: "memory"
145 149
	);
146 150
}
......
178 182
static void dct_unquantize_mpeg1_mmx(MpegEncContext *s,
179 183
                                     DCTELEM *block, int n, int qscale)
180 184
{
181
    int i, level;
185
    int i, level, nCoeffs;
182 186
    const UINT16 *quant_matrix;
187
    
188
    if(s->alternate_scan) nCoeffs= 64;
189
    else nCoeffs= nCoeffs= zigzag_end[ s->block_last_index[n] ];
190

  
183 191
    if (s->mb_intra) {
184 192
        if (n < 4) 
185 193
            block[0] = block[0] * s->y_dc_scale;
186 194
        else
187 195
            block[0] = block[0] * s->c_dc_scale;
188
        if (s->out_format == FMT_H263) {
196
        /* isnt used anymore (we have a h263 unquantizer since some time)
197
	if (s->out_format == FMT_H263) {
189 198
            i = 1;
190 199
            goto unquant_even;
191
        }
200
        }*/
192 201
        /* XXX: only mpeg1 */
193 202
        quant_matrix = s->intra_matrix;
194 203
	i=1;
......
214 223
	"packssdw %%mm6, %%mm7\n\t" /* mm7 = qscale | qscale | qscale | qscale */
215 224
	"pxor	%%mm6, %%mm6\n\t"
216 225
	::"g"(qscale),"m"(mm_wone),"m"(mm_wabs):"memory");
217
        for(;i<64;i+=4) {
226
        for(;i<nCoeffs;i+=4) {
218 227
		__asm __volatile(
219 228
			"movq	%1, %%mm0\n\t"
220 229
			"movq	%%mm7, %%mm1\n\t"
......
258 267
	    }
259 268
	    i++;
260 269
	}
261

  
262 270
asm volatile(
263 271
		"pcmpeqw %%mm7, %%mm7		\n\t"
264 272
		"psrlw $15, %%mm7		\n\t"
......
307 315
		"movq %%mm5, 8(%0, %3)		\n\t"
308 316

  
309 317
		"addl $16, %3			\n\t"
310
		"cmpl $128, %3			\n\t"
311
		"jb 1b				\n\t"
312
		::"r" (block), "r"(quant_matrix), "g" (qscale), "r" (2*i)
318
		"js 1b				\n\t"
319
		::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (2*(i-nCoeffs))
313 320
		: "memory"
314 321
	);
315 322
    }
libavcodec/mpegvideo.c
68 68
/* default motion estimation */
69 69
int motion_estimation_method = ME_LOG;
70 70

  
71
extern UINT8 zigzag_end[64];
72

  
71 73
/* XXX: should use variable shift ? */
72 74
#define QMAT_SHIFT_MMX 19
73 75
#define QMAT_SHIFT 25
......
674 676
{
675 677
    if (s->block_last_index[i] >= 0) {
676 678
        if (!s->mpeg2)
677
            s->dct_unquantize(s, block, i, s->qscale);
679
            if(s->encoding || s->avctx==NULL || s->avctx->codec->id!=CODEC_ID_MSMPEG4)
680
                s->dct_unquantize(s, block, i, s->qscale);
678 681
        ff_idct (block);
679 682
        add_pixels_clamped(block, dest, line_size);
680 683
    }
......
1206 1209
static void dct_unquantize_mpeg1_c(MpegEncContext *s, 
1207 1210
                                   DCTELEM *block, int n, int qscale)
1208 1211
{
1209
    int i, level;
1212
    int i, level, nCoeffs;
1210 1213
    const UINT16 *quant_matrix;
1211 1214

  
1215
    if(s->alternate_scan) nCoeffs= 64;
1216
    else nCoeffs= s->block_last_index[n]+1;
1217
    
1212 1218
    if (s->mb_intra) {
1213 1219
        if (n < 4) 
1214 1220
            block[0] = block[0] * s->y_dc_scale;
......
1216 1222
            block[0] = block[0] * s->c_dc_scale;
1217 1223
        /* XXX: only mpeg1 */
1218 1224
        quant_matrix = s->intra_matrix;
1219
        for(i=1;i<64;i++) {
1220
            level = block[i];
1225
        for(i=1;i<nCoeffs;i++) {
1226
            int j= zigzag_direct[i];
1227
            level = block[j];
1221 1228
            if (level) {
1222 1229
                if (level < 0) {
1223 1230
                    level = -level;
1224
                    level = (int)(level * qscale * quant_matrix[i]) >> 3;
1231
                    level = (int)(level * qscale * quant_matrix[j]) >> 3;
1225 1232
                    level = (level - 1) | 1;
1226 1233
                    level = -level;
1227 1234
                } else {
1228
                    level = (int)(level * qscale * quant_matrix[i]) >> 3;
1235
                    level = (int)(level * qscale * quant_matrix[j]) >> 3;
1229 1236
                    level = (level - 1) | 1;
1230 1237
                }
1231 1238
#ifdef PARANOID
1232 1239
                if (level < -2048 || level > 2047)
1233 1240
                    fprintf(stderr, "unquant error %d %d\n", i, level);
1234 1241
#endif
1235
                block[i] = level;
1242
                block[j] = level;
1236 1243
            }
1237 1244
        }
1238 1245
    } else {
1239 1246
        i = 0;
1240 1247
        quant_matrix = s->non_intra_matrix;
1241
        for(;i<64;i++) {
1242
            level = block[i];
1248
        for(i=1;i<nCoeffs;i++) {
1249
            int j= zigzag_direct[i];
1250
            level = block[j];
1243 1251
            if (level) {
1244 1252
                if (level < 0) {
1245 1253
                    level = -level;
1246 1254
                    level = (((level << 1) + 1) * qscale *
1247
                             ((int) (quant_matrix[i]))) >> 4;
1255
                             ((int) (quant_matrix[j]))) >> 4;
1248 1256
                    level = (level - 1) | 1;
1249 1257
                    level = -level;
1250 1258
                } else {
1251 1259
                    level = (((level << 1) + 1) * qscale *
1252
                             ((int) (quant_matrix[i]))) >> 4;
1260
                             ((int) (quant_matrix[j]))) >> 4;
1253 1261
                    level = (level - 1) | 1;
1254 1262
                }
1255 1263
#ifdef PARANOID
1256 1264
                if (level < -2048 || level > 2047)
1257 1265
                    fprintf(stderr, "unquant error %d %d\n", i, level);
1258 1266
#endif
1259
                block[i] = level;
1267
                block[j] = level;
1260 1268
            }
1261 1269
        }
1262 1270
    }
......
1266 1274
                                  DCTELEM *block, int n, int qscale)
1267 1275
{
1268 1276
    int i, level, qmul, qadd;
1277
    int nCoeffs;
1269 1278

  
1270 1279
    if (s->mb_intra) {
1271 1280
        if (n < 4) 
......
1273 1282
        else
1274 1283
            block[0] = block[0] * s->c_dc_scale;
1275 1284
        i = 1;
1285
        nCoeffs= 64; //does not allways use zigzag table 
1276 1286
    } else {
1277 1287
        i = 0;
1288
        nCoeffs= zigzag_end[ s->block_last_index[n] ];
1278 1289
    }
1279 1290

  
1280 1291
    qmul = s->qscale << 1;
1281 1292
    qadd = (s->qscale - 1) | 1;
1282 1293

  
1283
    for(;i<64;i++) {
1294
    for(;i<nCoeffs;i++) {
1284 1295
        level = block[i];
1285 1296
        if (level) {
1286 1297
            if (level < 0) {
libavcodec/msmpeg4.c
630 630
int msmpeg4_decode_picture_header(MpegEncContext * s)
631 631
{
632 632
    int code;
633
static int weirdAl=0;
633 634

  
634 635
    s->pict_type = get_bits(&s->gb, 2) + 1;
635 636
    if (s->pict_type != I_TYPE &&
......
642 643
        code = get_bits(&s->gb, 5); 
643 644
        /* 0x17: one slice, 0x18: three slices */
644 645
        /* XXX: implement it */
646
	//printf("%d %d %d\n", code, s->slice_height, s->first_slice_line);
645 647
        if (code < 0x17)
646 648
            return -1;
647 649
        s->slice_height = s->mb_height / (code - 0x16);
......
650 652

  
651 653
        s->dc_table_index = get_bits1(&s->gb);
652 654
        s->no_rounding = 1;
655
/*	printf(" %d %d %d %d     \n", 
656
		s->qscale,
657
		s->rl_chroma_table_index,
658
		s->rl_table_index, 
659
		s->dc_table_index);*/
653 660
    } else {
654 661
        s->use_skip_mb_code = get_bits1(&s->gb);
655 662
        
......
659 666
        s->dc_table_index = get_bits1(&s->gb);
660 667

  
661 668
        s->mv_table_index = get_bits1(&s->gb);
662
        s->no_rounding ^= 1;
669
/*	printf(" %d %d %d %d %d     \n", 
670
		s->use_skip_mb_code, 
671
		s->rl_table_index, 
672
		s->rl_chroma_table_index, 
673
		s->dc_table_index,
674
		s->mv_table_index);*/
675
  if(weirdAl)
676
	s->no_rounding = 0;
677
  else
678
	s->no_rounding ^= 1;
663 679
    }
664 680
#ifdef DEBUG
665 681
    printf("*****frame %d:\n", frame_count++);
......
785 801
    int dc_pred_dir;
786 802
    RLTable *rl;
787 803
    const UINT8 *scan_table;
804
    int qmul, qadd;
788 805

  
789 806
    if (s->mb_intra) {
807
        qmul=1;
808
        qadd=0;
809

  
790 810
	/* DC coef */
791 811
        set_stat(ST_DC);
792 812
        level = msmpeg4_decode_dc(s, n, &dc_pred_dir);
......
798 818
        } else {
799 819
            rl = &rl_table[3 + s->rl_chroma_table_index];
800 820
        }
821

  
801 822
        run_diff = 0;
802 823
	i = 1;
803 824
        if (!coded) {
......
813 834
        }
814 835
        set_stat(ST_INTRA_AC);
815 836
    } else {
837
        qmul = s->qscale << 1;
838
        qadd = (s->qscale - 1) | 1;
816 839
	i = 0;
817 840
        rl = &rl_table[3 + s->rl_table_index];
818 841
        run_diff = 1;
......
837 860
                    run = get_bits(&s->gb, 6);
838 861
                    level = get_bits(&s->gb, 8);
839 862
                    level = (level << 24) >> 24; /* sign extend */
863
                    if(level>0) level= level * qmul + qadd;
864
                    else        level= level * qmul - qadd;
840 865
                } else {
841 866
                    /* second escape */
842 867
                    code = get_vlc(&s->gb, &rl->vlc);
843 868
                    if (code < 0 || code >= rl->n)
844 869
                        return -1;
845 870
                    run = rl->table_run[code];
846
                    level = rl->table_level[code];
871
                    level = rl->table_level[code] * qmul + qadd;
847 872
                    last = code >= rl->last;
848 873
                    run += rl->max_run[last][level] + run_diff;
849 874
                    if (get_bits1(&s->gb))
......
858 883
                level = rl->table_level[code];
859 884
                last = code >= rl->last;
860 885
                level += rl->max_level[last][run];
886
                level= level * qmul + qadd;
861 887
                if (get_bits1(&s->gb))
862 888
                    level = -level;
863 889
            }
864 890
        } else {
865 891
            run = rl->table_run[code];
866
            level = rl->table_level[code];
892
            level = rl->table_level[code] * qmul + qadd;
867 893
            last = code >= rl->last;
868 894
            if (get_bits1(&s->gb))
869 895
                level = -level;

Also available in: Unified diff