Revision 827d43bb

View differences:

libavcodec/vp8.c
117 117
     */
118 118
    DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4];
119 119
    DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16];
120
    DECLARE_ALIGNED(16, DCTELEM, block_dc)[16];
120 121
    uint8_t intra4x4_pred_mode_mb[16];
121 122

  
122 123
    int chroma_pred_mode;    ///< 8x8c pred mode of the current macroblock
......
864 865
void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
865 866
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
866 867
{
867
    LOCAL_ALIGNED_16(DCTELEM, dc,[16]);
868 868
    int i, x, y, luma_start = 0, luma_ctx = 3;
869 869
    int nnz_pred, nnz, nnz_total = 0;
870 870
    int segment = s->segment;
871 871

  
872 872
    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
873
        AV_ZERO128(dc);
874
        AV_ZERO128(dc+8);
875 873
        nnz_pred = t_nnz[8] + l_nnz[8];
876 874

  
877 875
        // decode DC values and do hadamard
878
        nnz = decode_block_coeffs(c, dc, s->prob->token[1], 0, nnz_pred,
876
        nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred,
879 877
                                  s->qmat[segment].luma_dc_qmul);
880 878
        l_nnz[8] = t_nnz[8] = !!nnz;
881 879
        nnz_total += nnz;
882
        s->vp8dsp.vp8_luma_dc_wht(s->block, dc);
880
        s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc);
883 881
        luma_start = 1;
884 882
        luma_ctx = 0;
885 883
    }
libavcodec/vp8dsp.c
46 46
        t1 = dc[i*4+1] + dc[i*4+2];
47 47
        t2 = dc[i*4+1] - dc[i*4+2];
48 48
        t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding
49
        dc[i*4+0] = 0;
50
        dc[i*4+1] = 0;
51
        dc[i*4+2] = 0;
52
        dc[i*4+3] = 0;
49 53

  
50 54
        *block[i][0] = (t0 + t1) >> 3;
51 55
        *block[i][1] = (t3 + t2) >> 3;
libavcodec/x86/vp8dsp-init.c
224 224
extern void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, DCTELEM block[4][16], int stride);
225 225
extern void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, DCTELEM block[2][16], int stride);
226 226
extern void ff_vp8_luma_dc_wht_mmx(DCTELEM block[4][4][16], DCTELEM dc[16]);
227
extern void ff_vp8_luma_dc_wht_sse(DCTELEM block[4][4][16], DCTELEM dc[16]);
227 228
extern void ff_vp8_idct_add_mmx(uint8_t *dst, DCTELEM block[16], int stride);
228 229
extern void ff_vp8_idct_add_sse(uint8_t *dst, DCTELEM block[16], int stride);
229 230

  
......
335 336

  
336 337
    if (mm_flags & FF_MM_SSE) {
337 338
        c->vp8_idct_add                         = ff_vp8_idct_add_sse;
339
        c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
338 340
        c->put_vp8_epel_pixels_tab[0][0][0]     =
339 341
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
340 342
    }
libavcodec/x86/vp8dsp.asm
1186 1186
    SWAP %1, %4, %3
1187 1187
%endmacro
1188 1188

  
1189
INIT_MMX
1190
cglobal vp8_luma_dc_wht_mmx, 2,3
1189
%macro VP8_DC_WHT 1
1190
cglobal vp8_luma_dc_wht_%1, 2,3
1191 1191
    movq          m0, [r1]
1192 1192
    movq          m1, [r1+8]
1193 1193
    movq          m2, [r1+16]
1194 1194
    movq          m3, [r1+24]
1195
%ifidn %1, sse
1196
    xorps      xmm0, xmm0
1197
    movaps  [r1+ 0], xmm0
1198
    movaps  [r1+16], xmm0
1199
%else
1200
    pxor         m4, m4
1201
    movq    [r1+ 0], m4
1202
    movq    [r1+ 8], m4
1203
    movq    [r1+16], m4
1204
    movq    [r1+24], m4
1205
%endif
1195 1206
    HADAMARD4_1D  0, 1, 2, 3
1196 1207
    TRANSPOSE4x4W 0, 1, 2, 3, 4
1197 1208
    paddw         m0, [pw_3]
......
1203 1214
    SCATTER_WHT   0, 1, 0
1204 1215
    SCATTER_WHT   2, 3, 2
1205 1216
    RET
1217
%endmacro
1218

  
1219
INIT_MMX
1220
VP8_DC_WHT mmx
1221
VP8_DC_WHT sse
1206 1222

  
1207 1223
;-----------------------------------------------------------------------------
1208 1224
; void vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim);

Also available in: Unified diff