Revision 7d7f57d9 libavcodec/snow.c

View differences:

libavcodec/snow.c
394 394
#define LOG2_MB_SIZE 4
395 395
#define MB_SIZE (1<<LOG2_MB_SIZE)
396 396
#define ENCODER_EXTRA_BITS 4
397
#define HTAPS 6
397
#define HTAPS 8
398 398

  
399 399
typedef struct x_and_coeff{
400 400
    int16_t x;
......
421 421
    int width;
422 422
    int height;
423 423
    SubBand band[MAX_DECOMPOSITIONS][4];
424

  
425
    int htaps;
426
    int8_t hcoeff[HTAPS/2];
427
    int diag_mc;
428
    int fast_mc;
429

  
430
    int last_htaps;
431
    int8_t last_hcoeff[HTAPS/2];
432
    int last_diag_mc;
424 433
}Plane;
425 434

  
426 435
typedef struct SnowContext{
......
2143 2152
    }
2144 2153
}
2145 2154

  
2146
static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2155
static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2147 2156
    const static uint8_t weight[64]={
2148 2157
    8,7,6,5,4,3,2,1,
2149 2158
    7,7,0,0,0,0,0,1,
......
2193 2202
    l= brane[dx + 16*dy]>>4;
2194 2203

  
2195 2204
    b= needs[l] | needs[r];
2205
    if(p && !p->diag_mc)
2206
        b= 15;
2196 2207

  
2197 2208
    if(b&5){
2198 2209
        for(y=0; y < b_h+HTAPS-1; y++){
2199 2210
            for(x=0; x < b_w; x++){
2200
                int a_2=src[x + HTAPS/2-5];
2201 2211
                int a_1=src[x + HTAPS/2-4];
2202 2212
                int a0= src[x + HTAPS/2-3];
2203 2213
                int a1= src[x + HTAPS/2-2];
......
2206 2216
                int a4= src[x + HTAPS/2+1];
2207 2217
                int a5= src[x + HTAPS/2+2];
2208 2218
                int a6= src[x + HTAPS/2+3];
2209
                int a7= src[x + HTAPS/2+4];
2210
#if HTAPS==6
2211
                int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2212
#else
2213
                int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
2214
#endif
2219
                int am=0;
2220
                if(!p || p->fast_mc){
2221
                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2222
                    tmpI[x]= am;
2223
                    am= (am+16)>>5;
2224
                }else{
2225
                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2226
                    tmpI[x]= am;
2227
                    am= (am+32)>>6;
2228
                }
2215 2229

  
2216
                tmpI[x]= am;
2217
                am= (am+16)>>5;
2218 2230
                if(am&(~255)) am= ~(am>>31);
2219 2231
                tmp2[x]= am;
2220 2232
            }
......
2230 2242
    if(b&2){
2231 2243
        for(y=0; y < b_h; y++){
2232 2244
            for(x=0; x < b_w+1; x++){
2233
                int a_2=src[x + (HTAPS/2-5)*stride];
2234 2245
                int a_1=src[x + (HTAPS/2-4)*stride];
2235 2246
                int a0= src[x + (HTAPS/2-3)*stride];
2236 2247
                int a1= src[x + (HTAPS/2-2)*stride];
......
2239 2250
                int a4= src[x + (HTAPS/2+1)*stride];
2240 2251
                int a5= src[x + (HTAPS/2+2)*stride];
2241 2252
                int a6= src[x + (HTAPS/2+3)*stride];
2242
                int a7= src[x + (HTAPS/2+4)*stride];
2243
#if HTAPS==6
2244
                int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2245
#else
2246
                int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
2247
#endif
2253
                int am=0;
2254
                if(!p || p->fast_mc)
2255
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2256
                else
2257
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2248 2258

  
2249
                am= (am + 16)>>5;
2250 2259
                if(am&(~255)) am= ~(am>>31);
2251 2260
                tmp2[x]= am;
2252 2261
            }
......
2261 2270
    if(b&4){
2262 2271
        for(y=0; y < b_h; y++){
2263 2272
            for(x=0; x < b_w; x++){
2264
                int a_2=tmpI[x + (HTAPS/2-5)*64];
2265 2273
                int a_1=tmpI[x + (HTAPS/2-4)*64];
2266 2274
                int a0= tmpI[x + (HTAPS/2-3)*64];
2267 2275
                int a1= tmpI[x + (HTAPS/2-2)*64];
......
2270 2278
                int a4= tmpI[x + (HTAPS/2+1)*64];
2271 2279
                int a5= tmpI[x + (HTAPS/2+2)*64];
2272 2280
                int a6= tmpI[x + (HTAPS/2+3)*64];
2273
                int a7= tmpI[x + (HTAPS/2+4)*64];
2274
#if HTAPS==6
2275
                int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2276
#else
2277
                int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
2278
#endif
2279
                am= (am + 512)>>10;
2281
                int am=0;
2282
                if(!p || p->fast_mc)
2283
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2284
                else
2285
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2280 2286
                if(am&(~255)) am= ~(am>>31);
2281 2287
                tmp2[x]= am;
2282 2288
            }
......
2336 2342
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2337 2343
    uint8_t tmp[stride*(b_w+HTAPS-1)];\
2338 2344
    assert(h==b_w);\
2339
    mc_block(dst, src-(HTAPS/2-1)-(HTAPS/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2345
    mc_block(NULL, dst, src-(HTAPS/2-1)-(HTAPS/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2340 2346
}
2341 2347

  
2342 2348
mca( 0, 0,16)
......
2407 2413
//        assert(!(b_w&(b_w-1)));
2408 2414
        assert(b_w>1 && b_h>1);
2409 2415
        assert(tab_index>=0 && tab_index<4 || b_w==32);
2410
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || HTAPS != 6)
2411
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2416
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2417
            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2412 2418
        else if(b_w==32){
2413 2419
            int y;
2414 2420
            for(y=0; y<b_h; y+=16){
2415
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2416
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2421
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2422
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2417 2423
            }
2418 2424
        }else if(b_w==b_h)
2419
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2425
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2420 2426
        else if(b_w==2*b_h){
2421
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 2       + 2*stride,stride);
2422
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2427
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
2428
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2423 2429
        }else{
2424 2430
            assert(2*b_w==b_h);
2425
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 2 + 2*stride           ,stride);
2426
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2431
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
2432
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2427 2433
        }
2428 2434
    }
2429 2435
}
......
3514 3520
}
3515 3521

  
3516 3522
static void encode_header(SnowContext *s){
3517
    int plane_index, level, orientation;
3523
    int plane_index, level, orientation, i;
3518 3524
    uint8_t kstate[32];
3519 3525

  
3520 3526
    memset(kstate, MID_STATE, sizeof(kstate));
......
3527 3533
        s->last_qbias=
3528 3534
        s->last_mv_scale=
3529 3535
        s->last_block_max_depth= 0;
3536
        for(plane_index=0; plane_index<2; plane_index++){
3537
            Plane *p= &s->plane[plane_index];
3538
            p->last_htaps=0;
3539
            p->last_diag_mc=0;
3540
            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3541
        }
3530 3542
    }
3531 3543
    if(s->keyframe){
3532 3544
        put_symbol(&s->c, s->header_state, s->version, 0);
......
3550 3562
            }
3551 3563
        }
3552 3564
    }
3565

  
3566
    if(!s->keyframe){
3567
        int update_mc=0;
3568
        for(plane_index=0; plane_index<2; plane_index++){
3569
            Plane *p= &s->plane[plane_index];
3570
            update_mc |= p->last_htaps   != p->htaps;
3571
            update_mc |= p->last_diag_mc != p->diag_mc;
3572
            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3573
        }
3574
        if(!s->always_reset)
3575
            put_rac(&s->c, s->header_state, update_mc);
3576
        if(update_mc){
3577
            for(plane_index=0; plane_index<2; plane_index++){
3578
                Plane *p= &s->plane[plane_index];
3579
                put_rac(&s->c, s->header_state, p->diag_mc);
3580
                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3581
                for(i= p->htaps/2; i; i--)
3582
                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3583

  
3584
                p->last_diag_mc= p->diag_mc;
3585
                p->last_htaps= p->htaps;
3586
                memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3587
            }
3588
        }
3589
    }
3590

  
3553 3591
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3554 3592
    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3555 3593
    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
......
3608 3646
        }
3609 3647
    }
3610 3648

  
3649
    if(!s->keyframe){
3650
        if(s->always_reset || get_rac(&s->c, s->header_state)){
3651
            for(plane_index=0; plane_index<2; plane_index++){
3652
                int htaps, i, sum=0, absum=0;
3653
                Plane *p= &s->plane[plane_index];
3654
                p->diag_mc= get_rac(&s->c, s->header_state);
3655
                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3656
                if((unsigned)htaps > HTAPS || htaps==0)
3657
                    return -1;
3658
                p->htaps= htaps;
3659
                for(i= htaps/2; i; i--){
3660
                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3661
                    sum += p->hcoeff[i];
3662
                }
3663
                p->hcoeff[0]= 32-sum;
3664
            }
3665
            s->plane[2].diag_mc= s->plane[1].diag_mc;
3666
            s->plane[2].htaps  = s->plane[1].htaps;
3667
            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3668
        }
3669
    }
3670

  
3611 3671
    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3612 3672
    if(s->spatial_decomposition_type > 1){
3613 3673
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
......
3715 3775
        }
3716 3776
        s->plane[plane_index].width = w;
3717 3777
        s->plane[plane_index].height= h;
3778

  
3779
        s->plane[plane_index].diag_mc= 1;
3780
        s->plane[plane_index].htaps= 6;
3781
        s->plane[plane_index].hcoeff[0]=  40;
3782
        s->plane[plane_index].hcoeff[1]= -10;
3783
        s->plane[plane_index].hcoeff[2]=   2;
3784
        s->plane[plane_index].fast_mc= 1;
3785

  
3718 3786
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3719 3787
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3720 3788
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
......
4354 4422

  
4355 4423
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4356 4424
    decode_header(s);
4425

  
4426
    for(plane_index=0; plane_index<3; plane_index++){
4427
        Plane *p= &s->plane[plane_index];
4428
        p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4429
                                              && p->hcoeff[1]==-10
4430
                                              && p->hcoeff[2]==2;
4431
    }
4432

  
4357 4433
    if(!s->block) alloc_blocks(s);
4358 4434

  
4359 4435
    frame_start(s);

Also available in: Unified diff