Revision 2caf19e9

View differences:

libavcodec/h264pred_template.c
31 31
static void FUNCC(pred4x4_vertical)(uint8_t *_src, const uint8_t *topright, int _stride){
32 32
    pixel *src = (pixel*)_src;
33 33
    int stride = _stride/sizeof(pixel);
34
    const pixel4 a= ((pixel4*)(src-stride))[0];
35
    ((pixel4*)(src+0*stride))[0]= a;
36
    ((pixel4*)(src+1*stride))[0]= a;
37
    ((pixel4*)(src+2*stride))[0]= a;
38
    ((pixel4*)(src+3*stride))[0]= a;
34
    const pixel4 a= AV_RN4PA(src-stride);
35

  
36
    AV_WN4PA(src+0*stride, a);
37
    AV_WN4PA(src+1*stride, a);
38
    AV_WN4PA(src+2*stride, a);
39
    AV_WN4PA(src+3*stride, a);
39 40
}
40 41

  
41 42
static void FUNCC(pred4x4_horizontal)(uint8_t *_src, const uint8_t *topright, int _stride){
42 43
    pixel *src = (pixel*)_src;
43 44
    int stride = _stride/sizeof(pixel);
44
    ((pixel4*)(src+0*stride))[0]= PIXEL_SPLAT_X4(src[-1+0*stride]);
45
    ((pixel4*)(src+1*stride))[0]= PIXEL_SPLAT_X4(src[-1+1*stride]);
46
    ((pixel4*)(src+2*stride))[0]= PIXEL_SPLAT_X4(src[-1+2*stride]);
47
    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(src[-1+3*stride]);
45
    AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4(src[-1+0*stride]));
46
    AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4(src[-1+1*stride]));
47
    AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4(src[-1+2*stride]));
48
    AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4(src[-1+3*stride]));
48 49
}
49 50

  
50 51
static void FUNCC(pred4x4_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
......
52 53
    int stride = _stride/sizeof(pixel);
53 54
    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
54 55
                   + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
56
    const pixel4 a = PIXEL_SPLAT_X4(dc);
55 57

  
56
    ((pixel4*)(src+0*stride))[0]=
57
    ((pixel4*)(src+1*stride))[0]=
58
    ((pixel4*)(src+2*stride))[0]=
59
    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
58
    AV_WN4PA(src+0*stride, a);
59
    AV_WN4PA(src+1*stride, a);
60
    AV_WN4PA(src+2*stride, a);
61
    AV_WN4PA(src+3*stride, a);
60 62
}
61 63

  
62 64
static void FUNCC(pred4x4_left_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
63 65
    pixel *src = (pixel*)_src;
64 66
    int stride = _stride/sizeof(pixel);
65 67
    const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
68
    const pixel4 a = PIXEL_SPLAT_X4(dc);
66 69

  
67
    ((pixel4*)(src+0*stride))[0]=
68
    ((pixel4*)(src+1*stride))[0]=
69
    ((pixel4*)(src+2*stride))[0]=
70
    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
70
    AV_WN4PA(src+0*stride, a);
71
    AV_WN4PA(src+1*stride, a);
72
    AV_WN4PA(src+2*stride, a);
73
    AV_WN4PA(src+3*stride, a);
71 74
}
72 75

  
73 76
static void FUNCC(pred4x4_top_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
74 77
    pixel *src = (pixel*)_src;
75 78
    int stride = _stride/sizeof(pixel);
76 79
    const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
80
    const pixel4 a = PIXEL_SPLAT_X4(dc);
77 81

  
78
    ((pixel4*)(src+0*stride))[0]=
79
    ((pixel4*)(src+1*stride))[0]=
80
    ((pixel4*)(src+2*stride))[0]=
81
    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
82
    AV_WN4PA(src+0*stride, a);
83
    AV_WN4PA(src+1*stride, a);
84
    AV_WN4PA(src+2*stride, a);
85
    AV_WN4PA(src+3*stride, a);
82 86
}
83 87

  
84 88
static void FUNCC(pred4x4_128_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
85 89
    pixel *src = (pixel*)_src;
86 90
    int stride = _stride/sizeof(pixel);
87
    ((pixel4*)(src+0*stride))[0]=
88
    ((pixel4*)(src+1*stride))[0]=
89
    ((pixel4*)(src+2*stride))[0]=
90
    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
91
    const pixel4 a = PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
92

  
93
    AV_WN4PA(src+0*stride, a);
94
    AV_WN4PA(src+1*stride, a);
95
    AV_WN4PA(src+2*stride, a);
96
    AV_WN4PA(src+3*stride, a);
91 97
}
92 98

  
93 99
static void FUNCC(pred4x4_127_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
94 100
    pixel *src = (pixel*)_src;
95 101
    int stride = _stride/sizeof(pixel);
96
    ((pixel4*)(src+0*stride))[0]=
97
    ((pixel4*)(src+1*stride))[0]=
98
    ((pixel4*)(src+2*stride))[0]=
99
    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
102
    const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
103

  
104
    AV_WN4PA(src+0*stride, a);
105
    AV_WN4PA(src+1*stride, a);
106
    AV_WN4PA(src+2*stride, a);
107
    AV_WN4PA(src+3*stride, a);
100 108
}
101 109

  
102 110
static void FUNCC(pred4x4_129_dc)(uint8_t *_src, const uint8_t *topright, int _stride){
103 111
    pixel *src = (pixel*)_src;
104 112
    int stride = _stride/sizeof(pixel);
105
    ((pixel4*)(src+0*stride))[0]=
106
    ((pixel4*)(src+1*stride))[0]=
107
    ((pixel4*)(src+2*stride))[0]=
108
    ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
113
    const pixel4 a = PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
114

  
115
    AV_WN4PA(src+0*stride, a);
116
    AV_WN4PA(src+1*stride, a);
117
    AV_WN4PA(src+2*stride, a);
118
    AV_WN4PA(src+3*stride, a);
109 119
}
110 120

  
111 121

  
......
286 296
    int i;
287 297
    pixel *src = (pixel*)_src;
288 298
    int stride = _stride/sizeof(pixel);
289
    const pixel4 a = ((pixel4*)(src-stride))[0];
290
    const pixel4 b = ((pixel4*)(src-stride))[1];
291
    const pixel4 c = ((pixel4*)(src-stride))[2];
292
    const pixel4 d = ((pixel4*)(src-stride))[3];
299
    const pixel4 a = AV_RN4PA(((pixel4*)(src-stride))+0);
300
    const pixel4 b = AV_RN4PA(((pixel4*)(src-stride))+1);
301
    const pixel4 c = AV_RN4PA(((pixel4*)(src-stride))+2);
302
    const pixel4 d = AV_RN4PA(((pixel4*)(src-stride))+3);
293 303

  
294 304
    for(i=0; i<16; i++){
295
        ((pixel4*)(src+i*stride))[0] = a;
296
        ((pixel4*)(src+i*stride))[1] = b;
297
        ((pixel4*)(src+i*stride))[2] = c;
298
        ((pixel4*)(src+i*stride))[3] = d;
305
        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
306
        AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
307
        AV_WN4PA(((pixel4*)(src+i*stride))+2, c);
308
        AV_WN4PA(((pixel4*)(src+i*stride))+3, d);
299 309
    }
300 310
}
301 311

  
......
305 315
    stride /= sizeof(pixel);
306 316

  
307 317
    for(i=0; i<16; i++){
308
        ((pixel4*)(src+i*stride))[0] =
309
        ((pixel4*)(src+i*stride))[1] =
310
        ((pixel4*)(src+i*stride))[2] =
311
        ((pixel4*)(src+i*stride))[3] = PIXEL_SPLAT_X4(src[-1+i*stride]);
318
        const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
319

  
320
        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
321
        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
322
        AV_WN4PA(((pixel4*)(src+i*stride))+2, a);
323
        AV_WN4PA(((pixel4*)(src+i*stride))+3, a);
312 324
    }
313 325
}
314 326

  
315 327
#define PREDICT_16x16_DC(v)\
316 328
    for(i=0; i<16; i++){\
317
        AV_WN4P(src+ 0, v);\
318
        AV_WN4P(src+ 4, v);\
319
        AV_WN4P(src+ 8, v);\
320
        AV_WN4P(src+12, v);\
329
        AV_WN4PA(src+ 0, v);\
330
        AV_WN4PA(src+ 4, v);\
331
        AV_WN4PA(src+ 8, v);\
332
        AV_WN4PA(src+12, v);\
321 333
        src += stride;\
322 334
    }
323 335

  
......
432 444
    int i;
433 445
    pixel *src = (pixel*)_src;
434 446
    int stride = _stride/sizeof(pixel);
435
    const pixel4 a= ((pixel4*)(src-stride))[0];
436
    const pixel4 b= ((pixel4*)(src-stride))[1];
447
    const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0);
448
    const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1);
437 449

  
438 450
    for(i=0; i<8; i++){
439
        ((pixel4*)(src+i*stride))[0]= a;
440
        ((pixel4*)(src+i*stride))[1]= b;
451
        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
452
        AV_WN4PA(((pixel4*)(src+i*stride))+1, b);
441 453
    }
442 454
}
443 455

  
......
447 459
    stride /= sizeof(pixel);
448 460

  
449 461
    for(i=0; i<8; i++){
450
        ((pixel4*)(src+i*stride))[0]=
451
        ((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(src[-1+i*stride]);
462
        const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]);
463
        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);
464
        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);
452 465
    }
453 466
}
454 467

  
455 468
#define PRED8x8_X(n, v)\
456 469
static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\
457 470
    int i;\
471
    const pixel4 a = PIXEL_SPLAT_X4(v);\
458 472
    pixel *src = (pixel*)_src;\
459 473
    stride /= sizeof(pixel);\
460 474
    for(i=0; i<8; i++){\
461
        ((pixel4*)(src+i*stride))[0]=\
462
        ((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(v);\
475
        AV_WN4PA(((pixel4*)(src+i*stride))+0, a);\
476
        AV_WN4PA(((pixel4*)(src+i*stride))+1, a);\
463 477
    }\
464 478
}
465 479

  
......
483 497
    dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
484 498

  
485 499
    for(i=0; i<4; i++){
486
        ((pixel4*)(src+i*stride))[0]=
487
        ((pixel4*)(src+i*stride))[1]= dc0splat;
500
        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
501
        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc0splat);
488 502
    }
489 503
    for(i=4; i<8; i++){
490
        ((pixel4*)(src+i*stride))[0]=
491
        ((pixel4*)(src+i*stride))[1]= dc2splat;
504
        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
505
        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc2splat);
492 506
    }
493 507
}
494 508

  
......
508 522
    dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
509 523

  
510 524
    for(i=0; i<4; i++){
511
        ((pixel4*)(src+i*stride))[0]= dc0splat;
512
        ((pixel4*)(src+i*stride))[1]= dc1splat;
525
        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
526
        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
513 527
    }
514 528
    for(i=4; i<8; i++){
515
        ((pixel4*)(src+i*stride))[0]= dc0splat;
516
        ((pixel4*)(src+i*stride))[1]= dc1splat;
529
        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
530
        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
517 531
    }
518 532
}
519 533

  
......
536 550
    dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
537 551

  
538 552
    for(i=0; i<4; i++){
539
        ((pixel4*)(src+i*stride))[0]= dc0splat;
540
        ((pixel4*)(src+i*stride))[1]= dc1splat;
553
        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat);
554
        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat);
541 555
    }
542 556
    for(i=4; i<8; i++){
543
        ((pixel4*)(src+i*stride))[0]= dc2splat;
544
        ((pixel4*)(src+i*stride))[1]= dc3splat;
557
        AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat);
558
        AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat);
545 559
    }
546 560
}
547 561

  
......
636 650
#define PREDICT_8x8_DC(v) \
637 651
    int y; \
638 652
    for( y = 0; y < 8; y++ ) { \
639
        ((pixel4*)src)[0] = \
640
        ((pixel4*)src)[1] = v; \
653
        AV_WN4PA(((pixel4*)src)+0, v); \
654
        AV_WN4PA(((pixel4*)src)+1, v); \
641 655
        src += stride; \
642 656
    }
643 657

  
......
693 707
    int y;
694 708
    pixel *src = (pixel*)_src;
695 709
    int stride = _stride/sizeof(pixel);
710
    pixel4 a, b;
696 711

  
697 712
    PREDICT_8x8_LOAD_TOP;
698 713
    src[0] = t0;
......
703 718
    src[5] = t5;
704 719
    src[6] = t6;
705 720
    src[7] = t7;
721
    a = AV_RN4PA(((pixel4*)src)+0);
722
    b = AV_RN4PA(((pixel4*)src)+1);
706 723
    for( y = 1; y < 8; y++ ) {
707
        ((pixel4*)(src+y*stride))[0] = ((pixel4*)src)[0];
708
        ((pixel4*)(src+y*stride))[1] = ((pixel4*)src)[1];
724
        AV_WN4PA(((pixel4*)(src+y*stride))+0, a);
725
        AV_WN4PA(((pixel4*)(src+y*stride))+1, b);
709 726
    }
710 727
}
711 728
static void FUNCC(pred8x8l_down_left)(uint8_t *_src, int has_topleft, int has_topright, int _stride)
libavcodec/high_bit_depth.h
14 14
#   undef rnd_avg_pixel4
15 15
#   undef AV_RN2P
16 16
#   undef AV_RN4P
17
#   undef AV_RN4PA
17 18
#   undef AV_WN2P
18 19
#   undef AV_WN4P
19 20
#   undef AV_WN4PA
......
46 47
#   define    rnd_avg_pixel4    rnd_avg64
47 48
#   define AV_RN2P  AV_RN32
48 49
#   define AV_RN4P  AV_RN64
50
#   define AV_RN4PA AV_RN64A
49 51
#   define AV_WN2P  AV_WN32
50 52
#   define AV_WN4P  AV_WN64
51 53
#   define AV_WN4PA AV_WN64A
......
61 63
#   define    rnd_avg_pixel4    rnd_avg32
62 64
#   define AV_RN2P  AV_RN16
63 65
#   define AV_RN4P  AV_RN32
66
#   define AV_RN4PA AV_RN32A
64 67
#   define AV_WN2P  AV_WN16
65 68
#   define AV_WN4P  AV_WN32
66 69
#   define AV_WN4PA AV_WN32A

Also available in: Unified diff