Revision 8dffcca5

View differences:

libavcodec/dsputil.c
43 43
uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
44 44
uint32_t ff_squareTbl[512] = {0, };
45 45

  
46
#include "dsputil_internal.h"
47

  
46 48
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
47 49
#define pb_7f (~0UL/255 * 0x7f)
48 50
#define pb_80 (~0UL/255 * 0x80)
......
296 298
    return s;
297 299
}
298 300

  
299
/* draw the edges of width 'w' of an image of size width, height */
300
//FIXME check that this is ok for mpeg4 interlaced
301
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides)
302
{
303
    uint8_t *ptr, *last_line;
304
    int i;
305

  
306
    /* left and right */
307
    ptr = buf;
308
    for(i=0;i<height;i++) {
309
        memset(ptr - w, ptr[0], w);
310
        memset(ptr + width, ptr[width-1], w);
311
        ptr += wrap;
312
    }
313

  
314
    /* top and bottom + corners */
315
    buf -= w;
316
    last_line = buf + (height - 1) * wrap;
317
    if (sides & EDGE_TOP)
318
        for(i = 0; i < w; i++)
319
            memcpy(buf - (i + 1) * wrap, buf, width + w + w); // top
320
    if (sides & EDGE_BOTTOM)
321
        for (i = 0; i < w; i++)
322
            memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); // bottom
323
}
324

  
325
/**
326
 * Copy a rectangular area of samples to a temporary buffer and replicate the border samples.
327
 * @param buf destination buffer
328
 * @param src source buffer
329
 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
330
 * @param block_w width of block
331
 * @param block_h height of block
332
 * @param src_x x coordinate of the top left sample of the block in the source buffer
333
 * @param src_y y coordinate of the top left sample of the block in the source buffer
334
 * @param w width of the source buffer
335
 * @param h height of the source buffer
336
 */
337
void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
338
                                    int src_x, int src_y, int w, int h){
339
    int x, y;
340
    int start_y, start_x, end_y, end_x;
341

  
342
    if(src_y>= h){
343
        src+= (h-1-src_y)*linesize;
344
        src_y=h-1;
345
    }else if(src_y<=-block_h){
346
        src+= (1-block_h-src_y)*linesize;
347
        src_y=1-block_h;
348
    }
349
    if(src_x>= w){
350
        src+= (w-1-src_x);
351
        src_x=w-1;
352
    }else if(src_x<=-block_w){
353
        src+= (1-block_w-src_x);
354
        src_x=1-block_w;
355
    }
356

  
357
    start_y= FFMAX(0, -src_y);
358
    start_x= FFMAX(0, -src_x);
359
    end_y= FFMIN(block_h, h-src_y);
360
    end_x= FFMIN(block_w, w-src_x);
361
    assert(start_y < end_y && block_h);
362
    assert(start_x < end_x && block_w);
363

  
364
    w    = end_x - start_x;
365
    src += start_y*linesize + start_x;
366
    buf += start_x;
367

  
368
    //top
369
    for(y=0; y<start_y; y++){
370
        memcpy(buf, src, w);
371
        buf += linesize;
372
    }
373

  
374
    // copy existing part
375
    for(; y<end_y; y++){
376
        memcpy(buf, src, w);
377
        src += linesize;
378
        buf += linesize;
379
    }
380

  
381
    //bottom
382
    src -= linesize;
383
    for(; y<block_h; y++){
384
        memcpy(buf, src, w);
385
        buf += linesize;
386
    }
387

  
388
    buf -= block_h * linesize + start_x;
389
    while (block_h--){
390
       //left
391
        for(x=0; x<start_x; x++){
392
            buf[x] = buf[start_x];
393
        }
394

  
395
       //right
396
        for(x=end_x; x<block_w; x++){
397
            buf[x] = buf[end_x - 1];
398
        }
399
        buf += linesize;
400
    }
401
}
402

  
403 301
static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size)
404 302
{
405 303
    int i;
......
591 489
    }
592 490
}
593 491

  
594
static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
595
{
596
    int i;
597
    for(i=0;i<8;i++) {
598
        pixels[0] += block[0];
599
        pixels[1] += block[1];
600
        pixels[2] += block[2];
601
        pixels[3] += block[3];
602
        pixels[4] += block[4];
603
        pixels[5] += block[5];
604
        pixels[6] += block[6];
605
        pixels[7] += block[7];
606
        pixels += line_size;
607
        block += 8;
608
    }
609
}
610

  
611
static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
612
{
613
    int i;
614
    for(i=0;i<4;i++) {
615
        pixels[0] += block[0];
616
        pixels[1] += block[1];
617
        pixels[2] += block[2];
618
        pixels[3] += block[3];
619
        pixels += line_size;
620
        block += 4;
621
    }
622
}
623

  
624 492
static int sum_abs_dctelem_c(DCTELEM *block)
625 493
{
626 494
    int sum=0, i;
......
665 533
    }
666 534
}
667 535

  
668
#if 0
669

  
670
#define PIXOP2(OPNAME, OP) \
671
static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
672
{\
673
    int i;\
674
    for(i=0; i<h; i++){\
675
        OP(*((uint64_t*)block), AV_RN64(pixels));\
676
        pixels+=line_size;\
677
        block +=line_size;\
678
    }\
679
}\
680
\
681
static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
682
{\
683
    int i;\
684
    for(i=0; i<h; i++){\
685
        const uint64_t a= AV_RN64(pixels  );\
686
        const uint64_t b= AV_RN64(pixels+1);\
687
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
688
        pixels+=line_size;\
689
        block +=line_size;\
690
    }\
691
}\
692
\
693
static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
694
{\
695
    int i;\
696
    for(i=0; i<h; i++){\
697
        const uint64_t a= AV_RN64(pixels  );\
698
        const uint64_t b= AV_RN64(pixels+1);\
699
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
700
        pixels+=line_size;\
701
        block +=line_size;\
702
    }\
703
}\
704
\
705
static void OPNAME ## _no_rnd_pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
706
{\
707
    int i;\
708
    for(i=0; i<h; i++){\
709
        const uint64_t a= AV_RN64(pixels          );\
710
        const uint64_t b= AV_RN64(pixels+line_size);\
711
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
712
        pixels+=line_size;\
713
        block +=line_size;\
714
    }\
715
}\
716
\
717
static void OPNAME ## _pixels_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
718
{\
719
    int i;\
720
    for(i=0; i<h; i++){\
721
        const uint64_t a= AV_RN64(pixels          );\
722
        const uint64_t b= AV_RN64(pixels+line_size);\
723
        OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
724
        pixels+=line_size;\
725
        block +=line_size;\
726
    }\
727
}\
728
\
729
static void OPNAME ## _pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
730
{\
731
        int i;\
732
        const uint64_t a= AV_RN64(pixels  );\
733
        const uint64_t b= AV_RN64(pixels+1);\
734
        uint64_t l0=  (a&0x0303030303030303ULL)\
735
                    + (b&0x0303030303030303ULL)\
736
                    + 0x0202020202020202ULL;\
737
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
738
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
739
        uint64_t l1,h1;\
740
\
741
        pixels+=line_size;\
742
        for(i=0; i<h; i+=2){\
743
            uint64_t a= AV_RN64(pixels  );\
744
            uint64_t b= AV_RN64(pixels+1);\
745
            l1=  (a&0x0303030303030303ULL)\
746
               + (b&0x0303030303030303ULL);\
747
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
748
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
749
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
750
            pixels+=line_size;\
751
            block +=line_size;\
752
            a= AV_RN64(pixels  );\
753
            b= AV_RN64(pixels+1);\
754
            l0=  (a&0x0303030303030303ULL)\
755
               + (b&0x0303030303030303ULL)\
756
               + 0x0202020202020202ULL;\
757
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
758
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
759
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
760
            pixels+=line_size;\
761
            block +=line_size;\
762
        }\
763
}\
764
\
765
static void OPNAME ## _no_rnd_pixels_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
766
{\
767
        int i;\
768
        const uint64_t a= AV_RN64(pixels  );\
769
        const uint64_t b= AV_RN64(pixels+1);\
770
        uint64_t l0=  (a&0x0303030303030303ULL)\
771
                    + (b&0x0303030303030303ULL)\
772
                    + 0x0101010101010101ULL;\
773
        uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
774
                   + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
775
        uint64_t l1,h1;\
776
\
777
        pixels+=line_size;\
778
        for(i=0; i<h; i+=2){\
779
            uint64_t a= AV_RN64(pixels  );\
780
            uint64_t b= AV_RN64(pixels+1);\
781
            l1=  (a&0x0303030303030303ULL)\
782
               + (b&0x0303030303030303ULL);\
783
            h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
784
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
785
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
786
            pixels+=line_size;\
787
            block +=line_size;\
788
            a= AV_RN64(pixels  );\
789
            b= AV_RN64(pixels+1);\
790
            l0=  (a&0x0303030303030303ULL)\
791
               + (b&0x0303030303030303ULL)\
792
               + 0x0101010101010101ULL;\
793
            h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
794
              + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
795
            OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
796
            pixels+=line_size;\
797
            block +=line_size;\
798
        }\
799
}\
800
\
801
CALL_2X_PIXELS(OPNAME ## _pixels16_c    , OPNAME ## _pixels_c    , 8)\
802
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels_x2_c , 8)\
803
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels_y2_c , 8)\
804
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels_xy2_c, 8)\
805
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels_x2_c , 8)\
806
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels_y2_c , 8)\
807
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels_xy2_c, 8)
808

  
809
#define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
810
#else // 64 bit variant
811

  
812
#define PIXOP2(OPNAME, OP) \
813
static void OPNAME ## _pixels2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
814
    int i;\
815
    for(i=0; i<h; i++){\
816
        OP(*((uint16_t*)(block  )), AV_RN16(pixels  ));\
817
        pixels+=line_size;\
818
        block +=line_size;\
819
    }\
820
}\
821
static void OPNAME ## _pixels4_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
822
    int i;\
823
    for(i=0; i<h; i++){\
824
        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
825
        pixels+=line_size;\
826
        block +=line_size;\
827
    }\
828
}\
829
static void OPNAME ## _pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
830
    int i;\
831
    for(i=0; i<h; i++){\
832
        OP(*((uint32_t*)(block  )), AV_RN32(pixels  ));\
833
        OP(*((uint32_t*)(block+4)), AV_RN32(pixels+4));\
834
        pixels+=line_size;\
835
        block +=line_size;\
836
    }\
837
}\
838
static inline void OPNAME ## _no_rnd_pixels8_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
839
    OPNAME ## _pixels8_c(block, pixels, line_size, h);\
840
}\
841
\
842
static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
843
                                                int src_stride1, int src_stride2, int h){\
844
    int i;\
845
    for(i=0; i<h; i++){\
846
        uint32_t a,b;\
847
        a= AV_RN32(&src1[i*src_stride1  ]);\
848
        b= AV_RN32(&src2[i*src_stride2  ]);\
849
        OP(*((uint32_t*)&dst[i*dst_stride  ]), no_rnd_avg32(a, b));\
850
        a= AV_RN32(&src1[i*src_stride1+4]);\
851
        b= AV_RN32(&src2[i*src_stride2+4]);\
852
        OP(*((uint32_t*)&dst[i*dst_stride+4]), no_rnd_avg32(a, b));\
853
    }\
854
}\
855
\
856
static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
857
                                                int src_stride1, int src_stride2, int h){\
858
    int i;\
859
    for(i=0; i<h; i++){\
860
        uint32_t a,b;\
861
        a= AV_RN32(&src1[i*src_stride1  ]);\
862
        b= AV_RN32(&src2[i*src_stride2  ]);\
863
        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
864
        a= AV_RN32(&src1[i*src_stride1+4]);\
865
        b= AV_RN32(&src2[i*src_stride2+4]);\
866
        OP(*((uint32_t*)&dst[i*dst_stride+4]), rnd_avg32(a, b));\
867
    }\
868
}\
869
\
870
static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
871
                                                int src_stride1, int src_stride2, int h){\
872
    int i;\
873
    for(i=0; i<h; i++){\
874
        uint32_t a,b;\
875
        a= AV_RN32(&src1[i*src_stride1  ]);\
876
        b= AV_RN32(&src2[i*src_stride2  ]);\
877
        OP(*((uint32_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
878
    }\
879
}\
880
\
881
static inline void OPNAME ## _pixels2_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
882
                                                int src_stride1, int src_stride2, int h){\
883
    int i;\
884
    for(i=0; i<h; i++){\
885
        uint32_t a,b;\
886
        a= AV_RN16(&src1[i*src_stride1  ]);\
887
        b= AV_RN16(&src2[i*src_stride2  ]);\
888
        OP(*((uint16_t*)&dst[i*dst_stride  ]), rnd_avg32(a, b));\
889
    }\
890
}\
891
\
892
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
893
                                                int src_stride1, int src_stride2, int h){\
894
    OPNAME ## _pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
895
    OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
896
}\
897
\
898
static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
899
                                                int src_stride1, int src_stride2, int h){\
900
    OPNAME ## _no_rnd_pixels8_l2(dst  , src1  , src2  , dst_stride, src_stride1, src_stride2, h);\
901
    OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
902
}\
903
\
904
static inline void OPNAME ## _no_rnd_pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
905
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
906
}\
907
\
908
static inline void OPNAME ## _pixels8_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
909
    OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
910
}\
911
\
912
static inline void OPNAME ## _no_rnd_pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
913
    OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
914
}\
915
\
916
static inline void OPNAME ## _pixels8_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
917
    OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
918
}\
919
\
920
static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
921
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
922
    int i;\
923
    for(i=0; i<h; i++){\
924
        uint32_t a, b, c, d, l0, l1, h0, h1;\
925
        a= AV_RN32(&src1[i*src_stride1]);\
926
        b= AV_RN32(&src2[i*src_stride2]);\
927
        c= AV_RN32(&src3[i*src_stride3]);\
928
        d= AV_RN32(&src4[i*src_stride4]);\
929
        l0=  (a&0x03030303UL)\
930
           + (b&0x03030303UL)\
931
           + 0x02020202UL;\
932
        h0= ((a&0xFCFCFCFCUL)>>2)\
933
          + ((b&0xFCFCFCFCUL)>>2);\
934
        l1=  (c&0x03030303UL)\
935
           + (d&0x03030303UL);\
936
        h1= ((c&0xFCFCFCFCUL)>>2)\
937
          + ((d&0xFCFCFCFCUL)>>2);\
938
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
939
        a= AV_RN32(&src1[i*src_stride1+4]);\
940
        b= AV_RN32(&src2[i*src_stride2+4]);\
941
        c= AV_RN32(&src3[i*src_stride3+4]);\
942
        d= AV_RN32(&src4[i*src_stride4+4]);\
943
        l0=  (a&0x03030303UL)\
944
           + (b&0x03030303UL)\
945
           + 0x02020202UL;\
946
        h0= ((a&0xFCFCFCFCUL)>>2)\
947
          + ((b&0xFCFCFCFCUL)>>2);\
948
        l1=  (c&0x03030303UL)\
949
           + (d&0x03030303UL);\
950
        h1= ((c&0xFCFCFCFCUL)>>2)\
951
          + ((d&0xFCFCFCFCUL)>>2);\
952
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
953
    }\
954
}\
955
\
956
static inline void OPNAME ## _pixels4_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
957
    OPNAME ## _pixels4_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
958
}\
959
\
960
static inline void OPNAME ## _pixels4_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
961
    OPNAME ## _pixels4_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
962
}\
963
\
964
static inline void OPNAME ## _pixels2_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
965
    OPNAME ## _pixels2_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
966
}\
967
\
968
static inline void OPNAME ## _pixels2_y2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
969
    OPNAME ## _pixels2_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
970
}\
971
\
972
static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
973
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
974
    int i;\
975
    for(i=0; i<h; i++){\
976
        uint32_t a, b, c, d, l0, l1, h0, h1;\
977
        a= AV_RN32(&src1[i*src_stride1]);\
978
        b= AV_RN32(&src2[i*src_stride2]);\
979
        c= AV_RN32(&src3[i*src_stride3]);\
980
        d= AV_RN32(&src4[i*src_stride4]);\
981
        l0=  (a&0x03030303UL)\
982
           + (b&0x03030303UL)\
983
           + 0x01010101UL;\
984
        h0= ((a&0xFCFCFCFCUL)>>2)\
985
          + ((b&0xFCFCFCFCUL)>>2);\
986
        l1=  (c&0x03030303UL)\
987
           + (d&0x03030303UL);\
988
        h1= ((c&0xFCFCFCFCUL)>>2)\
989
          + ((d&0xFCFCFCFCUL)>>2);\
990
        OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
991
        a= AV_RN32(&src1[i*src_stride1+4]);\
992
        b= AV_RN32(&src2[i*src_stride2+4]);\
993
        c= AV_RN32(&src3[i*src_stride3+4]);\
994
        d= AV_RN32(&src4[i*src_stride4+4]);\
995
        l0=  (a&0x03030303UL)\
996
           + (b&0x03030303UL)\
997
           + 0x01010101UL;\
998
        h0= ((a&0xFCFCFCFCUL)>>2)\
999
          + ((b&0xFCFCFCFCUL)>>2);\
1000
        l1=  (c&0x03030303UL)\
1001
           + (d&0x03030303UL);\
1002
        h1= ((c&0xFCFCFCFCUL)>>2)\
1003
          + ((d&0xFCFCFCFCUL)>>2);\
1004
        OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1005
    }\
1006
}\
1007
static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
1008
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1009
    OPNAME ## _pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1010
    OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1011
}\
1012
static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, const uint8_t *src3, const uint8_t *src4,\
1013
                 int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
1014
    OPNAME ## _no_rnd_pixels8_l4(dst  , src1  , src2  , src3  , src4  , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1015
    OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
1016
}\
1017
\
1018
static inline void OPNAME ## _pixels2_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1019
{\
1020
        int i, a0, b0, a1, b1;\
1021
        a0= pixels[0];\
1022
        b0= pixels[1] + 2;\
1023
        a0 += b0;\
1024
        b0 += pixels[2];\
1025
\
1026
        pixels+=line_size;\
1027
        for(i=0; i<h; i+=2){\
1028
            a1= pixels[0];\
1029
            b1= pixels[1];\
1030
            a1 += b1;\
1031
            b1 += pixels[2];\
1032
\
1033
            block[0]= (a1+a0)>>2; /* FIXME non put */\
1034
            block[1]= (b1+b0)>>2;\
1035
\
1036
            pixels+=line_size;\
1037
            block +=line_size;\
1038
\
1039
            a0= pixels[0];\
1040
            b0= pixels[1] + 2;\
1041
            a0 += b0;\
1042
            b0 += pixels[2];\
1043
\
1044
            block[0]= (a1+a0)>>2;\
1045
            block[1]= (b1+b0)>>2;\
1046
            pixels+=line_size;\
1047
            block +=line_size;\
1048
        }\
1049
}\
1050
\
1051
static inline void OPNAME ## _pixels4_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1052
{\
1053
        int i;\
1054
        const uint32_t a= AV_RN32(pixels  );\
1055
        const uint32_t b= AV_RN32(pixels+1);\
1056
        uint32_t l0=  (a&0x03030303UL)\
1057
                    + (b&0x03030303UL)\
1058
                    + 0x02020202UL;\
1059
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1060
                   + ((b&0xFCFCFCFCUL)>>2);\
1061
        uint32_t l1,h1;\
1062
\
1063
        pixels+=line_size;\
1064
        for(i=0; i<h; i+=2){\
1065
            uint32_t a= AV_RN32(pixels  );\
1066
            uint32_t b= AV_RN32(pixels+1);\
1067
            l1=  (a&0x03030303UL)\
1068
               + (b&0x03030303UL);\
1069
            h1= ((a&0xFCFCFCFCUL)>>2)\
1070
              + ((b&0xFCFCFCFCUL)>>2);\
1071
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1072
            pixels+=line_size;\
1073
            block +=line_size;\
1074
            a= AV_RN32(pixels  );\
1075
            b= AV_RN32(pixels+1);\
1076
            l0=  (a&0x03030303UL)\
1077
               + (b&0x03030303UL)\
1078
               + 0x02020202UL;\
1079
            h0= ((a&0xFCFCFCFCUL)>>2)\
1080
              + ((b&0xFCFCFCFCUL)>>2);\
1081
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1082
            pixels+=line_size;\
1083
            block +=line_size;\
1084
        }\
1085
}\
1086
\
1087
static inline void OPNAME ## _pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1088
{\
1089
    int j;\
1090
    for(j=0; j<2; j++){\
1091
        int i;\
1092
        const uint32_t a= AV_RN32(pixels  );\
1093
        const uint32_t b= AV_RN32(pixels+1);\
1094
        uint32_t l0=  (a&0x03030303UL)\
1095
                    + (b&0x03030303UL)\
1096
                    + 0x02020202UL;\
1097
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1098
                   + ((b&0xFCFCFCFCUL)>>2);\
1099
        uint32_t l1,h1;\
1100
\
1101
        pixels+=line_size;\
1102
        for(i=0; i<h; i+=2){\
1103
            uint32_t a= AV_RN32(pixels  );\
1104
            uint32_t b= AV_RN32(pixels+1);\
1105
            l1=  (a&0x03030303UL)\
1106
               + (b&0x03030303UL);\
1107
            h1= ((a&0xFCFCFCFCUL)>>2)\
1108
              + ((b&0xFCFCFCFCUL)>>2);\
1109
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1110
            pixels+=line_size;\
1111
            block +=line_size;\
1112
            a= AV_RN32(pixels  );\
1113
            b= AV_RN32(pixels+1);\
1114
            l0=  (a&0x03030303UL)\
1115
               + (b&0x03030303UL)\
1116
               + 0x02020202UL;\
1117
            h0= ((a&0xFCFCFCFCUL)>>2)\
1118
              + ((b&0xFCFCFCFCUL)>>2);\
1119
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1120
            pixels+=line_size;\
1121
            block +=line_size;\
1122
        }\
1123
        pixels+=4-line_size*(h+1);\
1124
        block +=4-line_size*h;\
1125
    }\
1126
}\
1127
\
1128
static inline void OPNAME ## _no_rnd_pixels8_xy2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
1129
{\
1130
    int j;\
1131
    for(j=0; j<2; j++){\
1132
        int i;\
1133
        const uint32_t a= AV_RN32(pixels  );\
1134
        const uint32_t b= AV_RN32(pixels+1);\
1135
        uint32_t l0=  (a&0x03030303UL)\
1136
                    + (b&0x03030303UL)\
1137
                    + 0x01010101UL;\
1138
        uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
1139
                   + ((b&0xFCFCFCFCUL)>>2);\
1140
        uint32_t l1,h1;\
1141
\
1142
        pixels+=line_size;\
1143
        for(i=0; i<h; i+=2){\
1144
            uint32_t a= AV_RN32(pixels  );\
1145
            uint32_t b= AV_RN32(pixels+1);\
1146
            l1=  (a&0x03030303UL)\
1147
               + (b&0x03030303UL);\
1148
            h1= ((a&0xFCFCFCFCUL)>>2)\
1149
              + ((b&0xFCFCFCFCUL)>>2);\
1150
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1151
            pixels+=line_size;\
1152
            block +=line_size;\
1153
            a= AV_RN32(pixels  );\
1154
            b= AV_RN32(pixels+1);\
1155
            l0=  (a&0x03030303UL)\
1156
               + (b&0x03030303UL)\
1157
               + 0x01010101UL;\
1158
            h0= ((a&0xFCFCFCFCUL)>>2)\
1159
              + ((b&0xFCFCFCFCUL)>>2);\
1160
            OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
1161
            pixels+=line_size;\
1162
            block +=line_size;\
1163
        }\
1164
        pixels+=4-line_size*(h+1);\
1165
        block +=4-line_size*h;\
1166
    }\
1167
}\
1168
\
1169
CALL_2X_PIXELS(OPNAME ## _pixels16_c  , OPNAME ## _pixels8_c  , 8)\
1170
CALL_2X_PIXELS(OPNAME ## _pixels16_x2_c , OPNAME ## _pixels8_x2_c , 8)\
1171
CALL_2X_PIXELS(OPNAME ## _pixels16_y2_c , OPNAME ## _pixels8_y2_c , 8)\
1172
CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_c, OPNAME ## _pixels8_xy2_c, 8)\
1173
av_unused CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_c  , OPNAME ## _pixels8_c         , 8)\
1174
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2_c , OPNAME ## _no_rnd_pixels8_x2_c , 8)\
1175
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2_c , OPNAME ## _no_rnd_pixels8_y2_c , 8)\
1176
CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2_c, OPNAME ## _no_rnd_pixels8_xy2_c, 8)\
1177

  
1178
#define op_avg(a, b) a = rnd_avg32(a, b)
1179
#endif
1180
#define op_put(a, b) a = b
1181

  
1182
PIXOP2(avg, op_avg)
1183
PIXOP2(put, op_put)
1184
#undef op_avg
1185
#undef op_put
1186

  
1187
#define put_no_rnd_pixels8_c  put_pixels8_c
1188
#define put_no_rnd_pixels16_c put_pixels16_c
1189

  
1190 536
#define avg2(a,b) ((a+b+1)>>1)
1191 537
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
1192 538

  
1193
static void put_no_rnd_pixels16_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1194
    put_no_rnd_pixels16_l2(dst, a, b, stride, stride, stride, h);
1195
}
1196

  
1197
static void put_no_rnd_pixels8_l2_c(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h){
1198
    put_no_rnd_pixels8_l2(dst, a, b, stride, stride, stride, h);
1199
}
1200

  
1201 539
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
1202 540
{
1203 541
    const int A=(16-x16)*(16-y16);
......
1494 832
    void put_tpel_pixels_mc22_c(dst, src, stride, width, height);}
1495 833
#endif
1496 834

  
1497
#define H264_CHROMA_MC(OPNAME, OP)\
1498
static void OPNAME ## h264_chroma_mc2_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1499
    const int A=(8-x)*(8-y);\
1500
    const int B=(  x)*(8-y);\
1501
    const int C=(8-x)*(  y);\
1502
    const int D=(  x)*(  y);\
1503
    int i;\
1504
    \
1505
    assert(x<8 && y<8 && x>=0 && y>=0);\
1506
\
1507
    if(D){\
1508
        for(i=0; i<h; i++){\
1509
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1510
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1511
            dst+= stride;\
1512
            src+= stride;\
1513
        }\
1514
    }else{\
1515
        const int E= B+C;\
1516
        const int step= C ? stride : 1;\
1517
        for(i=0; i<h; i++){\
1518
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1519
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1520
            dst+= stride;\
1521
            src+= stride;\
1522
        }\
1523
    }\
1524
}\
1525
\
1526
static void OPNAME ## h264_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1527
    const int A=(8-x)*(8-y);\
1528
    const int B=(  x)*(8-y);\
1529
    const int C=(8-x)*(  y);\
1530
    const int D=(  x)*(  y);\
1531
    int i;\
1532
    \
1533
    assert(x<8 && y<8 && x>=0 && y>=0);\
1534
\
1535
    if(D){\
1536
        for(i=0; i<h; i++){\
1537
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1538
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1539
            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1540
            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1541
            dst+= stride;\
1542
            src+= stride;\
1543
        }\
1544
    }else{\
1545
        const int E= B+C;\
1546
        const int step= C ? stride : 1;\
1547
        for(i=0; i<h; i++){\
1548
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1549
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1550
            OP(dst[2], (A*src[2] + E*src[step+2]));\
1551
            OP(dst[3], (A*src[3] + E*src[step+3]));\
1552
            dst+= stride;\
1553
            src+= stride;\
1554
        }\
1555
    }\
1556
}\
1557
\
1558
static void OPNAME ## h264_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
1559
    const int A=(8-x)*(8-y);\
1560
    const int B=(  x)*(8-y);\
1561
    const int C=(8-x)*(  y);\
1562
    const int D=(  x)*(  y);\
1563
    int i;\
1564
    \
1565
    assert(x<8 && y<8 && x>=0 && y>=0);\
1566
\
1567
    if(D){\
1568
        for(i=0; i<h; i++){\
1569
            OP(dst[0], (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1]));\
1570
            OP(dst[1], (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2]));\
1571
            OP(dst[2], (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3]));\
1572
            OP(dst[3], (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4]));\
1573
            OP(dst[4], (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5]));\
1574
            OP(dst[5], (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6]));\
1575
            OP(dst[6], (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7]));\
1576
            OP(dst[7], (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8]));\
1577
            dst+= stride;\
1578
            src+= stride;\
1579
        }\
1580
    }else{\
1581
        const int E= B+C;\
1582
        const int step= C ? stride : 1;\
1583
        for(i=0; i<h; i++){\
1584
            OP(dst[0], (A*src[0] + E*src[step+0]));\
1585
            OP(dst[1], (A*src[1] + E*src[step+1]));\
1586
            OP(dst[2], (A*src[2] + E*src[step+2]));\
1587
            OP(dst[3], (A*src[3] + E*src[step+3]));\
1588
            OP(dst[4], (A*src[4] + E*src[step+4]));\
1589
            OP(dst[5], (A*src[5] + E*src[step+5]));\
1590
            OP(dst[6], (A*src[6] + E*src[step+6]));\
1591
            OP(dst[7], (A*src[7] + E*src[step+7]));\
1592
            dst+= stride;\
1593
            src+= stride;\
1594
        }\
1595
    }\
1596
}
1597

  
1598
#define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
1599
#define op_put(a, b) a = (((b) + 32)>>6)
1600

  
1601
H264_CHROMA_MC(put_       , op_put)
1602
H264_CHROMA_MC(avg_       , op_avg)
1603
#undef op_avg
1604
#undef op_put
1605

  
1606 835
#define QPEL_MC(r, OPNAME, RND, OP) \
1607 836
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
1608 837
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
......
2100 1329
#define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
2101 1330
#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
2102 1331

  
2103
#if 1
2104
#define H264_LOWPASS(OPNAME, OP, OP2) \
2105
static av_unused void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2106
    const int h=2;\
2107
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2108
    int i;\
2109
    for(i=0; i<h; i++)\
2110
    {\
2111
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2112
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2113
        dst+=dstStride;\
2114
        src+=srcStride;\
2115
    }\
2116
}\
2117
\
2118
static av_unused void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2119
    const int w=2;\
2120
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2121
    int i;\
2122
    for(i=0; i<w; i++)\
2123
    {\
2124
        const int srcB= src[-2*srcStride];\
2125
        const int srcA= src[-1*srcStride];\
2126
        const int src0= src[0 *srcStride];\
2127
        const int src1= src[1 *srcStride];\
2128
        const int src2= src[2 *srcStride];\
2129
        const int src3= src[3 *srcStride];\
2130
        const int src4= src[4 *srcStride];\
2131
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2132
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2133
        dst++;\
2134
        src++;\
2135
    }\
2136
}\
2137
\
2138
static av_unused void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2139
    const int h=2;\
2140
    const int w=2;\
2141
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2142
    int i;\
2143
    src -= 2*srcStride;\
2144
    for(i=0; i<h+5; i++)\
2145
    {\
2146
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2147
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2148
        tmp+=tmpStride;\
2149
        src+=srcStride;\
2150
    }\
2151
    tmp -= tmpStride*(h+5-2);\
2152
    for(i=0; i<w; i++)\
2153
    {\
2154
        const int tmpB= tmp[-2*tmpStride];\
2155
        const int tmpA= tmp[-1*tmpStride];\
2156
        const int tmp0= tmp[0 *tmpStride];\
2157
        const int tmp1= tmp[1 *tmpStride];\
2158
        const int tmp2= tmp[2 *tmpStride];\
2159
        const int tmp3= tmp[3 *tmpStride];\
2160
        const int tmp4= tmp[4 *tmpStride];\
2161
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2162
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2163
        dst++;\
2164
        tmp++;\
2165
    }\
2166
}\
2167
static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2168
    const int h=4;\
2169
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2170
    int i;\
2171
    for(i=0; i<h; i++)\
2172
    {\
2173
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
2174
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
2175
        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]));\
2176
        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]));\
2177
        dst+=dstStride;\
2178
        src+=srcStride;\
2179
    }\
2180
}\
2181
\
2182
static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2183
    const int w=4;\
2184
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2185
    int i;\
2186
    for(i=0; i<w; i++)\
2187
    {\
2188
        const int srcB= src[-2*srcStride];\
2189
        const int srcA= src[-1*srcStride];\
2190
        const int src0= src[0 *srcStride];\
2191
        const int src1= src[1 *srcStride];\
2192
        const int src2= src[2 *srcStride];\
2193
        const int src3= src[3 *srcStride];\
2194
        const int src4= src[4 *srcStride];\
2195
        const int src5= src[5 *srcStride];\
2196
        const int src6= src[6 *srcStride];\
2197
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2198
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2199
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2200
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2201
        dst++;\
2202
        src++;\
2203
    }\
2204
}\
2205
\
2206
static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2207
    const int h=4;\
2208
    const int w=4;\
2209
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2210
    int i;\
2211
    src -= 2*srcStride;\
2212
    for(i=0; i<h+5; i++)\
2213
    {\
2214
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
2215
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
2216
        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5]);\
2217
        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6]);\
2218
        tmp+=tmpStride;\
2219
        src+=srcStride;\
2220
    }\
2221
    tmp -= tmpStride*(h+5-2);\
2222
    for(i=0; i<w; i++)\
2223
    {\
2224
        const int tmpB= tmp[-2*tmpStride];\
2225
        const int tmpA= tmp[-1*tmpStride];\
2226
        const int tmp0= tmp[0 *tmpStride];\
2227
        const int tmp1= tmp[1 *tmpStride];\
2228
        const int tmp2= tmp[2 *tmpStride];\
2229
        const int tmp3= tmp[3 *tmpStride];\
2230
        const int tmp4= tmp[4 *tmpStride];\
2231
        const int tmp5= tmp[5 *tmpStride];\
2232
        const int tmp6= tmp[6 *tmpStride];\
2233
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2234
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2235
        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2236
        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2237
        dst++;\
2238
        tmp++;\
2239
    }\
2240
}\
2241
\
2242
static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2243
    const int h=8;\
2244
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2245
    int i;\
2246
    for(i=0; i<h; i++)\
2247
    {\
2248
        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]));\
2249
        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]));\
2250
        OP(dst[2], (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]));\
2251
        OP(dst[3], (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]));\
2252
        OP(dst[4], (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]));\
2253
        OP(dst[5], (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]));\
2254
        OP(dst[6], (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]));\
2255
        OP(dst[7], (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]));\
2256
        dst+=dstStride;\
2257
        src+=srcStride;\
2258
    }\
2259
}\
2260
\
2261
static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2262
    const int w=8;\
2263
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2264
    int i;\
2265
    for(i=0; i<w; i++)\
2266
    {\
2267
        const int srcB= src[-2*srcStride];\
2268
        const int srcA= src[-1*srcStride];\
2269
        const int src0= src[0 *srcStride];\
2270
        const int src1= src[1 *srcStride];\
2271
        const int src2= src[2 *srcStride];\
2272
        const int src3= src[3 *srcStride];\
2273
        const int src4= src[4 *srcStride];\
2274
        const int src5= src[5 *srcStride];\
2275
        const int src6= src[6 *srcStride];\
2276
        const int src7= src[7 *srcStride];\
2277
        const int src8= src[8 *srcStride];\
2278
        const int src9= src[9 *srcStride];\
2279
        const int src10=src[10*srcStride];\
2280
        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
2281
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
2282
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
2283
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
2284
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
2285
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
2286
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
2287
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
2288
        dst++;\
2289
        src++;\
2290
    }\
2291
}\
2292
\
2293
static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2294
    const int h=8;\
2295
    const int w=8;\
2296
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
2297
    int i;\
2298
    src -= 2*srcStride;\
2299
    for(i=0; i<h+5; i++)\
2300
    {\
2301
        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3 ]);\
2302
        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4 ]);\
2303
        tmp[2]= (src[2]+src[3])*20 - (src[1 ]+src[4])*5 + (src[0 ]+src[5 ]);\
2304
        tmp[3]= (src[3]+src[4])*20 - (src[2 ]+src[5])*5 + (src[1 ]+src[6 ]);\
2305
        tmp[4]= (src[4]+src[5])*20 - (src[3 ]+src[6])*5 + (src[2 ]+src[7 ]);\
2306
        tmp[5]= (src[5]+src[6])*20 - (src[4 ]+src[7])*5 + (src[3 ]+src[8 ]);\
2307
        tmp[6]= (src[6]+src[7])*20 - (src[5 ]+src[8])*5 + (src[4 ]+src[9 ]);\
2308
        tmp[7]= (src[7]+src[8])*20 - (src[6 ]+src[9])*5 + (src[5 ]+src[10]);\
2309
        tmp+=tmpStride;\
2310
        src+=srcStride;\
2311
    }\
2312
    tmp -= tmpStride*(h+5-2);\
2313
    for(i=0; i<w; i++)\
2314
    {\
2315
        const int tmpB= tmp[-2*tmpStride];\
2316
        const int tmpA= tmp[-1*tmpStride];\
2317
        const int tmp0= tmp[0 *tmpStride];\
2318
        const int tmp1= tmp[1 *tmpStride];\
2319
        const int tmp2= tmp[2 *tmpStride];\
2320
        const int tmp3= tmp[3 *tmpStride];\
2321
        const int tmp4= tmp[4 *tmpStride];\
2322
        const int tmp5= tmp[5 *tmpStride];\
2323
        const int tmp6= tmp[6 *tmpStride];\
2324
        const int tmp7= tmp[7 *tmpStride];\
2325
        const int tmp8= tmp[8 *tmpStride];\
2326
        const int tmp9= tmp[9 *tmpStride];\
2327
        const int tmp10=tmp[10*tmpStride];\
2328
        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
2329
        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
2330
        OP2(dst[2*dstStride], (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));\
2331
        OP2(dst[3*dstStride], (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));\
2332
        OP2(dst[4*dstStride], (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));\
2333
        OP2(dst[5*dstStride], (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));\
2334
        OP2(dst[6*dstStride], (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));\
2335
        OP2(dst[7*dstStride], (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));\
2336
        dst++;\
2337
        tmp++;\
2338
    }\
2339
}\
2340
\
2341
static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2342
    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
2343
    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2344
    src += 8*srcStride;\
2345
    dst += 8*dstStride;\
2346
    OPNAME ## h264_qpel8_v_lowpass(dst  , src  , dstStride, srcStride);\
2347
    OPNAME ## h264_qpel8_v_lowpass(dst+8, src+8, dstStride, srcStride);\
2348
}\
2349
\
2350
static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
2351
    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
2352
    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2353
    src += 8*srcStride;\
2354
    dst += 8*dstStride;\
2355
    OPNAME ## h264_qpel8_h_lowpass(dst  , src  , dstStride, srcStride);\
2356
    OPNAME ## h264_qpel8_h_lowpass(dst+8, src+8, dstStride, srcStride);\
2357
}\
2358
\
2359
static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
2360
    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
2361
    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2362
    src += 8*srcStride;\
2363
    dst += 8*dstStride;\
2364
    OPNAME ## h264_qpel8_hv_lowpass(dst  , tmp  , src  , dstStride, tmpStride, srcStride);\
2365
    OPNAME ## h264_qpel8_hv_lowpass(dst+8, tmp+8, src+8, dstStride, tmpStride, srcStride);\
2366
}\
2367

  
2368
#define H264_MC(OPNAME, SIZE) \
2369
static av_unused void OPNAME ## h264_qpel ## SIZE ## _mc00_c (uint8_t *dst, uint8_t *src, int stride){\
2370
    OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
2371
}\
2372
\
2373
static void OPNAME ## h264_qpel ## SIZE ## _mc10_c(uint8_t *dst, uint8_t *src, int stride){\
2374
    uint8_t half[SIZE*SIZE];\
2375
    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2376
    OPNAME ## pixels ## SIZE ## _l2(dst, src, half, stride, stride, SIZE, SIZE);\
2377
}\
2378
\
2379
static void OPNAME ## h264_qpel ## SIZE ## _mc20_c(uint8_t *dst, uint8_t *src, int stride){\
2380
    OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
2381
}\
2382
\
2383
static void OPNAME ## h264_qpel ## SIZE ## _mc30_c(uint8_t *dst, uint8_t *src, int stride){\
2384
    uint8_t half[SIZE*SIZE];\
2385
    put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
2386
    OPNAME ## pixels ## SIZE ## _l2(dst, src+1, half, stride, stride, SIZE, SIZE);\
2387
}\
2388
\
2389
static void OPNAME ## h264_qpel ## SIZE ## _mc01_c(uint8_t *dst, uint8_t *src, int stride){\
2390
    uint8_t full[SIZE*(SIZE+5)];\
2391
    uint8_t * const full_mid= full + SIZE*2;\
2392
    uint8_t half[SIZE*SIZE];\
2393
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2394
    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2395
    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
2396
}\
2397
\
2398
static void OPNAME ## h264_qpel ## SIZE ## _mc02_c(uint8_t *dst, uint8_t *src, int stride){\
2399
    uint8_t full[SIZE*(SIZE+5)];\
2400
    uint8_t * const full_mid= full + SIZE*2;\
2401
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2402
    OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
2403
}\
2404
\
2405
static void OPNAME ## h264_qpel ## SIZE ## _mc03_c(uint8_t *dst, uint8_t *src, int stride){\
2406
    uint8_t full[SIZE*(SIZE+5)];\
2407
    uint8_t * const full_mid= full + SIZE*2;\
2408
    uint8_t half[SIZE*SIZE];\
2409
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2410
    put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
2411
    OPNAME ## pixels ## SIZE ## _l2(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
2412
}\
2413
\
2414
static void OPNAME ## h264_qpel ## SIZE ## _mc11_c(uint8_t *dst, uint8_t *src, int stride){\
2415
    uint8_t full[SIZE*(SIZE+5)];\
2416
    uint8_t * const full_mid= full + SIZE*2;\
2417
    uint8_t halfH[SIZE*SIZE];\
2418
    uint8_t halfV[SIZE*SIZE];\
2419
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2420
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2421
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2422
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2423
}\
2424
\
2425
static void OPNAME ## h264_qpel ## SIZE ## _mc31_c(uint8_t *dst, uint8_t *src, int stride){\
2426
    uint8_t full[SIZE*(SIZE+5)];\
2427
    uint8_t * const full_mid= full + SIZE*2;\
2428
    uint8_t halfH[SIZE*SIZE];\
2429
    uint8_t halfV[SIZE*SIZE];\
2430
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2431
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2432
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2433
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2434
}\
2435
\
2436
static void OPNAME ## h264_qpel ## SIZE ## _mc13_c(uint8_t *dst, uint8_t *src, int stride){\
2437
    uint8_t full[SIZE*(SIZE+5)];\
2438
    uint8_t * const full_mid= full + SIZE*2;\
2439
    uint8_t halfH[SIZE*SIZE];\
2440
    uint8_t halfV[SIZE*SIZE];\
2441
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2442
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2443
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2444
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2445
}\
2446
\
2447
static void OPNAME ## h264_qpel ## SIZE ## _mc33_c(uint8_t *dst, uint8_t *src, int stride){\
2448
    uint8_t full[SIZE*(SIZE+5)];\
2449
    uint8_t * const full_mid= full + SIZE*2;\
2450
    uint8_t halfH[SIZE*SIZE];\
2451
    uint8_t halfV[SIZE*SIZE];\
2452
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2453
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2454
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2455
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
2456
}\
2457
\
2458
static void OPNAME ## h264_qpel ## SIZE ## _mc22_c(uint8_t *dst, uint8_t *src, int stride){\
2459
    int16_t tmp[SIZE*(SIZE+5)];\
2460
    OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
2461
}\
2462
\
2463
static void OPNAME ## h264_qpel ## SIZE ## _mc21_c(uint8_t *dst, uint8_t *src, int stride){\
2464
    int16_t tmp[SIZE*(SIZE+5)];\
2465
    uint8_t halfH[SIZE*SIZE];\
2466
    uint8_t halfHV[SIZE*SIZE];\
2467
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
2468
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2469
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2470
}\
2471
\
2472
static void OPNAME ## h264_qpel ## SIZE ## _mc23_c(uint8_t *dst, uint8_t *src, int stride){\
2473
    int16_t tmp[SIZE*(SIZE+5)];\
2474
    uint8_t halfH[SIZE*SIZE];\
2475
    uint8_t halfHV[SIZE*SIZE];\
2476
    put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
2477
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2478
    OPNAME ## pixels ## SIZE ## _l2(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
2479
}\
2480
\
2481
static void OPNAME ## h264_qpel ## SIZE ## _mc12_c(uint8_t *dst, uint8_t *src, int stride){\
2482
    uint8_t full[SIZE*(SIZE+5)];\
2483
    uint8_t * const full_mid= full + SIZE*2;\
2484
    int16_t tmp[SIZE*(SIZE+5)];\
2485
    uint8_t halfV[SIZE*SIZE];\
2486
    uint8_t halfHV[SIZE*SIZE];\
2487
    copy_block ## SIZE (full, src - stride*2, SIZE,  stride, SIZE + 5);\
2488
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2489
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2490
    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2491
}\
2492
\
2493
static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, int stride){\
2494
    uint8_t full[SIZE*(SIZE+5)];\
2495
    uint8_t * const full_mid= full + SIZE*2;\
2496
    int16_t tmp[SIZE*(SIZE+5)];\
2497
    uint8_t halfV[SIZE*SIZE];\
2498
    uint8_t halfHV[SIZE*SIZE];\
2499
    copy_block ## SIZE (full, src - stride*2 + 1, SIZE,  stride, SIZE + 5);\
2500
    put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
2501
    put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
2502
    OPNAME ## pixels ## SIZE ## _l2(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
2503
}\
2504

  
2505
#define op_avg(a, b)  a = (((a)+cm[((b) + 16)>>5]+1)>>1)
2506
//#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
2507
#define op_put(a, b)  a = cm[((b) + 16)>>5]
2508
#define op2_avg(a, b)  a = (((a)+cm[((b) + 512)>>10]+1)>>1)
2509
#define op2_put(a, b)  a = cm[((b) + 512)>>10]
2510

  
2511
H264_LOWPASS(put_       , op_put, op2_put)
2512
H264_LOWPASS(avg_       , op_avg, op2_avg)
2513
H264_MC(put_, 2)
2514
H264_MC(put_, 4)
2515
H264_MC(put_, 8)
2516
H264_MC(put_, 16)
2517
H264_MC(avg_, 4)
2518
H264_MC(avg_, 8)
2519
H264_MC(avg_, 16)
2520

  
2521
#undef op_avg
2522
#undef op_put
2523
#undef op2_avg
2524
#undef op2_put
2525
#endif
2526

  
2527
#define put_h264_qpel8_mc00_c  ff_put_pixels8x8_c
2528
#define avg_h264_qpel8_mc00_c  ff_avg_pixels8x8_c
2529
#define put_h264_qpel16_mc00_c ff_put_pixels16x16_c
2530
#define avg_h264_qpel16_mc00_c ff_avg_pixels16x16_c
2531

  
2532 1332
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
2533 1333
    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
2534 1334
    int i;
......
2547 1347
    }
2548 1348
}
2549 1349

  
2550
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
2551
    put_pixels8_c(dst, src, stride, 8);
2552
}
2553
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, int stride) {
2554
    avg_pixels8_c(dst, src, stride, 8);
2555
}
2556
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
2557
    put_pixels16_c(dst, src, stride, 16);
2558
}
2559
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, int stride) {
2560
    avg_pixels16_c(dst, src, stride, 16);
2561
}
2562

  
2563 1350
#if CONFIG_RV40_DECODER
2564 1351
static void put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, int stride){
2565 1352
    put_pixels16_xy2_c(dst, src, stride, 16);
......
3117 1904
    }
3118 1905
}
3119 1906

  
3120
static void clear_block_c(DCTELEM *block)
3121
{
3122
    memset(block, 0, sizeof(DCTELEM)*64);
3123
}
3124

  
3125
/**
3126
 * memset(blocks, 0, sizeof(DCTELEM)*6*64)
3127
 */
3128
static void clear_blocks_c(DCTELEM *blocks)
3129
{
3130
    memset(blocks, 0, sizeof(DCTELEM)*6*64);
3131
}
3132

  
3133 1907
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
3134 1908
    long i;
3135 1909
    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
libavcodec/dsputil_internal.h
1
/*
2
 * DSP utils
3
 * Copyright (c) 2000, 2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
7
 *
8
 * This file is part of FFmpeg.
9
 *
10
 * FFmpeg is free software; you can redistribute it and/or
11
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
14
 *
15
 * FFmpeg is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with FFmpeg; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
 */
24

  
25
/**
26
 * @file
27
 * DSP utils
28
 */
29

  
30
#include "dsputil.h"
31

  
32
/* draw the edges of width 'w' of an image of size width, height */
33
//FIXME check that this is ok for mpeg4 interlaced
34
static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w, int sides)
35
{
36
    uint8_t *ptr, *last_line;
37
    int i;
38

  
39
    /* left and right */
40
    ptr = buf;
41
    for(i=0;i<height;i++) {
42
        memset(ptr - w, ptr[0], w);
43
        memset(ptr + width, ptr[width-1], w);
44
        ptr += wrap;
45
    }
46

  
47
    /* top and bottom + corners */
48
    buf -= w;
49
    last_line = buf + (height - 1) * wrap;
50
    if (sides & EDGE_TOP)
51
        for(i = 0; i < w; i++)
52
            memcpy(buf - (i + 1) * wrap, buf, width + w + w); // top
53
    if (sides & EDGE_BOTTOM)
54
        for (i = 0; i < w; i++)
55
            memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); // bottom
56
}
57

  
58
/**
59
 * Copy a rectangular area of samples to a temporary buffer and replicate the border samples.
60
 * @param buf destination buffer
61
 * @param src source buffer
62
 * @param linesize number of bytes between 2 vertically adjacent samples in both the source and destination buffers
63
 * @param block_w width of block
64
 * @param block_h height of block
65
 * @param src_x x coordinate of the top left sample of the block in the source buffer
66
 * @param src_y y coordinate of the top left sample of the block in the source buffer
67
 * @param w width of the source buffer
68
 * @param h height of the source buffer
69
 */
70
void ff_emulated_edge_mc(uint8_t *buf, const uint8_t *src, int linesize, int block_w, int block_h,
71
                                    int src_x, int src_y, int w, int h){
72
    int x, y;
73
    int start_y, start_x, end_y, end_x;
74

  
75
    if(src_y>= h){
76
        src+= (h-1-src_y)*linesize;
77
        src_y=h-1;
78
    }else if(src_y<=-block_h){
79
        src+= (1-block_h-src_y)*linesize;
80
        src_y=1-block_h;
81
    }
82
    if(src_x>= w){
83
        src+= (w-1-src_x);
84
        src_x=w-1;
85
    }else if(src_x<=-block_w){
86
        src+= (1-block_w-src_x);
87
        src_x=1-block_w;
88
    }
89

  
90
    start_y= FFMAX(0, -src_y);
91
    start_x= FFMAX(0, -src_x);
92
    end_y= FFMIN(block_h, h-src_y);
93
    end_x= FFMIN(block_w, w-src_x);
94
    assert(start_y < end_y && block_h);
95
    assert(start_x < end_x && block_w);
96

  
97
    w    = end_x - start_x;
98
    src += start_y*linesize + start_x;
99
    buf += start_x;
100

  
101
    //top
102
    for(y=0; y<start_y; y++){
103
        memcpy(buf, src, w);
104
        buf += linesize;
105
    }
106

  
107
    // copy existing part
108
    for(; y<end_y; y++){
109
        memcpy(buf, src, w);
110
        src += linesize;
111
        buf += linesize;
112
    }
113

  
114
    //bottom
115
    src -= linesize;
116
    for(; y<block_h; y++){
117
        memcpy(buf, src, w);
118
        buf += linesize;
119
    }
120

  
121
    buf -= block_h * linesize + start_x;
122
    while (block_h--){
123
       //left
124
        for(x=0; x<start_x; x++){
125
            buf[x] = buf[start_x];
126
        }
127

  
128
       //right
129
        for(x=end_x; x<block_w; x++){
130
            buf[x] = buf[end_x - 1];
131
        }
132
        buf += linesize;
133
    }
134
}
135

  
136
static void add_pixels8_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
137
{
138
    int i;
139
    for(i=0;i<8;i++) {
140
        pixels[0] += block[0];
141
        pixels[1] += block[1];
142
        pixels[2] += block[2];
143
        pixels[3] += block[3];
144
        pixels[4] += block[4];
145
        pixels[5] += block[5];
146
        pixels[6] += block[6];
147
        pixels[7] += block[7];
148
        pixels += line_size;
149
        block += 8;
150
    }
151
}
152

  
153
static void add_pixels4_c(uint8_t *restrict pixels, DCTELEM *block, int line_size)
154
{
155
    int i;
156
    for(i=0;i<4;i++) {
157
        pixels[0] += block[0];
158
        pixels[1] += block[1];
159
        pixels[2] += block[2];
160
        pixels[3] += block[3];
161
        pixels += line_size;
162
        block += 4;
163
    }
164
}
165

  
166
#if 0
167

  
168
#define PIXOP2(OPNAME, OP) \
169
static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
170
{\
171
    int i;\
172
    for(i=0; i<h; i++){\
173
        OP(*((uint64_t*)block), AV_RN64(pixels));\
174
        pixels+=line_size;\
175
        block +=line_size;\
176
    }\
177
}\
178
\
179
static void OPNAME ## _no_rnd_pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
180
{\
181
    int i;\
182
    for(i=0; i<h; i++){\
183
        const uint64_t a= AV_RN64(pixels  );\
184
        const uint64_t b= AV_RN64(pixels+1);\
185
        OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
186
        pixels+=line_size;\
187
        block +=line_size;\
188
    }\
189
}\
190
\
191
static void OPNAME ## _pixels_x2_c(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff