Statistics
| Branch: | Revision:

ffmpeg / postproc / rgb2rgb.c @ a3aece93

History | View | Annotate | Download (4.06 KB)

1
/* 
2
 *
3
 *  rgb2rgb.c, Software RGB to RGB convertor
4
 *  Written by Nick Kurshev.
5
 */
6
#include <inttypes.h>
7
#include "../config.h"
8
#include "rgb2rgb.h"
9
#include "../mmx_defs.h"
10

    
11
#ifdef HAVE_MMX
12
static const uint64_t mask32   __attribute__((aligned(8))) = 0x00FFFFFF00FFFFFFULL;
13
static const uint64_t mask24l  __attribute__((aligned(8))) = 0x0000000000FFFFFFULL;
14
static const uint64_t mask24h  __attribute__((aligned(8))) = 0x0000FFFFFF000000ULL;
15
static const uint64_t mask15b  __attribute__((aligned(8))) = 0x001F001F001F001FULL; /* 00000000 00011111  xxB */
16
static const uint64_t mask15rg __attribute__((aligned(8))) = 0x7FE07FE07FE07FE0ULL; /* 01111111 11100000  RGx */
17
#endif
18

    
19
void rgb24to32(uint8_t *src,uint8_t *dst,uint32_t src_size)
20
{
21
  uint8_t *dest = dst;
22
  uint8_t *s = src;
23
  uint8_t *end;
24
#ifdef HAVE_MMX
25
  uint8_t *mm_end;
26
#endif
27
  end = s + src_size;
28
#ifdef HAVE_MMX
29
  __asm __volatile(PREFETCH"        %0"::"m"(*s):"memory");
30
  mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2));
31
  __asm __volatile("movq        %0, %%mm7"::"m"(mask32):"memory");
32
  if(mm_end == end) mm_end -= MMREG_SIZE*2;
33
  while(s < mm_end)
34
  {
35
    __asm __volatile(
36
        PREFETCH"        32%1\n\t"
37
        "movd        %1, %%mm0\n\t"
38
        "movd        3%1, %%mm1\n\t"
39
        "movd        6%1, %%mm2\n\t"
40
        "movd        9%1, %%mm3\n\t"
41
        "punpckldq %%mm1, %%mm0\n\t"
42
        "punpckldq %%mm3, %%mm2\n\t"
43
        "pand        %%mm7, %%mm0\n\t"
44
        "pand        %%mm7, %%mm2\n\t"
45
        MOVNTQ"        %%mm0, %0\n\t"
46
        MOVNTQ"        %%mm2, 8%0"
47
        :"=m"(*dest)
48
        :"m"(*s)
49
        :"memory");
50
    dest += 16;
51
    s += 12;
52
  }
53
  __asm __volatile(SFENCE:::"memory");
54
  __asm __volatile(EMMS:::"memory");
55
#endif
56
  while(s < end)
57
  {
58
    *dest++ = *s++;
59
    *dest++ = *s++;
60
    *dest++ = *s++;
61
    *dest++ = 0;
62
  }
63
}
64

    
65
void rgb32to24(uint8_t *src,uint8_t *dst,uint32_t src_size)
66
{
67
  uint8_t *dest = dst;
68
  uint8_t *s = src;
69
  uint8_t *end;
70
#ifdef HAVE_MMX
71
  uint8_t *mm_end;
72
#endif
73
  end = s + src_size;
74
#ifdef HAVE_MMX
75
  __asm __volatile(PREFETCH"        %0"::"m"(*s):"memory");
76
  mm_end = (uint8_t*)((((unsigned long)end)/(MMREG_SIZE*2))*(MMREG_SIZE*2));
77
  __asm __volatile(
78
        "movq        %0, %%mm7\n\t"
79
        "movq        %1, %%mm6"
80
        ::"m"(mask24l),"m"(mask24h):"memory");
81
  if(mm_end == end) mm_end -= MMREG_SIZE*2;
82
  while(s < mm_end)
83
  {
84
    __asm __volatile(
85
        PREFETCH"        32%1\n\t"
86
        "movq        %1, %%mm0\n\t"
87
        "movq        8%1, %%mm1\n\t"
88
        "movq        %%mm0, %%mm2\n\t"
89
        "movq        %%mm1, %%mm3\n\t"
90
        "psrlq        $8, %%mm2\n\t"
91
        "psrlq        $8, %%mm3\n\t"
92
        "pand        %%mm7, %%mm0\n\t"
93
        "pand        %%mm7, %%mm1\n\t"
94
        "pand        %%mm6, %%mm2\n\t"
95
        "pand        %%mm6, %%mm3\n\t"
96
        "por        %%mm2, %%mm0\n\t"
97
        "por        %%mm3, %%mm1\n\t"
98
        MOVNTQ"        %%mm0, %0\n\t"
99
        MOVNTQ"        %%mm1, 6%0"
100
        :"=m"(*dest)
101
        :"m"(*s)
102
        :"memory");
103
    dest += 12;
104
    s += 16;
105
  }
106
  __asm __volatile(SFENCE:::"memory");
107
  __asm __volatile(EMMS:::"memory");
108
#endif
109
  while(s < end)
110
  {
111
    *dest++ = *s++;
112
    *dest++ = *s++;
113
    *dest++ = *s++;
114
    s++;
115
  }
116
}
117

    
118
/*
119
 Original by Strepto/Astral
120
 ported to gcc & bugfixed : A'rpi
121
 MMX,  3DNOW optimization by Nick Kurshev
122
*/
123
void rgb15to16(uint8_t *src,uint8_t *dst,uint32_t src_size)
124
{
125
#ifdef HAVE_MMX
126
  register char* s=src+src_size;
127
  register char* d=dst+src_size;
128
  register int offs=-src_size;
129
  __asm __volatile(PREFETCH"        %0"::"m"(*(s+offs)):"memory");
130
  __asm __volatile(
131
        "movq        %0, %%mm4\n\t"
132
        "movq        %1, %%mm5"
133
        ::"m"(mask15b), "m"(mask15rg):"memory");
134
  while(offs<0)
135
  {
136
        __asm __volatile(
137
                PREFETCH"        32%1\n\t"
138
                "movq        %1, %%mm0\n\t"
139
                "movq        8%1, %%mm2\n\t"
140
                "movq        %%mm0, %%mm1\n\t"
141
                "movq        %%mm2, %%mm3\n\t"
142
                "pand        %%mm4, %%mm0\n\t"
143
                "pand        %%mm5, %%mm1\n\t"
144
                "pand        %%mm4, %%mm2\n\t"
145
                "pand        %%mm5, %%mm3\n\t"
146
                "psllq        $1, %%mm1\n\t"
147
                "psllq        $1, %%mm3\n\t"
148
                "por        %%mm1, %%mm0\n\t"
149
                "por        %%mm3, %%mm2\n\t"
150
                MOVNTQ"        %%mm0, %0\n\t"
151
                MOVNTQ"        %%mm2, 8%0"
152
                :"=m"(*(d+offs))
153
                :"m"(*(s+offs))
154
                :"memory");
155
        offs+=16;
156
  }
157
  __asm __volatile(SFENCE:::"memory");
158
  __asm __volatile(EMMS:::"memory");
159
#else
160
   uint16_t *s1=( uint16_t * )src;
161
   uint16_t *d1=( uint16_t * )dst;
162
   uint16_t *e=((uint8_t *)s1)+src_size;
163
   while( s1<e ){
164
     register int x=*( s1++ );
165
     /* rrrrrggggggbbbbb
166
        0rrrrrgggggbbbbb
167
        0111 1111 1110 0000=0x7FE0
168
        00000000000001 1111=0x001F */
169
     *( d1++ )=( x&0x001F )|( ( x&0x7FE0 )<<1 );
170
   }
171
#endif
172
}