ffmpeg / libavcodec / cabac.h @ 849f1035
History | View | Annotate | Download (28.3 KB)
1 |
/*
|
---|---|
2 |
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
|
3 |
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
|
4 |
*
|
5 |
* This file is part of FFmpeg.
|
6 |
*
|
7 |
* FFmpeg is free software; you can redistribute it and/or
|
8 |
* modify it under the terms of the GNU Lesser General Public
|
9 |
* License as published by the Free Software Foundation; either
|
10 |
* version 2.1 of the License, or (at your option) any later version.
|
11 |
*
|
12 |
* FFmpeg is distributed in the hope that it will be useful,
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15 |
* Lesser General Public License for more details.
|
16 |
*
|
17 |
* You should have received a copy of the GNU Lesser General Public
|
18 |
* License along with FFmpeg; if not, write to the Free Software
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
20 |
*
|
21 |
*/
|
22 |
|
23 |
/**
|
24 |
* @file cabac.h
|
25 |
* Context Adaptive Binary Arithmetic Coder.
|
26 |
*/
|
27 |
|
28 |
|
29 |
//#undef NDEBUG
|
30 |
#include <assert.h> |
31 |
#ifdef ARCH_X86
|
32 |
#include "x86_cpu.h" |
33 |
#endif
|
34 |
|
35 |
#define CABAC_BITS 16 |
36 |
#define CABAC_MASK ((1<<CABAC_BITS)-1) |
37 |
#define BRANCHLESS_CABAC_DECODER 1 |
38 |
//#define ARCH_X86_DISABLED 1
|
39 |
|
40 |
typedef struct CABACContext{ |
41 |
int low;
|
42 |
int range;
|
43 |
int outstanding_count;
|
44 |
#ifdef STRICT_LIMITS
|
45 |
int symCount;
|
46 |
#endif
|
47 |
const uint8_t *bytestream_start;
|
48 |
const uint8_t *bytestream;
|
49 |
const uint8_t *bytestream_end;
|
50 |
PutBitContext pb; |
51 |
}CABACContext; |
52 |
|
53 |
extern uint8_t ff_h264_mlps_state[4*64]; |
54 |
extern uint8_t ff_h264_lps_range[4*2*64]; ///< rangeTabLPS |
55 |
extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS |
56 |
extern uint8_t ff_h264_lps_state[2*64]; ///< transIdxLPS |
57 |
extern const uint8_t ff_h264_norm_shift[512]; |
58 |
|
59 |
|
60 |
void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size); |
61 |
void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size); |
62 |
void ff_init_cabac_states(CABACContext *c);
|
63 |
|
64 |
|
65 |
static inline void put_cabac_bit(CABACContext *c, int b){ |
66 |
put_bits(&c->pb, 1, b);
|
67 |
for(;c->outstanding_count; c->outstanding_count--){
|
68 |
put_bits(&c->pb, 1, 1-b); |
69 |
} |
70 |
} |
71 |
|
72 |
static inline void renorm_cabac_encoder(CABACContext *c){ |
73 |
while(c->range < 0x100){ |
74 |
//FIXME optimize
|
75 |
if(c->low<0x100){ |
76 |
put_cabac_bit(c, 0);
|
77 |
}else if(c->low<0x200){ |
78 |
c->outstanding_count++; |
79 |
c->low -= 0x100;
|
80 |
}else{
|
81 |
put_cabac_bit(c, 1);
|
82 |
c->low -= 0x200;
|
83 |
} |
84 |
|
85 |
c->range+= c->range; |
86 |
c->low += c->low; |
87 |
} |
88 |
} |
89 |
|
90 |
static void put_cabac(CABACContext *c, uint8_t * const state, int bit){ |
91 |
int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state]; |
92 |
|
93 |
if(bit == ((*state)&1)){ |
94 |
c->range -= RangeLPS; |
95 |
*state= ff_h264_mps_state[*state]; |
96 |
}else{
|
97 |
c->low += c->range - RangeLPS; |
98 |
c->range = RangeLPS; |
99 |
*state= ff_h264_lps_state[*state]; |
100 |
} |
101 |
|
102 |
renorm_cabac_encoder(c); |
103 |
|
104 |
#ifdef STRICT_LIMITS
|
105 |
c->symCount++; |
106 |
#endif
|
107 |
} |
108 |
|
109 |
static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){ |
110 |
assert(c->range > RangeLPS); |
111 |
|
112 |
if(!bit){
|
113 |
c->range -= RangeLPS; |
114 |
}else{
|
115 |
c->low += c->range - RangeLPS; |
116 |
c->range = RangeLPS; |
117 |
} |
118 |
|
119 |
renorm_cabac_encoder(c); |
120 |
|
121 |
#ifdef STRICT_LIMITS
|
122 |
c->symCount++; |
123 |
#endif
|
124 |
} |
125 |
|
126 |
/**
|
127 |
* @param bit 0 -> write zero bit, !=0 write one bit
|
128 |
*/
|
129 |
static void put_cabac_bypass(CABACContext *c, int bit){ |
130 |
c->low += c->low; |
131 |
|
132 |
if(bit){
|
133 |
c->low += c->range; |
134 |
} |
135 |
//FIXME optimize
|
136 |
if(c->low<0x200){ |
137 |
put_cabac_bit(c, 0);
|
138 |
}else if(c->low<0x400){ |
139 |
c->outstanding_count++; |
140 |
c->low -= 0x200;
|
141 |
}else{
|
142 |
put_cabac_bit(c, 1);
|
143 |
c->low -= 0x400;
|
144 |
} |
145 |
|
146 |
#ifdef STRICT_LIMITS
|
147 |
c->symCount++; |
148 |
#endif
|
149 |
} |
150 |
|
151 |
/**
|
152 |
*
|
153 |
* @return the number of bytes written
|
154 |
*/
|
155 |
static int put_cabac_terminate(CABACContext *c, int bit){ |
156 |
c->range -= 2;
|
157 |
|
158 |
if(!bit){
|
159 |
renorm_cabac_encoder(c); |
160 |
}else{
|
161 |
c->low += c->range; |
162 |
c->range= 2;
|
163 |
|
164 |
renorm_cabac_encoder(c); |
165 |
|
166 |
assert(c->low <= 0x1FF);
|
167 |
put_cabac_bit(c, c->low>>9);
|
168 |
put_bits(&c->pb, 2, ((c->low>>7)&3)|1); |
169 |
|
170 |
flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
|
171 |
} |
172 |
|
173 |
#ifdef STRICT_LIMITS
|
174 |
c->symCount++; |
175 |
#endif
|
176 |
|
177 |
return (put_bits_count(&c->pb)+7)>>3; |
178 |
} |
179 |
|
180 |
/**
|
181 |
* put (truncated) unary binarization.
|
182 |
*/
|
183 |
static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){ |
184 |
int i;
|
185 |
|
186 |
assert(v <= max); |
187 |
|
188 |
#if 1 |
189 |
for(i=0; i<v; i++){ |
190 |
put_cabac(c, state, 1);
|
191 |
if(i < max_index) state++;
|
192 |
} |
193 |
if(truncated==0 || v<max) |
194 |
put_cabac(c, state, 0);
|
195 |
#else
|
196 |
if(v <= max_index){
|
197 |
for(i=0; i<v; i++){ |
198 |
put_cabac(c, state+i, 1);
|
199 |
} |
200 |
if(truncated==0 || v<max) |
201 |
put_cabac(c, state+i, 0);
|
202 |
}else{
|
203 |
for(i=0; i<=max_index; i++){ |
204 |
put_cabac(c, state+i, 1);
|
205 |
} |
206 |
for(; i<v; i++){
|
207 |
put_cabac(c, state+max_index, 1);
|
208 |
} |
209 |
if(truncated==0 || v<max) |
210 |
put_cabac(c, state+max_index, 0);
|
211 |
} |
212 |
#endif
|
213 |
} |
214 |
|
215 |
/**
|
216 |
* put unary exp golomb k-th order binarization.
|
217 |
*/
|
218 |
static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){ |
219 |
int i;
|
220 |
|
221 |
if(v==0) |
222 |
put_cabac(c, state, 0);
|
223 |
else{
|
224 |
const int sign= v < 0; |
225 |
|
226 |
if(is_signed) v= FFABS(v);
|
227 |
|
228 |
if(v<max){
|
229 |
for(i=0; i<v; i++){ |
230 |
put_cabac(c, state, 1);
|
231 |
if(i < max_index) state++;
|
232 |
} |
233 |
|
234 |
put_cabac(c, state, 0);
|
235 |
}else{
|
236 |
int m= 1<<k; |
237 |
|
238 |
for(i=0; i<max; i++){ |
239 |
put_cabac(c, state, 1);
|
240 |
if(i < max_index) state++;
|
241 |
} |
242 |
|
243 |
v -= max; |
244 |
while(v >= m){ //FIXME optimize |
245 |
put_cabac_bypass(c, 1);
|
246 |
v-= m; |
247 |
m+= m; |
248 |
} |
249 |
put_cabac_bypass(c, 0);
|
250 |
while(m>>=1){ |
251 |
put_cabac_bypass(c, v&m); |
252 |
} |
253 |
} |
254 |
|
255 |
if(is_signed)
|
256 |
put_cabac_bypass(c, sign); |
257 |
} |
258 |
} |
259 |
|
260 |
static void refill(CABACContext *c){ |
261 |
#if CABAC_BITS == 16 |
262 |
c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1); |
263 |
#else
|
264 |
c->low+= c->bytestream[0]<<1; |
265 |
#endif
|
266 |
c->low -= CABAC_MASK; |
267 |
c->bytestream+= CABAC_BITS/8;
|
268 |
} |
269 |
|
270 |
static void refill2(CABACContext *c){ |
271 |
int i, x;
|
272 |
|
273 |
x= c->low ^ (c->low-1);
|
274 |
i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)]; |
275 |
|
276 |
x= -CABAC_MASK; |
277 |
|
278 |
#if CABAC_BITS == 16 |
279 |
x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1); |
280 |
#else
|
281 |
x+= c->bytestream[0]<<1; |
282 |
#endif
|
283 |
|
284 |
c->low += x<<i; |
285 |
c->bytestream+= CABAC_BITS/8;
|
286 |
} |
287 |
|
288 |
static inline void renorm_cabac_decoder(CABACContext *c){ |
289 |
while(c->range < 0x100){ |
290 |
c->range+= c->range; |
291 |
c->low+= c->low; |
292 |
if(!(c->low & CABAC_MASK))
|
293 |
refill(c); |
294 |
} |
295 |
} |
296 |
|
297 |
static inline void renorm_cabac_decoder_once(CABACContext *c){ |
298 |
#ifdef ARCH_X86_DISABLED
|
299 |
int temp;
|
300 |
#if 0
|
301 |
//P3:683 athlon:475
|
302 |
asm(
|
303 |
"lea -0x100(%0), %2 \n\t"
|
304 |
"shr $31, %2 \n\t" //FIXME 31->63 for x86-64
|
305 |
"shl %%cl, %0 \n\t"
|
306 |
"shl %%cl, %1 \n\t"
|
307 |
: "+r"(c->range), "+r"(c->low), "+c"(temp)
|
308 |
);
|
309 |
#elif 0
|
310 |
//P3:680 athlon:474
|
311 |
asm(
|
312 |
"cmp $0x100, %0 \n\t"
|
313 |
"setb %%cl \n\t" //FIXME 31->63 for x86-64 |
314 |
"shl %%cl, %0 \n\t"
|
315 |
"shl %%cl, %1 \n\t"
|
316 |
: "+r"(c->range), "+r"(c->low), "+c"(temp) |
317 |
); |
318 |
#elif 1 |
319 |
int temp2;
|
320 |
//P3:665 athlon:517
|
321 |
asm(
|
322 |
"lea -0x100(%0), %%eax \n\t"
|
323 |
"cdq \n\t"
|
324 |
"mov %0, %%eax \n\t"
|
325 |
"and %%edx, %0 \n\t"
|
326 |
"and %1, %%edx \n\t"
|
327 |
"add %%eax, %0 \n\t"
|
328 |
"add %%edx, %1 \n\t"
|
329 |
: "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) |
330 |
); |
331 |
#elif 0 |
332 |
int temp2;
|
333 |
//P3:673 athlon:509
|
334 |
asm(
|
335 |
"cmp $0x100, %0 \n\t"
|
336 |
"sbb %%edx, %%edx \n\t"
|
337 |
"mov %0, %%eax \n\t"
|
338 |
"and %%edx, %0 \n\t"
|
339 |
"and %1, %%edx \n\t"
|
340 |
"add %%eax, %0 \n\t"
|
341 |
"add %%edx, %1 \n\t"
|
342 |
: "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) |
343 |
); |
344 |
#else
|
345 |
int temp2;
|
346 |
//P3:677 athlon:511
|
347 |
asm(
|
348 |
"cmp $0x100, %0 \n\t"
|
349 |
"lea (%0, %0), %%eax \n\t"
|
350 |
"lea (%1, %1), %%edx \n\t"
|
351 |
"cmovb %%eax, %0 \n\t"
|
352 |
"cmovb %%edx, %1 \n\t"
|
353 |
: "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) |
354 |
); |
355 |
#endif
|
356 |
#else
|
357 |
//P3:675 athlon:476
|
358 |
int shift= (uint32_t)(c->range - 0x100)>>31; |
359 |
c->range<<= shift; |
360 |
c->low <<= shift; |
361 |
#endif
|
362 |
if(!(c->low & CABAC_MASK))
|
363 |
refill(c); |
364 |
} |
365 |
|
366 |
static int av_always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){ |
367 |
//FIXME gcc generates duplicate load/stores for c->low and c->range
|
368 |
#define LOW "0" |
369 |
#define RANGE "4" |
370 |
#ifdef ARCH_X86_64
|
371 |
#define BYTESTART "16" |
372 |
#define BYTE "24" |
373 |
#define BYTEEND "32" |
374 |
#else
|
375 |
#define BYTESTART "12" |
376 |
#define BYTE "16" |
377 |
#define BYTEEND "20" |
378 |
#endif
|
379 |
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
|
380 |
int bit;
|
381 |
|
382 |
#ifndef BRANCHLESS_CABAC_DECODER
|
383 |
asm volatile( |
384 |
"movzbl (%1), %0 \n\t"
|
385 |
"movl "RANGE "(%2), %%ebx \n\t" |
386 |
"movl "RANGE "(%2), %%edx \n\t" |
387 |
"andl $0xC0, %%ebx \n\t"
|
388 |
"movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t" |
389 |
"movl "LOW "(%2), %%ebx \n\t" |
390 |
//eax:state ebx:low, edx:range, esi:RangeLPS
|
391 |
"subl %%esi, %%edx \n\t"
|
392 |
"movl %%edx, %%ecx \n\t"
|
393 |
"shll $17, %%ecx \n\t"
|
394 |
"cmpl %%ecx, %%ebx \n\t"
|
395 |
" ja 1f \n\t"
|
396 |
|
397 |
#if 1 |
398 |
//athlon:4067 P3:4110
|
399 |
"lea -0x100(%%edx), %%ecx \n\t"
|
400 |
"shr $31, %%ecx \n\t"
|
401 |
"shl %%cl, %%edx \n\t"
|
402 |
"shl %%cl, %%ebx \n\t"
|
403 |
#else
|
404 |
//athlon:4057 P3:4130
|
405 |
"cmp $0x100, %%edx \n\t" //FIXME avoidable |
406 |
"setb %%cl \n\t"
|
407 |
"shl %%cl, %%edx \n\t"
|
408 |
"shl %%cl, %%ebx \n\t"
|
409 |
#endif
|
410 |
"movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx \n\t" |
411 |
"movb %%cl, (%1) \n\t"
|
412 |
//eax:state ebx:low, edx:range, esi:RangeLPS
|
413 |
"test %%bx, %%bx \n\t"
|
414 |
" jnz 2f \n\t"
|
415 |
"mov "BYTE "(%2), %%"REG_S" \n\t" |
416 |
"subl $0xFFFF, %%ebx \n\t"
|
417 |
"movzwl (%%"REG_S"), %%ecx \n\t" |
418 |
"bswap %%ecx \n\t"
|
419 |
"shrl $15, %%ecx \n\t"
|
420 |
"add $2, %%"REG_S" \n\t" |
421 |
"addl %%ecx, %%ebx \n\t"
|
422 |
"mov %%"REG_S", "BYTE "(%2) \n\t" |
423 |
"jmp 2f \n\t"
|
424 |
"1: \n\t"
|
425 |
//eax:state ebx:low, edx:range, esi:RangeLPS
|
426 |
"subl %%ecx, %%ebx \n\t"
|
427 |
"movl %%esi, %%edx \n\t"
|
428 |
"movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" |
429 |
"shll %%cl, %%ebx \n\t"
|
430 |
"shll %%cl, %%edx \n\t"
|
431 |
"movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx \n\t" |
432 |
"movb %%cl, (%1) \n\t"
|
433 |
"add $1, %0 \n\t"
|
434 |
"test %%bx, %%bx \n\t"
|
435 |
" jnz 2f \n\t"
|
436 |
|
437 |
"mov "BYTE "(%2), %%"REG_c" \n\t" |
438 |
"movzwl (%%"REG_c"), %%esi \n\t" |
439 |
"bswap %%esi \n\t"
|
440 |
"shrl $15, %%esi \n\t"
|
441 |
"subl $0xFFFF, %%esi \n\t"
|
442 |
"add $2, %%"REG_c" \n\t" |
443 |
"mov %%"REG_c", "BYTE "(%2) \n\t" |
444 |
|
445 |
"leal -1(%%ebx), %%ecx \n\t"
|
446 |
"xorl %%ebx, %%ecx \n\t"
|
447 |
"shrl $15, %%ecx \n\t"
|
448 |
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t" |
449 |
"neg %%ecx \n\t"
|
450 |
"add $7, %%ecx \n\t"
|
451 |
|
452 |
"shll %%cl , %%esi \n\t"
|
453 |
"addl %%esi, %%ebx \n\t"
|
454 |
"2: \n\t"
|
455 |
"movl %%edx, "RANGE "(%2) \n\t" |
456 |
"movl %%ebx, "LOW "(%2) \n\t" |
457 |
:"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used |
458 |
:"r"(state), "r"(c) |
459 |
: "%"REG_c, "%ebx", "%edx", "%"REG_S, "memory" |
460 |
); |
461 |
bit&=1;
|
462 |
#else /* BRANCHLESS_CABAC_DECODER */ |
463 |
|
464 |
|
465 |
#if defined CMOV_IS_FAST
|
466 |
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
467 |
"mov "tmp" , %%ecx \n\t"\ |
468 |
"shl $17 , "tmp" \n\t"\ |
469 |
"cmp "low" , "tmp" \n\t"\ |
470 |
"cmova %%ecx , "range" \n\t"\ |
471 |
"sbb %%ecx , %%ecx \n\t"\
|
472 |
"and %%ecx , "tmp" \n\t"\ |
473 |
"sub "tmp" , "low" \n\t"\ |
474 |
"xor %%ecx , "ret" \n\t" |
475 |
#else /* CMOV_IS_FAST */ |
476 |
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
477 |
"mov "tmp" , %%ecx \n\t"\ |
478 |
"shl $17 , "tmp" \n\t"\ |
479 |
"sub "low" , "tmp" \n\t"\ |
480 |
"sar $31 , "tmp" \n\t" /*lps_mask*/\ |
481 |
"sub %%ecx , "range" \n\t" /*RangeLPS - range*/\ |
482 |
"and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\ |
483 |
"add %%ecx , "range" \n\t" /*new range*/\ |
484 |
"shl $17 , %%ecx \n\t"\
|
485 |
"and "tmp" , %%ecx \n\t"\ |
486 |
"sub %%ecx , "low" \n\t"\ |
487 |
"xor "tmp" , "ret" \n\t" |
488 |
#endif /* CMOV_IS_FAST */ |
489 |
|
490 |
|
491 |
#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
492 |
"movzbl "statep" , "ret" \n\t"\ |
493 |
"mov "range" , "tmp" \n\t"\ |
494 |
"and $0xC0 , "range" \n\t"\ |
495 |
"movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\ |
496 |
"sub "range" , "tmp" \n\t"\ |
497 |
BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\ |
498 |
"movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ |
499 |
"shl %%cl , "range" \n\t"\ |
500 |
"movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ |
501 |
"mov "tmpbyte" , "statep" \n\t"\ |
502 |
"shl %%cl , "low" \n\t"\ |
503 |
"test "lowword" , "lowword" \n\t"\ |
504 |
" jnz 1f \n\t"\
|
505 |
"mov "BYTE"("cabac"), %%"REG_c" \n\t"\ |
506 |
"movzwl (%%"REG_c") , "tmp" \n\t"\ |
507 |
"bswap "tmp" \n\t"\ |
508 |
"shr $15 , "tmp" \n\t"\ |
509 |
"sub $0xFFFF , "tmp" \n\t"\ |
510 |
"add $2 , %%"REG_c" \n\t"\ |
511 |
"mov %%"REG_c" , "BYTE "("cabac") \n\t"\ |
512 |
"lea -1("low") , %%ecx \n\t"\ |
513 |
"xor "low" , %%ecx \n\t"\ |
514 |
"shr $15 , %%ecx \n\t"\
|
515 |
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ |
516 |
"neg %%ecx \n\t"\
|
517 |
"add $7 , %%ecx \n\t"\
|
518 |
"shl %%cl , "tmp" \n\t"\ |
519 |
"add "tmp" , "low" \n\t"\ |
520 |
"1: \n\t"
|
521 |
|
522 |
asm volatile( |
523 |
"movl "RANGE "(%2), %%esi \n\t" |
524 |
"movl "LOW "(%2), %%ebx \n\t" |
525 |
BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl") |
526 |
"movl %%esi, "RANGE "(%2) \n\t" |
527 |
"movl %%ebx, "LOW "(%2) \n\t" |
528 |
|
529 |
:"=&a"(bit)
|
530 |
:"r"(state), "r"(c) |
531 |
: "%"REG_c, "%ebx", "%edx", "%esi", "memory" |
532 |
); |
533 |
bit&=1;
|
534 |
#endif /* BRANCHLESS_CABAC_DECODER */ |
535 |
#else /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */ |
536 |
int s = *state;
|
537 |
int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s]; |
538 |
int bit, lps_mask attribute_unused;
|
539 |
|
540 |
c->range -= RangeLPS; |
541 |
#ifndef BRANCHLESS_CABAC_DECODER
|
542 |
if(c->low < (c->range<<17)){ |
543 |
bit= s&1;
|
544 |
*state= ff_h264_mps_state[s]; |
545 |
renorm_cabac_decoder_once(c); |
546 |
}else{
|
547 |
bit= ff_h264_norm_shift[RangeLPS]; |
548 |
c->low -= (c->range<<17);
|
549 |
*state= ff_h264_lps_state[s]; |
550 |
c->range = RangeLPS<<bit; |
551 |
c->low <<= bit; |
552 |
bit= (s&1)^1; |
553 |
|
554 |
if(!(c->low & 0xFFFF)){ |
555 |
refill2(c); |
556 |
} |
557 |
} |
558 |
#else /* BRANCHLESS_CABAC_DECODER */ |
559 |
lps_mask= ((c->range<<17) - c->low)>>31; |
560 |
|
561 |
c->low -= (c->range<<17) & lps_mask;
|
562 |
c->range += (RangeLPS - c->range) & lps_mask; |
563 |
|
564 |
s^=lps_mask; |
565 |
*state= (ff_h264_mlps_state+128)[s];
|
566 |
bit= s&1;
|
567 |
|
568 |
lps_mask= ff_h264_norm_shift[c->range]; |
569 |
c->range<<= lps_mask; |
570 |
c->low <<= lps_mask; |
571 |
if(!(c->low & CABAC_MASK))
|
572 |
refill2(c); |
573 |
#endif /* BRANCHLESS_CABAC_DECODER */ |
574 |
#endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */ |
575 |
return bit;
|
576 |
} |
577 |
|
578 |
static int __attribute((noinline)) get_cabac_noinline(CABACContext *c, uint8_t * const state){ |
579 |
return get_cabac_inline(c,state);
|
580 |
} |
581 |
|
582 |
static int get_cabac(CABACContext *c, uint8_t * const state){ |
583 |
return get_cabac_inline(c,state);
|
584 |
} |
585 |
|
586 |
static int get_cabac_bypass(CABACContext *c){ |
587 |
#if 0 //not faster
|
588 |
int bit;
|
589 |
asm volatile(
|
590 |
"movl "RANGE "(%1), %%ebx \n\t"
|
591 |
"movl "LOW "(%1), %%eax \n\t"
|
592 |
"shl $17, %%ebx \n\t"
|
593 |
"add %%eax, %%eax \n\t"
|
594 |
"sub %%ebx, %%eax \n\t"
|
595 |
"cdq \n\t"
|
596 |
"and %%edx, %%ebx \n\t"
|
597 |
"add %%ebx, %%eax \n\t"
|
598 |
"test %%ax, %%ax \n\t"
|
599 |
" jnz 1f \n\t"
|
600 |
"movl "BYTE "(%1), %%"REG_b" \n\t"
|
601 |
"subl $0xFFFF, %%eax \n\t"
|
602 |
"movzwl (%%"REG_b"), %%ecx \n\t"
|
603 |
"bswap %%ecx \n\t"
|
604 |
"shrl $15, %%ecx \n\t"
|
605 |
"addl $2, %%"REG_b" \n\t"
|
606 |
"addl %%ecx, %%eax \n\t"
|
607 |
"movl %%"REG_b", "BYTE "(%1) \n\t"
|
608 |
"1: \n\t"
|
609 |
"movl %%eax, "LOW "(%1) \n\t"
|
610 |
|
611 |
:"=&d"(bit)
|
612 |
:"r"(c)
|
613 |
: "%eax", "%"REG_b, "%ecx", "memory"
|
614 |
);
|
615 |
return bit+1;
|
616 |
#else
|
617 |
int range;
|
618 |
c->low += c->low; |
619 |
|
620 |
if(!(c->low & CABAC_MASK))
|
621 |
refill(c); |
622 |
|
623 |
range= c->range<<17;
|
624 |
if(c->low < range){
|
625 |
return 0; |
626 |
}else{
|
627 |
c->low -= range; |
628 |
return 1; |
629 |
} |
630 |
#endif
|
631 |
} |
632 |
|
633 |
|
634 |
static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ |
635 |
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
|
636 |
asm volatile( |
637 |
"movl "RANGE "(%1), %%ebx \n\t" |
638 |
"movl "LOW "(%1), %%eax \n\t" |
639 |
"shl $17, %%ebx \n\t"
|
640 |
"add %%eax, %%eax \n\t"
|
641 |
"sub %%ebx, %%eax \n\t"
|
642 |
"cdq \n\t"
|
643 |
"and %%edx, %%ebx \n\t"
|
644 |
"add %%ebx, %%eax \n\t"
|
645 |
"xor %%edx, %%ecx \n\t"
|
646 |
"sub %%edx, %%ecx \n\t"
|
647 |
"test %%ax, %%ax \n\t"
|
648 |
" jnz 1f \n\t"
|
649 |
"mov "BYTE "(%1), %%"REG_b" \n\t" |
650 |
"subl $0xFFFF, %%eax \n\t"
|
651 |
"movzwl (%%"REG_b"), %%edx \n\t" |
652 |
"bswap %%edx \n\t"
|
653 |
"shrl $15, %%edx \n\t"
|
654 |
"add $2, %%"REG_b" \n\t" |
655 |
"addl %%edx, %%eax \n\t"
|
656 |
"mov %%"REG_b", "BYTE "(%1) \n\t" |
657 |
"1: \n\t"
|
658 |
"movl %%eax, "LOW "(%1) \n\t" |
659 |
|
660 |
:"+c"(val)
|
661 |
:"r"(c)
|
662 |
: "%eax", "%"REG_b, "%edx", "memory" |
663 |
); |
664 |
return val;
|
665 |
#else
|
666 |
int range, mask;
|
667 |
c->low += c->low; |
668 |
|
669 |
if(!(c->low & CABAC_MASK))
|
670 |
refill(c); |
671 |
|
672 |
range= c->range<<17;
|
673 |
c->low -= range; |
674 |
mask= c->low >> 31;
|
675 |
range &= mask; |
676 |
c->low += range; |
677 |
return (val^mask)-mask;
|
678 |
#endif
|
679 |
} |
680 |
|
681 |
//FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!)
|
682 |
//FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard)
|
683 |
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
|
684 |
static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){ |
685 |
void *end= significant_coeff_ctx_base + max_coeff - 1; |
686 |
int minusstart= -(int)significant_coeff_ctx_base; |
687 |
int minusindex= 4-(int)index; |
688 |
int coeff_count;
|
689 |
asm volatile( |
690 |
"movl "RANGE "(%3), %%esi \n\t" |
691 |
"movl "LOW "(%3), %%ebx \n\t" |
692 |
|
693 |
"2: \n\t"
|
694 |
|
695 |
BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") |
696 |
|
697 |
"test $1, %%edx \n\t"
|
698 |
" jz 3f \n\t"
|
699 |
|
700 |
BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") |
701 |
|
702 |
"mov %2, %%"REG_a" \n\t" |
703 |
"movl %4, %%ecx \n\t"
|
704 |
"add %1, %%"REG_c" \n\t" |
705 |
"movl %%ecx, (%%"REG_a") \n\t" |
706 |
|
707 |
"test $1, %%edx \n\t"
|
708 |
" jnz 4f \n\t"
|
709 |
|
710 |
"add $4, %%"REG_a" \n\t" |
711 |
"mov %%"REG_a", %2 \n\t" |
712 |
|
713 |
"3: \n\t"
|
714 |
"add $1, %1 \n\t"
|
715 |
"cmp %5, %1 \n\t"
|
716 |
" jb 2b \n\t"
|
717 |
"mov %2, %%"REG_a" \n\t" |
718 |
"movl %4, %%ecx \n\t"
|
719 |
"add %1, %%"REG_c" \n\t" |
720 |
"movl %%ecx, (%%"REG_a") \n\t" |
721 |
"4: \n\t"
|
722 |
"add %6, %%eax \n\t"
|
723 |
"shr $2, %%eax \n\t"
|
724 |
|
725 |
"movl %%esi, "RANGE "(%3) \n\t" |
726 |
"movl %%ebx, "LOW "(%3) \n\t" |
727 |
:"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)\ |
728 |
:"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)\ |
729 |
: "%"REG_c, "%ebx", "%edx", "%esi", "memory"\ |
730 |
); |
731 |
return coeff_count;
|
732 |
} |
733 |
|
734 |
static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, uint8_t *sig_off){ |
735 |
int minusindex= 4-(int)index; |
736 |
int coeff_count;
|
737 |
long last=0; |
738 |
asm volatile( |
739 |
"movl "RANGE "(%3), %%esi \n\t" |
740 |
"movl "LOW "(%3), %%ebx \n\t" |
741 |
|
742 |
"mov %1, %%"REG_D" \n\t" |
743 |
"2: \n\t"
|
744 |
|
745 |
"mov %6, %%"REG_a" \n\t" |
746 |
"movzbl (%%"REG_a", %%"REG_D"), %%edi \n\t" |
747 |
"add %5, %%"REG_D" \n\t" |
748 |
|
749 |
BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") |
750 |
|
751 |
"mov %1, %%edi \n\t"
|
752 |
"test $1, %%edx \n\t"
|
753 |
" jz 3f \n\t"
|
754 |
|
755 |
"movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t" |
756 |
"add %5, %%"REG_D" \n\t" |
757 |
|
758 |
BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") |
759 |
|
760 |
"mov %2, %%"REG_a" \n\t" |
761 |
"mov %1, %%edi \n\t"
|
762 |
"movl %%edi, (%%"REG_a") \n\t" |
763 |
|
764 |
"test $1, %%edx \n\t"
|
765 |
" jnz 4f \n\t"
|
766 |
|
767 |
"add $4, %%"REG_a" \n\t" |
768 |
"mov %%"REG_a", %2 \n\t" |
769 |
|
770 |
"3: \n\t"
|
771 |
"addl $1, %%edi \n\t"
|
772 |
"mov %%edi, %1 \n\t"
|
773 |
"cmpl $63, %%edi \n\t"
|
774 |
" jb 2b \n\t"
|
775 |
"mov %2, %%"REG_a" \n\t" |
776 |
"movl %%edi, (%%"REG_a") \n\t" |
777 |
"4: \n\t"
|
778 |
"addl %4, %%eax \n\t"
|
779 |
"shr $2, %%eax \n\t"
|
780 |
|
781 |
"movl %%esi, "RANGE "(%3) \n\t" |
782 |
"movl %%ebx, "LOW "(%3) \n\t" |
783 |
:"=&a"(coeff_count),"+m"(last), "+m"(index)\ |
784 |
:"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\ |
785 |
: "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"\ |
786 |
); |
787 |
return coeff_count;
|
788 |
} |
789 |
#endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */ |
790 |
|
791 |
/**
|
792 |
*
|
793 |
* @return the number of bytes read or 0 if no end
|
794 |
*/
|
795 |
static int get_cabac_terminate(CABACContext *c){ |
796 |
c->range -= 2;
|
797 |
if(c->low < c->range<<17){ |
798 |
renorm_cabac_decoder_once(c); |
799 |
return 0; |
800 |
}else{
|
801 |
return c->bytestream - c->bytestream_start;
|
802 |
} |
803 |
} |
804 |
|
805 |
/**
|
806 |
* get (truncated) unnary binarization.
|
807 |
*/
|
808 |
static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){ |
809 |
int i;
|
810 |
|
811 |
for(i=0; i<max; i++){ |
812 |
if(get_cabac(c, state)==0) |
813 |
return i;
|
814 |
|
815 |
if(i< max_index) state++;
|
816 |
} |
817 |
|
818 |
return truncated ? max : -1; |
819 |
} |
820 |
|
821 |
/**
|
822 |
* get unary exp golomb k-th order binarization.
|
823 |
*/
|
824 |
static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){ |
825 |
int i, v;
|
826 |
int m= 1<<k; |
827 |
|
828 |
if(get_cabac(c, state)==0) |
829 |
return 0; |
830 |
|
831 |
if(0 < max_index) state++; |
832 |
|
833 |
for(i=1; i<max; i++){ |
834 |
if(get_cabac(c, state)==0){ |
835 |
if(is_signed && get_cabac_bypass(c)){
|
836 |
return -i;
|
837 |
}else
|
838 |
return i;
|
839 |
} |
840 |
|
841 |
if(i < max_index) state++;
|
842 |
} |
843 |
|
844 |
while(get_cabac_bypass(c)){
|
845 |
i+= m; |
846 |
m+= m; |
847 |
} |
848 |
|
849 |
v=0;
|
850 |
while(m>>=1){ |
851 |
v+= v + get_cabac_bypass(c); |
852 |
} |
853 |
i += v; |
854 |
|
855 |
if(is_signed && get_cabac_bypass(c)){
|
856 |
return -i;
|
857 |
}else
|
858 |
return i;
|
859 |
} |