ffmpeg / libavcodec / cabac.h @ 5b21bdab
History | View | Annotate | Download (28.9 KB)
1 |
/*
|
---|---|
2 |
* H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
|
3 |
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
|
4 |
*
|
5 |
* This file is part of FFmpeg.
|
6 |
*
|
7 |
* FFmpeg is free software; you can redistribute it and/or
|
8 |
* modify it under the terms of the GNU Lesser General Public
|
9 |
* License as published by the Free Software Foundation; either
|
10 |
* version 2.1 of the License, or (at your option) any later version.
|
11 |
*
|
12 |
* FFmpeg is distributed in the hope that it will be useful,
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15 |
* Lesser General Public License for more details.
|
16 |
*
|
17 |
* You should have received a copy of the GNU Lesser General Public
|
18 |
* License along with FFmpeg; if not, write to the Free Software
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
20 |
*/
|
21 |
|
22 |
/**
|
23 |
* @file cabac.h
|
24 |
* Context Adaptive Binary Arithmetic Coder.
|
25 |
*/
|
26 |
|
27 |
#ifndef FFMPEG_CABAC_H
|
28 |
#define FFMPEG_CABAC_H
|
29 |
|
30 |
#include "bitstream.h" |
31 |
|
32 |
//#undef NDEBUG
|
33 |
#include <assert.h> |
34 |
#ifdef ARCH_X86
|
35 |
#include "x86_cpu.h" |
36 |
#endif
|
37 |
|
38 |
#define CABAC_BITS 16 |
39 |
#define CABAC_MASK ((1<<CABAC_BITS)-1) |
40 |
#define BRANCHLESS_CABAC_DECODER 1 |
41 |
//#define ARCH_X86_DISABLED 1
|
42 |
|
43 |
typedef struct CABACContext{ |
44 |
int low;
|
45 |
int range;
|
46 |
int outstanding_count;
|
47 |
#ifdef STRICT_LIMITS
|
48 |
int symCount;
|
49 |
#endif
|
50 |
const uint8_t *bytestream_start;
|
51 |
const uint8_t *bytestream;
|
52 |
const uint8_t *bytestream_end;
|
53 |
PutBitContext pb; |
54 |
}CABACContext; |
55 |
|
56 |
extern uint8_t ff_h264_mlps_state[4*64]; |
57 |
extern uint8_t ff_h264_lps_range[4*2*64]; ///< rangeTabLPS |
58 |
extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS |
59 |
extern uint8_t ff_h264_lps_state[2*64]; ///< transIdxLPS |
60 |
extern const uint8_t ff_h264_norm_shift[512]; |
61 |
|
62 |
|
63 |
void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size); |
64 |
void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size); |
65 |
void ff_init_cabac_states(CABACContext *c);
|
66 |
|
67 |
|
68 |
static inline void put_cabac_bit(CABACContext *c, int b){ |
69 |
put_bits(&c->pb, 1, b);
|
70 |
for(;c->outstanding_count; c->outstanding_count--){
|
71 |
put_bits(&c->pb, 1, 1-b); |
72 |
} |
73 |
} |
74 |
|
75 |
static inline void renorm_cabac_encoder(CABACContext *c){ |
76 |
while(c->range < 0x100){ |
77 |
//FIXME optimize
|
78 |
if(c->low<0x100){ |
79 |
put_cabac_bit(c, 0);
|
80 |
}else if(c->low<0x200){ |
81 |
c->outstanding_count++; |
82 |
c->low -= 0x100;
|
83 |
}else{
|
84 |
put_cabac_bit(c, 1);
|
85 |
c->low -= 0x200;
|
86 |
} |
87 |
|
88 |
c->range+= c->range; |
89 |
c->low += c->low; |
90 |
} |
91 |
} |
92 |
|
93 |
#if 0
|
94 |
static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
|
95 |
int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
|
96 |
|
97 |
if(bit == ((*state)&1)){
|
98 |
c->range -= RangeLPS;
|
99 |
*state= ff_h264_mps_state[*state];
|
100 |
}else{
|
101 |
c->low += c->range - RangeLPS;
|
102 |
c->range = RangeLPS;
|
103 |
*state= ff_h264_lps_state[*state];
|
104 |
}
|
105 |
|
106 |
renorm_cabac_encoder(c);
|
107 |
|
108 |
#ifdef STRICT_LIMITS
|
109 |
c->symCount++;
|
110 |
#endif
|
111 |
} |
112 |
|
113 |
static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){ |
114 |
assert(c->range > RangeLPS); |
115 |
|
116 |
if(!bit){
|
117 |
c->range -= RangeLPS; |
118 |
}else{
|
119 |
c->low += c->range - RangeLPS; |
120 |
c->range = RangeLPS; |
121 |
} |
122 |
|
123 |
renorm_cabac_encoder(c); |
124 |
|
125 |
#ifdef STRICT_LIMITS
|
126 |
c->symCount++; |
127 |
#endif
|
128 |
} |
129 |
|
130 |
/**
|
131 |
* @param bit 0 -> write zero bit, !=0 write one bit
|
132 |
*/
|
133 |
static void put_cabac_bypass(CABACContext *c, int bit){ |
134 |
c->low += c->low; |
135 |
|
136 |
if(bit){
|
137 |
c->low += c->range; |
138 |
} |
139 |
//FIXME optimize
|
140 |
if(c->low<0x200){ |
141 |
put_cabac_bit(c, 0);
|
142 |
}else if(c->low<0x400){ |
143 |
c->outstanding_count++; |
144 |
c->low -= 0x200;
|
145 |
}else{
|
146 |
put_cabac_bit(c, 1);
|
147 |
c->low -= 0x400;
|
148 |
} |
149 |
|
150 |
#ifdef STRICT_LIMITS
|
151 |
c->symCount++; |
152 |
#endif
|
153 |
} |
154 |
|
155 |
/**
|
156 |
*
|
157 |
* @return the number of bytes written
|
158 |
*/
|
159 |
static int put_cabac_terminate(CABACContext *c, int bit){ |
160 |
c->range -= 2;
|
161 |
|
162 |
if(!bit){
|
163 |
renorm_cabac_encoder(c); |
164 |
}else{
|
165 |
c->low += c->range; |
166 |
c->range= 2;
|
167 |
|
168 |
renorm_cabac_encoder(c); |
169 |
|
170 |
assert(c->low <= 0x1FF);
|
171 |
put_cabac_bit(c, c->low>>9);
|
172 |
put_bits(&c->pb, 2, ((c->low>>7)&3)|1); |
173 |
|
174 |
flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
|
175 |
} |
176 |
|
177 |
#ifdef STRICT_LIMITS
|
178 |
c->symCount++; |
179 |
#endif
|
180 |
|
181 |
return (put_bits_count(&c->pb)+7)>>3; |
182 |
} |
183 |
|
184 |
/**
|
185 |
* put (truncated) unary binarization.
|
186 |
*/
|
187 |
static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){ |
188 |
int i;
|
189 |
|
190 |
assert(v <= max); |
191 |
|
192 |
#if 1 |
193 |
for(i=0; i<v; i++){ |
194 |
put_cabac(c, state, 1);
|
195 |
if(i < max_index) state++;
|
196 |
} |
197 |
if(truncated==0 || v<max) |
198 |
put_cabac(c, state, 0);
|
199 |
#else
|
200 |
if(v <= max_index){
|
201 |
for(i=0; i<v; i++){ |
202 |
put_cabac(c, state+i, 1);
|
203 |
} |
204 |
if(truncated==0 || v<max) |
205 |
put_cabac(c, state+i, 0);
|
206 |
}else{
|
207 |
for(i=0; i<=max_index; i++){ |
208 |
put_cabac(c, state+i, 1);
|
209 |
} |
210 |
for(; i<v; i++){
|
211 |
put_cabac(c, state+max_index, 1);
|
212 |
} |
213 |
if(truncated==0 || v<max) |
214 |
put_cabac(c, state+max_index, 0);
|
215 |
} |
216 |
#endif
|
217 |
} |
218 |
|
219 |
/**
|
220 |
* put unary exp golomb k-th order binarization.
|
221 |
*/
|
222 |
static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){ |
223 |
int i;
|
224 |
|
225 |
if(v==0) |
226 |
put_cabac(c, state, 0);
|
227 |
else{
|
228 |
const int sign= v < 0; |
229 |
|
230 |
if(is_signed) v= FFABS(v);
|
231 |
|
232 |
if(v<max){
|
233 |
for(i=0; i<v; i++){ |
234 |
put_cabac(c, state, 1);
|
235 |
if(i < max_index) state++;
|
236 |
} |
237 |
|
238 |
put_cabac(c, state, 0);
|
239 |
}else{
|
240 |
int m= 1<<k; |
241 |
|
242 |
for(i=0; i<max; i++){ |
243 |
put_cabac(c, state, 1);
|
244 |
if(i < max_index) state++;
|
245 |
} |
246 |
|
247 |
v -= max; |
248 |
while(v >= m){ //FIXME optimize |
249 |
put_cabac_bypass(c, 1);
|
250 |
v-= m; |
251 |
m+= m; |
252 |
} |
253 |
put_cabac_bypass(c, 0);
|
254 |
while(m>>=1){ |
255 |
put_cabac_bypass(c, v&m); |
256 |
} |
257 |
} |
258 |
|
259 |
if(is_signed)
|
260 |
put_cabac_bypass(c, sign); |
261 |
} |
262 |
} |
263 |
#endif /* 0 */ |
264 |
|
265 |
static void refill(CABACContext *c){ |
266 |
#if CABAC_BITS == 16 |
267 |
c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1); |
268 |
#else
|
269 |
c->low+= c->bytestream[0]<<1; |
270 |
#endif
|
271 |
c->low -= CABAC_MASK; |
272 |
c->bytestream+= CABAC_BITS/8;
|
273 |
} |
274 |
|
275 |
#if ! ( defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) )
|
276 |
static void refill2(CABACContext *c){ |
277 |
int i, x;
|
278 |
|
279 |
x= c->low ^ (c->low-1);
|
280 |
i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)]; |
281 |
|
282 |
x= -CABAC_MASK; |
283 |
|
284 |
#if CABAC_BITS == 16 |
285 |
x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1); |
286 |
#else
|
287 |
x+= c->bytestream[0]<<1; |
288 |
#endif
|
289 |
|
290 |
c->low += x<<i; |
291 |
c->bytestream+= CABAC_BITS/8;
|
292 |
} |
293 |
#endif
|
294 |
|
295 |
static inline void renorm_cabac_decoder(CABACContext *c){ |
296 |
while(c->range < 0x100){ |
297 |
c->range+= c->range; |
298 |
c->low+= c->low; |
299 |
if(!(c->low & CABAC_MASK))
|
300 |
refill(c); |
301 |
} |
302 |
} |
303 |
|
304 |
static inline void renorm_cabac_decoder_once(CABACContext *c){ |
305 |
#ifdef ARCH_X86_DISABLED
|
306 |
int temp;
|
307 |
#if 0
|
308 |
//P3:683 athlon:475
|
309 |
asm(
|
310 |
"lea -0x100(%0), %2 \n\t"
|
311 |
"shr $31, %2 \n\t" //FIXME 31->63 for x86-64
|
312 |
"shl %%cl, %0 \n\t"
|
313 |
"shl %%cl, %1 \n\t"
|
314 |
: "+r"(c->range), "+r"(c->low), "+c"(temp)
|
315 |
);
|
316 |
#elif 0
|
317 |
//P3:680 athlon:474
|
318 |
asm(
|
319 |
"cmp $0x100, %0 \n\t"
|
320 |
"setb %%cl \n\t" //FIXME 31->63 for x86-64 |
321 |
"shl %%cl, %0 \n\t"
|
322 |
"shl %%cl, %1 \n\t"
|
323 |
: "+r"(c->range), "+r"(c->low), "+c"(temp) |
324 |
); |
325 |
#elif 1 |
326 |
int temp2;
|
327 |
//P3:665 athlon:517
|
328 |
asm(
|
329 |
"lea -0x100(%0), %%eax \n\t"
|
330 |
"cdq \n\t"
|
331 |
"mov %0, %%eax \n\t"
|
332 |
"and %%edx, %0 \n\t"
|
333 |
"and %1, %%edx \n\t"
|
334 |
"add %%eax, %0 \n\t"
|
335 |
"add %%edx, %1 \n\t"
|
336 |
: "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) |
337 |
); |
338 |
#elif 0 |
339 |
int temp2;
|
340 |
//P3:673 athlon:509
|
341 |
asm(
|
342 |
"cmp $0x100, %0 \n\t"
|
343 |
"sbb %%edx, %%edx \n\t"
|
344 |
"mov %0, %%eax \n\t"
|
345 |
"and %%edx, %0 \n\t"
|
346 |
"and %1, %%edx \n\t"
|
347 |
"add %%eax, %0 \n\t"
|
348 |
"add %%edx, %1 \n\t"
|
349 |
: "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) |
350 |
); |
351 |
#else
|
352 |
int temp2;
|
353 |
//P3:677 athlon:511
|
354 |
asm(
|
355 |
"cmp $0x100, %0 \n\t"
|
356 |
"lea (%0, %0), %%eax \n\t"
|
357 |
"lea (%1, %1), %%edx \n\t"
|
358 |
"cmovb %%eax, %0 \n\t"
|
359 |
"cmovb %%edx, %1 \n\t"
|
360 |
: "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) |
361 |
); |
362 |
#endif
|
363 |
#else
|
364 |
//P3:675 athlon:476
|
365 |
int shift= (uint32_t)(c->range - 0x100)>>31; |
366 |
c->range<<= shift; |
367 |
c->low <<= shift; |
368 |
#endif
|
369 |
if(!(c->low & CABAC_MASK))
|
370 |
refill(c); |
371 |
} |
372 |
|
373 |
static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){ |
374 |
//FIXME gcc generates duplicate load/stores for c->low and c->range
|
375 |
#define LOW "0" |
376 |
#define RANGE "4" |
377 |
#ifdef ARCH_X86_64
|
378 |
#define BYTESTART "16" |
379 |
#define BYTE "24" |
380 |
#define BYTEEND "32" |
381 |
#else
|
382 |
#define BYTESTART "12" |
383 |
#define BYTE "16" |
384 |
#define BYTEEND "20" |
385 |
#endif
|
386 |
#if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
|
387 |
int bit;
|
388 |
|
389 |
#ifndef BRANCHLESS_CABAC_DECODER
|
390 |
asm volatile( |
391 |
"movzbl (%1), %0 \n\t"
|
392 |
"movl "RANGE "(%2), %%ebx \n\t" |
393 |
"movl "RANGE "(%2), %%edx \n\t" |
394 |
"andl $0xC0, %%ebx \n\t"
|
395 |
"movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t" |
396 |
"movl "LOW "(%2), %%ebx \n\t" |
397 |
//eax:state ebx:low, edx:range, esi:RangeLPS
|
398 |
"subl %%esi, %%edx \n\t"
|
399 |
"movl %%edx, %%ecx \n\t"
|
400 |
"shll $17, %%ecx \n\t"
|
401 |
"cmpl %%ecx, %%ebx \n\t"
|
402 |
" ja 1f \n\t"
|
403 |
|
404 |
#if 1 |
405 |
//athlon:4067 P3:4110
|
406 |
"lea -0x100(%%edx), %%ecx \n\t"
|
407 |
"shr $31, %%ecx \n\t"
|
408 |
"shl %%cl, %%edx \n\t"
|
409 |
"shl %%cl, %%ebx \n\t"
|
410 |
#else
|
411 |
//athlon:4057 P3:4130
|
412 |
"cmp $0x100, %%edx \n\t" //FIXME avoidable |
413 |
"setb %%cl \n\t"
|
414 |
"shl %%cl, %%edx \n\t"
|
415 |
"shl %%cl, %%ebx \n\t"
|
416 |
#endif
|
417 |
"movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx \n\t" |
418 |
"movb %%cl, (%1) \n\t"
|
419 |
//eax:state ebx:low, edx:range, esi:RangeLPS
|
420 |
"test %%bx, %%bx \n\t"
|
421 |
" jnz 2f \n\t"
|
422 |
"mov "BYTE "(%2), %%"REG_S" \n\t" |
423 |
"subl $0xFFFF, %%ebx \n\t"
|
424 |
"movzwl (%%"REG_S"), %%ecx \n\t" |
425 |
"bswap %%ecx \n\t"
|
426 |
"shrl $15, %%ecx \n\t"
|
427 |
"add $2, %%"REG_S" \n\t" |
428 |
"addl %%ecx, %%ebx \n\t"
|
429 |
"mov %%"REG_S", "BYTE "(%2) \n\t" |
430 |
"jmp 2f \n\t"
|
431 |
"1: \n\t"
|
432 |
//eax:state ebx:low, edx:range, esi:RangeLPS
|
433 |
"subl %%ecx, %%ebx \n\t"
|
434 |
"movl %%esi, %%edx \n\t"
|
435 |
"movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" |
436 |
"shll %%cl, %%ebx \n\t"
|
437 |
"shll %%cl, %%edx \n\t"
|
438 |
"movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx \n\t" |
439 |
"movb %%cl, (%1) \n\t"
|
440 |
"add $1, %0 \n\t"
|
441 |
"test %%bx, %%bx \n\t"
|
442 |
" jnz 2f \n\t"
|
443 |
|
444 |
"mov "BYTE "(%2), %%"REG_c" \n\t" |
445 |
"movzwl (%%"REG_c"), %%esi \n\t" |
446 |
"bswap %%esi \n\t"
|
447 |
"shrl $15, %%esi \n\t"
|
448 |
"subl $0xFFFF, %%esi \n\t"
|
449 |
"add $2, %%"REG_c" \n\t" |
450 |
"mov %%"REG_c", "BYTE "(%2) \n\t" |
451 |
|
452 |
"leal -1(%%ebx), %%ecx \n\t"
|
453 |
"xorl %%ebx, %%ecx \n\t"
|
454 |
"shrl $15, %%ecx \n\t"
|
455 |
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t" |
456 |
"neg %%ecx \n\t"
|
457 |
"add $7, %%ecx \n\t"
|
458 |
|
459 |
"shll %%cl , %%esi \n\t"
|
460 |
"addl %%esi, %%ebx \n\t"
|
461 |
"2: \n\t"
|
462 |
"movl %%edx, "RANGE "(%2) \n\t" |
463 |
"movl %%ebx, "LOW "(%2) \n\t" |
464 |
:"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or miscompiles it (for example if "+a"(bit) or "+m"(*state) is used |
465 |
:"r"(state), "r"(c) |
466 |
: "%"REG_c, "%ebx", "%edx", "%"REG_S, "memory" |
467 |
); |
468 |
bit&=1;
|
469 |
#else /* BRANCHLESS_CABAC_DECODER */ |
470 |
|
471 |
|
472 |
#if defined HAVE_FAST_CMOV
|
473 |
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
474 |
"mov "tmp" , %%ecx \n\t"\ |
475 |
"shl $17 , "tmp" \n\t"\ |
476 |
"cmp "low" , "tmp" \n\t"\ |
477 |
"cmova %%ecx , "range" \n\t"\ |
478 |
"sbb %%ecx , %%ecx \n\t"\
|
479 |
"and %%ecx , "tmp" \n\t"\ |
480 |
"sub "tmp" , "low" \n\t"\ |
481 |
"xor %%ecx , "ret" \n\t" |
482 |
#else /* HAVE_FAST_CMOV */ |
483 |
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
484 |
"mov "tmp" , %%ecx \n\t"\ |
485 |
"shl $17 , "tmp" \n\t"\ |
486 |
"sub "low" , "tmp" \n\t"\ |
487 |
"sar $31 , "tmp" \n\t" /*lps_mask*/\ |
488 |
"sub %%ecx , "range" \n\t" /*RangeLPS - range*/\ |
489 |
"and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\ |
490 |
"add %%ecx , "range" \n\t" /*new range*/\ |
491 |
"shl $17 , %%ecx \n\t"\
|
492 |
"and "tmp" , %%ecx \n\t"\ |
493 |
"sub %%ecx , "low" \n\t"\ |
494 |
"xor "tmp" , "ret" \n\t" |
495 |
#endif /* HAVE_FAST_CMOV */ |
496 |
|
497 |
|
498 |
#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
|
499 |
"movzbl "statep" , "ret" \n\t"\ |
500 |
"mov "range" , "tmp" \n\t"\ |
501 |
"and $0xC0 , "range" \n\t"\ |
502 |
"movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\ |
503 |
"sub "range" , "tmp" \n\t"\ |
504 |
BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\ |
505 |
"movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ |
506 |
"shl %%cl , "range" \n\t"\ |
507 |
"movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ |
508 |
"mov "tmpbyte" , "statep" \n\t"\ |
509 |
"shl %%cl , "low" \n\t"\ |
510 |
"test "lowword" , "lowword" \n\t"\ |
511 |
" jnz 1f \n\t"\
|
512 |
"mov "BYTE"("cabac"), %%"REG_c" \n\t"\ |
513 |
"movzwl (%%"REG_c") , "tmp" \n\t"\ |
514 |
"bswap "tmp" \n\t"\ |
515 |
"shr $15 , "tmp" \n\t"\ |
516 |
"sub $0xFFFF , "tmp" \n\t"\ |
517 |
"add $2 , %%"REG_c" \n\t"\ |
518 |
"mov %%"REG_c" , "BYTE "("cabac") \n\t"\ |
519 |
"lea -1("low") , %%ecx \n\t"\ |
520 |
"xor "low" , %%ecx \n\t"\ |
521 |
"shr $15 , %%ecx \n\t"\
|
522 |
"movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ |
523 |
"neg %%ecx \n\t"\
|
524 |
"add $7 , %%ecx \n\t"\
|
525 |
"shl %%cl , "tmp" \n\t"\ |
526 |
"add "tmp" , "low" \n\t"\ |
527 |
"1: \n\t"
|
528 |
|
529 |
asm volatile( |
530 |
"movl "RANGE "(%2), %%esi \n\t" |
531 |
"movl "LOW "(%2), %%ebx \n\t" |
532 |
BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl") |
533 |
"movl %%esi, "RANGE "(%2) \n\t" |
534 |
"movl %%ebx, "LOW "(%2) \n\t" |
535 |
|
536 |
:"=&a"(bit)
|
537 |
:"r"(state), "r"(c) |
538 |
: "%"REG_c, "%ebx", "%edx", "%esi", "memory" |
539 |
); |
540 |
bit&=1;
|
541 |
#endif /* BRANCHLESS_CABAC_DECODER */ |
542 |
#else /* defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) */ |
543 |
int s = *state;
|
544 |
int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s]; |
545 |
int bit, lps_mask av_unused;
|
546 |
|
547 |
c->range -= RangeLPS; |
548 |
#ifndef BRANCHLESS_CABAC_DECODER
|
549 |
if(c->low < (c->range<<(CABAC_BITS+1))){ |
550 |
bit= s&1;
|
551 |
*state= ff_h264_mps_state[s]; |
552 |
renorm_cabac_decoder_once(c); |
553 |
}else{
|
554 |
bit= ff_h264_norm_shift[RangeLPS]; |
555 |
c->low -= (c->range<<(CABAC_BITS+1));
|
556 |
*state= ff_h264_lps_state[s]; |
557 |
c->range = RangeLPS<<bit; |
558 |
c->low <<= bit; |
559 |
bit= (s&1)^1; |
560 |
|
561 |
if(!(c->low & CABAC_MASK)){
|
562 |
refill2(c); |
563 |
} |
564 |
} |
565 |
#else /* BRANCHLESS_CABAC_DECODER */ |
566 |
lps_mask= ((c->range<<(CABAC_BITS+1)) - c->low)>>31; |
567 |
|
568 |
c->low -= (c->range<<(CABAC_BITS+1)) & lps_mask;
|
569 |
c->range += (RangeLPS - c->range) & lps_mask; |
570 |
|
571 |
s^=lps_mask; |
572 |
*state= (ff_h264_mlps_state+128)[s];
|
573 |
bit= s&1;
|
574 |
|
575 |
lps_mask= ff_h264_norm_shift[c->range]; |
576 |
c->range<<= lps_mask; |
577 |
c->low <<= lps_mask; |
578 |
if(!(c->low & CABAC_MASK))
|
579 |
refill2(c); |
580 |
#endif /* BRANCHLESS_CABAC_DECODER */ |
581 |
#endif /* defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) */ |
582 |
return bit;
|
583 |
} |
584 |
|
585 |
static int av_noinline get_cabac_noinline(CABACContext *c, uint8_t * const state){ |
586 |
return get_cabac_inline(c,state);
|
587 |
} |
588 |
|
589 |
static int get_cabac(CABACContext *c, uint8_t * const state){ |
590 |
return get_cabac_inline(c,state);
|
591 |
} |
592 |
|
593 |
static int get_cabac_bypass(CABACContext *c){ |
594 |
#if 0 //not faster
|
595 |
int bit;
|
596 |
asm volatile(
|
597 |
"movl "RANGE "(%1), %%ebx \n\t"
|
598 |
"movl "LOW "(%1), %%eax \n\t"
|
599 |
"shl $17, %%ebx \n\t"
|
600 |
"add %%eax, %%eax \n\t"
|
601 |
"sub %%ebx, %%eax \n\t"
|
602 |
"cdq \n\t"
|
603 |
"and %%edx, %%ebx \n\t"
|
604 |
"add %%ebx, %%eax \n\t"
|
605 |
"test %%ax, %%ax \n\t"
|
606 |
" jnz 1f \n\t"
|
607 |
"movl "BYTE "(%1), %%"REG_b" \n\t"
|
608 |
"subl $0xFFFF, %%eax \n\t"
|
609 |
"movzwl (%%"REG_b"), %%ecx \n\t"
|
610 |
"bswap %%ecx \n\t"
|
611 |
"shrl $15, %%ecx \n\t"
|
612 |
"addl $2, %%"REG_b" \n\t"
|
613 |
"addl %%ecx, %%eax \n\t"
|
614 |
"movl %%"REG_b", "BYTE "(%1) \n\t"
|
615 |
"1: \n\t"
|
616 |
"movl %%eax, "LOW "(%1) \n\t"
|
617 |
|
618 |
:"=&d"(bit)
|
619 |
:"r"(c)
|
620 |
: "%eax", "%"REG_b, "%ecx", "memory"
|
621 |
);
|
622 |
return bit+1;
|
623 |
#else
|
624 |
int range;
|
625 |
c->low += c->low; |
626 |
|
627 |
if(!(c->low & CABAC_MASK))
|
628 |
refill(c); |
629 |
|
630 |
range= c->range<<(CABAC_BITS+1);
|
631 |
if(c->low < range){
|
632 |
return 0; |
633 |
}else{
|
634 |
c->low -= range; |
635 |
return 1; |
636 |
} |
637 |
#endif
|
638 |
} |
639 |
|
640 |
|
641 |
static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ |
642 |
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
|
643 |
asm volatile( |
644 |
"movl "RANGE "(%1), %%ebx \n\t" |
645 |
"movl "LOW "(%1), %%eax \n\t" |
646 |
"shl $17, %%ebx \n\t"
|
647 |
"add %%eax, %%eax \n\t"
|
648 |
"sub %%ebx, %%eax \n\t"
|
649 |
"cdq \n\t"
|
650 |
"and %%edx, %%ebx \n\t"
|
651 |
"add %%ebx, %%eax \n\t"
|
652 |
"xor %%edx, %%ecx \n\t"
|
653 |
"sub %%edx, %%ecx \n\t"
|
654 |
"test %%ax, %%ax \n\t"
|
655 |
" jnz 1f \n\t"
|
656 |
"mov "BYTE "(%1), %%"REG_b" \n\t" |
657 |
"subl $0xFFFF, %%eax \n\t"
|
658 |
"movzwl (%%"REG_b"), %%edx \n\t" |
659 |
"bswap %%edx \n\t"
|
660 |
"shrl $15, %%edx \n\t"
|
661 |
"add $2, %%"REG_b" \n\t" |
662 |
"addl %%edx, %%eax \n\t"
|
663 |
"mov %%"REG_b", "BYTE "(%1) \n\t" |
664 |
"1: \n\t"
|
665 |
"movl %%eax, "LOW "(%1) \n\t" |
666 |
|
667 |
:"+c"(val)
|
668 |
:"r"(c)
|
669 |
: "%eax", "%"REG_b, "%edx", "memory" |
670 |
); |
671 |
return val;
|
672 |
#else
|
673 |
int range, mask;
|
674 |
c->low += c->low; |
675 |
|
676 |
if(!(c->low & CABAC_MASK))
|
677 |
refill(c); |
678 |
|
679 |
range= c->range<<(CABAC_BITS+1);
|
680 |
c->low -= range; |
681 |
mask= c->low >> 31;
|
682 |
range &= mask; |
683 |
c->low += range; |
684 |
return (val^mask)-mask;
|
685 |
#endif
|
686 |
} |
687 |
|
688 |
//FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!)
|
689 |
//FIXME use some macros to avoid duplicatin get_cabac (cannot be done yet as that would make optimization work hard)
|
690 |
#if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
|
691 |
static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){ |
692 |
void *end= significant_coeff_ctx_base + max_coeff - 1; |
693 |
int minusstart= -(int)significant_coeff_ctx_base; |
694 |
int minusindex= 4-(int)index; |
695 |
int coeff_count;
|
696 |
asm volatile( |
697 |
"movl "RANGE "(%3), %%esi \n\t" |
698 |
"movl "LOW "(%3), %%ebx \n\t" |
699 |
|
700 |
"2: \n\t"
|
701 |
|
702 |
BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") |
703 |
|
704 |
"test $1, %%edx \n\t"
|
705 |
" jz 3f \n\t"
|
706 |
|
707 |
BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") |
708 |
|
709 |
"mov %2, %%"REG_a" \n\t" |
710 |
"movl %4, %%ecx \n\t"
|
711 |
"add %1, %%"REG_c" \n\t" |
712 |
"movl %%ecx, (%%"REG_a") \n\t" |
713 |
|
714 |
"test $1, %%edx \n\t"
|
715 |
" jnz 4f \n\t"
|
716 |
|
717 |
"add $4, %%"REG_a" \n\t" |
718 |
"mov %%"REG_a", %2 \n\t" |
719 |
|
720 |
"3: \n\t"
|
721 |
"add $1, %1 \n\t"
|
722 |
"cmp %5, %1 \n\t"
|
723 |
" jb 2b \n\t"
|
724 |
"mov %2, %%"REG_a" \n\t" |
725 |
"movl %4, %%ecx \n\t"
|
726 |
"add %1, %%"REG_c" \n\t" |
727 |
"movl %%ecx, (%%"REG_a") \n\t" |
728 |
"4: \n\t"
|
729 |
"add %6, %%eax \n\t"
|
730 |
"shr $2, %%eax \n\t"
|
731 |
|
732 |
"movl %%esi, "RANGE "(%3) \n\t" |
733 |
"movl %%ebx, "LOW "(%3) \n\t" |
734 |
:"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)\ |
735 |
:"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)\ |
736 |
: "%"REG_c, "%ebx", "%edx", "%esi", "memory"\ |
737 |
); |
738 |
return coeff_count;
|
739 |
} |
740 |
|
741 |
static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, uint8_t *sig_off){ |
742 |
int minusindex= 4-(int)index; |
743 |
int coeff_count;
|
744 |
long last=0; |
745 |
asm volatile( |
746 |
"movl "RANGE "(%3), %%esi \n\t" |
747 |
"movl "LOW "(%3), %%ebx \n\t" |
748 |
|
749 |
"mov %1, %%"REG_D" \n\t" |
750 |
"2: \n\t"
|
751 |
|
752 |
"mov %6, %%"REG_a" \n\t" |
753 |
"movzbl (%%"REG_a", %%"REG_D"), %%edi \n\t" |
754 |
"add %5, %%"REG_D" \n\t" |
755 |
|
756 |
BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") |
757 |
|
758 |
"mov %1, %%edi \n\t"
|
759 |
"test $1, %%edx \n\t"
|
760 |
" jz 3f \n\t"
|
761 |
|
762 |
"movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t" |
763 |
"add %5, %%"REG_D" \n\t" |
764 |
|
765 |
BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") |
766 |
|
767 |
"mov %2, %%"REG_a" \n\t" |
768 |
"mov %1, %%edi \n\t"
|
769 |
"movl %%edi, (%%"REG_a") \n\t" |
770 |
|
771 |
"test $1, %%edx \n\t"
|
772 |
" jnz 4f \n\t"
|
773 |
|
774 |
"add $4, %%"REG_a" \n\t" |
775 |
"mov %%"REG_a", %2 \n\t" |
776 |
|
777 |
"3: \n\t"
|
778 |
"addl $1, %%edi \n\t"
|
779 |
"mov %%edi, %1 \n\t"
|
780 |
"cmpl $63, %%edi \n\t"
|
781 |
" jb 2b \n\t"
|
782 |
"mov %2, %%"REG_a" \n\t" |
783 |
"movl %%edi, (%%"REG_a") \n\t" |
784 |
"4: \n\t"
|
785 |
"addl %4, %%eax \n\t"
|
786 |
"shr $2, %%eax \n\t"
|
787 |
|
788 |
"movl %%esi, "RANGE "(%3) \n\t" |
789 |
"movl %%ebx, "LOW "(%3) \n\t" |
790 |
:"=&a"(coeff_count),"+m"(last), "+m"(index)\ |
791 |
:"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\ |
792 |
: "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"\ |
793 |
); |
794 |
return coeff_count;
|
795 |
} |
796 |
#endif /* defined(ARCH_X86) && && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) */ |
797 |
|
798 |
/**
|
799 |
*
|
800 |
* @return the number of bytes read or 0 if no end
|
801 |
*/
|
802 |
static int get_cabac_terminate(CABACContext *c){ |
803 |
c->range -= 2;
|
804 |
if(c->low < c->range<<(CABAC_BITS+1)){ |
805 |
renorm_cabac_decoder_once(c); |
806 |
return 0; |
807 |
}else{
|
808 |
return c->bytestream - c->bytestream_start;
|
809 |
} |
810 |
} |
811 |
|
812 |
#if 0
|
813 |
/**
|
814 |
* Get (truncated) unary binarization.
|
815 |
*/
|
816 |
static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
|
817 |
int i;
|
818 |
|
819 |
for(i=0; i<max; i++){
|
820 |
if(get_cabac(c, state)==0)
|
821 |
return i;
|
822 |
|
823 |
if(i< max_index) state++;
|
824 |
}
|
825 |
|
826 |
return truncated ? max : -1;
|
827 |
}
|
828 |
|
829 |
/**
|
830 |
* get unary exp golomb k-th order binarization.
|
831 |
*/
|
832 |
static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
|
833 |
int i, v;
|
834 |
int m= 1<<k;
|
835 |
|
836 |
if(get_cabac(c, state)==0)
|
837 |
return 0;
|
838 |
|
839 |
if(0 < max_index) state++;
|
840 |
|
841 |
for(i=1; i<max; i++){
|
842 |
if(get_cabac(c, state)==0){
|
843 |
if(is_signed && get_cabac_bypass(c)){
|
844 |
return -i;
|
845 |
}else
|
846 |
return i;
|
847 |
}
|
848 |
|
849 |
if(i < max_index) state++;
|
850 |
}
|
851 |
|
852 |
while(get_cabac_bypass(c)){
|
853 |
i+= m;
|
854 |
m+= m;
|
855 |
}
|
856 |
|
857 |
v=0;
|
858 |
while(m>>=1){
|
859 |
v+= v + get_cabac_bypass(c);
|
860 |
}
|
861 |
i += v;
|
862 |
|
863 |
if(is_signed && get_cabac_bypass(c)){
|
864 |
return -i;
|
865 |
}else
|
866 |
return i;
|
867 |
}
|
868 |
#endif /* 0 */
|
869 |
|
870 |
#endif /* FFMPEG_CABAC_H */ |