Statistics
| Branch: | Revision:

ffmpeg / libavcodec / cabac.h @ be449fca

History | View | Annotate | Download (24 KB)

1 d592f67f Michael Niedermayer
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5 b78e7197 Diego Biurrun
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8 d592f67f Michael Niedermayer
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10 b78e7197 Diego Biurrun
 * version 2.1 of the License, or (at your option) any later version.
11 d592f67f Michael Niedermayer
 *
12 b78e7197 Diego Biurrun
 * FFmpeg is distributed in the hope that it will be useful,
13 d592f67f Michael Niedermayer
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18 b78e7197 Diego Biurrun
 * License along with FFmpeg; if not, write to the Free Software
19 5509bffa Diego Biurrun
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 d592f67f Michael Niedermayer
 */
21 115329f1 Diego Biurrun
22 d592f67f Michael Niedermayer
/**
23
 * @file cabac.h
24
 * Context Adaptive Binary Arithmetic Coder.
25
 */
26
27 98790382 Stefano Sabatini
#ifndef AVCODEC_CABAC_H
28
#define AVCODEC_CABAC_H
29 26b4fe82 Aurelien Jacobs
30
#include "bitstream.h"
31 d592f67f Michael Niedermayer
32 2848ce84 Loren Merritt
//#undef NDEBUG
33 d592f67f Michael Niedermayer
#include <assert.h>
34 245976da Diego Biurrun
#include "libavutil/x86_cpu.h"
35 d592f67f Michael Niedermayer
36 5659b509 Michael Niedermayer
#define CABAC_BITS 16
37 ec7eb896 Michael Niedermayer
#define CABAC_MASK ((1<<CABAC_BITS)-1)
38 0bc2e7f0 Diego Biurrun
#define BRANCHLESS_CABAC_DECODER 1
39 a0f2c6ba Jindřich Makovička
//#define ARCH_X86_DISABLED 1
40 ec7eb896 Michael Niedermayer
41 d592f67f Michael Niedermayer
typedef struct CABACContext{
42
    int low;
43
    int range;
44
    int outstanding_count;
45
#ifdef STRICT_LIMITS
46
    int symCount;
47
#endif
48 e96682e6 Michael Niedermayer
    const uint8_t *bytestream_start;
49
    const uint8_t *bytestream;
50 bba83349 Michael Niedermayer
    const uint8_t *bytestream_end;
51 d592f67f Michael Niedermayer
    PutBitContext pb;
52
}CABACContext;
53
54 68a205ed Michael Niedermayer
extern uint8_t ff_h264_mlps_state[4*64];
55 a0f2c6ba Jindřich Makovička
extern uint8_t ff_h264_lps_range[4*2*64];  ///< rangeTabLPS
56 d61c4e73 Michael Niedermayer
extern uint8_t ff_h264_mps_state[2*64];     ///< transIdxMPS
57
extern uint8_t ff_h264_lps_state[2*64];     ///< transIdxLPS
58 f24a5159 Michael Niedermayer
extern const uint8_t ff_h264_norm_shift[512];
59 ec7eb896 Michael Niedermayer
60 d592f67f Michael Niedermayer
61
void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
62 e96682e6 Michael Niedermayer
void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
63 d61c4e73 Michael Niedermayer
void ff_init_cabac_states(CABACContext *c);
64 d592f67f Michael Niedermayer
65
66
static inline void put_cabac_bit(CABACContext *c, int b){
67 115329f1 Diego Biurrun
    put_bits(&c->pb, 1, b);
68
    for(;c->outstanding_count; c->outstanding_count--){
69 d592f67f Michael Niedermayer
        put_bits(&c->pb, 1, 1-b);
70
    }
71
}
72
73
static inline void renorm_cabac_encoder(CABACContext *c){
74
    while(c->range < 0x100){
75
        //FIXME optimize
76
        if(c->low<0x100){
77
            put_cabac_bit(c, 0);
78
        }else if(c->low<0x200){
79
            c->outstanding_count++;
80
            c->low -= 0x100;
81
        }else{
82
            put_cabac_bit(c, 1);
83
            c->low -= 0x200;
84
        }
85 115329f1 Diego Biurrun
86 d592f67f Michael Niedermayer
        c->range+= c->range;
87
        c->low += c->low;
88
    }
89
}
90
91 c46e2874 Diego Biurrun
#ifdef TEST
92 938dd846 Loren Merritt
static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
93 f24a5159 Michael Niedermayer
    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
94 115329f1 Diego Biurrun
95 d592f67f Michael Niedermayer
    if(bit == ((*state)&1)){
96
        c->range -= RangeLPS;
97 d61c4e73 Michael Niedermayer
        *state= ff_h264_mps_state[*state];
98 d592f67f Michael Niedermayer
    }else{
99
        c->low += c->range - RangeLPS;
100
        c->range = RangeLPS;
101 d61c4e73 Michael Niedermayer
        *state= ff_h264_lps_state[*state];
102 d592f67f Michael Niedermayer
    }
103 115329f1 Diego Biurrun
104 d592f67f Michael Niedermayer
    renorm_cabac_encoder(c);
105
106
#ifdef STRICT_LIMITS
107
    c->symCount++;
108
#endif
109
}
110
111 938dd846 Loren Merritt
static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
112 d592f67f Michael Niedermayer
    assert(c->range > RangeLPS);
113
114
    if(!bit){
115
        c->range -= RangeLPS;
116
    }else{
117
        c->low += c->range - RangeLPS;
118
        c->range = RangeLPS;
119
    }
120
121
    renorm_cabac_encoder(c);
122
123
#ifdef STRICT_LIMITS
124
    c->symCount++;
125
#endif
126
}
127
128 61ccfcc0 Michael Niedermayer
/**
129
 * @param bit 0 -> write zero bit, !=0 write one bit
130
 */
131 938dd846 Loren Merritt
static void put_cabac_bypass(CABACContext *c, int bit){
132 d592f67f Michael Niedermayer
    c->low += c->low;
133
134
    if(bit){
135
        c->low += c->range;
136
    }
137
//FIXME optimize
138
    if(c->low<0x200){
139
        put_cabac_bit(c, 0);
140
    }else if(c->low<0x400){
141
        c->outstanding_count++;
142
        c->low -= 0x200;
143
    }else{
144
        put_cabac_bit(c, 1);
145
        c->low -= 0x400;
146
    }
147 115329f1 Diego Biurrun
148 d592f67f Michael Niedermayer
#ifdef STRICT_LIMITS
149
    c->symCount++;
150
#endif
151
}
152
153 5e20f836 Michael Niedermayer
/**
154
 *
155
 * @return the number of bytes written
156
 */
157 938dd846 Loren Merritt
static int put_cabac_terminate(CABACContext *c, int bit){
158 d592f67f Michael Niedermayer
    c->range -= 2;
159
160
    if(!bit){
161
        renorm_cabac_encoder(c);
162
    }else{
163
        c->low += c->range;
164
        c->range= 2;
165 115329f1 Diego Biurrun
166 d592f67f Michael Niedermayer
        renorm_cabac_encoder(c);
167
168
        assert(c->low <= 0x1FF);
169
        put_cabac_bit(c, c->low>>9);
170
        put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
171 115329f1 Diego Biurrun
172 d592f67f Michael Niedermayer
        flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
173
    }
174 115329f1 Diego Biurrun
175 d592f67f Michael Niedermayer
#ifdef STRICT_LIMITS
176
    c->symCount++;
177
#endif
178 5e20f836 Michael Niedermayer
179 b46243ed Alex Beregszaszi
    return (put_bits_count(&c->pb)+7)>>3;
180 d592f67f Michael Niedermayer
}
181
182 61ccfcc0 Michael Niedermayer
/**
183
 * put (truncated) unary binarization.
184
 */
185 938dd846 Loren Merritt
static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
186 61ccfcc0 Michael Niedermayer
    int i;
187 115329f1 Diego Biurrun
188 61ccfcc0 Michael Niedermayer
    assert(v <= max);
189 115329f1 Diego Biurrun
190 61ccfcc0 Michael Niedermayer
#if 1
191
    for(i=0; i<v; i++){
192
        put_cabac(c, state, 1);
193
        if(i < max_index) state++;
194
    }
195
    if(truncated==0 || v<max)
196
        put_cabac(c, state, 0);
197
#else
198
    if(v <= max_index){
199
        for(i=0; i<v; i++){
200
            put_cabac(c, state+i, 1);
201
        }
202
        if(truncated==0 || v<max)
203
            put_cabac(c, state+i, 0);
204
    }else{
205
        for(i=0; i<=max_index; i++){
206
            put_cabac(c, state+i, 1);
207
        }
208
        for(; i<v; i++){
209
            put_cabac(c, state+max_index, 1);
210
        }
211
        if(truncated==0 || v<max)
212
            put_cabac(c, state+max_index, 0);
213
    }
214
#endif
215
}
216
217
/**
218
 * put unary exp golomb k-th order binarization.
219
 */
220 938dd846 Loren Merritt
static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
221 61ccfcc0 Michael Niedermayer
    int i;
222 115329f1 Diego Biurrun
223 61ccfcc0 Michael Niedermayer
    if(v==0)
224
        put_cabac(c, state, 0);
225
    else{
226 8f8c0800 Michael Niedermayer
        const int sign= v < 0;
227 115329f1 Diego Biurrun
228 c26abfa5 Diego Biurrun
        if(is_signed) v= FFABS(v);
229 115329f1 Diego Biurrun
230 61ccfcc0 Michael Niedermayer
        if(v<max){
231
            for(i=0; i<v; i++){
232
                put_cabac(c, state, 1);
233
                if(i < max_index) state++;
234
            }
235
236
            put_cabac(c, state, 0);
237
        }else{
238
            int m= 1<<k;
239
240
            for(i=0; i<max; i++){
241
                put_cabac(c, state, 1);
242
                if(i < max_index) state++;
243
            }
244
245
            v -= max;
246
            while(v >= m){ //FIXME optimize
247
                put_cabac_bypass(c, 1);
248
                v-= m;
249
                m+= m;
250
            }
251
            put_cabac_bypass(c, 0);
252
            while(m>>=1){
253
                put_cabac_bypass(c, v&m);
254
            }
255
        }
256
257
        if(is_signed)
258
            put_cabac_bypass(c, sign);
259
    }
260
}
261 c46e2874 Diego Biurrun
#endif /* TEST */
262 61ccfcc0 Michael Niedermayer
263 ec7eb896 Michael Niedermayer
static void refill(CABACContext *c){
264
#if CABAC_BITS == 16
265 2ae7569d Michael Niedermayer
        c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
266 ec7eb896 Michael Niedermayer
#else
267
        c->low+= c->bytestream[0]<<1;
268
#endif
269
    c->low -= CABAC_MASK;
270
    c->bytestream+= CABAC_BITS/8;
271
}
272
273 1084771a Diego Biurrun
#if ! ( defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) )
274 ec7eb896 Michael Niedermayer
static void refill2(CABACContext *c){
275
    int i, x;
276
277
    x= c->low ^ (c->low-1);
278 f24a5159 Michael Niedermayer
    i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
279 ec7eb896 Michael Niedermayer
280
    x= -CABAC_MASK;
281 115329f1 Diego Biurrun
282 ec7eb896 Michael Niedermayer
#if CABAC_BITS == 16
283
        x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
284
#else
285
        x+= c->bytestream[0]<<1;
286
#endif
287 115329f1 Diego Biurrun
288 ec7eb896 Michael Niedermayer
    c->low += x<<i;
289
    c->bytestream+= CABAC_BITS/8;
290
}
291 0f26eec2 Diego Biurrun
#endif
292 ec7eb896 Michael Niedermayer
293 d592f67f Michael Niedermayer
static inline void renorm_cabac_decoder(CABACContext *c){
294 f24a5159 Michael Niedermayer
    while(c->range < 0x100){
295 d592f67f Michael Niedermayer
        c->range+= c->range;
296
        c->low+= c->low;
297 ec7eb896 Michael Niedermayer
        if(!(c->low & CABAC_MASK))
298
            refill(c);
299 d592f67f Michael Niedermayer
    }
300
}
301
302 ec7eb896 Michael Niedermayer
static inline void renorm_cabac_decoder_once(CABACContext *c){
303 400d0f8e Michael Niedermayer
#ifdef ARCH_X86_DISABLED
304 ec8f483a Michael Niedermayer
    int temp;
305
#if 0
306 4310580d Michael Niedermayer
    //P3:683    athlon:475
307 be449fca Diego Pettenò
    __asm__(
308 f24a5159 Michael Niedermayer
        "lea -0x100(%0), %2         \n\t"
309 ec8f483a Michael Niedermayer
        "shr $31, %2                \n\t"  //FIXME 31->63 for x86-64
310
        "shl %%cl, %0               \n\t"
311
        "shl %%cl, %1               \n\t"
312
        : "+r"(c->range), "+r"(c->low), "+c"(temp)
313
    );
314
#elif 0
315 4310580d Michael Niedermayer
    //P3:680    athlon:474
316 be449fca Diego Pettenò
    __asm__(
317 f24a5159 Michael Niedermayer
        "cmp $0x100, %0             \n\t"
318 ec8f483a Michael Niedermayer
        "setb %%cl                  \n\t"  //FIXME 31->63 for x86-64
319
        "shl %%cl, %0               \n\t"
320
        "shl %%cl, %1               \n\t"
321
        : "+r"(c->range), "+r"(c->low), "+c"(temp)
322
    );
323
#elif 1
324
    int temp2;
325 4310580d Michael Niedermayer
    //P3:665    athlon:517
326 be449fca Diego Pettenò
    __asm__(
327 f24a5159 Michael Niedermayer
        "lea -0x100(%0), %%eax      \n\t"
328 7e14b808 Reimar Döffinger
        "cltd                       \n\t"
329 ec8f483a Michael Niedermayer
        "mov %0, %%eax              \n\t"
330
        "and %%edx, %0              \n\t"
331
        "and %1, %%edx              \n\t"
332
        "add %%eax, %0              \n\t"
333
        "add %%edx, %1              \n\t"
334
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
335
    );
336
#elif 0
337
    int temp2;
338 4310580d Michael Niedermayer
    //P3:673    athlon:509
339 be449fca Diego Pettenò
    __asm__(
340 f24a5159 Michael Niedermayer
        "cmp $0x100, %0             \n\t"
341 ec8f483a Michael Niedermayer
        "sbb %%edx, %%edx           \n\t"
342
        "mov %0, %%eax              \n\t"
343
        "and %%edx, %0              \n\t"
344
        "and %1, %%edx              \n\t"
345
        "add %%eax, %0              \n\t"
346
        "add %%edx, %1              \n\t"
347
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
348
    );
349
#else
350
    int temp2;
351 4310580d Michael Niedermayer
    //P3:677    athlon:511
352 be449fca Diego Pettenò
    __asm__(
353 f24a5159 Michael Niedermayer
        "cmp $0x100, %0             \n\t"
354 ec8f483a Michael Niedermayer
        "lea (%0, %0), %%eax        \n\t"
355
        "lea (%1, %1), %%edx        \n\t"
356
        "cmovb %%eax, %0            \n\t"
357
        "cmovb %%edx, %1            \n\t"
358
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
359
    );
360
#endif
361
#else
362 4310580d Michael Niedermayer
    //P3:675    athlon:476
363 f24a5159 Michael Niedermayer
    int shift= (uint32_t)(c->range - 0x100)>>31;
364 bfe328ca Loren Merritt
    c->range<<= shift;
365
    c->low  <<= shift;
366 ec8f483a Michael Niedermayer
#endif
367 ec7eb896 Michael Niedermayer
    if(!(c->low & CABAC_MASK))
368
        refill(c);
369
}
370
371 5a6a6cc7 Diego Biurrun
static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){
372 bfe328ca Loren Merritt
    //FIXME gcc generates duplicate load/stores for c->low and c->range
373 f7d0b683 Michael Niedermayer
#define LOW          "0"
374
#define RANGE        "4"
375 755073fe Reimar Döffinger
#ifdef ARCH_X86_64
376
#define BYTESTART   "16"
377
#define BYTE        "24"
378
#define BYTEEND     "32"
379
#else
380 d61c4e73 Michael Niedermayer
#define BYTESTART   "12"
381
#define BYTE        "16"
382
#define BYTEEND     "20"
383 755073fe Reimar Döffinger
#endif
384 c8f9ef61 Ramiro Polla
#if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
385 ba9fb5da Bernhard Rosenkränzer
    int bit;
386
387 0bc2e7f0 Diego Biurrun
#ifndef BRANCHLESS_CABAC_DECODER
388 be449fca Diego Pettenò
    __asm__ volatile(
389 4041a495 Michael Niedermayer
        "movzbl (%1), %0                        \n\t"
390 f7d0b683 Michael Niedermayer
        "movl "RANGE    "(%2), %%ebx            \n\t"
391
        "movl "RANGE    "(%2), %%edx            \n\t"
392 f24a5159 Michael Niedermayer
        "andl $0xC0, %%ebx                      \n\t"
393 4041a495 Michael Niedermayer
        "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
394 f7d0b683 Michael Niedermayer
        "movl "LOW      "(%2), %%ebx            \n\t"
395
//eax:state ebx:low, edx:range, esi:RangeLPS
396
        "subl %%esi, %%edx                      \n\t"
397 f24a5159 Michael Niedermayer
        "movl %%edx, %%ecx                      \n\t"
398
        "shll $17, %%ecx                        \n\t"
399
        "cmpl %%ecx, %%ebx                      \n\t"
400 f7d0b683 Michael Niedermayer
        " ja 1f                                 \n\t"
401 1f4d5e9f Michael Niedermayer
402
#if 1
403
        //athlon:4067 P3:4110
404 f24a5159 Michael Niedermayer
        "lea -0x100(%%edx), %%ecx               \n\t"
405 1f4d5e9f Michael Niedermayer
        "shr $31, %%ecx                         \n\t"
406
        "shl %%cl, %%edx                        \n\t"
407
        "shl %%cl, %%ebx                        \n\t"
408
#else
409
        //athlon:4057 P3:4130
410 f24a5159 Michael Niedermayer
        "cmp $0x100, %%edx                      \n\t" //FIXME avoidable
411 f7d0b683 Michael Niedermayer
        "setb %%cl                              \n\t"
412
        "shl %%cl, %%edx                        \n\t"
413
        "shl %%cl, %%ebx                        \n\t"
414 1f4d5e9f Michael Niedermayer
#endif
415 4041a495 Michael Niedermayer
        "movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx   \n\t"
416 f7d0b683 Michael Niedermayer
        "movb %%cl, (%1)                        \n\t"
417
//eax:state ebx:low, edx:range, esi:RangeLPS
418
        "test %%bx, %%bx                        \n\t"
419
        " jnz 2f                                \n\t"
420 755073fe Reimar Döffinger
        "mov  "BYTE     "(%2), %%"REG_S"        \n\t"
421 f7d0b683 Michael Niedermayer
        "subl $0xFFFF, %%ebx                    \n\t"
422 755073fe Reimar Döffinger
        "movzwl (%%"REG_S"), %%ecx              \n\t"
423 f7d0b683 Michael Niedermayer
        "bswap %%ecx                            \n\t"
424
        "shrl $15, %%ecx                        \n\t"
425 755073fe Reimar Döffinger
        "add  $2, %%"REG_S"                     \n\t"
426 f7d0b683 Michael Niedermayer
        "addl %%ecx, %%ebx                      \n\t"
427 755073fe Reimar Döffinger
        "mov  %%"REG_S", "BYTE    "(%2)         \n\t"
428 f7d0b683 Michael Niedermayer
        "jmp 2f                                 \n\t"
429
        "1:                                     \n\t"
430
//eax:state ebx:low, edx:range, esi:RangeLPS
431 f24a5159 Michael Niedermayer
        "subl %%ecx, %%ebx                      \n\t"
432 f7d0b683 Michael Niedermayer
        "movl %%esi, %%edx                      \n\t"
433 a6672acf Michael Niedermayer
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx   \n\t"
434 f7d0b683 Michael Niedermayer
        "shll %%cl, %%ebx                       \n\t"
435
        "shll %%cl, %%edx                       \n\t"
436 4041a495 Michael Niedermayer
        "movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx   \n\t"
437 f24a5159 Michael Niedermayer
        "movb %%cl, (%1)                        \n\t"
438 755073fe Reimar Döffinger
        "add  $1, %0                            \n\t"
439 f7d0b683 Michael Niedermayer
        "test %%bx, %%bx                        \n\t"
440
        " jnz 2f                                \n\t"
441
442 755073fe Reimar Döffinger
        "mov  "BYTE     "(%2), %%"REG_c"        \n\t"
443
        "movzwl (%%"REG_c"), %%esi              \n\t"
444 f7d0b683 Michael Niedermayer
        "bswap %%esi                            \n\t"
445
        "shrl $15, %%esi                        \n\t"
446
        "subl $0xFFFF, %%esi                    \n\t"
447 755073fe Reimar Döffinger
        "add  $2, %%"REG_c"                     \n\t"
448
        "mov  %%"REG_c", "BYTE    "(%2)         \n\t"
449 f7d0b683 Michael Niedermayer
450
        "leal -1(%%ebx), %%ecx                  \n\t"
451
        "xorl %%ebx, %%ecx                      \n\t"
452 f24a5159 Michael Niedermayer
        "shrl $15, %%ecx                        \n\t"
453 a6672acf Michael Niedermayer
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx   \n\t"
454 d17faef0 Michael Niedermayer
        "neg %%ecx                              \n\t"
455
        "add $7, %%ecx                          \n\t"
456 f7d0b683 Michael Niedermayer
457
        "shll %%cl , %%esi                      \n\t"
458
        "addl %%esi, %%ebx                      \n\t"
459
        "2:                                     \n\t"
460
        "movl %%edx, "RANGE    "(%2)            \n\t"
461
        "movl %%ebx, "LOW      "(%2)            \n\t"
462 df3a80b5 Diego Biurrun
        :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or miscompiles it (for example if "+a"(bit) or "+m"(*state) is used
463 f7d0b683 Michael Niedermayer
        :"r"(state), "r"(c)
464 755073fe Reimar Döffinger
        : "%"REG_c, "%ebx", "%edx", "%"REG_S, "memory"
465 f7d0b683 Michael Niedermayer
    );
466 9ed92c65 Michael Niedermayer
    bit&=1;
467 a0490b32 Guillaume Poirier
#else /* BRANCHLESS_CABAC_DECODER */
468 13404b2e Michael Niedermayer
469
470 7073e9fc Måns Rullgård
#if defined HAVE_FAST_CMOV
471 13404b2e Michael Niedermayer
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
472
        "mov    "tmp"       , %%ecx                                     \n\t"\
473
        "shl    $17         , "tmp"                                     \n\t"\
474
        "cmp    "low"       , "tmp"                                     \n\t"\
475
        "cmova  %%ecx       , "range"                                   \n\t"\
476
        "sbb    %%ecx       , %%ecx                                     \n\t"\
477
        "and    %%ecx       , "tmp"                                     \n\t"\
478
        "sub    "tmp"       , "low"                                     \n\t"\
479
        "xor    %%ecx       , "ret"                                     \n\t"
480 7073e9fc Måns Rullgård
#else /* HAVE_FAST_CMOV */
481 13404b2e Michael Niedermayer
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
482
        "mov    "tmp"       , %%ecx                                     \n\t"\
483
        "shl    $17         , "tmp"                                     \n\t"\
484
        "sub    "low"       , "tmp"                                     \n\t"\
485
        "sar    $31         , "tmp"                                     \n\t" /*lps_mask*/\
486
        "sub    %%ecx       , "range"                                   \n\t" /*RangeLPS - range*/\
487
        "and    "tmp"       , "range"                                   \n\t" /*(RangeLPS - range)&lps_mask*/\
488
        "add    %%ecx       , "range"                                   \n\t" /*new range*/\
489
        "shl    $17         , %%ecx                                     \n\t"\
490
        "and    "tmp"       , %%ecx                                     \n\t"\
491
        "sub    %%ecx       , "low"                                     \n\t"\
492
        "xor    "tmp"       , "ret"                                     \n\t"
493 7073e9fc Måns Rullgård
#endif /* HAVE_FAST_CMOV */
494 ef0090a9 Michael Niedermayer
495
496 13404b2e Michael Niedermayer
#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
497
        "movzbl "statep"    , "ret"                                     \n\t"\
498
        "mov    "range"     , "tmp"                                     \n\t"\
499
        "and    $0xC0       , "range"                                   \n\t"\
500
        "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
501
        "sub    "range"     , "tmp"                                     \n\t"\
502
        BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
503
        "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
504
        "shl    %%cl        , "range"                                   \n\t"\
505
        "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
506
        "mov    "tmpbyte"   , "statep"                                  \n\t"\
507
        "shl    %%cl        , "low"                                     \n\t"\
508
        "test   "lowword"   , "lowword"                                 \n\t"\
509
        " jnz   1f                                                      \n\t"\
510 755073fe Reimar Döffinger
        "mov "BYTE"("cabac"), %%"REG_c"                                 \n\t"\
511
        "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
512 13404b2e Michael Niedermayer
        "bswap  "tmp"                                                   \n\t"\
513
        "shr    $15         , "tmp"                                     \n\t"\
514
        "sub    $0xFFFF     , "tmp"                                     \n\t"\
515 755073fe Reimar Döffinger
        "add    $2          , %%"REG_c"                                 \n\t"\
516
        "mov    %%"REG_c"   , "BYTE    "("cabac")                       \n\t"\
517 13404b2e Michael Niedermayer
        "lea    -1("low")   , %%ecx                                     \n\t"\
518
        "xor    "low"       , %%ecx                                     \n\t"\
519
        "shr    $15         , %%ecx                                     \n\t"\
520
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx            \n\t"\
521
        "neg    %%ecx                                                   \n\t"\
522
        "add    $7          , %%ecx                                     \n\t"\
523
        "shl    %%cl        , "tmp"                                     \n\t"\
524
        "add    "tmp"       , "low"                                     \n\t"\
525
        "1:                                                             \n\t"
526 ef0090a9 Michael Niedermayer
527 be449fca Diego Pettenò
    __asm__ volatile(
528 13404b2e Michael Niedermayer
        "movl "RANGE    "(%2), %%esi            \n\t"
529
        "movl "LOW      "(%2), %%ebx            \n\t"
530
        BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
531 f24a5159 Michael Niedermayer
        "movl %%esi, "RANGE    "(%2)            \n\t"
532 b99f3cab Michael Niedermayer
        "movl %%ebx, "LOW      "(%2)            \n\t"
533 ef0090a9 Michael Niedermayer
534
        :"=&a"(bit)
535
        :"r"(state), "r"(c)
536 755073fe Reimar Döffinger
        : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
537 ef0090a9 Michael Niedermayer
    );
538 f1b37db4 Michael Niedermayer
    bit&=1;
539 a0490b32 Guillaume Poirier
#endif /* BRANCHLESS_CABAC_DECODER */
540 c8f9ef61 Ramiro Polla
#else /* defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) */
541 bfe328ca Loren Merritt
    int s = *state;
542 a0f2c6ba Jindřich Makovička
    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
543 154e30f6 Carl Eugen Hoyos
    int bit, lps_mask av_unused;
544 115329f1 Diego Biurrun
545 d592f67f Michael Niedermayer
    c->range -= RangeLPS;
546 0bc2e7f0 Diego Biurrun
#ifndef BRANCHLESS_CABAC_DECODER
547 3b6dc9ca Michael Niedermayer
    if(c->low < (c->range<<(CABAC_BITS+1))){
548 bfe328ca Loren Merritt
        bit= s&1;
549 d61c4e73 Michael Niedermayer
        *state= ff_h264_mps_state[s];
550 ec7eb896 Michael Niedermayer
        renorm_cabac_decoder_once(c);
551 d592f67f Michael Niedermayer
    }else{
552 f24a5159 Michael Niedermayer
        bit= ff_h264_norm_shift[RangeLPS];
553 3b6dc9ca Michael Niedermayer
        c->low -= (c->range<<(CABAC_BITS+1));
554 d61c4e73 Michael Niedermayer
        *state= ff_h264_lps_state[s];
555 260ceb63 Michael Niedermayer
        c->range = RangeLPS<<bit;
556
        c->low <<= bit;
557
        bit= (s&1)^1;
558
559 3b6dc9ca Michael Niedermayer
        if(!(c->low & CABAC_MASK)){
560 ec7eb896 Michael Niedermayer
            refill2(c);
561 260ceb63 Michael Niedermayer
        }
562 d592f67f Michael Niedermayer
    }
563 a0490b32 Guillaume Poirier
#else /* BRANCHLESS_CABAC_DECODER */
564 3b6dc9ca Michael Niedermayer
    lps_mask= ((c->range<<(CABAC_BITS+1)) - c->low)>>31;
565 115329f1 Diego Biurrun
566 3b6dc9ca Michael Niedermayer
    c->low -= (c->range<<(CABAC_BITS+1)) & lps_mask;
567 ec7eb896 Michael Niedermayer
    c->range += (RangeLPS - c->range) & lps_mask;
568 115329f1 Diego Biurrun
569 2e1aee80 Michael Niedermayer
    s^=lps_mask;
570 68a205ed Michael Niedermayer
    *state= (ff_h264_mlps_state+128)[s];
571 2e1aee80 Michael Niedermayer
    bit= s&1;
572 115329f1 Diego Biurrun
573 f24a5159 Michael Niedermayer
    lps_mask= ff_h264_norm_shift[c->range];
574 ec7eb896 Michael Niedermayer
    c->range<<= lps_mask;
575
    c->low  <<= lps_mask;
576
    if(!(c->low & CABAC_MASK))
577
        refill2(c);
578 a0490b32 Guillaume Poirier
#endif /* BRANCHLESS_CABAC_DECODER */
579 c8f9ef61 Ramiro Polla
#endif /* defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) */
580 115329f1 Diego Biurrun
    return bit;
581 d592f67f Michael Niedermayer
}
582
583 ab210908 Diego Biurrun
static int av_noinline av_unused get_cabac_noinline(CABACContext *c, uint8_t * const state){
584 851ded89 Michael Niedermayer
    return get_cabac_inline(c,state);
585
}
586
587 ab210908 Diego Biurrun
static int av_unused get_cabac(CABACContext *c, uint8_t * const state){
588 851ded89 Michael Niedermayer
    return get_cabac_inline(c,state);
589
}
590
591 ab210908 Diego Biurrun
static int av_unused get_cabac_bypass(CABACContext *c){
592 ebd624b6 Michael Niedermayer
#if 0 //not faster
593
    int bit;
594 be449fca Diego Pettenò
    __asm__ volatile(
595 ebd624b6 Michael Niedermayer
        "movl "RANGE    "(%1), %%ebx            \n\t"
596
        "movl "LOW      "(%1), %%eax            \n\t"
597
        "shl $17, %%ebx                         \n\t"
598
        "add %%eax, %%eax                       \n\t"
599
        "sub %%ebx, %%eax                       \n\t"
600 7e14b808 Reimar Döffinger
        "cltd                                   \n\t"
601 ebd624b6 Michael Niedermayer
        "and %%edx, %%ebx                       \n\t"
602
        "add %%ebx, %%eax                       \n\t"
603
        "test %%ax, %%ax                        \n\t"
604
        " jnz 1f                                \n\t"
605 755073fe Reimar Döffinger
        "movl "BYTE     "(%1), %%"REG_b"        \n\t"
606 ebd624b6 Michael Niedermayer
        "subl $0xFFFF, %%eax                    \n\t"
607 755073fe Reimar Döffinger
        "movzwl (%%"REG_b"), %%ecx              \n\t"
608 ebd624b6 Michael Niedermayer
        "bswap %%ecx                            \n\t"
609
        "shrl $15, %%ecx                        \n\t"
610 755073fe Reimar Döffinger
        "addl $2, %%"REG_b"                     \n\t"
611 ebd624b6 Michael Niedermayer
        "addl %%ecx, %%eax                      \n\t"
612 755073fe Reimar Döffinger
        "movl %%"REG_b", "BYTE     "(%1)        \n\t"
613 ebd624b6 Michael Niedermayer
        "1:                                     \n\t"
614
        "movl %%eax, "LOW      "(%1)            \n\t"
615

616
        :"=&d"(bit)
617
        :"r"(c)
618 755073fe Reimar Döffinger
        : "%eax", "%"REG_b, "%ecx", "memory"
619 ebd624b6 Michael Niedermayer
    );
620
    return bit+1;
621
#else
622 f24a5159 Michael Niedermayer
    int range;
623 d592f67f Michael Niedermayer
    c->low += c->low;
624
625 ec7eb896 Michael Niedermayer
    if(!(c->low & CABAC_MASK))
626
        refill(c);
627 115329f1 Diego Biurrun
628 3b6dc9ca Michael Niedermayer
    range= c->range<<(CABAC_BITS+1);
629 f24a5159 Michael Niedermayer
    if(c->low < range){
630 d592f67f Michael Niedermayer
        return 0;
631
    }else{
632 f24a5159 Michael Niedermayer
        c->low -= range;
633 d592f67f Michael Niedermayer
        return 1;
634
    }
635 ebd624b6 Michael Niedermayer
#endif
636
}
637
638
639 849f1035 Måns Rullgård
static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
640 c61b9d44 Michael Niedermayer
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
641 be449fca Diego Pettenò
    __asm__ volatile(
642 ebd624b6 Michael Niedermayer
        "movl "RANGE    "(%1), %%ebx            \n\t"
643
        "movl "LOW      "(%1), %%eax            \n\t"
644
        "shl $17, %%ebx                         \n\t"
645
        "add %%eax, %%eax                       \n\t"
646
        "sub %%ebx, %%eax                       \n\t"
647 7e14b808 Reimar Döffinger
        "cltd                                   \n\t"
648 ebd624b6 Michael Niedermayer
        "and %%edx, %%ebx                       \n\t"
649
        "add %%ebx, %%eax                       \n\t"
650
        "xor %%edx, %%ecx                       \n\t"
651
        "sub %%edx, %%ecx                       \n\t"
652
        "test %%ax, %%ax                        \n\t"
653
        " jnz 1f                                \n\t"
654 755073fe Reimar Döffinger
        "mov  "BYTE     "(%1), %%"REG_b"        \n\t"
655 ebd624b6 Michael Niedermayer
        "subl $0xFFFF, %%eax                    \n\t"
656 755073fe Reimar Döffinger
        "movzwl (%%"REG_b"), %%edx              \n\t"
657 ebd624b6 Michael Niedermayer
        "bswap %%edx                            \n\t"
658
        "shrl $15, %%edx                        \n\t"
659 755073fe Reimar Döffinger
        "add  $2, %%"REG_b"                     \n\t"
660 ebd624b6 Michael Niedermayer
        "addl %%edx, %%eax                      \n\t"
661 755073fe Reimar Döffinger
        "mov  %%"REG_b", "BYTE     "(%1)        \n\t"
662 ebd624b6 Michael Niedermayer
        "1:                                     \n\t"
663
        "movl %%eax, "LOW      "(%1)            \n\t"
664
665
        :"+c"(val)
666
        :"r"(c)
667 755073fe Reimar Döffinger
        : "%eax", "%"REG_b, "%edx", "memory"
668 ebd624b6 Michael Niedermayer
    );
669
    return val;
670
#else
671
    int range, mask;
672
    c->low += c->low;
673
674
    if(!(c->low & CABAC_MASK))
675
        refill(c);
676
677 3b6dc9ca Michael Niedermayer
    range= c->range<<(CABAC_BITS+1);
678 ebd624b6 Michael Niedermayer
    c->low -= range;
679
    mask= c->low >> 31;
680
    range &= mask;
681
    c->low += range;
682
    return (val^mask)-mask;
683
#endif
684 d592f67f Michael Niedermayer
}
685 ebd624b6 Michael Niedermayer
686 5e20f836 Michael Niedermayer
/**
687
 *
688
 * @return the number of bytes read or 0 if no end
689
 */
690 ab210908 Diego Biurrun
static int av_unused get_cabac_terminate(CABACContext *c){
691 f24a5159 Michael Niedermayer
    c->range -= 2;
692 3b6dc9ca Michael Niedermayer
    if(c->low < c->range<<(CABAC_BITS+1)){
693 ec7eb896 Michael Niedermayer
        renorm_cabac_decoder_once(c);
694 d592f67f Michael Niedermayer
        return 0;
695
    }else{
696 5e20f836 Michael Niedermayer
        return c->bytestream - c->bytestream_start;
697 115329f1 Diego Biurrun
    }
698 d592f67f Michael Niedermayer
}
699
700 0f26eec2 Diego Biurrun
#if 0
701 61ccfcc0 Michael Niedermayer
/**
702 1fe2d0fd Diego Biurrun
 * Get (truncated) unary binarization.
703 61ccfcc0 Michael Niedermayer
 */
704 938dd846 Loren Merritt
static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
705 61ccfcc0 Michael Niedermayer
    int i;
706 115329f1 Diego Biurrun

707
    for(i=0; i<max; i++){
708 61ccfcc0 Michael Niedermayer
        if(get_cabac(c, state)==0)
709
            return i;
710 115329f1 Diego Biurrun

711 61ccfcc0 Michael Niedermayer
        if(i< max_index) state++;
712
    }
713

714
    return truncated ? max : -1;
715
}
716

717
/**
718
 * get unary exp golomb k-th order binarization.
719
 */
720 938dd846 Loren Merritt
static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
721 61ccfcc0 Michael Niedermayer
    int i, v;
722
    int m= 1<<k;
723 115329f1 Diego Biurrun

724
    if(get_cabac(c, state)==0)
725 61ccfcc0 Michael Niedermayer
        return 0;
726 115329f1 Diego Biurrun

727 61ccfcc0 Michael Niedermayer
    if(0 < max_index) state++;
728 115329f1 Diego Biurrun

729
    for(i=1; i<max; i++){
730 61ccfcc0 Michael Niedermayer
        if(get_cabac(c, state)==0){
731
            if(is_signed && get_cabac_bypass(c)){
732
                return -i;
733
            }else
734
                return i;
735
        }
736

737
        if(i < max_index) state++;
738
    }
739 115329f1 Diego Biurrun

740 61ccfcc0 Michael Niedermayer
    while(get_cabac_bypass(c)){
741
        i+= m;
742
        m+= m;
743
    }
744 115329f1 Diego Biurrun

745 61ccfcc0 Michael Niedermayer
    v=0;
746
    while(m>>=1){
747
        v+= v + get_cabac_bypass(c);
748
    }
749
    i += v;
750

751
    if(is_signed && get_cabac_bypass(c)){
752
        return -i;
753
    }else
754
        return i;
755
}
756 0f26eec2 Diego Biurrun
#endif /* 0 */
757 26b4fe82 Aurelien Jacobs
758 98790382 Stefano Sabatini
#endif /* AVCODEC_CABAC_H */