Statistics
| Branch: | Revision:

ffmpeg / libavcodec / cabac.h @ f0b23422

History | View | Annotate | Download (28.4 KB)

1
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 *
21
 */
22

    
23
/**
24
 * @file cabac.h
25
 * Context Adaptive Binary Arithmetic Coder.
26
 */
27

    
28

    
29
//#undef NDEBUG
30
#include <assert.h>
31
#ifdef ARCH_X86
32
#include "x86_cpu.h"
33
#endif
34

    
35
#define CABAC_BITS 16
36
#define CABAC_MASK ((1<<CABAC_BITS)-1)
37
#define BRANCHLESS_CABAC_DECODER 1
38
//#define ARCH_X86_DISABLED 1
39

    
40
typedef struct CABACContext{
41
    int low;
42
    int range;
43
    int outstanding_count;
44
#ifdef STRICT_LIMITS
45
    int symCount;
46
#endif
47
    const uint8_t *bytestream_start;
48
    const uint8_t *bytestream;
49
    const uint8_t *bytestream_end;
50
    PutBitContext pb;
51
}CABACContext;
52

    
53
extern uint8_t ff_h264_mlps_state[4*64];
54
extern uint8_t ff_h264_lps_range[4*2*64];  ///< rangeTabLPS
55
extern uint8_t ff_h264_mps_state[2*64];     ///< transIdxMPS
56
extern uint8_t ff_h264_lps_state[2*64];     ///< transIdxLPS
57
extern const uint8_t ff_h264_norm_shift[512];
58

    
59

    
60
void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
61
void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
62
void ff_init_cabac_states(CABACContext *c);
63

    
64

    
65
static inline void put_cabac_bit(CABACContext *c, int b){
66
    put_bits(&c->pb, 1, b);
67
    for(;c->outstanding_count; c->outstanding_count--){
68
        put_bits(&c->pb, 1, 1-b);
69
    }
70
}
71

    
72
static inline void renorm_cabac_encoder(CABACContext *c){
73
    while(c->range < 0x100){
74
        //FIXME optimize
75
        if(c->low<0x100){
76
            put_cabac_bit(c, 0);
77
        }else if(c->low<0x200){
78
            c->outstanding_count++;
79
            c->low -= 0x100;
80
        }else{
81
            put_cabac_bit(c, 1);
82
            c->low -= 0x200;
83
        }
84

    
85
        c->range+= c->range;
86
        c->low += c->low;
87
    }
88
}
89

    
90
static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
91
    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
92

    
93
    if(bit == ((*state)&1)){
94
        c->range -= RangeLPS;
95
        *state= ff_h264_mps_state[*state];
96
    }else{
97
        c->low += c->range - RangeLPS;
98
        c->range = RangeLPS;
99
        *state= ff_h264_lps_state[*state];
100
    }
101

    
102
    renorm_cabac_encoder(c);
103

    
104
#ifdef STRICT_LIMITS
105
    c->symCount++;
106
#endif
107
}
108

    
109
static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
110
    assert(c->range > RangeLPS);
111

    
112
    if(!bit){
113
        c->range -= RangeLPS;
114
    }else{
115
        c->low += c->range - RangeLPS;
116
        c->range = RangeLPS;
117
    }
118

    
119
    renorm_cabac_encoder(c);
120

    
121
#ifdef STRICT_LIMITS
122
    c->symCount++;
123
#endif
124
}
125

    
126
/**
127
 * @param bit 0 -> write zero bit, !=0 write one bit
128
 */
129
static void put_cabac_bypass(CABACContext *c, int bit){
130
    c->low += c->low;
131

    
132
    if(bit){
133
        c->low += c->range;
134
    }
135
//FIXME optimize
136
    if(c->low<0x200){
137
        put_cabac_bit(c, 0);
138
    }else if(c->low<0x400){
139
        c->outstanding_count++;
140
        c->low -= 0x200;
141
    }else{
142
        put_cabac_bit(c, 1);
143
        c->low -= 0x400;
144
    }
145

    
146
#ifdef STRICT_LIMITS
147
    c->symCount++;
148
#endif
149
}
150

    
151
/**
152
 *
153
 * @return the number of bytes written
154
 */
155
static int put_cabac_terminate(CABACContext *c, int bit){
156
    c->range -= 2;
157

    
158
    if(!bit){
159
        renorm_cabac_encoder(c);
160
    }else{
161
        c->low += c->range;
162
        c->range= 2;
163

    
164
        renorm_cabac_encoder(c);
165

    
166
        assert(c->low <= 0x1FF);
167
        put_cabac_bit(c, c->low>>9);
168
        put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
169

    
170
        flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
171
    }
172

    
173
#ifdef STRICT_LIMITS
174
    c->symCount++;
175
#endif
176

    
177
    return (put_bits_count(&c->pb)+7)>>3;
178
}
179

    
180
/**
181
 * put (truncated) unary binarization.
182
 */
183
static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
184
    int i;
185

    
186
    assert(v <= max);
187

    
188
#if 1
189
    for(i=0; i<v; i++){
190
        put_cabac(c, state, 1);
191
        if(i < max_index) state++;
192
    }
193
    if(truncated==0 || v<max)
194
        put_cabac(c, state, 0);
195
#else
196
    if(v <= max_index){
197
        for(i=0; i<v; i++){
198
            put_cabac(c, state+i, 1);
199
        }
200
        if(truncated==0 || v<max)
201
            put_cabac(c, state+i, 0);
202
    }else{
203
        for(i=0; i<=max_index; i++){
204
            put_cabac(c, state+i, 1);
205
        }
206
        for(; i<v; i++){
207
            put_cabac(c, state+max_index, 1);
208
        }
209
        if(truncated==0 || v<max)
210
            put_cabac(c, state+max_index, 0);
211
    }
212
#endif
213
}
214

    
215
/**
216
 * put unary exp golomb k-th order binarization.
217
 */
218
static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
219
    int i;
220

    
221
    if(v==0)
222
        put_cabac(c, state, 0);
223
    else{
224
        const int sign= v < 0;
225

    
226
        if(is_signed) v= FFABS(v);
227

    
228
        if(v<max){
229
            for(i=0; i<v; i++){
230
                put_cabac(c, state, 1);
231
                if(i < max_index) state++;
232
            }
233

    
234
            put_cabac(c, state, 0);
235
        }else{
236
            int m= 1<<k;
237

    
238
            for(i=0; i<max; i++){
239
                put_cabac(c, state, 1);
240
                if(i < max_index) state++;
241
            }
242

    
243
            v -= max;
244
            while(v >= m){ //FIXME optimize
245
                put_cabac_bypass(c, 1);
246
                v-= m;
247
                m+= m;
248
            }
249
            put_cabac_bypass(c, 0);
250
            while(m>>=1){
251
                put_cabac_bypass(c, v&m);
252
            }
253
        }
254

    
255
        if(is_signed)
256
            put_cabac_bypass(c, sign);
257
    }
258
}
259

    
260
static void refill(CABACContext *c){
261
#if CABAC_BITS == 16
262
        c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
263
#else
264
        c->low+= c->bytestream[0]<<1;
265
#endif
266
    c->low -= CABAC_MASK;
267
    c->bytestream+= CABAC_BITS/8;
268
}
269

    
270
static void refill2(CABACContext *c){
271
    int i, x;
272

    
273
    x= c->low ^ (c->low-1);
274
    i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
275

    
276
    x= -CABAC_MASK;
277

    
278
#if CABAC_BITS == 16
279
        x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
280
#else
281
        x+= c->bytestream[0]<<1;
282
#endif
283

    
284
    c->low += x<<i;
285
    c->bytestream+= CABAC_BITS/8;
286
}
287

    
288
static inline void renorm_cabac_decoder(CABACContext *c){
289
    while(c->range < 0x100){
290
        c->range+= c->range;
291
        c->low+= c->low;
292
        if(!(c->low & CABAC_MASK))
293
            refill(c);
294
    }
295
}
296

    
297
static inline void renorm_cabac_decoder_once(CABACContext *c){
298
#ifdef ARCH_X86_DISABLED
299
    int temp;
300
#if 0
301
    //P3:683    athlon:475
302
    asm(
303
        "lea -0x100(%0), %2         \n\t"
304
        "shr $31, %2                \n\t"  //FIXME 31->63 for x86-64
305
        "shl %%cl, %0               \n\t"
306
        "shl %%cl, %1               \n\t"
307
        : "+r"(c->range), "+r"(c->low), "+c"(temp)
308
    );
309
#elif 0
310
    //P3:680    athlon:474
311
    asm(
312
        "cmp $0x100, %0             \n\t"
313
        "setb %%cl                  \n\t"  //FIXME 31->63 for x86-64
314
        "shl %%cl, %0               \n\t"
315
        "shl %%cl, %1               \n\t"
316
        : "+r"(c->range), "+r"(c->low), "+c"(temp)
317
    );
318
#elif 1
319
    int temp2;
320
    //P3:665    athlon:517
321
    asm(
322
        "lea -0x100(%0), %%eax      \n\t"
323
        "cdq                        \n\t"
324
        "mov %0, %%eax              \n\t"
325
        "and %%edx, %0              \n\t"
326
        "and %1, %%edx              \n\t"
327
        "add %%eax, %0              \n\t"
328
        "add %%edx, %1              \n\t"
329
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
330
    );
331
#elif 0
332
    int temp2;
333
    //P3:673    athlon:509
334
    asm(
335
        "cmp $0x100, %0             \n\t"
336
        "sbb %%edx, %%edx           \n\t"
337
        "mov %0, %%eax              \n\t"
338
        "and %%edx, %0              \n\t"
339
        "and %1, %%edx              \n\t"
340
        "add %%eax, %0              \n\t"
341
        "add %%edx, %1              \n\t"
342
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
343
    );
344
#else
345
    int temp2;
346
    //P3:677    athlon:511
347
    asm(
348
        "cmp $0x100, %0             \n\t"
349
        "lea (%0, %0), %%eax        \n\t"
350
        "lea (%1, %1), %%edx        \n\t"
351
        "cmovb %%eax, %0            \n\t"
352
        "cmovb %%edx, %1            \n\t"
353
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
354
    );
355
#endif
356
#else
357
    //P3:675    athlon:476
358
    int shift= (uint32_t)(c->range - 0x100)>>31;
359
    c->range<<= shift;
360
    c->low  <<= shift;
361
#endif
362
    if(!(c->low & CABAC_MASK))
363
        refill(c);
364
}
365

    
366
static int av_always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
367
    //FIXME gcc generates duplicate load/stores for c->low and c->range
368
#define LOW          "0"
369
#define RANGE        "4"
370
#ifdef ARCH_X86_64
371
#define BYTESTART   "16"
372
#define BYTE        "24"
373
#define BYTEEND     "32"
374
#else
375
#define BYTESTART   "12"
376
#define BYTE        "16"
377
#define BYTEEND     "20"
378
#endif
379
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
380
    int bit;
381

    
382
#ifndef BRANCHLESS_CABAC_DECODER
383
    asm volatile(
384
        "movzbl (%1), %0                        \n\t"
385
        "movl "RANGE    "(%2), %%ebx            \n\t"
386
        "movl "RANGE    "(%2), %%edx            \n\t"
387
        "andl $0xC0, %%ebx                      \n\t"
388
        "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
389
        "movl "LOW      "(%2), %%ebx            \n\t"
390
//eax:state ebx:low, edx:range, esi:RangeLPS
391
        "subl %%esi, %%edx                      \n\t"
392
        "movl %%edx, %%ecx                      \n\t"
393
        "shll $17, %%ecx                        \n\t"
394
        "cmpl %%ecx, %%ebx                      \n\t"
395
        " ja 1f                                 \n\t"
396

    
397
#if 1
398
        //athlon:4067 P3:4110
399
        "lea -0x100(%%edx), %%ecx               \n\t"
400
        "shr $31, %%ecx                         \n\t"
401
        "shl %%cl, %%edx                        \n\t"
402
        "shl %%cl, %%ebx                        \n\t"
403
#else
404
        //athlon:4057 P3:4130
405
        "cmp $0x100, %%edx                      \n\t" //FIXME avoidable
406
        "setb %%cl                              \n\t"
407
        "shl %%cl, %%edx                        \n\t"
408
        "shl %%cl, %%ebx                        \n\t"
409
#endif
410
        "movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx   \n\t"
411
        "movb %%cl, (%1)                        \n\t"
412
//eax:state ebx:low, edx:range, esi:RangeLPS
413
        "test %%bx, %%bx                        \n\t"
414
        " jnz 2f                                \n\t"
415
        "mov  "BYTE     "(%2), %%"REG_S"        \n\t"
416
        "subl $0xFFFF, %%ebx                    \n\t"
417
        "movzwl (%%"REG_S"), %%ecx              \n\t"
418
        "bswap %%ecx                            \n\t"
419
        "shrl $15, %%ecx                        \n\t"
420
        "add  $2, %%"REG_S"                     \n\t"
421
        "addl %%ecx, %%ebx                      \n\t"
422
        "mov  %%"REG_S", "BYTE    "(%2)         \n\t"
423
        "jmp 2f                                 \n\t"
424
        "1:                                     \n\t"
425
//eax:state ebx:low, edx:range, esi:RangeLPS
426
        "subl %%ecx, %%ebx                      \n\t"
427
        "movl %%esi, %%edx                      \n\t"
428
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx   \n\t"
429
        "shll %%cl, %%ebx                       \n\t"
430
        "shll %%cl, %%edx                       \n\t"
431
        "movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx   \n\t"
432
        "movb %%cl, (%1)                        \n\t"
433
        "add  $1, %0                            \n\t"
434
        "test %%bx, %%bx                        \n\t"
435
        " jnz 2f                                \n\t"
436

    
437
        "mov  "BYTE     "(%2), %%"REG_c"        \n\t"
438
        "movzwl (%%"REG_c"), %%esi              \n\t"
439
        "bswap %%esi                            \n\t"
440
        "shrl $15, %%esi                        \n\t"
441
        "subl $0xFFFF, %%esi                    \n\t"
442
        "add  $2, %%"REG_c"                     \n\t"
443
        "mov  %%"REG_c", "BYTE    "(%2)         \n\t"
444

    
445
        "leal -1(%%ebx), %%ecx                  \n\t"
446
        "xorl %%ebx, %%ecx                      \n\t"
447
        "shrl $15, %%ecx                        \n\t"
448
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx   \n\t"
449
        "neg %%ecx                              \n\t"
450
        "add $7, %%ecx                          \n\t"
451

    
452
        "shll %%cl , %%esi                      \n\t"
453
        "addl %%esi, %%ebx                      \n\t"
454
        "2:                                     \n\t"
455
        "movl %%edx, "RANGE    "(%2)            \n\t"
456
        "movl %%ebx, "LOW      "(%2)            \n\t"
457
        :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
458
        :"r"(state), "r"(c)
459
        : "%"REG_c, "%ebx", "%edx", "%"REG_S, "memory"
460
    );
461
    bit&=1;
462
#else /* BRANCHLESS_CABAC_DECODER */
463

    
464

    
465
#if defined HAVE_FAST_CMOV
466
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
467
        "mov    "tmp"       , %%ecx                                     \n\t"\
468
        "shl    $17         , "tmp"                                     \n\t"\
469
        "cmp    "low"       , "tmp"                                     \n\t"\
470
        "cmova  %%ecx       , "range"                                   \n\t"\
471
        "sbb    %%ecx       , %%ecx                                     \n\t"\
472
        "and    %%ecx       , "tmp"                                     \n\t"\
473
        "sub    "tmp"       , "low"                                     \n\t"\
474
        "xor    %%ecx       , "ret"                                     \n\t"
475
#else /* HAVE_FAST_CMOV */
476
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
477
        "mov    "tmp"       , %%ecx                                     \n\t"\
478
        "shl    $17         , "tmp"                                     \n\t"\
479
        "sub    "low"       , "tmp"                                     \n\t"\
480
        "sar    $31         , "tmp"                                     \n\t" /*lps_mask*/\
481
        "sub    %%ecx       , "range"                                   \n\t" /*RangeLPS - range*/\
482
        "and    "tmp"       , "range"                                   \n\t" /*(RangeLPS - range)&lps_mask*/\
483
        "add    %%ecx       , "range"                                   \n\t" /*new range*/\
484
        "shl    $17         , %%ecx                                     \n\t"\
485
        "and    "tmp"       , %%ecx                                     \n\t"\
486
        "sub    %%ecx       , "low"                                     \n\t"\
487
        "xor    "tmp"       , "ret"                                     \n\t"
488
#endif /* HAVE_FAST_CMOV */
489

    
490

    
491
#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
492
        "movzbl "statep"    , "ret"                                     \n\t"\
493
        "mov    "range"     , "tmp"                                     \n\t"\
494
        "and    $0xC0       , "range"                                   \n\t"\
495
        "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
496
        "sub    "range"     , "tmp"                                     \n\t"\
497
        BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
498
        "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
499
        "shl    %%cl        , "range"                                   \n\t"\
500
        "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
501
        "mov    "tmpbyte"   , "statep"                                  \n\t"\
502
        "shl    %%cl        , "low"                                     \n\t"\
503
        "test   "lowword"   , "lowword"                                 \n\t"\
504
        " jnz   1f                                                      \n\t"\
505
        "mov "BYTE"("cabac"), %%"REG_c"                                 \n\t"\
506
        "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
507
        "bswap  "tmp"                                                   \n\t"\
508
        "shr    $15         , "tmp"                                     \n\t"\
509
        "sub    $0xFFFF     , "tmp"                                     \n\t"\
510
        "add    $2          , %%"REG_c"                                 \n\t"\
511
        "mov    %%"REG_c"   , "BYTE    "("cabac")                       \n\t"\
512
        "lea    -1("low")   , %%ecx                                     \n\t"\
513
        "xor    "low"       , %%ecx                                     \n\t"\
514
        "shr    $15         , %%ecx                                     \n\t"\
515
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx            \n\t"\
516
        "neg    %%ecx                                                   \n\t"\
517
        "add    $7          , %%ecx                                     \n\t"\
518
        "shl    %%cl        , "tmp"                                     \n\t"\
519
        "add    "tmp"       , "low"                                     \n\t"\
520
        "1:                                                             \n\t"
521

    
522
    asm volatile(
523
        "movl "RANGE    "(%2), %%esi            \n\t"
524
        "movl "LOW      "(%2), %%ebx            \n\t"
525
        BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
526
        "movl %%esi, "RANGE    "(%2)            \n\t"
527
        "movl %%ebx, "LOW      "(%2)            \n\t"
528

    
529
        :"=&a"(bit)
530
        :"r"(state), "r"(c)
531
        : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
532
    );
533
    bit&=1;
534
#endif /* BRANCHLESS_CABAC_DECODER */
535
#else /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
536
    int s = *state;
537
    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
538
    int bit, lps_mask attribute_unused;
539

    
540
    c->range -= RangeLPS;
541
#ifndef BRANCHLESS_CABAC_DECODER
542
    if(c->low < (c->range<<(CABAC_BITS+1))){
543
        bit= s&1;
544
        *state= ff_h264_mps_state[s];
545
        renorm_cabac_decoder_once(c);
546
    }else{
547
        bit= ff_h264_norm_shift[RangeLPS];
548
        c->low -= (c->range<<(CABAC_BITS+1));
549
        *state= ff_h264_lps_state[s];
550
        c->range = RangeLPS<<bit;
551
        c->low <<= bit;
552
        bit= (s&1)^1;
553

    
554
        if(!(c->low & CABAC_MASK)){
555
            refill2(c);
556
        }
557
    }
558
#else /* BRANCHLESS_CABAC_DECODER */
559
    lps_mask= ((c->range<<(CABAC_BITS+1)) - c->low)>>31;
560

    
561
    c->low -= (c->range<<(CABAC_BITS+1)) & lps_mask;
562
    c->range += (RangeLPS - c->range) & lps_mask;
563

    
564
    s^=lps_mask;
565
    *state= (ff_h264_mlps_state+128)[s];
566
    bit= s&1;
567

    
568
    lps_mask= ff_h264_norm_shift[c->range];
569
    c->range<<= lps_mask;
570
    c->low  <<= lps_mask;
571
    if(!(c->low & CABAC_MASK))
572
        refill2(c);
573
#endif /* BRANCHLESS_CABAC_DECODER */
574
#endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
575
    return bit;
576
}
577

    
578
static int av_noinline get_cabac_noinline(CABACContext *c, uint8_t * const state){
579
    return get_cabac_inline(c,state);
580
}
581

    
582
static int get_cabac(CABACContext *c, uint8_t * const state){
583
    return get_cabac_inline(c,state);
584
}
585

    
586
static int get_cabac_bypass(CABACContext *c){
587
#if 0 //not faster
588
    int bit;
589
    asm volatile(
590
        "movl "RANGE    "(%1), %%ebx            \n\t"
591
        "movl "LOW      "(%1), %%eax            \n\t"
592
        "shl $17, %%ebx                         \n\t"
593
        "add %%eax, %%eax                       \n\t"
594
        "sub %%ebx, %%eax                       \n\t"
595
        "cdq                                    \n\t"
596
        "and %%edx, %%ebx                       \n\t"
597
        "add %%ebx, %%eax                       \n\t"
598
        "test %%ax, %%ax                        \n\t"
599
        " jnz 1f                                \n\t"
600
        "movl "BYTE     "(%1), %%"REG_b"        \n\t"
601
        "subl $0xFFFF, %%eax                    \n\t"
602
        "movzwl (%%"REG_b"), %%ecx              \n\t"
603
        "bswap %%ecx                            \n\t"
604
        "shrl $15, %%ecx                        \n\t"
605
        "addl $2, %%"REG_b"                     \n\t"
606
        "addl %%ecx, %%eax                      \n\t"
607
        "movl %%"REG_b", "BYTE     "(%1)        \n\t"
608
        "1:                                     \n\t"
609
        "movl %%eax, "LOW      "(%1)            \n\t"
610

611
        :"=&d"(bit)
612
        :"r"(c)
613
        : "%eax", "%"REG_b, "%ecx", "memory"
614
    );
615
    return bit+1;
616
#else
617
    int range;
618
    c->low += c->low;
619

    
620
    if(!(c->low & CABAC_MASK))
621
        refill(c);
622

    
623
    range= c->range<<(CABAC_BITS+1);
624
    if(c->low < range){
625
        return 0;
626
    }else{
627
        c->low -= range;
628
        return 1;
629
    }
630
#endif
631
}
632

    
633

    
634
static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
635
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
636
    asm volatile(
637
        "movl "RANGE    "(%1), %%ebx            \n\t"
638
        "movl "LOW      "(%1), %%eax            \n\t"
639
        "shl $17, %%ebx                         \n\t"
640
        "add %%eax, %%eax                       \n\t"
641
        "sub %%ebx, %%eax                       \n\t"
642
        "cdq                                    \n\t"
643
        "and %%edx, %%ebx                       \n\t"
644
        "add %%ebx, %%eax                       \n\t"
645
        "xor %%edx, %%ecx                       \n\t"
646
        "sub %%edx, %%ecx                       \n\t"
647
        "test %%ax, %%ax                        \n\t"
648
        " jnz 1f                                \n\t"
649
        "mov  "BYTE     "(%1), %%"REG_b"        \n\t"
650
        "subl $0xFFFF, %%eax                    \n\t"
651
        "movzwl (%%"REG_b"), %%edx              \n\t"
652
        "bswap %%edx                            \n\t"
653
        "shrl $15, %%edx                        \n\t"
654
        "add  $2, %%"REG_b"                     \n\t"
655
        "addl %%edx, %%eax                      \n\t"
656
        "mov  %%"REG_b", "BYTE     "(%1)        \n\t"
657
        "1:                                     \n\t"
658
        "movl %%eax, "LOW      "(%1)            \n\t"
659

    
660
        :"+c"(val)
661
        :"r"(c)
662
        : "%eax", "%"REG_b, "%edx", "memory"
663
    );
664
    return val;
665
#else
666
    int range, mask;
667
    c->low += c->low;
668

    
669
    if(!(c->low & CABAC_MASK))
670
        refill(c);
671

    
672
    range= c->range<<(CABAC_BITS+1);
673
    c->low -= range;
674
    mask= c->low >> 31;
675
    range &= mask;
676
    c->low += range;
677
    return (val^mask)-mask;
678
#endif
679
}
680

    
681
//FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!)
682
//FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard)
683
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
684
static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){
685
    void *end= significant_coeff_ctx_base + max_coeff - 1;
686
    int minusstart= -(int)significant_coeff_ctx_base;
687
    int minusindex= 4-(int)index;
688
    int coeff_count;
689
    asm volatile(
690
        "movl "RANGE    "(%3), %%esi            \n\t"
691
        "movl "LOW      "(%3), %%ebx            \n\t"
692

    
693
        "2:                                     \n\t"
694

    
695
        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
696

    
697
        "test $1, %%edx                         \n\t"
698
        " jz 3f                                 \n\t"
699

    
700
        BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
701

    
702
        "mov  %2, %%"REG_a"                     \n\t"
703
        "movl %4, %%ecx                         \n\t"
704
        "add  %1, %%"REG_c"                     \n\t"
705
        "movl %%ecx, (%%"REG_a")                \n\t"
706

    
707
        "test $1, %%edx                         \n\t"
708
        " jnz 4f                                \n\t"
709

    
710
        "add  $4, %%"REG_a"                     \n\t"
711
        "mov  %%"REG_a", %2                     \n\t"
712

    
713
        "3:                                     \n\t"
714
        "add  $1, %1                            \n\t"
715
        "cmp  %5, %1                            \n\t"
716
        " jb 2b                                 \n\t"
717
        "mov  %2, %%"REG_a"                     \n\t"
718
        "movl %4, %%ecx                         \n\t"
719
        "add  %1, %%"REG_c"                     \n\t"
720
        "movl %%ecx, (%%"REG_a")                \n\t"
721
        "4:                                     \n\t"
722
        "add  %6, %%eax                         \n\t"
723
        "shr $2, %%eax                          \n\t"
724

    
725
        "movl %%esi, "RANGE    "(%3)            \n\t"
726
        "movl %%ebx, "LOW      "(%3)            \n\t"
727
        :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)\
728
        :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)\
729
        : "%"REG_c, "%ebx", "%edx", "%esi", "memory"\
730
    );
731
    return coeff_count;
732
}
733

    
734
static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, uint8_t *sig_off){
735
    int minusindex= 4-(int)index;
736
    int coeff_count;
737
    long last=0;
738
    asm volatile(
739
        "movl "RANGE    "(%3), %%esi            \n\t"
740
        "movl "LOW      "(%3), %%ebx            \n\t"
741

    
742
        "mov %1, %%"REG_D"                      \n\t"
743
        "2:                                     \n\t"
744

    
745
        "mov %6, %%"REG_a"                      \n\t"
746
        "movzbl (%%"REG_a", %%"REG_D"), %%edi   \n\t"
747
        "add %5, %%"REG_D"                      \n\t"
748

    
749
        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
750

    
751
        "mov %1, %%edi                          \n\t"
752
        "test $1, %%edx                         \n\t"
753
        " jz 3f                                 \n\t"
754

    
755
        "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
756
        "add %5, %%"REG_D"                      \n\t"
757

    
758
        BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
759

    
760
        "mov %2, %%"REG_a"                      \n\t"
761
        "mov %1, %%edi                          \n\t"
762
        "movl %%edi, (%%"REG_a")                \n\t"
763

    
764
        "test $1, %%edx                         \n\t"
765
        " jnz 4f                                \n\t"
766

    
767
        "add $4, %%"REG_a"                      \n\t"
768
        "mov %%"REG_a", %2                      \n\t"
769

    
770
        "3:                                     \n\t"
771
        "addl $1, %%edi                         \n\t"
772
        "mov %%edi, %1                          \n\t"
773
        "cmpl $63, %%edi                        \n\t"
774
        " jb 2b                                 \n\t"
775
        "mov %2, %%"REG_a"                      \n\t"
776
        "movl %%edi, (%%"REG_a")                \n\t"
777
        "4:                                     \n\t"
778
        "addl %4, %%eax                         \n\t"
779
        "shr $2, %%eax                          \n\t"
780

    
781
        "movl %%esi, "RANGE    "(%3)            \n\t"
782
        "movl %%ebx, "LOW      "(%3)            \n\t"
783
        :"=&a"(coeff_count),"+m"(last), "+m"(index)\
784
        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\
785
        : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"\
786
    );
787
    return coeff_count;
788
}
789
#endif /* defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__)) */
790

    
791
/**
792
 *
793
 * @return the number of bytes read or 0 if no end
794
 */
795
static int get_cabac_terminate(CABACContext *c){
796
    c->range -= 2;
797
    if(c->low < c->range<<(CABAC_BITS+1)){
798
        renorm_cabac_decoder_once(c);
799
        return 0;
800
    }else{
801
        return c->bytestream - c->bytestream_start;
802
    }
803
}
804

    
805
/**
806
 * get (truncated) unnary binarization.
807
 */
808
static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
809
    int i;
810

    
811
    for(i=0; i<max; i++){
812
        if(get_cabac(c, state)==0)
813
            return i;
814

    
815
        if(i< max_index) state++;
816
    }
817

    
818
    return truncated ? max : -1;
819
}
820

    
821
/**
822
 * get unary exp golomb k-th order binarization.
823
 */
824
static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
825
    int i, v;
826
    int m= 1<<k;
827

    
828
    if(get_cabac(c, state)==0)
829
        return 0;
830

    
831
    if(0 < max_index) state++;
832

    
833
    for(i=1; i<max; i++){
834
        if(get_cabac(c, state)==0){
835
            if(is_signed && get_cabac_bypass(c)){
836
                return -i;
837
            }else
838
                return i;
839
        }
840

    
841
        if(i < max_index) state++;
842
    }
843

    
844
    while(get_cabac_bypass(c)){
845
        i+= m;
846
        m+= m;
847
    }
848

    
849
    v=0;
850
    while(m>>=1){
851
        v+= v + get_cabac_bypass(c);
852
    }
853
    i += v;
854

    
855
    if(is_signed && get_cabac_bypass(c)){
856
        return -i;
857
    }else
858
        return i;
859
}