Statistics
| Branch: | Revision:

ffmpeg / libavcodec / cabac.h @ 7e14b808

History | View | Annotate | Download (28.9 KB)

1
/*
2
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

    
22
/**
23
 * @file cabac.h
24
 * Context Adaptive Binary Arithmetic Coder.
25
 */
26

    
27
#ifndef FFMPEG_CABAC_H
28
#define FFMPEG_CABAC_H
29

    
30
#include "bitstream.h"
31

    
32
//#undef NDEBUG
33
#include <assert.h>
34
#ifdef ARCH_X86
35
#include "x86_cpu.h"
36
#endif
37

    
38
#define CABAC_BITS 16
39
#define CABAC_MASK ((1<<CABAC_BITS)-1)
40
#define BRANCHLESS_CABAC_DECODER 1
41
//#define ARCH_X86_DISABLED 1
42

    
43
typedef struct CABACContext{
44
    int low;
45
    int range;
46
    int outstanding_count;
47
#ifdef STRICT_LIMITS
48
    int symCount;
49
#endif
50
    const uint8_t *bytestream_start;
51
    const uint8_t *bytestream;
52
    const uint8_t *bytestream_end;
53
    PutBitContext pb;
54
}CABACContext;
55

    
56
extern uint8_t ff_h264_mlps_state[4*64];
57
extern uint8_t ff_h264_lps_range[4*2*64];  ///< rangeTabLPS
58
extern uint8_t ff_h264_mps_state[2*64];     ///< transIdxMPS
59
extern uint8_t ff_h264_lps_state[2*64];     ///< transIdxLPS
60
extern const uint8_t ff_h264_norm_shift[512];
61

    
62

    
63
void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
64
void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
65
void ff_init_cabac_states(CABACContext *c);
66

    
67

    
68
static inline void put_cabac_bit(CABACContext *c, int b){
69
    put_bits(&c->pb, 1, b);
70
    for(;c->outstanding_count; c->outstanding_count--){
71
        put_bits(&c->pb, 1, 1-b);
72
    }
73
}
74

    
75
static inline void renorm_cabac_encoder(CABACContext *c){
76
    while(c->range < 0x100){
77
        //FIXME optimize
78
        if(c->low<0x100){
79
            put_cabac_bit(c, 0);
80
        }else if(c->low<0x200){
81
            c->outstanding_count++;
82
            c->low -= 0x100;
83
        }else{
84
            put_cabac_bit(c, 1);
85
            c->low -= 0x200;
86
        }
87

    
88
        c->range+= c->range;
89
        c->low += c->low;
90
    }
91
}
92

    
93
#ifdef TEST
94
static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
95
    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
96

    
97
    if(bit == ((*state)&1)){
98
        c->range -= RangeLPS;
99
        *state= ff_h264_mps_state[*state];
100
    }else{
101
        c->low += c->range - RangeLPS;
102
        c->range = RangeLPS;
103
        *state= ff_h264_lps_state[*state];
104
    }
105

    
106
    renorm_cabac_encoder(c);
107

    
108
#ifdef STRICT_LIMITS
109
    c->symCount++;
110
#endif
111
}
112

    
113
static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){
114
    assert(c->range > RangeLPS);
115

    
116
    if(!bit){
117
        c->range -= RangeLPS;
118
    }else{
119
        c->low += c->range - RangeLPS;
120
        c->range = RangeLPS;
121
    }
122

    
123
    renorm_cabac_encoder(c);
124

    
125
#ifdef STRICT_LIMITS
126
    c->symCount++;
127
#endif
128
}
129

    
130
/**
131
 * @param bit 0 -> write zero bit, !=0 write one bit
132
 */
133
static void put_cabac_bypass(CABACContext *c, int bit){
134
    c->low += c->low;
135

    
136
    if(bit){
137
        c->low += c->range;
138
    }
139
//FIXME optimize
140
    if(c->low<0x200){
141
        put_cabac_bit(c, 0);
142
    }else if(c->low<0x400){
143
        c->outstanding_count++;
144
        c->low -= 0x200;
145
    }else{
146
        put_cabac_bit(c, 1);
147
        c->low -= 0x400;
148
    }
149

    
150
#ifdef STRICT_LIMITS
151
    c->symCount++;
152
#endif
153
}
154

    
155
/**
156
 *
157
 * @return the number of bytes written
158
 */
159
static int put_cabac_terminate(CABACContext *c, int bit){
160
    c->range -= 2;
161

    
162
    if(!bit){
163
        renorm_cabac_encoder(c);
164
    }else{
165
        c->low += c->range;
166
        c->range= 2;
167

    
168
        renorm_cabac_encoder(c);
169

    
170
        assert(c->low <= 0x1FF);
171
        put_cabac_bit(c, c->low>>9);
172
        put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
173

    
174
        flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
175
    }
176

    
177
#ifdef STRICT_LIMITS
178
    c->symCount++;
179
#endif
180

    
181
    return (put_bits_count(&c->pb)+7)>>3;
182
}
183

    
184
/**
185
 * put (truncated) unary binarization.
186
 */
187
static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
188
    int i;
189

    
190
    assert(v <= max);
191

    
192
#if 1
193
    for(i=0; i<v; i++){
194
        put_cabac(c, state, 1);
195
        if(i < max_index) state++;
196
    }
197
    if(truncated==0 || v<max)
198
        put_cabac(c, state, 0);
199
#else
200
    if(v <= max_index){
201
        for(i=0; i<v; i++){
202
            put_cabac(c, state+i, 1);
203
        }
204
        if(truncated==0 || v<max)
205
            put_cabac(c, state+i, 0);
206
    }else{
207
        for(i=0; i<=max_index; i++){
208
            put_cabac(c, state+i, 1);
209
        }
210
        for(; i<v; i++){
211
            put_cabac(c, state+max_index, 1);
212
        }
213
        if(truncated==0 || v<max)
214
            put_cabac(c, state+max_index, 0);
215
    }
216
#endif
217
}
218

    
219
/**
220
 * put unary exp golomb k-th order binarization.
221
 */
222
static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
223
    int i;
224

    
225
    if(v==0)
226
        put_cabac(c, state, 0);
227
    else{
228
        const int sign= v < 0;
229

    
230
        if(is_signed) v= FFABS(v);
231

    
232
        if(v<max){
233
            for(i=0; i<v; i++){
234
                put_cabac(c, state, 1);
235
                if(i < max_index) state++;
236
            }
237

    
238
            put_cabac(c, state, 0);
239
        }else{
240
            int m= 1<<k;
241

    
242
            for(i=0; i<max; i++){
243
                put_cabac(c, state, 1);
244
                if(i < max_index) state++;
245
            }
246

    
247
            v -= max;
248
            while(v >= m){ //FIXME optimize
249
                put_cabac_bypass(c, 1);
250
                v-= m;
251
                m+= m;
252
            }
253
            put_cabac_bypass(c, 0);
254
            while(m>>=1){
255
                put_cabac_bypass(c, v&m);
256
            }
257
        }
258

    
259
        if(is_signed)
260
            put_cabac_bypass(c, sign);
261
    }
262
}
263
#endif /* TEST */
264

    
265
static void refill(CABACContext *c){
266
#if CABAC_BITS == 16
267
        c->low+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
268
#else
269
        c->low+= c->bytestream[0]<<1;
270
#endif
271
    c->low -= CABAC_MASK;
272
    c->bytestream+= CABAC_BITS/8;
273
}
274

    
275
#if ! ( defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) )
276
static void refill2(CABACContext *c){
277
    int i, x;
278

    
279
    x= c->low ^ (c->low-1);
280
    i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)];
281

    
282
    x= -CABAC_MASK;
283

    
284
#if CABAC_BITS == 16
285
        x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
286
#else
287
        x+= c->bytestream[0]<<1;
288
#endif
289

    
290
    c->low += x<<i;
291
    c->bytestream+= CABAC_BITS/8;
292
}
293
#endif
294

    
295
static inline void renorm_cabac_decoder(CABACContext *c){
296
    while(c->range < 0x100){
297
        c->range+= c->range;
298
        c->low+= c->low;
299
        if(!(c->low & CABAC_MASK))
300
            refill(c);
301
    }
302
}
303

    
304
static inline void renorm_cabac_decoder_once(CABACContext *c){
305
#ifdef ARCH_X86_DISABLED
306
    int temp;
307
#if 0
308
    //P3:683    athlon:475
309
    asm(
310
        "lea -0x100(%0), %2         \n\t"
311
        "shr $31, %2                \n\t"  //FIXME 31->63 for x86-64
312
        "shl %%cl, %0               \n\t"
313
        "shl %%cl, %1               \n\t"
314
        : "+r"(c->range), "+r"(c->low), "+c"(temp)
315
    );
316
#elif 0
317
    //P3:680    athlon:474
318
    asm(
319
        "cmp $0x100, %0             \n\t"
320
        "setb %%cl                  \n\t"  //FIXME 31->63 for x86-64
321
        "shl %%cl, %0               \n\t"
322
        "shl %%cl, %1               \n\t"
323
        : "+r"(c->range), "+r"(c->low), "+c"(temp)
324
    );
325
#elif 1
326
    int temp2;
327
    //P3:665    athlon:517
328
    asm(
329
        "lea -0x100(%0), %%eax      \n\t"
330
        "cltd                       \n\t"
331
        "mov %0, %%eax              \n\t"
332
        "and %%edx, %0              \n\t"
333
        "and %1, %%edx              \n\t"
334
        "add %%eax, %0              \n\t"
335
        "add %%edx, %1              \n\t"
336
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
337
    );
338
#elif 0
339
    int temp2;
340
    //P3:673    athlon:509
341
    asm(
342
        "cmp $0x100, %0             \n\t"
343
        "sbb %%edx, %%edx           \n\t"
344
        "mov %0, %%eax              \n\t"
345
        "and %%edx, %0              \n\t"
346
        "and %1, %%edx              \n\t"
347
        "add %%eax, %0              \n\t"
348
        "add %%edx, %1              \n\t"
349
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
350
    );
351
#else
352
    int temp2;
353
    //P3:677    athlon:511
354
    asm(
355
        "cmp $0x100, %0             \n\t"
356
        "lea (%0, %0), %%eax        \n\t"
357
        "lea (%1, %1), %%edx        \n\t"
358
        "cmovb %%eax, %0            \n\t"
359
        "cmovb %%edx, %1            \n\t"
360
        : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2)
361
    );
362
#endif
363
#else
364
    //P3:675    athlon:476
365
    int shift= (uint32_t)(c->range - 0x100)>>31;
366
    c->range<<= shift;
367
    c->low  <<= shift;
368
#endif
369
    if(!(c->low & CABAC_MASK))
370
        refill(c);
371
}
372

    
373
static av_always_inline int get_cabac_inline(CABACContext *c, uint8_t * const state){
374
    //FIXME gcc generates duplicate load/stores for c->low and c->range
375
#define LOW          "0"
376
#define RANGE        "4"
377
#ifdef ARCH_X86_64
378
#define BYTESTART   "16"
379
#define BYTE        "24"
380
#define BYTEEND     "32"
381
#else
382
#define BYTESTART   "12"
383
#define BYTE        "16"
384
#define BYTEEND     "20"
385
#endif
386
#if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
387
    int bit;
388

    
389
#ifndef BRANCHLESS_CABAC_DECODER
390
    asm volatile(
391
        "movzbl (%1), %0                        \n\t"
392
        "movl "RANGE    "(%2), %%ebx            \n\t"
393
        "movl "RANGE    "(%2), %%edx            \n\t"
394
        "andl $0xC0, %%ebx                      \n\t"
395
        "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
396
        "movl "LOW      "(%2), %%ebx            \n\t"
397
//eax:state ebx:low, edx:range, esi:RangeLPS
398
        "subl %%esi, %%edx                      \n\t"
399
        "movl %%edx, %%ecx                      \n\t"
400
        "shll $17, %%ecx                        \n\t"
401
        "cmpl %%ecx, %%ebx                      \n\t"
402
        " ja 1f                                 \n\t"
403

    
404
#if 1
405
        //athlon:4067 P3:4110
406
        "lea -0x100(%%edx), %%ecx               \n\t"
407
        "shr $31, %%ecx                         \n\t"
408
        "shl %%cl, %%edx                        \n\t"
409
        "shl %%cl, %%ebx                        \n\t"
410
#else
411
        //athlon:4057 P3:4130
412
        "cmp $0x100, %%edx                      \n\t" //FIXME avoidable
413
        "setb %%cl                              \n\t"
414
        "shl %%cl, %%edx                        \n\t"
415
        "shl %%cl, %%ebx                        \n\t"
416
#endif
417
        "movzbl "MANGLE(ff_h264_mps_state)"(%0), %%ecx   \n\t"
418
        "movb %%cl, (%1)                        \n\t"
419
//eax:state ebx:low, edx:range, esi:RangeLPS
420
        "test %%bx, %%bx                        \n\t"
421
        " jnz 2f                                \n\t"
422
        "mov  "BYTE     "(%2), %%"REG_S"        \n\t"
423
        "subl $0xFFFF, %%ebx                    \n\t"
424
        "movzwl (%%"REG_S"), %%ecx              \n\t"
425
        "bswap %%ecx                            \n\t"
426
        "shrl $15, %%ecx                        \n\t"
427
        "add  $2, %%"REG_S"                     \n\t"
428
        "addl %%ecx, %%ebx                      \n\t"
429
        "mov  %%"REG_S", "BYTE    "(%2)         \n\t"
430
        "jmp 2f                                 \n\t"
431
        "1:                                     \n\t"
432
//eax:state ebx:low, edx:range, esi:RangeLPS
433
        "subl %%ecx, %%ebx                      \n\t"
434
        "movl %%esi, %%edx                      \n\t"
435
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx   \n\t"
436
        "shll %%cl, %%ebx                       \n\t"
437
        "shll %%cl, %%edx                       \n\t"
438
        "movzbl "MANGLE(ff_h264_lps_state)"(%0), %%ecx   \n\t"
439
        "movb %%cl, (%1)                        \n\t"
440
        "add  $1, %0                            \n\t"
441
        "test %%bx, %%bx                        \n\t"
442
        " jnz 2f                                \n\t"
443

    
444
        "mov  "BYTE     "(%2), %%"REG_c"        \n\t"
445
        "movzwl (%%"REG_c"), %%esi              \n\t"
446
        "bswap %%esi                            \n\t"
447
        "shrl $15, %%esi                        \n\t"
448
        "subl $0xFFFF, %%esi                    \n\t"
449
        "add  $2, %%"REG_c"                     \n\t"
450
        "mov  %%"REG_c", "BYTE    "(%2)         \n\t"
451

    
452
        "leal -1(%%ebx), %%ecx                  \n\t"
453
        "xorl %%ebx, %%ecx                      \n\t"
454
        "shrl $15, %%ecx                        \n\t"
455
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx   \n\t"
456
        "neg %%ecx                              \n\t"
457
        "add $7, %%ecx                          \n\t"
458

    
459
        "shll %%cl , %%esi                      \n\t"
460
        "addl %%esi, %%ebx                      \n\t"
461
        "2:                                     \n\t"
462
        "movl %%edx, "RANGE    "(%2)            \n\t"
463
        "movl %%ebx, "LOW      "(%2)            \n\t"
464
        :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or miscompiles it (for example if "+a"(bit) or "+m"(*state) is used
465
        :"r"(state), "r"(c)
466
        : "%"REG_c, "%ebx", "%edx", "%"REG_S, "memory"
467
    );
468
    bit&=1;
469
#else /* BRANCHLESS_CABAC_DECODER */
470

    
471

    
472
#if defined HAVE_FAST_CMOV
473
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
474
        "mov    "tmp"       , %%ecx                                     \n\t"\
475
        "shl    $17         , "tmp"                                     \n\t"\
476
        "cmp    "low"       , "tmp"                                     \n\t"\
477
        "cmova  %%ecx       , "range"                                   \n\t"\
478
        "sbb    %%ecx       , %%ecx                                     \n\t"\
479
        "and    %%ecx       , "tmp"                                     \n\t"\
480
        "sub    "tmp"       , "low"                                     \n\t"\
481
        "xor    %%ecx       , "ret"                                     \n\t"
482
#else /* HAVE_FAST_CMOV */
483
#define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
484
        "mov    "tmp"       , %%ecx                                     \n\t"\
485
        "shl    $17         , "tmp"                                     \n\t"\
486
        "sub    "low"       , "tmp"                                     \n\t"\
487
        "sar    $31         , "tmp"                                     \n\t" /*lps_mask*/\
488
        "sub    %%ecx       , "range"                                   \n\t" /*RangeLPS - range*/\
489
        "and    "tmp"       , "range"                                   \n\t" /*(RangeLPS - range)&lps_mask*/\
490
        "add    %%ecx       , "range"                                   \n\t" /*new range*/\
491
        "shl    $17         , %%ecx                                     \n\t"\
492
        "and    "tmp"       , %%ecx                                     \n\t"\
493
        "sub    %%ecx       , "low"                                     \n\t"\
494
        "xor    "tmp"       , "ret"                                     \n\t"
495
#endif /* HAVE_FAST_CMOV */
496

    
497

    
498
#define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
499
        "movzbl "statep"    , "ret"                                     \n\t"\
500
        "mov    "range"     , "tmp"                                     \n\t"\
501
        "and    $0xC0       , "range"                                   \n\t"\
502
        "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
503
        "sub    "range"     , "tmp"                                     \n\t"\
504
        BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
505
        "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx          \n\t"\
506
        "shl    %%cl        , "range"                                   \n\t"\
507
        "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp"          \n\t"\
508
        "mov    "tmpbyte"   , "statep"                                  \n\t"\
509
        "shl    %%cl        , "low"                                     \n\t"\
510
        "test   "lowword"   , "lowword"                                 \n\t"\
511
        " jnz   1f                                                      \n\t"\
512
        "mov "BYTE"("cabac"), %%"REG_c"                                 \n\t"\
513
        "movzwl (%%"REG_c")     , "tmp"                                 \n\t"\
514
        "bswap  "tmp"                                                   \n\t"\
515
        "shr    $15         , "tmp"                                     \n\t"\
516
        "sub    $0xFFFF     , "tmp"                                     \n\t"\
517
        "add    $2          , %%"REG_c"                                 \n\t"\
518
        "mov    %%"REG_c"   , "BYTE    "("cabac")                       \n\t"\
519
        "lea    -1("low")   , %%ecx                                     \n\t"\
520
        "xor    "low"       , %%ecx                                     \n\t"\
521
        "shr    $15         , %%ecx                                     \n\t"\
522
        "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx            \n\t"\
523
        "neg    %%ecx                                                   \n\t"\
524
        "add    $7          , %%ecx                                     \n\t"\
525
        "shl    %%cl        , "tmp"                                     \n\t"\
526
        "add    "tmp"       , "low"                                     \n\t"\
527
        "1:                                                             \n\t"
528

    
529
    asm volatile(
530
        "movl "RANGE    "(%2), %%esi            \n\t"
531
        "movl "LOW      "(%2), %%ebx            \n\t"
532
        BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
533
        "movl %%esi, "RANGE    "(%2)            \n\t"
534
        "movl %%ebx, "LOW      "(%2)            \n\t"
535

    
536
        :"=&a"(bit)
537
        :"r"(state), "r"(c)
538
        : "%"REG_c, "%ebx", "%edx", "%esi", "memory"
539
    );
540
    bit&=1;
541
#endif /* BRANCHLESS_CABAC_DECODER */
542
#else /* defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) */
543
    int s = *state;
544
    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + s];
545
    int bit, lps_mask av_unused;
546

    
547
    c->range -= RangeLPS;
548
#ifndef BRANCHLESS_CABAC_DECODER
549
    if(c->low < (c->range<<(CABAC_BITS+1))){
550
        bit= s&1;
551
        *state= ff_h264_mps_state[s];
552
        renorm_cabac_decoder_once(c);
553
    }else{
554
        bit= ff_h264_norm_shift[RangeLPS];
555
        c->low -= (c->range<<(CABAC_BITS+1));
556
        *state= ff_h264_lps_state[s];
557
        c->range = RangeLPS<<bit;
558
        c->low <<= bit;
559
        bit= (s&1)^1;
560

    
561
        if(!(c->low & CABAC_MASK)){
562
            refill2(c);
563
        }
564
    }
565
#else /* BRANCHLESS_CABAC_DECODER */
566
    lps_mask= ((c->range<<(CABAC_BITS+1)) - c->low)>>31;
567

    
568
    c->low -= (c->range<<(CABAC_BITS+1)) & lps_mask;
569
    c->range += (RangeLPS - c->range) & lps_mask;
570

    
571
    s^=lps_mask;
572
    *state= (ff_h264_mlps_state+128)[s];
573
    bit= s&1;
574

    
575
    lps_mask= ff_h264_norm_shift[c->range];
576
    c->range<<= lps_mask;
577
    c->low  <<= lps_mask;
578
    if(!(c->low & CABAC_MASK))
579
        refill2(c);
580
#endif /* BRANCHLESS_CABAC_DECODER */
581
#endif /* defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) */
582
    return bit;
583
}
584

    
585
static int av_noinline get_cabac_noinline(CABACContext *c, uint8_t * const state){
586
    return get_cabac_inline(c,state);
587
}
588

    
589
static int get_cabac(CABACContext *c, uint8_t * const state){
590
    return get_cabac_inline(c,state);
591
}
592

    
593
static int get_cabac_bypass(CABACContext *c){
594
#if 0 //not faster
595
    int bit;
596
    asm volatile(
597
        "movl "RANGE    "(%1), %%ebx            \n\t"
598
        "movl "LOW      "(%1), %%eax            \n\t"
599
        "shl $17, %%ebx                         \n\t"
600
        "add %%eax, %%eax                       \n\t"
601
        "sub %%ebx, %%eax                       \n\t"
602
        "cltd                                   \n\t"
603
        "and %%edx, %%ebx                       \n\t"
604
        "add %%ebx, %%eax                       \n\t"
605
        "test %%ax, %%ax                        \n\t"
606
        " jnz 1f                                \n\t"
607
        "movl "BYTE     "(%1), %%"REG_b"        \n\t"
608
        "subl $0xFFFF, %%eax                    \n\t"
609
        "movzwl (%%"REG_b"), %%ecx              \n\t"
610
        "bswap %%ecx                            \n\t"
611
        "shrl $15, %%ecx                        \n\t"
612
        "addl $2, %%"REG_b"                     \n\t"
613
        "addl %%ecx, %%eax                      \n\t"
614
        "movl %%"REG_b", "BYTE     "(%1)        \n\t"
615
        "1:                                     \n\t"
616
        "movl %%eax, "LOW      "(%1)            \n\t"
617

618
        :"=&d"(bit)
619
        :"r"(c)
620
        : "%eax", "%"REG_b, "%ecx", "memory"
621
    );
622
    return bit+1;
623
#else
624
    int range;
625
    c->low += c->low;
626

    
627
    if(!(c->low & CABAC_MASK))
628
        refill(c);
629

    
630
    range= c->range<<(CABAC_BITS+1);
631
    if(c->low < range){
632
        return 0;
633
    }else{
634
        c->low -= range;
635
        return 1;
636
    }
637
#endif
638
}
639

    
640

    
641
static av_always_inline int get_cabac_bypass_sign(CABACContext *c, int val){
642
#if defined(ARCH_X86) && !(defined(PIC) && defined(__GNUC__))
643
    asm volatile(
644
        "movl "RANGE    "(%1), %%ebx            \n\t"
645
        "movl "LOW      "(%1), %%eax            \n\t"
646
        "shl $17, %%ebx                         \n\t"
647
        "add %%eax, %%eax                       \n\t"
648
        "sub %%ebx, %%eax                       \n\t"
649
        "cltd                                   \n\t"
650
        "and %%edx, %%ebx                       \n\t"
651
        "add %%ebx, %%eax                       \n\t"
652
        "xor %%edx, %%ecx                       \n\t"
653
        "sub %%edx, %%ecx                       \n\t"
654
        "test %%ax, %%ax                        \n\t"
655
        " jnz 1f                                \n\t"
656
        "mov  "BYTE     "(%1), %%"REG_b"        \n\t"
657
        "subl $0xFFFF, %%eax                    \n\t"
658
        "movzwl (%%"REG_b"), %%edx              \n\t"
659
        "bswap %%edx                            \n\t"
660
        "shrl $15, %%edx                        \n\t"
661
        "add  $2, %%"REG_b"                     \n\t"
662
        "addl %%edx, %%eax                      \n\t"
663
        "mov  %%"REG_b", "BYTE     "(%1)        \n\t"
664
        "1:                                     \n\t"
665
        "movl %%eax, "LOW      "(%1)            \n\t"
666

    
667
        :"+c"(val)
668
        :"r"(c)
669
        : "%eax", "%"REG_b, "%edx", "memory"
670
    );
671
    return val;
672
#else
673
    int range, mask;
674
    c->low += c->low;
675

    
676
    if(!(c->low & CABAC_MASK))
677
        refill(c);
678

    
679
    range= c->range<<(CABAC_BITS+1);
680
    c->low -= range;
681
    mask= c->low >> 31;
682
    range &= mask;
683
    c->low += range;
684
    return (val^mask)-mask;
685
#endif
686
}
687

    
688
//FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!)
689
//FIXME use some macros to avoid duplicatin get_cabac (cannot be done yet as that would make optimization work hard)
690
#if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
691
static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){
692
    void *end= significant_coeff_ctx_base + max_coeff - 1;
693
    int minusstart= -(int)significant_coeff_ctx_base;
694
    int minusindex= 4-(int)index;
695
    int coeff_count;
696
    asm volatile(
697
        "movl "RANGE    "(%3), %%esi            \n\t"
698
        "movl "LOW      "(%3), %%ebx            \n\t"
699

    
700
        "2:                                     \n\t"
701

    
702
        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
703

    
704
        "test $1, %%edx                         \n\t"
705
        " jz 3f                                 \n\t"
706

    
707
        BRANCHLESS_GET_CABAC("%%edx", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
708

    
709
        "mov  %2, %%"REG_a"                     \n\t"
710
        "movl %4, %%ecx                         \n\t"
711
        "add  %1, %%"REG_c"                     \n\t"
712
        "movl %%ecx, (%%"REG_a")                \n\t"
713

    
714
        "test $1, %%edx                         \n\t"
715
        " jnz 4f                                \n\t"
716

    
717
        "add  $4, %%"REG_a"                     \n\t"
718
        "mov  %%"REG_a", %2                     \n\t"
719

    
720
        "3:                                     \n\t"
721
        "add  $1, %1                            \n\t"
722
        "cmp  %5, %1                            \n\t"
723
        " jb 2b                                 \n\t"
724
        "mov  %2, %%"REG_a"                     \n\t"
725
        "movl %4, %%ecx                         \n\t"
726
        "add  %1, %%"REG_c"                     \n\t"
727
        "movl %%ecx, (%%"REG_a")                \n\t"
728
        "4:                                     \n\t"
729
        "add  %6, %%eax                         \n\t"
730
        "shr $2, %%eax                          \n\t"
731

    
732
        "movl %%esi, "RANGE    "(%3)            \n\t"
733
        "movl %%ebx, "LOW      "(%3)            \n\t"
734
        :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)\
735
        :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)\
736
        : "%"REG_c, "%ebx", "%edx", "%esi", "memory"\
737
    );
738
    return coeff_count;
739
}
740

    
741
static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, const uint8_t *sig_off){
742
    int minusindex= 4-(int)index;
743
    int coeff_count;
744
    long last=0;
745
    asm volatile(
746
        "movl "RANGE    "(%3), %%esi            \n\t"
747
        "movl "LOW      "(%3), %%ebx            \n\t"
748

    
749
        "mov %1, %%"REG_D"                      \n\t"
750
        "2:                                     \n\t"
751

    
752
        "mov %6, %%"REG_a"                      \n\t"
753
        "movzbl (%%"REG_a", %%"REG_D"), %%edi   \n\t"
754
        "add %5, %%"REG_D"                      \n\t"
755

    
756
        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
757

    
758
        "mov %1, %%edi                          \n\t"
759
        "test $1, %%edx                         \n\t"
760
        " jz 3f                                 \n\t"
761

    
762
        "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
763
        "add %5, %%"REG_D"                      \n\t"
764

    
765
        BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%"REG_D")", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
766

    
767
        "mov %2, %%"REG_a"                      \n\t"
768
        "mov %1, %%edi                          \n\t"
769
        "movl %%edi, (%%"REG_a")                \n\t"
770

    
771
        "test $1, %%edx                         \n\t"
772
        " jnz 4f                                \n\t"
773

    
774
        "add $4, %%"REG_a"                      \n\t"
775
        "mov %%"REG_a", %2                      \n\t"
776

    
777
        "3:                                     \n\t"
778
        "addl $1, %%edi                         \n\t"
779
        "mov %%edi, %1                          \n\t"
780
        "cmpl $63, %%edi                        \n\t"
781
        " jb 2b                                 \n\t"
782
        "mov %2, %%"REG_a"                      \n\t"
783
        "movl %%edi, (%%"REG_a")                \n\t"
784
        "4:                                     \n\t"
785
        "addl %4, %%eax                         \n\t"
786
        "shr $2, %%eax                          \n\t"
787

    
788
        "movl %%esi, "RANGE    "(%3)            \n\t"
789
        "movl %%ebx, "LOW      "(%3)            \n\t"
790
        :"=&a"(coeff_count),"+m"(last), "+m"(index)\
791
        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\
792
        : "%"REG_c, "%ebx", "%edx", "%esi", "%"REG_D, "memory"\
793
    );
794
    return coeff_count;
795
}
796
#endif /* defined(ARCH_X86) && && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS) */
797

    
798
/**
799
 *
800
 * @return the number of bytes read or 0 if no end
801
 */
802
static int get_cabac_terminate(CABACContext *c){
803
    c->range -= 2;
804
    if(c->low < c->range<<(CABAC_BITS+1)){
805
        renorm_cabac_decoder_once(c);
806
        return 0;
807
    }else{
808
        return c->bytestream - c->bytestream_start;
809
    }
810
}
811

    
812
#if 0
813
/**
814
 * Get (truncated) unary binarization.
815
 */
816
static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
817
    int i;
818

819
    for(i=0; i<max; i++){
820
        if(get_cabac(c, state)==0)
821
            return i;
822

823
        if(i< max_index) state++;
824
    }
825

826
    return truncated ? max : -1;
827
}
828

829
/**
830
 * get unary exp golomb k-th order binarization.
831
 */
832
static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
833
    int i, v;
834
    int m= 1<<k;
835

836
    if(get_cabac(c, state)==0)
837
        return 0;
838

839
    if(0 < max_index) state++;
840

841
    for(i=1; i<max; i++){
842
        if(get_cabac(c, state)==0){
843
            if(is_signed && get_cabac_bypass(c)){
844
                return -i;
845
            }else
846
                return i;
847
        }
848

849
        if(i < max_index) state++;
850
    }
851

852
    while(get_cabac_bypass(c)){
853
        i+= m;
854
        m+= m;
855
    }
856

857
    v=0;
858
    while(m>>=1){
859
        v+= v + get_cabac_bypass(c);
860
    }
861
    i += v;
862

863
    if(is_signed && get_cabac_bypass(c)){
864
        return -i;
865
    }else
866
        return i;
867
}
868
#endif /* 0 */
869

    
870
#endif /* FFMPEG_CABAC_H */