## Revision fbb6b49d

View differences:

libavcodec/ac3dsp.c
42 42
```    }
```
43 43
```}
```
44 44

45
```static int ac3_max_msb_abs_int16_c(const int16_t *src, int len)
```
46
```{
```
47
```    int i, v = 0;
```
48
```    for (i = 0; i < len; i++)
```
49
```        v |= abs(src[i]);
```
50
```    return v;
```
51
```}
```
52

45 53
```av_cold void ff_ac3dsp_init(AC3DSPContext *c)
```
46 54
```{
```
47 55
```    c->ac3_exponent_min = ac3_exponent_min_c;
```
56
```    c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c;
```
48 57

49 58
```    if (HAVE_MMX)
```
50 59
```        ff_ac3dsp_init_x86(c);
```
libavcodec/ac3dsp.h
35 35
```     * @param nb_coefs  number of frequency coefficients.
```
36 36
```     */
```
37 37
```    void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
```
38

39
```    /**
```
40
```     * Calculate the maximum MSB of the absolute value of each element in an
```
41
```     * array of int16_t.
```
42
```     * @param src input array
```
43
```     *            constraints: align 16. values must be in range [-32767,32767]
```
44
```     * @param len number of values in the array
```
45
```     *            constraints: multiple of 16 greater than 0
```
46
```     * @return    a value with the same MSB as max(abs(src[]))
```
47
```     */
```
48
```    int (*ac3_max_msb_abs_int16)(const int16_t *src, int len);
```
38 49
```} AC3DSPContext;
```
39 50

40 51
```void ff_ac3dsp_init    (AC3DSPContext *c);
```
libavcodec/ac3enc_fixed.c
270 270
``` * @param n   number of values in the array
```
271 271
``` * @return    log2(max(abs(tab[])))
```
272 272
``` */
```
273
```static int log2_tab(int16_t *tab, int n)
```
273
```static int log2_tab(AC3EncodeContext *s, int16_t *src, int len)
```
274 274
```{
```
275
```    int i, v;
```
276

277
```    v = 0;
```
278
```    for (i = 0; i < n; i++)
```
279
```        v |= abs(tab[i]);
```
280

275
```    int v = s->ac3dsp.ac3_max_msb_abs_int16(src, len);
```
281 276
```    return av_log2(v);
```
282 277
```}
```
283 278

......
308 303
``` */
```
309 304
```static int normalize_samples(AC3EncodeContext *s)
```
310 305
```{
```
311
```    int v = 14 - log2_tab(s->windowed_samples, AC3_WINDOW_SIZE);
```
306
```    int v = 14 - log2_tab(s, s->windowed_samples, AC3_WINDOW_SIZE);
```
312 307
```    lshift_tab(s->windowed_samples, AC3_WINDOW_SIZE, v);
```
313 308
```    return v - 9;
```
314 309
```}
```
libavcodec/x86/ac3dsp.asm
65 65
```%endif
```
66 66
```%undef PMINUB
```
67 67
```%undef LOOP_ALIGN
```
68

69
```;-----------------------------------------------------------------------------
```
70
```; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len)
```
71
```;
```
72
```; This function uses 2 different methods to calculate a valid result.
```
73
```; 1) logical 'or' of abs of each element
```
74
```;        This is used for ssse3 because of the pabsw instruction.
```
75
```;        It is also used for mmx because of the lack of min/max instructions.
```
76
```; 2) calculate min/max for the array, then or(abs(min),abs(max))
```
77
```;        This is used for mmxext and sse2 because they have pminsw/pmaxsw.
```
78
```;-----------------------------------------------------------------------------
```
79

80
```%macro AC3_MAX_MSB_ABS_INT16 2
```
81
```cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
```
82
```    pxor        m2, m2
```
83
```    pxor        m3, m3
```
84
```.loop:
```
85
```%ifidn %2, min_max
```
86
```    mova        m0, [srcq]
```
87
```    mova        m1, [srcq+mmsize]
```
88
```    pminsw      m2, m0
```
89
```    pminsw      m2, m1
```
90
```    pmaxsw      m3, m0
```
91
```    pmaxsw      m3, m1
```
92
```%else ; or_abs
```
93
```%ifidn %1, mmx
```
94
```    mova        m0, [srcq]
```
95
```    mova        m1, [srcq+mmsize]
```
96
```    ABS2        m0, m1, m3, m4
```
97
```%else ; ssse3
```
98
```    ; using memory args is faster for ssse3
```
99
```    pabsw       m0, [srcq]
```
100
```    pabsw       m1, [srcq+mmsize]
```
101
```%endif
```
102
```    por         m2, m0
```
103
```    por         m2, m1
```
104
```%endif
```
105
```    add       srcq, mmsize*2
```
106
```    sub       lend, mmsize
```
107
```    ja .loop
```
108
```%ifidn %2, min_max
```
109
```    ABS2        m2, m3, m0, m1
```
110
```    por         m2, m3
```
111
```%endif
```
112
```%ifidn mmsize, 16
```
113
```    mova        m0, m2
```
114
```    punpckhqdq  m0, m0
```
115
```    por         m2, m0
```
116
```%endif
```
117
```    PSHUFLW     m0, m2, 0xe
```
118
```    por         m2, m0
```
119
```    PSHUFLW     m0, m2, 0x1
```
120
```    por         m2, m0
```
121
```    movd       eax, m2
```
122
```    and        eax, 0xFFFF
```
123
```    RET
```
124
```%endmacro
```
125

126
```INIT_MMX
```
127
```%define ABS2 ABS2_MMX
```
128
```%define PSHUFLW pshufw
```
129
```AC3_MAX_MSB_ABS_INT16 mmx, or_abs
```
130
```%define ABS2 ABS2_MMX2
```
131
```AC3_MAX_MSB_ABS_INT16 mmxext, min_max
```
132
```INIT_XMM
```
133
```%define PSHUFLW pshuflw
```
134
```AC3_MAX_MSB_ABS_INT16 sse2, min_max
```
135
```%define ABS2 ABS2_SSSE3
```
136
```AC3_MAX_MSB_ABS_INT16 ssse3, or_abs
```
libavcodec/x86/ac3dsp_mmx.c
27 27
```extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
```
28 28
```extern void ff_ac3_exponent_min_sse2  (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
```
29 29

30
```extern int ff_ac3_max_msb_abs_int16_mmx   (const int16_t *src, int len);
```
31
```extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
```
32
```extern int ff_ac3_max_msb_abs_int16_sse2  (const int16_t *src, int len);
```
33
```extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len);
```
34

30 35
```av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
```
31 36
```{
```
32 37
```    int mm_flags = av_get_cpu_flags();
```
......
34 39
```#if HAVE_YASM
```
35 40
```    if (mm_flags & AV_CPU_FLAG_MMX) {
```
36 41
```        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
```
42
```        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
```
37 43
```    }
```
38 44
```    if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
```
39 45
```        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
```
46
```        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
```
40 47
```    }
```
41 48
```    if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
```
42 49
```        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
```
50
```        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
```
51
```    }
```
52
```    if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
```
53
```        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
```
43 54
```    }
```
44 55
```#endif
```
45 56
```}
```

Also available in: Unified diff