Revision fbb6b49d

View differences:

libavcodec/ac3dsp.c
42 42
    }
43 43
}
44 44

  
45
static int ac3_max_msb_abs_int16_c(const int16_t *src, int len)
46
{
47
    int i, v = 0;
48
    for (i = 0; i < len; i++)
49
        v |= abs(src[i]);
50
    return v;
51
}
52

  
45 53
av_cold void ff_ac3dsp_init(AC3DSPContext *c)
46 54
{
47 55
    c->ac3_exponent_min = ac3_exponent_min_c;
56
    c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c;
48 57

  
49 58
    if (HAVE_MMX)
50 59
        ff_ac3dsp_init_x86(c);
libavcodec/ac3dsp.h
35 35
     * @param nb_coefs  number of frequency coefficients.
36 36
     */
37 37
    void (*ac3_exponent_min)(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
38

  
39
    /**
40
     * Calculate the maximum MSB of the absolute value of each element in an
41
     * array of int16_t.
42
     * @param src input array
43
     *            constraints: align 16. values must be in range [-32767,32767]
44
     * @param len number of values in the array
45
     *            constraints: multiple of 16 greater than 0
46
     * @return    a value with the same MSB as max(abs(src[]))
47
     */
48
    int (*ac3_max_msb_abs_int16)(const int16_t *src, int len);
38 49
} AC3DSPContext;
39 50

  
40 51
void ff_ac3dsp_init    (AC3DSPContext *c);
libavcodec/ac3enc_fixed.c
270 270
 * @param n   number of values in the array
271 271
 * @return    log2(max(abs(tab[])))
272 272
 */
273
static int log2_tab(int16_t *tab, int n)
273
static int log2_tab(AC3EncodeContext *s, int16_t *src, int len)
274 274
{
275
    int i, v;
276

  
277
    v = 0;
278
    for (i = 0; i < n; i++)
279
        v |= abs(tab[i]);
280

  
275
    int v = s->ac3dsp.ac3_max_msb_abs_int16(src, len);
281 276
    return av_log2(v);
282 277
}
283 278

  
......
308 303
 */
309 304
static int normalize_samples(AC3EncodeContext *s)
310 305
{
311
    int v = 14 - log2_tab(s->windowed_samples, AC3_WINDOW_SIZE);
306
    int v = 14 - log2_tab(s, s->windowed_samples, AC3_WINDOW_SIZE);
312 307
    lshift_tab(s->windowed_samples, AC3_WINDOW_SIZE, v);
313 308
    return v - 9;
314 309
}
libavcodec/x86/ac3dsp.asm
65 65
%endif
66 66
%undef PMINUB
67 67
%undef LOOP_ALIGN
68

  
69
;-----------------------------------------------------------------------------
70
; int ff_ac3_max_msb_abs_int16(const int16_t *src, int len)
71
;
72
; This function uses 2 different methods to calculate a valid result.
73
; 1) logical 'or' of abs of each element
74
;        This is used for ssse3 because of the pabsw instruction.
75
;        It is also used for mmx because of the lack of min/max instructions.
76
; 2) calculate min/max for the array, then or(abs(min),abs(max))
77
;        This is used for mmxext and sse2 because they have pminsw/pmaxsw.
78
;-----------------------------------------------------------------------------
79

  
80
%macro AC3_MAX_MSB_ABS_INT16 2
81
cglobal ac3_max_msb_abs_int16_%1, 2,2,5, src, len
82
    pxor        m2, m2
83
    pxor        m3, m3
84
.loop:
85
%ifidn %2, min_max
86
    mova        m0, [srcq]
87
    mova        m1, [srcq+mmsize]
88
    pminsw      m2, m0
89
    pminsw      m2, m1
90
    pmaxsw      m3, m0
91
    pmaxsw      m3, m1
92
%else ; or_abs
93
%ifidn %1, mmx
94
    mova        m0, [srcq]
95
    mova        m1, [srcq+mmsize]
96
    ABS2        m0, m1, m3, m4
97
%else ; ssse3
98
    ; using memory args is faster for ssse3
99
    pabsw       m0, [srcq]
100
    pabsw       m1, [srcq+mmsize]
101
%endif
102
    por         m2, m0
103
    por         m2, m1
104
%endif
105
    add       srcq, mmsize*2
106
    sub       lend, mmsize
107
    ja .loop
108
%ifidn %2, min_max
109
    ABS2        m2, m3, m0, m1
110
    por         m2, m3
111
%endif
112
%ifidn mmsize, 16
113
    mova        m0, m2
114
    punpckhqdq  m0, m0
115
    por         m2, m0
116
%endif
117
    PSHUFLW     m0, m2, 0xe
118
    por         m2, m0
119
    PSHUFLW     m0, m2, 0x1
120
    por         m2, m0
121
    movd       eax, m2
122
    and        eax, 0xFFFF
123
    RET
124
%endmacro
125

  
126
INIT_MMX
127
%define ABS2 ABS2_MMX
128
%define PSHUFLW pshufw
129
AC3_MAX_MSB_ABS_INT16 mmx, or_abs
130
%define ABS2 ABS2_MMX2
131
AC3_MAX_MSB_ABS_INT16 mmxext, min_max
132
INIT_XMM
133
%define PSHUFLW pshuflw
134
AC3_MAX_MSB_ABS_INT16 sse2, min_max
135
%define ABS2 ABS2_SSSE3
136
AC3_MAX_MSB_ABS_INT16 ssse3, or_abs
libavcodec/x86/ac3dsp_mmx.c
27 27
extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int nb_coefs);
28 28
extern void ff_ac3_exponent_min_sse2  (uint8_t *exp, int num_reuse_blocks, int nb_coefs);
29 29

  
30
extern int ff_ac3_max_msb_abs_int16_mmx   (const int16_t *src, int len);
31
extern int ff_ac3_max_msb_abs_int16_mmxext(const int16_t *src, int len);
32
extern int ff_ac3_max_msb_abs_int16_sse2  (const int16_t *src, int len);
33
extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len);
34

  
30 35
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
31 36
{
32 37
    int mm_flags = av_get_cpu_flags();
......
34 39
#if HAVE_YASM
35 40
    if (mm_flags & AV_CPU_FLAG_MMX) {
36 41
        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
42
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
37 43
    }
38 44
    if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
39 45
        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
46
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmxext;
40 47
    }
41 48
    if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
42 49
        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
50
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
51
    }
52
    if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
53
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;
43 54
    }
44 55
#endif
45 56
}

Also available in: Unified diff