Revision cc4d3dd3

View differences:

libavcodec/ac3dsp.c
50 50
    return v;
51 51
}
52 52

  
53
static void ac3_lshift_int16_c(int16_t *src, int len, unsigned int shift)
54
{
55
    int i;
56

  
57
    if (shift > 0) {
58
        for (i = 0; i < len; i++)
59
            src[i] <<= shift;
60
    }
61
}
62

  
63
static void ac3_shift_int32_c(int32_t *src, int len, int shift)
64
{
65
    int i;
66

  
67
    if (shift > 0) {
68
        for (i = 0; i < len; i++)
69
            src[i] <<= shift;
70
    } else if (shift < 0) {
71
        shift = -shift;
72
        for (i = 0; i < len; i++)
73
            src[i] >>= shift;
74
    }
75
}
76

  
53 77
av_cold void ff_ac3dsp_init(AC3DSPContext *c)
54 78
{
55 79
    c->ac3_exponent_min = ac3_exponent_min_c;
56 80
    c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c;
81
    c->ac3_lshift_int16 = ac3_lshift_int16_c;
82
    c->ac3_shift_int32  = ac3_shift_int32_c;
57 83

  
58 84
    if (HAVE_MMX)
59 85
        ff_ac3dsp_init_x86(c);
libavcodec/ac3dsp.h
46 46
     * @return    a value with the same MSB as max(abs(src[]))
47 47
     */
48 48
    int (*ac3_max_msb_abs_int16)(const int16_t *src, int len);
49

  
50
    /*
51
     * Left-shift each value in an array of int16_t by a specified amount.
52
     * @param src    input array
53
     *               constraints: align 16
54
     * @param len    number of values in the array
55
     *               constraints: multiple of 32 greater than 0
56
     * @param shift  left shift amount
57
     *               constraints: range [0,15]
58
     */
59
    void (*ac3_lshift_int16)(int16_t *src, int len, unsigned int shift);
60

  
61
    /**
62
     * Shift each value in an array of int32_t by a specified amount.
63
     * @param src    input array
64
     *               constraints: align 16
65
     * @param len    number of values in the array
66
     *               constraints: multiple of 16 greater than 0
67
     * @param shift  shift amount (negative = right, positive = left)
68
     *               constraints: range [-31,31]
69
     */
70
    void (*ac3_shift_int32)(int32_t *src, int len, int shift);
49 71
} AC3DSPContext;
50 72

  
51 73
void ff_ac3dsp_init    (AC3DSPContext *c);
libavcodec/ac3enc_fixed.c
278 278

  
279 279

  
280 280
/**
281
 * Left-shift each value in an array by a specified amount.
282
 * @param tab    input array
283
 * @param n      number of values in the array
284
 * @param lshift left shift amount
285
 */
286
static void lshift_tab(int16_t *tab, int n, unsigned int lshift)
287
{
288
    int i;
289

  
290
    if (lshift > 0) {
291
        for (i = 0; i < n; i++)
292
            tab[i] <<= lshift;
293
    }
294
}
295

  
296

  
297
/**
298
 * Shift each value in an array by a specified amount.
299
 * @param src    input array
300
 * @param n      number of values in the array
301
 * @param shift  shift amount (negative=right, positive=left)
302
 */
303
static void shift_int32(int32_t *src, int n, int shift)
304
{
305
    int i;
306

  
307
    if (shift > 0) {
308
        for (i = 0; i < n; i++)
309
            src[i] <<= shift;
310
    } else if (shift < 0) {
311
        shift = -shift;
312
        for (i = 0; i < n; i++)
313
            src[i] >>= shift;
314
    }
315
}
316

  
317

  
318
/**
319 281
 * Normalize the input samples to use the maximum available precision.
320 282
 * This assumes signed 16-bit input samples.
321 283
 *
......
324 286
static int normalize_samples(AC3EncodeContext *s)
325 287
{
326 288
    int v = 14 - log2_tab(s, s->windowed_samples, AC3_WINDOW_SIZE);
327
    lshift_tab(s->windowed_samples, AC3_WINDOW_SIZE, v);
289
    s->ac3dsp.ac3_lshift_int16(s->windowed_samples, AC3_WINDOW_SIZE, v);
328 290
    return 9 - v;
329 291
}
330 292

  
......
339 301
    for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
340 302
        AC3Block *block = &s->blocks[blk];
341 303
        for (ch = 0; ch < s->channels; ch++) {
342
            shift_int32(block->mdct_coef[ch], AC3_MAX_COEFS,
343
                        block->coeff_shift[ch]);
304
            s->ac3dsp.ac3_shift_int32(block->mdct_coef[ch], AC3_MAX_COEFS,
305
                                      block->coeff_shift[ch]);
344 306
        }
345 307
    }
346 308
}
libavcodec/x86/ac3dsp.asm
133 133
AC3_MAX_MSB_ABS_INT16 sse2, min_max
134 134
%define ABS2 ABS2_SSSE3
135 135
AC3_MAX_MSB_ABS_INT16 ssse3, or_abs
136

  
137
;-----------------------------------------------------------------------------
138
; macro used for ff_ac3_lshift_int16() and ff_ac3_shift_int32()
139
;-----------------------------------------------------------------------------
140

  
141
%macro AC3_SHIFT_4MM 3 ; src/dst, shift instruction, shift amount
142
    mova  m1, [%1         ]
143
    mova  m2, [%1+mmsize  ]
144
    mova  m3, [%1+mmsize*2]
145
    mova  m4, [%1+mmsize*3]
146
    %2    m1, %3
147
    %2    m2, %3
148
    %2    m3, %3
149
    %2    m4, %3
150
    mova  [%1         ], m1
151
    mova  [%1+mmsize  ], m2
152
    mova  [%1+mmsize*2], m3
153
    mova  [%1+mmsize*3], m4
154
    add   %1, mmsize*4
155
%endmacro
156

  
157
;-----------------------------------------------------------------------------
158
; void ff_ac3_lshift_int16(int16_t *src, int len, unsigned int shift)
159
;-----------------------------------------------------------------------------
160

  
161
%macro AC3_LSHIFT_INT16 1
162
cglobal ac3_lshift_int16_%1, 3,3,5, src, len, shift
163
    test   shiftd, shiftd
164
    jz .end
165
    movd       m0, shiftd
166
    ALIGN 8
167
.loop:
168
    AC3_SHIFT_4MM srcq, psllw, m0
169
    sub      lend, mmsize*2
170
    ja .loop
171
.end:
172
    REP_RET
173
%endmacro
174

  
175
INIT_MMX
176
AC3_LSHIFT_INT16 mmx
177
INIT_XMM
178
AC3_LSHIFT_INT16 sse2
179

  
180
;-----------------------------------------------------------------------------
181
; void ff_ac3_shift_int32(int32_t *src, int len, int shift)
182
;-----------------------------------------------------------------------------
183

  
184
%macro AC3_SHIFT_INT32 1
185
cglobal ac3_shift_int32_%1, 3,3,5, src, len, shift
186
    test   shiftd, shiftd
187
    je .end
188
    js .shift_right
189
    movd       m0, shiftd
190
.loop_left:
191
    AC3_SHIFT_4MM srcq, pslld, m0
192
    sub      lend, mmsize
193
    ja .loop_left
194
    jmp .end
195
.shift_right:
196
    neg    shiftd
197
    movd       m0, shiftd
198
.loop_right:
199
    AC3_SHIFT_4MM srcq, psrad, m0
200
    sub      lend, mmsize
201
    ja .loop_right
202
.end:
203
    REP_RET
204
%endmacro
205

  
206
INIT_MMX
207
AC3_SHIFT_INT32 mmx
208
INIT_XMM
209
AC3_SHIFT_INT32 sse2
libavcodec/x86/ac3dsp_mmx.c
32 32
extern int ff_ac3_max_msb_abs_int16_sse2  (const int16_t *src, int len);
33 33
extern int ff_ac3_max_msb_abs_int16_ssse3 (const int16_t *src, int len);
34 34

  
35
extern void ff_ac3_lshift_int16_mmx (int16_t *src, int len, unsigned int lshift);
36
extern void ff_ac3_lshift_int16_sse2(int16_t *src, int len, unsigned int lshift);
37

  
38
extern void ff_ac3_shift_int32_mmx (int32_t *src, int len, int shift);
39
extern void ff_ac3_shift_int32_sse2(int32_t *src, int len, int shift);
40

  
35 41
av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c)
36 42
{
37 43
    int mm_flags = av_get_cpu_flags();
......
40 46
    if (mm_flags & AV_CPU_FLAG_MMX) {
41 47
        c->ac3_exponent_min = ff_ac3_exponent_min_mmx;
42 48
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_mmx;
49
        c->ac3_lshift_int16 = ff_ac3_lshift_int16_mmx;
50
        c->ac3_shift_int32  = ff_ac3_shift_int32_mmx;
43 51
    }
44 52
    if (mm_flags & AV_CPU_FLAG_MMX2 && HAVE_MMX2) {
45 53
        c->ac3_exponent_min = ff_ac3_exponent_min_mmxext;
......
48 56
    if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) {
49 57
        c->ac3_exponent_min = ff_ac3_exponent_min_sse2;
50 58
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_sse2;
59

  
60
        if (!(mm_flags & AV_CPU_FLAG_SSE2SLOW)) {
61
            c->ac3_lshift_int16 = ff_ac3_lshift_int16_sse2;
62
            c->ac3_shift_int32  = ff_ac3_shift_int32_sse2;
63
        }
51 64
    }
52 65
    if (mm_flags & AV_CPU_FLAG_SSSE3 && HAVE_SSSE3) {
53 66
        c->ac3_max_msb_abs_int16 = ff_ac3_max_msb_abs_int16_ssse3;

Also available in: Unified diff