Revision fe2ff6d2
libavcodec/Makefile | ||
---|---|---|
12 | 12 |
bitstream_filter.o \ |
13 | 13 |
dsputil.o \ |
14 | 14 |
faanidct.o \ |
15 |
fmtconvert.o \ |
|
15 | 16 |
imgconvert.o \ |
16 | 17 |
jrevdct.o \ |
17 | 18 |
opt.o \ |
libavcodec/aac.h | ||
---|---|---|
35 | 35 |
#include "fft.h" |
36 | 36 |
#include "mpeg4audio.h" |
37 | 37 |
#include "sbr.h" |
38 |
#include "fmtconvert.h" |
|
38 | 39 |
|
39 | 40 |
#include <stdint.h> |
40 | 41 |
|
... | ... | |
268 | 269 |
FFTContext mdct; |
269 | 270 |
FFTContext mdct_small; |
270 | 271 |
DSPContext dsp; |
272 |
FmtConvertContext fmt_conv; |
|
271 | 273 |
int random_state; |
272 | 274 |
/** @} */ |
273 | 275 |
|
libavcodec/aacdec.c | ||
---|---|---|
85 | 85 |
#include "get_bits.h" |
86 | 86 |
#include "dsputil.h" |
87 | 87 |
#include "fft.h" |
88 |
#include "fmtconvert.h" |
|
88 | 89 |
#include "lpc.h" |
89 | 90 |
|
90 | 91 |
#include "aac.h" |
... | ... | |
562 | 563 |
ff_aac_sbr_init(); |
563 | 564 |
|
564 | 565 |
dsputil_init(&ac->dsp, avctx); |
566 |
ff_fmt_convert_init(&ac->fmt_conv, avctx); |
|
565 | 567 |
|
566 | 568 |
ac->random_state = 0x1f2e3d4c; |
567 | 569 |
|
... | ... | |
2032 | 2034 |
*data_size = data_size_tmp; |
2033 | 2035 |
|
2034 | 2036 |
if (samples) |
2035 |
ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
|
|
2037 |
ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
|
|
2036 | 2038 |
|
2037 | 2039 |
if (ac->output_configured) |
2038 | 2040 |
ac->output_configured = OC_LOCKED; |
libavcodec/ac3dec.c | ||
---|---|---|
193 | 193 |
ff_mdct_init(&s->imdct_512, 9, 1, 1.0); |
194 | 194 |
ff_kbd_window_init(s->window, 5.0, 256); |
195 | 195 |
dsputil_init(&s->dsp, avctx); |
196 |
ff_fmt_convert_init(&s->fmt_conv, avctx); |
|
196 | 197 |
av_lfg_init(&s->dith_state, 0); |
197 | 198 |
|
198 | 199 |
/* set scale value for float to int16 conversion */ |
... | ... | |
1255 | 1256 |
} else { |
1256 | 1257 |
gain *= s->dynamic_range[0]; |
1257 | 1258 |
} |
1258 |
s->dsp.int32_to_float_fmul_scalar(s->transform_coeffs[ch], s->fixed_coeffs[ch], gain, 256);
|
|
1259 |
s->fmt_conv.int32_to_float_fmul_scalar(s->transform_coeffs[ch], s->fixed_coeffs[ch], gain, 256);
|
|
1259 | 1260 |
} |
1260 | 1261 |
|
1261 | 1262 |
/* apply spectral extension to high frequency bins */ |
... | ... | |
1407 | 1408 |
av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n"); |
1408 | 1409 |
err = 1; |
1409 | 1410 |
} |
1410 |
s->dsp.float_to_int16_interleave(out_samples, output, 256, s->out_channels);
|
|
1411 |
s->fmt_conv.float_to_int16_interleave(out_samples, output, 256, s->out_channels);
|
|
1411 | 1412 |
out_samples += 256 * s->out_channels; |
1412 | 1413 |
} |
1413 | 1414 |
*data_size = s->num_blocks * 256 * avctx->channels * sizeof (int16_t); |
libavcodec/ac3dec.h | ||
---|---|---|
55 | 55 |
#include "get_bits.h" |
56 | 56 |
#include "dsputil.h" |
57 | 57 |
#include "fft.h" |
58 |
#include "fmtconvert.h" |
|
58 | 59 |
|
59 | 60 |
/* override ac3.h to include coupling channel */ |
60 | 61 |
#undef AC3_MAX_CHANNELS |
... | ... | |
190 | 191 |
|
191 | 192 |
///@defgroup opt optimization |
192 | 193 |
DSPContext dsp; ///< for optimization |
194 |
FmtConvertContext fmt_conv; ///< optimized conversion functions |
|
193 | 195 |
float mul_bias; ///< scaling for float_to_int16 conversion |
194 | 196 |
///@} |
195 | 197 |
|
libavcodec/arm/Makefile | ||
---|---|---|
9 | 9 |
OBJS += arm/dsputil_init_arm.o \ |
10 | 10 |
arm/dsputil_arm.o \ |
11 | 11 |
arm/fft_init_arm.o \ |
12 |
arm/fmtconvert_init_arm.o \ |
|
12 | 13 |
arm/jrevdct_arm.o \ |
13 | 14 |
arm/mpegvideo_arm.o \ |
14 | 15 |
arm/simple_idct_arm.o \ |
... | ... | |
22 | 23 |
arm/dsputil_armv6.o \ |
23 | 24 |
arm/simple_idct_armv6.o \ |
24 | 25 |
|
26 |
VFP-OBJS-$(HAVE_ARMV6) += arm/fmtconvert_vfp.o \ |
|
27 |
|
|
25 | 28 |
OBJS-$(HAVE_ARMVFP) += arm/dsputil_vfp.o \ |
26 | 29 |
arm/dsputil_init_vfp.o \ |
30 |
$(VFP-OBJS-yes) |
|
27 | 31 |
|
28 | 32 |
OBJS-$(HAVE_IWMMXT) += arm/dsputil_iwmmxt.o \ |
29 | 33 |
arm/mpegvideo_iwmmxt.o \ |
... | ... | |
52 | 56 |
|
53 | 57 |
OBJS-$(HAVE_NEON) += arm/dsputil_init_neon.o \ |
54 | 58 |
arm/dsputil_neon.o \ |
59 |
arm/fmtconvert_neon.o \ |
|
55 | 60 |
arm/int_neon.o \ |
56 | 61 |
arm/mpegvideo_neon.o \ |
57 | 62 |
arm/simple_idct_neon.o \ |
libavcodec/arm/dsputil_init_neon.c | ||
---|---|---|
153 | 153 |
int len); |
154 | 154 |
void ff_butterflies_float_neon(float *v1, float *v2, int len); |
155 | 155 |
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); |
156 |
void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src, |
|
157 |
float mul, int len); |
|
158 | 156 |
void ff_vector_fmul_reverse_neon(float *dst, const float *src0, |
159 | 157 |
const float *src1, int len); |
160 | 158 |
void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1, |
... | ... | |
162 | 160 |
|
163 | 161 |
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, |
164 | 162 |
int len); |
165 |
void ff_float_to_int16_neon(int16_t *, const float *, long); |
|
166 |
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); |
|
167 | 163 |
|
168 | 164 |
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize); |
169 | 165 |
|
... | ... | |
308 | 304 |
c->vector_fmul_scalar = ff_vector_fmul_scalar_neon; |
309 | 305 |
c->butterflies_float = ff_butterflies_float_neon; |
310 | 306 |
c->scalarproduct_float = ff_scalarproduct_float_neon; |
311 |
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon; |
|
312 | 307 |
c->vector_fmul_reverse = ff_vector_fmul_reverse_neon; |
313 | 308 |
c->vector_fmul_add = ff_vector_fmul_add_neon; |
314 | 309 |
c->vector_clipf = ff_vector_clipf_neon; |
... | ... | |
319 | 314 |
c->sv_fmul_scalar[0] = ff_sv_fmul_scalar_2_neon; |
320 | 315 |
c->sv_fmul_scalar[1] = ff_sv_fmul_scalar_4_neon; |
321 | 316 |
|
322 |
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { |
|
323 |
c->float_to_int16 = ff_float_to_int16_neon; |
|
324 |
c->float_to_int16_interleave = ff_float_to_int16_interleave_neon; |
|
325 |
} |
|
326 |
|
|
327 | 317 |
if (CONFIG_VORBIS_DECODER) |
328 | 318 |
c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon; |
329 | 319 |
|
libavcodec/arm/dsputil_init_vfp.c | ||
---|---|---|
25 | 25 |
const float *src1, int len); |
26 | 26 |
void ff_vector_fmul_reverse_vfp(float *dst, const float *src0, |
27 | 27 |
const float *src1, int len); |
28 |
void ff_float_to_int16_vfp(int16_t *dst, const float *src, long len); |
|
29 | 28 |
|
30 | 29 |
void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx) |
31 | 30 |
{ |
32 | 31 |
c->vector_fmul = ff_vector_fmul_vfp; |
33 | 32 |
c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp; |
34 |
#if HAVE_ARMV6 |
|
35 |
c->float_to_int16 = ff_float_to_int16_vfp; |
|
36 |
#endif |
|
37 | 33 |
} |
libavcodec/arm/dsputil_neon.S | ||
---|---|---|
400 | 400 |
bx lr |
401 | 401 |
endfunc |
402 | 402 |
|
403 |
function ff_float_to_int16_neon, export=1 |
|
404 |
subs r2, r2, #8 |
|
405 |
vld1.64 {d0-d1}, [r1,:128]! |
|
406 |
vcvt.s32.f32 q8, q0, #16 |
|
407 |
vld1.64 {d2-d3}, [r1,:128]! |
|
408 |
vcvt.s32.f32 q9, q1, #16 |
|
409 |
beq 3f |
|
410 |
bics ip, r2, #15 |
|
411 |
beq 2f |
|
412 |
1: subs ip, ip, #16 |
|
413 |
vshrn.s32 d4, q8, #16 |
|
414 |
vld1.64 {d0-d1}, [r1,:128]! |
|
415 |
vcvt.s32.f32 q0, q0, #16 |
|
416 |
vshrn.s32 d5, q9, #16 |
|
417 |
vld1.64 {d2-d3}, [r1,:128]! |
|
418 |
vcvt.s32.f32 q1, q1, #16 |
|
419 |
vshrn.s32 d6, q0, #16 |
|
420 |
vst1.64 {d4-d5}, [r0,:128]! |
|
421 |
vshrn.s32 d7, q1, #16 |
|
422 |
vld1.64 {d16-d17},[r1,:128]! |
|
423 |
vcvt.s32.f32 q8, q8, #16 |
|
424 |
vld1.64 {d18-d19},[r1,:128]! |
|
425 |
vcvt.s32.f32 q9, q9, #16 |
|
426 |
vst1.64 {d6-d7}, [r0,:128]! |
|
427 |
bne 1b |
|
428 |
ands r2, r2, #15 |
|
429 |
beq 3f |
|
430 |
2: vld1.64 {d0-d1}, [r1,:128]! |
|
431 |
vshrn.s32 d4, q8, #16 |
|
432 |
vcvt.s32.f32 q0, q0, #16 |
|
433 |
vld1.64 {d2-d3}, [r1,:128]! |
|
434 |
vshrn.s32 d5, q9, #16 |
|
435 |
vcvt.s32.f32 q1, q1, #16 |
|
436 |
vshrn.s32 d6, q0, #16 |
|
437 |
vst1.64 {d4-d5}, [r0,:128]! |
|
438 |
vshrn.s32 d7, q1, #16 |
|
439 |
vst1.64 {d6-d7}, [r0,:128]! |
|
440 |
bx lr |
|
441 |
3: vshrn.s32 d4, q8, #16 |
|
442 |
vshrn.s32 d5, q9, #16 |
|
443 |
vst1.64 {d4-d5}, [r0,:128]! |
|
444 |
bx lr |
|
445 |
endfunc |
|
446 |
|
|
447 |
function ff_float_to_int16_interleave_neon, export=1 |
|
448 |
cmp r3, #2 |
|
449 |
ldrlt r1, [r1] |
|
450 |
blt ff_float_to_int16_neon |
|
451 |
bne 4f |
|
452 |
|
|
453 |
ldr r3, [r1] |
|
454 |
ldr r1, [r1, #4] |
|
455 |
|
|
456 |
subs r2, r2, #8 |
|
457 |
vld1.64 {d0-d1}, [r3,:128]! |
|
458 |
vcvt.s32.f32 q8, q0, #16 |
|
459 |
vld1.64 {d2-d3}, [r3,:128]! |
|
460 |
vcvt.s32.f32 q9, q1, #16 |
|
461 |
vld1.64 {d20-d21},[r1,:128]! |
|
462 |
vcvt.s32.f32 q10, q10, #16 |
|
463 |
vld1.64 {d22-d23},[r1,:128]! |
|
464 |
vcvt.s32.f32 q11, q11, #16 |
|
465 |
beq 3f |
|
466 |
bics ip, r2, #15 |
|
467 |
beq 2f |
|
468 |
1: subs ip, ip, #16 |
|
469 |
vld1.64 {d0-d1}, [r3,:128]! |
|
470 |
vcvt.s32.f32 q0, q0, #16 |
|
471 |
vsri.32 q10, q8, #16 |
|
472 |
vld1.64 {d2-d3}, [r3,:128]! |
|
473 |
vcvt.s32.f32 q1, q1, #16 |
|
474 |
vld1.64 {d24-d25},[r1,:128]! |
|
475 |
vcvt.s32.f32 q12, q12, #16 |
|
476 |
vld1.64 {d26-d27},[r1,:128]! |
|
477 |
vsri.32 q11, q9, #16 |
|
478 |
vst1.64 {d20-d21},[r0,:128]! |
|
479 |
vcvt.s32.f32 q13, q13, #16 |
|
480 |
vst1.64 {d22-d23},[r0,:128]! |
|
481 |
vsri.32 q12, q0, #16 |
|
482 |
vld1.64 {d16-d17},[r3,:128]! |
|
483 |
vsri.32 q13, q1, #16 |
|
484 |
vst1.64 {d24-d25},[r0,:128]! |
|
485 |
vcvt.s32.f32 q8, q8, #16 |
|
486 |
vld1.64 {d18-d19},[r3,:128]! |
|
487 |
vcvt.s32.f32 q9, q9, #16 |
|
488 |
vld1.64 {d20-d21},[r1,:128]! |
|
489 |
vcvt.s32.f32 q10, q10, #16 |
|
490 |
vld1.64 {d22-d23},[r1,:128]! |
|
491 |
vcvt.s32.f32 q11, q11, #16 |
|
492 |
vst1.64 {d26-d27},[r0,:128]! |
|
493 |
bne 1b |
|
494 |
ands r2, r2, #15 |
|
495 |
beq 3f |
|
496 |
2: vsri.32 q10, q8, #16 |
|
497 |
vld1.64 {d0-d1}, [r3,:128]! |
|
498 |
vcvt.s32.f32 q0, q0, #16 |
|
499 |
vld1.64 {d2-d3}, [r3,:128]! |
|
500 |
vcvt.s32.f32 q1, q1, #16 |
|
501 |
vld1.64 {d24-d25},[r1,:128]! |
|
502 |
vcvt.s32.f32 q12, q12, #16 |
|
503 |
vsri.32 q11, q9, #16 |
|
504 |
vld1.64 {d26-d27},[r1,:128]! |
|
505 |
vcvt.s32.f32 q13, q13, #16 |
|
506 |
vst1.64 {d20-d21},[r0,:128]! |
|
507 |
vsri.32 q12, q0, #16 |
|
508 |
vst1.64 {d22-d23},[r0,:128]! |
|
509 |
vsri.32 q13, q1, #16 |
|
510 |
vst1.64 {d24-d27},[r0,:128]! |
|
511 |
bx lr |
|
512 |
3: vsri.32 q10, q8, #16 |
|
513 |
vsri.32 q11, q9, #16 |
|
514 |
vst1.64 {d20-d23},[r0,:128]! |
|
515 |
bx lr |
|
516 |
|
|
517 |
4: push {r4-r8,lr} |
|
518 |
cmp r3, #4 |
|
519 |
lsl ip, r3, #1 |
|
520 |
blt 4f |
|
521 |
|
|
522 |
@ 4 channels |
|
523 |
5: ldmia r1!, {r4-r7} |
|
524 |
mov lr, r2 |
|
525 |
mov r8, r0 |
|
526 |
vld1.64 {d16-d17},[r4,:128]! |
|
527 |
vcvt.s32.f32 q8, q8, #16 |
|
528 |
vld1.64 {d18-d19},[r5,:128]! |
|
529 |
vcvt.s32.f32 q9, q9, #16 |
|
530 |
vld1.64 {d20-d21},[r6,:128]! |
|
531 |
vcvt.s32.f32 q10, q10, #16 |
|
532 |
vld1.64 {d22-d23},[r7,:128]! |
|
533 |
vcvt.s32.f32 q11, q11, #16 |
|
534 |
6: subs lr, lr, #8 |
|
535 |
vld1.64 {d0-d1}, [r4,:128]! |
|
536 |
vcvt.s32.f32 q0, q0, #16 |
|
537 |
vsri.32 q9, q8, #16 |
|
538 |
vld1.64 {d2-d3}, [r5,:128]! |
|
539 |
vcvt.s32.f32 q1, q1, #16 |
|
540 |
vsri.32 q11, q10, #16 |
|
541 |
vld1.64 {d4-d5}, [r6,:128]! |
|
542 |
vcvt.s32.f32 q2, q2, #16 |
|
543 |
vzip.32 d18, d22 |
|
544 |
vld1.64 {d6-d7}, [r7,:128]! |
|
545 |
vcvt.s32.f32 q3, q3, #16 |
|
546 |
vzip.32 d19, d23 |
|
547 |
vst1.64 {d18}, [r8], ip |
|
548 |
vsri.32 q1, q0, #16 |
|
549 |
vst1.64 {d22}, [r8], ip |
|
550 |
vsri.32 q3, q2, #16 |
|
551 |
vst1.64 {d19}, [r8], ip |
|
552 |
vzip.32 d2, d6 |
|
553 |
vst1.64 {d23}, [r8], ip |
|
554 |
vzip.32 d3, d7 |
|
555 |
beq 7f |
|
556 |
vld1.64 {d16-d17},[r4,:128]! |
|
557 |
vcvt.s32.f32 q8, q8, #16 |
|
558 |
vst1.64 {d2}, [r8], ip |
|
559 |
vld1.64 {d18-d19},[r5,:128]! |
|
560 |
vcvt.s32.f32 q9, q9, #16 |
|
561 |
vst1.64 {d6}, [r8], ip |
|
562 |
vld1.64 {d20-d21},[r6,:128]! |
|
563 |
vcvt.s32.f32 q10, q10, #16 |
|
564 |
vst1.64 {d3}, [r8], ip |
|
565 |
vld1.64 {d22-d23},[r7,:128]! |
|
566 |
vcvt.s32.f32 q11, q11, #16 |
|
567 |
vst1.64 {d7}, [r8], ip |
|
568 |
b 6b |
|
569 |
7: vst1.64 {d2}, [r8], ip |
|
570 |
vst1.64 {d6}, [r8], ip |
|
571 |
vst1.64 {d3}, [r8], ip |
|
572 |
vst1.64 {d7}, [r8], ip |
|
573 |
subs r3, r3, #4 |
|
574 |
popeq {r4-r8,pc} |
|
575 |
cmp r3, #4 |
|
576 |
add r0, r0, #8 |
|
577 |
bge 5b |
|
578 |
|
|
579 |
@ 2 channels |
|
580 |
4: cmp r3, #2 |
|
581 |
blt 4f |
|
582 |
ldmia r1!, {r4-r5} |
|
583 |
mov lr, r2 |
|
584 |
mov r8, r0 |
|
585 |
tst lr, #8 |
|
586 |
vld1.64 {d16-d17},[r4,:128]! |
|
587 |
vcvt.s32.f32 q8, q8, #16 |
|
588 |
vld1.64 {d18-d19},[r5,:128]! |
|
589 |
vcvt.s32.f32 q9, q9, #16 |
|
590 |
vld1.64 {d20-d21},[r4,:128]! |
|
591 |
vcvt.s32.f32 q10, q10, #16 |
|
592 |
vld1.64 {d22-d23},[r5,:128]! |
|
593 |
vcvt.s32.f32 q11, q11, #16 |
|
594 |
beq 6f |
|
595 |
subs lr, lr, #8 |
|
596 |
beq 7f |
|
597 |
vsri.32 d18, d16, #16 |
|
598 |
vsri.32 d19, d17, #16 |
|
599 |
vld1.64 {d16-d17},[r4,:128]! |
|
600 |
vcvt.s32.f32 q8, q8, #16 |
|
601 |
vst1.32 {d18[0]}, [r8], ip |
|
602 |
vsri.32 d22, d20, #16 |
|
603 |
vst1.32 {d18[1]}, [r8], ip |
|
604 |
vsri.32 d23, d21, #16 |
|
605 |
vst1.32 {d19[0]}, [r8], ip |
|
606 |
vst1.32 {d19[1]}, [r8], ip |
|
607 |
vld1.64 {d18-d19},[r5,:128]! |
|
608 |
vcvt.s32.f32 q9, q9, #16 |
|
609 |
vst1.32 {d22[0]}, [r8], ip |
|
610 |
vst1.32 {d22[1]}, [r8], ip |
|
611 |
vld1.64 {d20-d21},[r4,:128]! |
|
612 |
vcvt.s32.f32 q10, q10, #16 |
|
613 |
vst1.32 {d23[0]}, [r8], ip |
|
614 |
vst1.32 {d23[1]}, [r8], ip |
|
615 |
vld1.64 {d22-d23},[r5,:128]! |
|
616 |
vcvt.s32.f32 q11, q11, #16 |
|
617 |
6: subs lr, lr, #16 |
|
618 |
vld1.64 {d0-d1}, [r4,:128]! |
|
619 |
vcvt.s32.f32 q0, q0, #16 |
|
620 |
vsri.32 d18, d16, #16 |
|
621 |
vld1.64 {d2-d3}, [r5,:128]! |
|
622 |
vcvt.s32.f32 q1, q1, #16 |
|
623 |
vsri.32 d19, d17, #16 |
|
624 |
vld1.64 {d4-d5}, [r4,:128]! |
|
625 |
vcvt.s32.f32 q2, q2, #16 |
|
626 |
vld1.64 {d6-d7}, [r5,:128]! |
|
627 |
vcvt.s32.f32 q3, q3, #16 |
|
628 |
vst1.32 {d18[0]}, [r8], ip |
|
629 |
vsri.32 d22, d20, #16 |
|
630 |
vst1.32 {d18[1]}, [r8], ip |
|
631 |
vsri.32 d23, d21, #16 |
|
632 |
vst1.32 {d19[0]}, [r8], ip |
|
633 |
vsri.32 d2, d0, #16 |
|
634 |
vst1.32 {d19[1]}, [r8], ip |
|
635 |
vsri.32 d3, d1, #16 |
|
636 |
vst1.32 {d22[0]}, [r8], ip |
|
637 |
vsri.32 d6, d4, #16 |
|
638 |
vst1.32 {d22[1]}, [r8], ip |
|
639 |
vsri.32 d7, d5, #16 |
|
640 |
vst1.32 {d23[0]}, [r8], ip |
|
641 |
vst1.32 {d23[1]}, [r8], ip |
|
642 |
beq 6f |
|
643 |
vld1.64 {d16-d17},[r4,:128]! |
|
644 |
vcvt.s32.f32 q8, q8, #16 |
|
645 |
vst1.32 {d2[0]}, [r8], ip |
|
646 |
vst1.32 {d2[1]}, [r8], ip |
|
647 |
vld1.64 {d18-d19},[r5,:128]! |
|
648 |
vcvt.s32.f32 q9, q9, #16 |
|
649 |
vst1.32 {d3[0]}, [r8], ip |
|
650 |
vst1.32 {d3[1]}, [r8], ip |
|
651 |
vld1.64 {d20-d21},[r4,:128]! |
|
652 |
vcvt.s32.f32 q10, q10, #16 |
|
653 |
vst1.32 {d6[0]}, [r8], ip |
|
654 |
vst1.32 {d6[1]}, [r8], ip |
|
655 |
vld1.64 {d22-d23},[r5,:128]! |
|
656 |
vcvt.s32.f32 q11, q11, #16 |
|
657 |
vst1.32 {d7[0]}, [r8], ip |
|
658 |
vst1.32 {d7[1]}, [r8], ip |
|
659 |
bgt 6b |
|
660 |
6: vst1.32 {d2[0]}, [r8], ip |
|
661 |
vst1.32 {d2[1]}, [r8], ip |
|
662 |
vst1.32 {d3[0]}, [r8], ip |
|
663 |
vst1.32 {d3[1]}, [r8], ip |
|
664 |
vst1.32 {d6[0]}, [r8], ip |
|
665 |
vst1.32 {d6[1]}, [r8], ip |
|
666 |
vst1.32 {d7[0]}, [r8], ip |
|
667 |
vst1.32 {d7[1]}, [r8], ip |
|
668 |
b 8f |
|
669 |
7: vsri.32 d18, d16, #16 |
|
670 |
vsri.32 d19, d17, #16 |
|
671 |
vst1.32 {d18[0]}, [r8], ip |
|
672 |
vsri.32 d22, d20, #16 |
|
673 |
vst1.32 {d18[1]}, [r8], ip |
|
674 |
vsri.32 d23, d21, #16 |
|
675 |
vst1.32 {d19[0]}, [r8], ip |
|
676 |
vst1.32 {d19[1]}, [r8], ip |
|
677 |
vst1.32 {d22[0]}, [r8], ip |
|
678 |
vst1.32 {d22[1]}, [r8], ip |
|
679 |
vst1.32 {d23[0]}, [r8], ip |
|
680 |
vst1.32 {d23[1]}, [r8], ip |
|
681 |
8: subs r3, r3, #2 |
|
682 |
add r0, r0, #4 |
|
683 |
popeq {r4-r8,pc} |
|
684 |
|
|
685 |
@ 1 channel |
|
686 |
4: ldr r4, [r1],#4 |
|
687 |
tst r2, #8 |
|
688 |
mov lr, r2 |
|
689 |
mov r5, r0 |
|
690 |
vld1.64 {d0-d1}, [r4,:128]! |
|
691 |
vcvt.s32.f32 q0, q0, #16 |
|
692 |
vld1.64 {d2-d3}, [r4,:128]! |
|
693 |
vcvt.s32.f32 q1, q1, #16 |
|
694 |
bne 8f |
|
695 |
6: subs lr, lr, #16 |
|
696 |
vld1.64 {d4-d5}, [r4,:128]! |
|
697 |
vcvt.s32.f32 q2, q2, #16 |
|
698 |
vld1.64 {d6-d7}, [r4,:128]! |
|
699 |
vcvt.s32.f32 q3, q3, #16 |
|
700 |
vst1.16 {d0[1]}, [r5,:16], ip |
|
701 |
vst1.16 {d0[3]}, [r5,:16], ip |
|
702 |
vst1.16 {d1[1]}, [r5,:16], ip |
|
703 |
vst1.16 {d1[3]}, [r5,:16], ip |
|
704 |
vst1.16 {d2[1]}, [r5,:16], ip |
|
705 |
vst1.16 {d2[3]}, [r5,:16], ip |
|
706 |
vst1.16 {d3[1]}, [r5,:16], ip |
|
707 |
vst1.16 {d3[3]}, [r5,:16], ip |
|
708 |
beq 7f |
|
709 |
vld1.64 {d0-d1}, [r4,:128]! |
|
710 |
vcvt.s32.f32 q0, q0, #16 |
|
711 |
vld1.64 {d2-d3}, [r4,:128]! |
|
712 |
vcvt.s32.f32 q1, q1, #16 |
|
713 |
7: vst1.16 {d4[1]}, [r5,:16], ip |
|
714 |
vst1.16 {d4[3]}, [r5,:16], ip |
|
715 |
vst1.16 {d5[1]}, [r5,:16], ip |
|
716 |
vst1.16 {d5[3]}, [r5,:16], ip |
|
717 |
vst1.16 {d6[1]}, [r5,:16], ip |
|
718 |
vst1.16 {d6[3]}, [r5,:16], ip |
|
719 |
vst1.16 {d7[1]}, [r5,:16], ip |
|
720 |
vst1.16 {d7[3]}, [r5,:16], ip |
|
721 |
bgt 6b |
|
722 |
pop {r4-r8,pc} |
|
723 |
8: subs lr, lr, #8 |
|
724 |
vst1.16 {d0[1]}, [r5,:16], ip |
|
725 |
vst1.16 {d0[3]}, [r5,:16], ip |
|
726 |
vst1.16 {d1[1]}, [r5,:16], ip |
|
727 |
vst1.16 {d1[3]}, [r5,:16], ip |
|
728 |
vst1.16 {d2[1]}, [r5,:16], ip |
|
729 |
vst1.16 {d2[3]}, [r5,:16], ip |
|
730 |
vst1.16 {d3[1]}, [r5,:16], ip |
|
731 |
vst1.16 {d3[3]}, [r5,:16], ip |
|
732 |
popeq {r4-r8,pc} |
|
733 |
vld1.64 {d0-d1}, [r4,:128]! |
|
734 |
vcvt.s32.f32 q0, q0, #16 |
|
735 |
vld1.64 {d2-d3}, [r4,:128]! |
|
736 |
vcvt.s32.f32 q1, q1, #16 |
|
737 |
b 6b |
|
738 |
endfunc |
|
739 |
|
|
740 | 403 |
function ff_vector_fmul_neon, export=1 |
741 | 404 |
subs r3, r3, #8 |
742 | 405 |
vld1.64 {d0-d3}, [r1,:128]! |
... | ... | |
1050 | 713 |
bx lr |
1051 | 714 |
endfunc |
1052 | 715 |
|
1053 |
function ff_int32_to_float_fmul_scalar_neon, export=1 |
|
1054 |
VFP vdup.32 q0, d0[0] |
|
1055 |
VFP len .req r2 |
|
1056 |
NOVFP vdup.32 q0, r2 |
|
1057 |
NOVFP len .req r3 |
|
1058 |
|
|
1059 |
vld1.32 {q1},[r1,:128]! |
|
1060 |
vcvt.f32.s32 q3, q1 |
|
1061 |
vld1.32 {q2},[r1,:128]! |
|
1062 |
vcvt.f32.s32 q8, q2 |
|
1063 |
1: subs len, len, #8 |
|
1064 |
pld [r1, #16] |
|
1065 |
vmul.f32 q9, q3, q0 |
|
1066 |
vmul.f32 q10, q8, q0 |
|
1067 |
beq 2f |
|
1068 |
vld1.32 {q1},[r1,:128]! |
|
1069 |
vcvt.f32.s32 q3, q1 |
|
1070 |
vld1.32 {q2},[r1,:128]! |
|
1071 |
vcvt.f32.s32 q8, q2 |
|
1072 |
vst1.32 {q9}, [r0,:128]! |
|
1073 |
vst1.32 {q10},[r0,:128]! |
|
1074 |
b 1b |
|
1075 |
2: vst1.32 {q9}, [r0,:128]! |
|
1076 |
vst1.32 {q10},[r0,:128]! |
|
1077 |
bx lr |
|
1078 |
.unreq len |
|
1079 |
endfunc |
|
1080 |
|
|
1081 | 716 |
function ff_vector_fmul_reverse_neon, export=1 |
1082 | 717 |
add r2, r2, r3, lsl #2 |
1083 | 718 |
sub r2, r2, #32 |
libavcodec/arm/dsputil_vfp.S | ||
---|---|---|
131 | 131 |
vpop {d8-d15} |
132 | 132 |
bx lr |
133 | 133 |
endfunc |
134 |
|
|
135 |
#if HAVE_ARMV6 |
|
136 |
/** |
|
137 |
* ARM VFP optimized float to int16 conversion. |
|
138 |
* Assume that len is a positive number and is multiple of 8, destination |
|
139 |
* buffer is at least 4 bytes aligned (8 bytes alignment is better for |
|
140 |
* performance), little endian byte sex |
|
141 |
*/ |
|
142 |
@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len) |
|
143 |
function ff_float_to_int16_vfp, export=1 |
|
144 |
push {r4-r8,lr} |
|
145 |
vpush {d8-d11} |
|
146 |
vldmia r1!, {s16-s23} |
|
147 |
vcvt.s32.f32 s0, s16 |
|
148 |
vcvt.s32.f32 s1, s17 |
|
149 |
vcvt.s32.f32 s2, s18 |
|
150 |
vcvt.s32.f32 s3, s19 |
|
151 |
vcvt.s32.f32 s4, s20 |
|
152 |
vcvt.s32.f32 s5, s21 |
|
153 |
vcvt.s32.f32 s6, s22 |
|
154 |
vcvt.s32.f32 s7, s23 |
|
155 |
1: |
|
156 |
subs r2, r2, #8 |
|
157 |
vmov r3, r4, s0, s1 |
|
158 |
vmov r5, r6, s2, s3 |
|
159 |
vmov r7, r8, s4, s5 |
|
160 |
vmov ip, lr, s6, s7 |
|
161 |
vldmiagt r1!, {s16-s23} |
|
162 |
ssat r4, #16, r4 |
|
163 |
ssat r3, #16, r3 |
|
164 |
ssat r6, #16, r6 |
|
165 |
ssat r5, #16, r5 |
|
166 |
pkhbt r3, r3, r4, lsl #16 |
|
167 |
pkhbt r4, r5, r6, lsl #16 |
|
168 |
vcvtgt.s32.f32 s0, s16 |
|
169 |
vcvtgt.s32.f32 s1, s17 |
|
170 |
vcvtgt.s32.f32 s2, s18 |
|
171 |
vcvtgt.s32.f32 s3, s19 |
|
172 |
vcvtgt.s32.f32 s4, s20 |
|
173 |
vcvtgt.s32.f32 s5, s21 |
|
174 |
vcvtgt.s32.f32 s6, s22 |
|
175 |
vcvtgt.s32.f32 s7, s23 |
|
176 |
ssat r8, #16, r8 |
|
177 |
ssat r7, #16, r7 |
|
178 |
ssat lr, #16, lr |
|
179 |
ssat ip, #16, ip |
|
180 |
pkhbt r5, r7, r8, lsl #16 |
|
181 |
pkhbt r6, ip, lr, lsl #16 |
|
182 |
stmia r0!, {r3-r6} |
|
183 |
bgt 1b |
|
184 |
|
|
185 |
vpop {d8-d11} |
|
186 |
pop {r4-r8,pc} |
|
187 |
endfunc |
|
188 |
#endif |
libavcodec/arm/fmtconvert_init_arm.c | ||
---|---|---|
1 |
/* |
|
2 |
* ARM optimized Format Conversion Utils |
|
3 |
* |
|
4 |
* This file is part of FFmpeg. |
|
5 |
* |
|
6 |
* FFmpeg is free software; you can redistribute it and/or |
|
7 |
* modify it under the terms of the GNU Lesser General Public |
|
8 |
* License as published by the Free Software Foundation; either |
|
9 |
* version 2.1 of the License, or (at your option) any later version. |
|
10 |
* |
|
11 |
* FFmpeg is distributed in the hope that it will be useful, |
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 |
* Lesser General Public License for more details. |
|
15 |
* |
|
16 |
* You should have received a copy of the GNU Lesser General Public |
|
17 |
* License along with FFmpeg; if not, write to the Free Software |
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
19 |
*/ |
|
20 |
|
|
21 |
#include <stdint.h> |
|
22 |
|
|
23 |
#include "libavcodec/avcodec.h" |
|
24 |
#include "libavcodec/fmtconvert.h" |
|
25 |
|
|
26 |
void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src, |
|
27 |
float mul, int len); |
|
28 |
|
|
29 |
void ff_float_to_int16_neon(int16_t *dst, const float *src, long len); |
|
30 |
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int); |
|
31 |
|
|
32 |
void ff_float_to_int16_vfp(int16_t *dst, const float *src, long len); |
|
33 |
|
|
34 |
void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx) |
|
35 |
{ |
|
36 |
if (HAVE_ARMVFP && HAVE_ARMV6) { |
|
37 |
c->float_to_int16 = ff_float_to_int16_vfp; |
|
38 |
} |
|
39 |
|
|
40 |
if (HAVE_NEON) { |
|
41 |
c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon; |
|
42 |
|
|
43 |
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { |
|
44 |
c->float_to_int16 = ff_float_to_int16_neon; |
|
45 |
c->float_to_int16_interleave = ff_float_to_int16_interleave_neon; |
|
46 |
} |
|
47 |
} |
|
48 |
} |
libavcodec/arm/fmtconvert_neon.S | ||
---|---|---|
1 |
/* |
|
2 |
* ARM NEON optimised Format Conversion Utils |
|
3 |
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> |
|
4 |
* |
|
5 |
* This file is part of FFmpeg. |
|
6 |
* |
|
7 |
* FFmpeg is free software; you can redistribute it and/or |
|
8 |
* modify it under the terms of the GNU Lesser General Public |
|
9 |
* License as published by the Free Software Foundation; either |
|
10 |
* version 2.1 of the License, or (at your option) any later version. |
|
11 |
* |
|
12 |
* FFmpeg is distributed in the hope that it will be useful, |
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
15 |
* Lesser General Public License for more details. |
|
16 |
* |
|
17 |
* You should have received a copy of the GNU Lesser General Public |
|
18 |
* License along with FFmpeg; if not, write to the Free Software |
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
20 |
*/ |
|
21 |
|
|
22 |
#include "config.h" |
|
23 |
#include "asm.S" |
|
24 |
|
|
25 |
preserve8 |
|
26 |
.text |
|
27 |
|
|
28 |
function ff_float_to_int16_neon, export=1 |
|
29 |
subs r2, r2, #8 |
|
30 |
vld1.64 {d0-d1}, [r1,:128]! |
|
31 |
vcvt.s32.f32 q8, q0, #16 |
|
32 |
vld1.64 {d2-d3}, [r1,:128]! |
|
33 |
vcvt.s32.f32 q9, q1, #16 |
|
34 |
beq 3f |
|
35 |
bics ip, r2, #15 |
|
36 |
beq 2f |
|
37 |
1: subs ip, ip, #16 |
|
38 |
vshrn.s32 d4, q8, #16 |
|
39 |
vld1.64 {d0-d1}, [r1,:128]! |
|
40 |
vcvt.s32.f32 q0, q0, #16 |
|
41 |
vshrn.s32 d5, q9, #16 |
|
42 |
vld1.64 {d2-d3}, [r1,:128]! |
|
43 |
vcvt.s32.f32 q1, q1, #16 |
|
44 |
vshrn.s32 d6, q0, #16 |
|
45 |
vst1.64 {d4-d5}, [r0,:128]! |
|
46 |
vshrn.s32 d7, q1, #16 |
|
47 |
vld1.64 {d16-d17},[r1,:128]! |
|
48 |
vcvt.s32.f32 q8, q8, #16 |
|
49 |
vld1.64 {d18-d19},[r1,:128]! |
|
50 |
vcvt.s32.f32 q9, q9, #16 |
|
51 |
vst1.64 {d6-d7}, [r0,:128]! |
|
52 |
bne 1b |
|
53 |
ands r2, r2, #15 |
|
54 |
beq 3f |
|
55 |
2: vld1.64 {d0-d1}, [r1,:128]! |
|
56 |
vshrn.s32 d4, q8, #16 |
|
57 |
vcvt.s32.f32 q0, q0, #16 |
|
58 |
vld1.64 {d2-d3}, [r1,:128]! |
|
59 |
vshrn.s32 d5, q9, #16 |
|
60 |
vcvt.s32.f32 q1, q1, #16 |
|
61 |
vshrn.s32 d6, q0, #16 |
|
62 |
vst1.64 {d4-d5}, [r0,:128]! |
|
63 |
vshrn.s32 d7, q1, #16 |
|
64 |
vst1.64 {d6-d7}, [r0,:128]! |
|
65 |
bx lr |
|
66 |
3: vshrn.s32 d4, q8, #16 |
|
67 |
vshrn.s32 d5, q9, #16 |
|
68 |
vst1.64 {d4-d5}, [r0,:128]! |
|
69 |
bx lr |
|
70 |
endfunc |
|
71 |
|
|
72 |
function ff_float_to_int16_interleave_neon, export=1 |
|
73 |
cmp r3, #2 |
|
74 |
ldrlt r1, [r1] |
|
75 |
blt ff_float_to_int16_neon |
|
76 |
bne 4f |
|
77 |
|
|
78 |
ldr r3, [r1] |
|
79 |
ldr r1, [r1, #4] |
|
80 |
|
|
81 |
subs r2, r2, #8 |
|
82 |
vld1.64 {d0-d1}, [r3,:128]! |
|
83 |
vcvt.s32.f32 q8, q0, #16 |
|
84 |
vld1.64 {d2-d3}, [r3,:128]! |
|
85 |
vcvt.s32.f32 q9, q1, #16 |
|
86 |
vld1.64 {d20-d21},[r1,:128]! |
|
87 |
vcvt.s32.f32 q10, q10, #16 |
|
88 |
vld1.64 {d22-d23},[r1,:128]! |
|
89 |
vcvt.s32.f32 q11, q11, #16 |
|
90 |
beq 3f |
|
91 |
bics ip, r2, #15 |
|
92 |
beq 2f |
|
93 |
1: subs ip, ip, #16 |
|
94 |
vld1.64 {d0-d1}, [r3,:128]! |
|
95 |
vcvt.s32.f32 q0, q0, #16 |
|
96 |
vsri.32 q10, q8, #16 |
|
97 |
vld1.64 {d2-d3}, [r3,:128]! |
|
98 |
vcvt.s32.f32 q1, q1, #16 |
|
99 |
vld1.64 {d24-d25},[r1,:128]! |
|
100 |
vcvt.s32.f32 q12, q12, #16 |
|
101 |
vld1.64 {d26-d27},[r1,:128]! |
|
102 |
vsri.32 q11, q9, #16 |
|
103 |
vst1.64 {d20-d21},[r0,:128]! |
|
104 |
vcvt.s32.f32 q13, q13, #16 |
|
105 |
vst1.64 {d22-d23},[r0,:128]! |
|
106 |
vsri.32 q12, q0, #16 |
|
107 |
vld1.64 {d16-d17},[r3,:128]! |
|
108 |
vsri.32 q13, q1, #16 |
|
109 |
vst1.64 {d24-d25},[r0,:128]! |
|
110 |
vcvt.s32.f32 q8, q8, #16 |
|
111 |
vld1.64 {d18-d19},[r3,:128]! |
|
112 |
vcvt.s32.f32 q9, q9, #16 |
|
113 |
vld1.64 {d20-d21},[r1,:128]! |
|
114 |
vcvt.s32.f32 q10, q10, #16 |
|
115 |
vld1.64 {d22-d23},[r1,:128]! |
|
116 |
vcvt.s32.f32 q11, q11, #16 |
|
117 |
vst1.64 {d26-d27},[r0,:128]! |
|
118 |
bne 1b |
|
119 |
ands r2, r2, #15 |
|
120 |
beq 3f |
|
121 |
2: vsri.32 q10, q8, #16 |
|
122 |
vld1.64 {d0-d1}, [r3,:128]! |
|
123 |
vcvt.s32.f32 q0, q0, #16 |
|
124 |
vld1.64 {d2-d3}, [r3,:128]! |
|
125 |
vcvt.s32.f32 q1, q1, #16 |
|
126 |
vld1.64 {d24-d25},[r1,:128]! |
|
127 |
vcvt.s32.f32 q12, q12, #16 |
|
128 |
vsri.32 q11, q9, #16 |
|
129 |
vld1.64 {d26-d27},[r1,:128]! |
|
130 |
vcvt.s32.f32 q13, q13, #16 |
|
131 |
vst1.64 {d20-d21},[r0,:128]! |
|
132 |
vsri.32 q12, q0, #16 |
|
133 |
vst1.64 {d22-d23},[r0,:128]! |
|
134 |
vsri.32 q13, q1, #16 |
|
135 |
vst1.64 {d24-d27},[r0,:128]! |
|
136 |
bx lr |
|
137 |
3: vsri.32 q10, q8, #16 |
|
138 |
vsri.32 q11, q9, #16 |
|
139 |
vst1.64 {d20-d23},[r0,:128]! |
|
140 |
bx lr |
|
141 |
|
|
142 |
4: push {r4-r8,lr} |
|
143 |
cmp r3, #4 |
|
144 |
lsl ip, r3, #1 |
|
145 |
blt 4f |
|
146 |
|
|
147 |
@ 4 channels |
|
148 |
5: ldmia r1!, {r4-r7} |
|
149 |
mov lr, r2 |
|
150 |
mov r8, r0 |
|
151 |
vld1.64 {d16-d17},[r4,:128]! |
|
152 |
vcvt.s32.f32 q8, q8, #16 |
|
153 |
vld1.64 {d18-d19},[r5,:128]! |
|
154 |
vcvt.s32.f32 q9, q9, #16 |
|
155 |
vld1.64 {d20-d21},[r6,:128]! |
|
156 |
vcvt.s32.f32 q10, q10, #16 |
|
157 |
vld1.64 {d22-d23},[r7,:128]! |
|
158 |
vcvt.s32.f32 q11, q11, #16 |
|
159 |
6: subs lr, lr, #8 |
|
160 |
vld1.64 {d0-d1}, [r4,:128]! |
|
161 |
vcvt.s32.f32 q0, q0, #16 |
|
162 |
vsri.32 q9, q8, #16 |
|
163 |
vld1.64 {d2-d3}, [r5,:128]! |
|
164 |
vcvt.s32.f32 q1, q1, #16 |
|
165 |
vsri.32 q11, q10, #16 |
|
166 |
vld1.64 {d4-d5}, [r6,:128]! |
|
167 |
vcvt.s32.f32 q2, q2, #16 |
|
168 |
vzip.32 d18, d22 |
|
169 |
vld1.64 {d6-d7}, [r7,:128]! |
|
170 |
vcvt.s32.f32 q3, q3, #16 |
|
171 |
vzip.32 d19, d23 |
|
172 |
vst1.64 {d18}, [r8], ip |
|
173 |
vsri.32 q1, q0, #16 |
|
174 |
vst1.64 {d22}, [r8], ip |
|
175 |
vsri.32 q3, q2, #16 |
|
176 |
vst1.64 {d19}, [r8], ip |
|
177 |
vzip.32 d2, d6 |
|
178 |
vst1.64 {d23}, [r8], ip |
|
179 |
vzip.32 d3, d7 |
|
180 |
beq 7f |
|
181 |
vld1.64 {d16-d17},[r4,:128]! |
|
182 |
vcvt.s32.f32 q8, q8, #16 |
|
183 |
vst1.64 {d2}, [r8], ip |
|
184 |
vld1.64 {d18-d19},[r5,:128]! |
|
185 |
vcvt.s32.f32 q9, q9, #16 |
|
186 |
vst1.64 {d6}, [r8], ip |
|
187 |
vld1.64 {d20-d21},[r6,:128]! |
|
188 |
vcvt.s32.f32 q10, q10, #16 |
|
189 |
vst1.64 {d3}, [r8], ip |
|
190 |
vld1.64 {d22-d23},[r7,:128]! |
|
191 |
vcvt.s32.f32 q11, q11, #16 |
|
192 |
vst1.64 {d7}, [r8], ip |
|
193 |
b 6b |
|
194 |
7: vst1.64 {d2}, [r8], ip |
|
195 |
vst1.64 {d6}, [r8], ip |
|
196 |
vst1.64 {d3}, [r8], ip |
|
197 |
vst1.64 {d7}, [r8], ip |
|
198 |
subs r3, r3, #4 |
|
199 |
popeq {r4-r8,pc} |
|
200 |
cmp r3, #4 |
|
201 |
add r0, r0, #8 |
|
202 |
bge 5b |
|
203 |
|
|
204 |
@ 2 channels |
|
205 |
4: cmp r3, #2 |
|
206 |
blt 4f |
|
207 |
ldmia r1!, {r4-r5} |
|
208 |
mov lr, r2 |
|
209 |
mov r8, r0 |
|
210 |
tst lr, #8 |
|
211 |
vld1.64 {d16-d17},[r4,:128]! |
|
212 |
vcvt.s32.f32 q8, q8, #16 |
|
213 |
vld1.64 {d18-d19},[r5,:128]! |
|
214 |
vcvt.s32.f32 q9, q9, #16 |
|
215 |
vld1.64 {d20-d21},[r4,:128]! |
|
216 |
vcvt.s32.f32 q10, q10, #16 |
|
217 |
vld1.64 {d22-d23},[r5,:128]! |
|
218 |
vcvt.s32.f32 q11, q11, #16 |
|
219 |
beq 6f |
|
220 |
subs lr, lr, #8 |
|
221 |
beq 7f |
|
222 |
vsri.32 d18, d16, #16 |
|
223 |
vsri.32 d19, d17, #16 |
|
224 |
vld1.64 {d16-d17},[r4,:128]! |
|
225 |
vcvt.s32.f32 q8, q8, #16 |
|
226 |
vst1.32 {d18[0]}, [r8], ip |
|
227 |
vsri.32 d22, d20, #16 |
|
228 |
vst1.32 {d18[1]}, [r8], ip |
|
229 |
vsri.32 d23, d21, #16 |
|
230 |
vst1.32 {d19[0]}, [r8], ip |
|
231 |
vst1.32 {d19[1]}, [r8], ip |
|
232 |
vld1.64 {d18-d19},[r5,:128]! |
|
233 |
vcvt.s32.f32 q9, q9, #16 |
|
234 |
vst1.32 {d22[0]}, [r8], ip |
|
235 |
vst1.32 {d22[1]}, [r8], ip |
|
236 |
vld1.64 {d20-d21},[r4,:128]! |
|
237 |
vcvt.s32.f32 q10, q10, #16 |
|
238 |
vst1.32 {d23[0]}, [r8], ip |
|
239 |
vst1.32 {d23[1]}, [r8], ip |
|
240 |
vld1.64 {d22-d23},[r5,:128]! |
|
241 |
vcvt.s32.f32 q11, q11, #16 |
|
242 |
6: subs lr, lr, #16 |
|
243 |
vld1.64 {d0-d1}, [r4,:128]! |
|
244 |
vcvt.s32.f32 q0, q0, #16 |
|
245 |
vsri.32 d18, d16, #16 |
|
246 |
vld1.64 {d2-d3}, [r5,:128]! |
|
247 |
vcvt.s32.f32 q1, q1, #16 |
|
248 |
vsri.32 d19, d17, #16 |
|
249 |
vld1.64 {d4-d5}, [r4,:128]! |
|
250 |
vcvt.s32.f32 q2, q2, #16 |
|
251 |
vld1.64 {d6-d7}, [r5,:128]! |
|
252 |
vcvt.s32.f32 q3, q3, #16 |
|
253 |
vst1.32 {d18[0]}, [r8], ip |
|
254 |
vsri.32 d22, d20, #16 |
|
255 |
vst1.32 {d18[1]}, [r8], ip |
|
256 |
vsri.32 d23, d21, #16 |
|
257 |
vst1.32 {d19[0]}, [r8], ip |
|
258 |
vsri.32 d2, d0, #16 |
|
259 |
vst1.32 {d19[1]}, [r8], ip |
|
260 |
vsri.32 d3, d1, #16 |
|
261 |
vst1.32 {d22[0]}, [r8], ip |
|
262 |
vsri.32 d6, d4, #16 |
|
263 |
vst1.32 {d22[1]}, [r8], ip |
|
264 |
vsri.32 d7, d5, #16 |
|
265 |
vst1.32 {d23[0]}, [r8], ip |
|
266 |
vst1.32 {d23[1]}, [r8], ip |
|
267 |
beq 6f |
|
268 |
vld1.64 {d16-d17},[r4,:128]! |
|
269 |
vcvt.s32.f32 q8, q8, #16 |
|
270 |
vst1.32 {d2[0]}, [r8], ip |
|
271 |
vst1.32 {d2[1]}, [r8], ip |
|
272 |
vld1.64 {d18-d19},[r5,:128]! |
|
273 |
vcvt.s32.f32 q9, q9, #16 |
|
274 |
vst1.32 {d3[0]}, [r8], ip |
|
275 |
vst1.32 {d3[1]}, [r8], ip |
|
276 |
vld1.64 {d20-d21},[r4,:128]! |
|
277 |
vcvt.s32.f32 q10, q10, #16 |
|
278 |
vst1.32 {d6[0]}, [r8], ip |
|
279 |
vst1.32 {d6[1]}, [r8], ip |
|
280 |
vld1.64 {d22-d23},[r5,:128]! |
|
281 |
vcvt.s32.f32 q11, q11, #16 |
|
282 |
vst1.32 {d7[0]}, [r8], ip |
|
283 |
vst1.32 {d7[1]}, [r8], ip |
|
284 |
bgt 6b |
|
285 |
6: vst1.32 {d2[0]}, [r8], ip |
|
286 |
vst1.32 {d2[1]}, [r8], ip |
|
287 |
vst1.32 {d3[0]}, [r8], ip |
|
288 |
vst1.32 {d3[1]}, [r8], ip |
|
289 |
vst1.32 {d6[0]}, [r8], ip |
|
290 |
vst1.32 {d6[1]}, [r8], ip |
|
291 |
vst1.32 {d7[0]}, [r8], ip |
|
292 |
vst1.32 {d7[1]}, [r8], ip |
|
293 |
b 8f |
|
294 |
7: vsri.32 d18, d16, #16 |
|
295 |
vsri.32 d19, d17, #16 |
|
296 |
vst1.32 {d18[0]}, [r8], ip |
|
297 |
vsri.32 d22, d20, #16 |
|
298 |
vst1.32 {d18[1]}, [r8], ip |
|
299 |
vsri.32 d23, d21, #16 |
|
300 |
vst1.32 {d19[0]}, [r8], ip |
|
301 |
vst1.32 {d19[1]}, [r8], ip |
|
302 |
vst1.32 {d22[0]}, [r8], ip |
|
303 |
vst1.32 {d22[1]}, [r8], ip |
|
304 |
vst1.32 {d23[0]}, [r8], ip |
|
305 |
vst1.32 {d23[1]}, [r8], ip |
|
306 |
8: subs r3, r3, #2 |
|
307 |
add r0, r0, #4 |
|
308 |
popeq {r4-r8,pc} |
|
309 |
|
|
310 |
@ 1 channel |
|
311 |
4: ldr r4, [r1],#4 |
|
312 |
tst r2, #8 |
|
313 |
mov lr, r2 |
|
314 |
mov r5, r0 |
|
315 |
vld1.64 {d0-d1}, [r4,:128]! |
|
316 |
vcvt.s32.f32 q0, q0, #16 |
|
317 |
vld1.64 {d2-d3}, [r4,:128]! |
|
318 |
vcvt.s32.f32 q1, q1, #16 |
|
319 |
bne 8f |
|
320 |
6: subs lr, lr, #16 |
|
321 |
vld1.64 {d4-d5}, [r4,:128]! |
|
322 |
vcvt.s32.f32 q2, q2, #16 |
|
323 |
vld1.64 {d6-d7}, [r4,:128]! |
|
324 |
vcvt.s32.f32 q3, q3, #16 |
|
325 |
vst1.16 {d0[1]}, [r5,:16], ip |
|
326 |
vst1.16 {d0[3]}, [r5,:16], ip |
|
327 |
vst1.16 {d1[1]}, [r5,:16], ip |
|
328 |
vst1.16 {d1[3]}, [r5,:16], ip |
|
329 |
vst1.16 {d2[1]}, [r5,:16], ip |
|
330 |
vst1.16 {d2[3]}, [r5,:16], ip |
|
331 |
vst1.16 {d3[1]}, [r5,:16], ip |
|
332 |
vst1.16 {d3[3]}, [r5,:16], ip |
|
333 |
beq 7f |
|
334 |
vld1.64 {d0-d1}, [r4,:128]! |
|
335 |
vcvt.s32.f32 q0, q0, #16 |
|
336 |
vld1.64 {d2-d3}, [r4,:128]! |
|
337 |
vcvt.s32.f32 q1, q1, #16 |
|
338 |
7: vst1.16 {d4[1]}, [r5,:16], ip |
|
339 |
vst1.16 {d4[3]}, [r5,:16], ip |
|
340 |
vst1.16 {d5[1]}, [r5,:16], ip |
|
341 |
vst1.16 {d5[3]}, [r5,:16], ip |
|
342 |
vst1.16 {d6[1]}, [r5,:16], ip |
|
343 |
vst1.16 {d6[3]}, [r5,:16], ip |
|
344 |
vst1.16 {d7[1]}, [r5,:16], ip |
|
345 |
vst1.16 {d7[3]}, [r5,:16], ip |
|
346 |
bgt 6b |
|
347 |
pop {r4-r8,pc} |
|
348 |
8: subs lr, lr, #8 |
|
349 |
vst1.16 {d0[1]}, [r5,:16], ip |
|
350 |
vst1.16 {d0[3]}, [r5,:16], ip |
|
351 |
vst1.16 {d1[1]}, [r5,:16], ip |
|
352 |
vst1.16 {d1[3]}, [r5,:16], ip |
|
353 |
vst1.16 {d2[1]}, [r5,:16], ip |
|
354 |
vst1.16 {d2[3]}, [r5,:16], ip |
|
355 |
vst1.16 {d3[1]}, [r5,:16], ip |
|
356 |
vst1.16 {d3[3]}, [r5,:16], ip |
|
357 |
popeq {r4-r8,pc} |
|
358 |
vld1.64 {d0-d1}, [r4,:128]! |
|
359 |
vcvt.s32.f32 q0, q0, #16 |
|
360 |
vld1.64 {d2-d3}, [r4,:128]! |
|
361 |
vcvt.s32.f32 q1, q1, #16 |
|
362 |
b 6b |
|
363 |
endfunc |
|
364 |
|
|
365 |
function ff_int32_to_float_fmul_scalar_neon, export=1 |
|
366 |
VFP vdup.32 q0, d0[0] |
|
367 |
VFP len .req r2 |
|
368 |
NOVFP vdup.32 q0, r2 |
|
369 |
NOVFP len .req r3 |
|
370 |
|
|
371 |
vld1.32 {q1},[r1,:128]! |
|
372 |
vcvt.f32.s32 q3, q1 |
|
373 |
vld1.32 {q2},[r1,:128]! |
|
374 |
vcvt.f32.s32 q8, q2 |
|
375 |
1: subs len, len, #8 |
|
376 |
pld [r1, #16] |
|
377 |
vmul.f32 q9, q3, q0 |
|
378 |
vmul.f32 q10, q8, q0 |
|
379 |
beq 2f |
|
380 |
vld1.32 {q1},[r1,:128]! |
|
381 |
vcvt.f32.s32 q3, q1 |
|
382 |
vld1.32 {q2},[r1,:128]! |
|
383 |
vcvt.f32.s32 q8, q2 |
|
384 |
vst1.32 {q9}, [r0,:128]! |
|
385 |
vst1.32 {q10},[r0,:128]! |
|
386 |
b 1b |
|
387 |
2: vst1.32 {q9}, [r0,:128]! |
|
388 |
vst1.32 {q10},[r0,:128]! |
|
389 |
bx lr |
|
390 |
.unreq len |
|
391 |
endfunc |
libavcodec/arm/fmtconvert_vfp.S | ||
---|---|---|
1 |
/* |
|
2 |
* Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net> |
|
3 |
* |
|
4 |
* This file is part of FFmpeg. |
|
5 |
* |
|
6 |
* FFmpeg is free software; you can redistribute it and/or |
|
7 |
* modify it under the terms of the GNU Lesser General Public |
|
8 |
* License as published by the Free Software Foundation; either |
|
9 |
* version 2.1 of the License, or (at your option) any later version. |
|
10 |
* |
|
11 |
* FFmpeg is distributed in the hope that it will be useful, |
|
12 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
14 |
* Lesser General Public License for more details. |
|
15 |
* |
|
16 |
* You should have received a copy of the GNU Lesser General Public |
|
17 |
* License along with FFmpeg; if not, write to the Free Software |
|
18 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
19 |
*/ |
|
20 |
|
|
21 |
#include "config.h" |
|
22 |
#include "asm.S" |
|
23 |
|
|
24 |
.syntax unified |
|
25 |
|
|
26 |
/** |
|
27 |
* ARM VFP optimized float to int16 conversion. |
|
28 |
* Assume that len is a positive number and is multiple of 8, destination |
|
29 |
* buffer is at least 4 bytes aligned (8 bytes alignment is better for |
|
30 |
* performance), little endian byte sex |
|
31 |
*/ |
|
32 |
@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len) |
|
33 |
function ff_float_to_int16_vfp, export=1 |
|
34 |
push {r4-r8,lr} |
|
35 |
vpush {d8-d11} |
|
36 |
vldmia r1!, {s16-s23} |
|
37 |
vcvt.s32.f32 s0, s16 |
|
38 |
vcvt.s32.f32 s1, s17 |
|
39 |
vcvt.s32.f32 s2, s18 |
|
40 |
vcvt.s32.f32 s3, s19 |
|
41 |
vcvt.s32.f32 s4, s20 |
|
42 |
vcvt.s32.f32 s5, s21 |
|
43 |
vcvt.s32.f32 s6, s22 |
|
44 |
vcvt.s32.f32 s7, s23 |
|
45 |
1: |
|
46 |
subs r2, r2, #8 |
|
47 |
vmov r3, r4, s0, s1 |
|
48 |
vmov r5, r6, s2, s3 |
|
49 |
vmov r7, r8, s4, s5 |
|
50 |
vmov ip, lr, s6, s7 |
|
51 |
vldmiagt r1!, {s16-s23} |
|
52 |
ssat r4, #16, r4 |
|
53 |
ssat r3, #16, r3 |
|
54 |
ssat r6, #16, r6 |
|
55 |
ssat r5, #16, r5 |
|
56 |
pkhbt r3, r3, r4, lsl #16 |
|
57 |
pkhbt r4, r5, r6, lsl #16 |
|
58 |
vcvtgt.s32.f32 s0, s16 |
|
59 |
vcvtgt.s32.f32 s1, s17 |
|
60 |
vcvtgt.s32.f32 s2, s18 |
|
61 |
vcvtgt.s32.f32 s3, s19 |
|
62 |
vcvtgt.s32.f32 s4, s20 |
|
63 |
vcvtgt.s32.f32 s5, s21 |
|
64 |
vcvtgt.s32.f32 s6, s22 |
|
65 |
vcvtgt.s32.f32 s7, s23 |
|
66 |
ssat r8, #16, r8 |
|
67 |
ssat r7, #16, r7 |
|
68 |
ssat lr, #16, lr |
|
69 |
ssat ip, #16, ip |
|
70 |
pkhbt r5, r7, r8, lsl #16 |
|
71 |
pkhbt r6, ip, lr, lsl #16 |
|
72 |
stmia r0!, {r3-r6} |
|
73 |
bgt 1b |
|
74 |
|
|
75 |
vpop {d8-d11} |
|
76 |
pop {r4-r8,pc} |
|
77 |
endfunc |
libavcodec/binkaudio.c | ||
---|---|---|
33 | 33 |
#include "get_bits.h" |
34 | 34 |
#include "dsputil.h" |
35 | 35 |
#include "fft.h" |
36 |
#include "fmtconvert.h" |
|
36 | 37 |
|
37 | 38 |
extern const uint16_t ff_wma_critical_freqs[25]; |
38 | 39 |
|
... | ... | |
43 | 44 |
AVCodecContext *avctx; |
44 | 45 |
GetBitContext gb; |
45 | 46 |
DSPContext dsp; |
47 |
FmtConvertContext fmt_conv; |
|
46 | 48 |
int first; |
47 | 49 |
int channels; |
48 | 50 |
int frame_len; ///< transform size (samples) |
... | ... | |
71 | 73 |
|
72 | 74 |
s->avctx = avctx; |
73 | 75 |
dsputil_init(&s->dsp, avctx); |
76 |
ff_fmt_convert_init(&s->fmt_conv, avctx); |
|
74 | 77 |
|
75 | 78 |
/* determine frame length */ |
76 | 79 |
if (avctx->sample_rate < 22050) { |
... | ... | |
222 | 225 |
ff_rdft_calc(&s->trans.rdft, coeffs); |
223 | 226 |
} |
224 | 227 |
|
225 |
s->dsp.float_to_int16_interleave(out, (const float **)s->coeffs_ptr, s->frame_len, s->channels); |
|
228 |
s->fmt_conv.float_to_int16_interleave(out, (const float **)s->coeffs_ptr, |
|
229 |
s->frame_len, s->channels); |
|
226 | 230 |
|
227 | 231 |
if (!s->first) { |
228 | 232 |
int count = s->overlap_len * s->channels; |
libavcodec/dca.c | ||
---|---|---|
40 | 40 |
#include "dca.h" |
41 | 41 |
#include "synth_filter.h" |
42 | 42 |
#include "dcadsp.h" |
43 |
#include "fmtconvert.h" |
|
43 | 44 |
|
44 | 45 |
//#define TRACE |
45 | 46 |
|
... | ... | |
347 | 348 |
FFTContext imdct; |
348 | 349 |
SynthFilterContext synth; |
349 | 350 |
DCADSPContext dcadsp; |
351 |
FmtConvertContext fmt_conv; |
|
350 | 352 |
} DCAContext; |
351 | 353 |
|
352 | 354 |
static const uint16_t dca_vlc_offs[] = { |
... | ... | |
1115 | 1117 |
block[m] = get_bitalloc(&s->gb, &dca_smpl_bitalloc[abits], sel); |
1116 | 1118 |
} |
1117 | 1119 |
|
1118 |
s->dsp.int32_to_float_fmul_scalar(subband_samples[k][l],
|
|
1120 |
s->fmt_conv.int32_to_float_fmul_scalar(subband_samples[k][l],
|
|
1119 | 1121 |
block, rscale, 8); |
1120 | 1122 |
} |
1121 | 1123 |
|
... | ... | |
1802 | 1804 |
} |
1803 | 1805 |
} |
1804 | 1806 |
|
1805 |
s->dsp.float_to_int16_interleave(samples, s->samples_chanptr, 256, channels);
|
|
1807 |
s->fmt_conv.float_to_int16_interleave(samples, s->samples_chanptr, 256, channels);
|
|
1806 | 1808 |
samples += 256 * channels; |
1807 | 1809 |
} |
1808 | 1810 |
|
... | ... | |
1835 | 1837 |
ff_mdct_init(&s->imdct, 6, 1, 1.0); |
1836 | 1838 |
ff_synth_filter_init(&s->synth); |
1837 | 1839 |
ff_dcadsp_init(&s->dcadsp); |
1840 |
ff_fmt_convert_init(&s->fmt_conv, avctx); |
|
1838 | 1841 |
|
1839 | 1842 |
for (i = 0; i < DCA_PRIM_CHANNELS_MAX+1; i++) |
1840 | 1843 |
s->samples_chanptr[i] = s->samples + i * 256; |
libavcodec/dsputil.c | ||
---|---|---|
3867 | 3867 |
return p; |
3868 | 3868 |
} |
3869 | 3869 |
|
3870 |
static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){ |
|
3871 |
int i; |
|
3872 |
for(i=0; i<len; i++) |
|
3873 |
dst[i] = src[i] * mul; |
|
3874 |
} |
|
3875 |
|
|
3876 | 3870 |
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini, |
3877 | 3871 |
uint32_t maxi, uint32_t maxisign) |
3878 | 3872 |
{ |
... | ... | |
3918 | 3912 |
} |
3919 | 3913 |
} |
3920 | 3914 |
|
3921 |
static av_always_inline int float_to_int16_one(const float *src){ |
|
3922 |
return av_clip_int16(lrintf(*src)); |
|
3923 |
} |
|
3924 |
|
|
3925 |
static void ff_float_to_int16_c(int16_t *dst, const float *src, long len){ |
|
3926 |
int i; |
|
3927 |
for(i=0; i<len; i++) |
|
3928 |
dst[i] = float_to_int16_one(src+i); |
|
3929 |
} |
|
3930 |
|
|
3931 |
static void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){ |
|
3932 |
int i,j,c; |
|
3933 |
if(channels==2){ |
|
3934 |
for(i=0; i<len; i++){ |
|
3935 |
dst[2*i] = float_to_int16_one(src[0]+i); |
|
3936 |
dst[2*i+1] = float_to_int16_one(src[1]+i); |
|
3937 |
} |
|
3938 |
}else{ |
|
3939 |
for(c=0; c<channels; c++) |
|
3940 |
for(i=0, j=c; i<len; i++, j+=channels) |
|
3941 |
dst[j] = float_to_int16_one(src[c]+i); |
|
3942 |
} |
|
3943 |
} |
|
3944 |
|
|
3945 | 3915 |
static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift) |
3946 | 3916 |
{ |
3947 | 3917 |
int res = 0; |
... | ... | |
4437 | 4407 |
c->vector_fmul_reverse = vector_fmul_reverse_c; |
4438 | 4408 |
c->vector_fmul_add = vector_fmul_add_c; |
4439 | 4409 |
c->vector_fmul_window = vector_fmul_window_c; |
4440 |
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; |
|
4441 | 4410 |
c->vector_clipf = vector_clipf_c; |
4442 |
c->float_to_int16 = ff_float_to_int16_c; |
|
4443 |
c->float_to_int16_interleave = ff_float_to_int16_interleave_c; |
|
4444 | 4411 |
c->scalarproduct_int16 = scalarproduct_int16_c; |
4445 | 4412 |
c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c; |
4446 | 4413 |
c->scalarproduct_float = scalarproduct_float_c; |
libavcodec/dsputil.h | ||
---|---|---|
392 | 392 |
/* assume len is a multiple of 4, and arrays are 16-byte aligned */ |
393 | 393 |
void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len); |
394 | 394 |
/* assume len is a multiple of 8, and arrays are 16-byte aligned */ |
395 |
void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); |
|
396 | 395 |
void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */); |
397 | 396 |
/** |
398 | 397 |
* Multiply a vector of floats by a scalar float. Source and |
... | ... | |
445 | 444 |
*/ |
446 | 445 |
void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); |
447 | 446 |
|
448 |
/* convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */ |
|
449 |
void (*float_to_int16)(int16_t *dst, const float *src, long len); |
|
450 |
void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels); |
|
451 |
|
|
452 | 447 |
/* (I)DCT */ |
453 | 448 |
void (*fdct)(DCTELEM *block/* align 16*/); |
454 | 449 |
void (*fdct248)(DCTELEM *block/* align 16*/); |
libavcodec/fmtconvert.c | ||
---|---|---|
1 |
/* |
|
2 |
* Format Conversion Utils |
|
3 |
* Copyright (c) 2000, 2001 Fabrice Bellard |
|
4 |
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
|
5 |
* |
|
6 |
* This file is part of FFmpeg. |
|
7 |
* |
|
8 |
* FFmpeg is free software; you can redistribute it and/or |
|
9 |
* modify it under the terms of the GNU Lesser General Public |
|
10 |
* License as published by the Free Software Foundation; either |
|
11 |
* version 2.1 of the License, or (at your option) any later version. |
|
12 |
* |
|
13 |
* FFmpeg is distributed in the hope that it will be useful, |
|
14 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
15 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
16 |
* Lesser General Public License for more details. |
|
17 |
* |
|
18 |
* You should have received a copy of the GNU Lesser General Public |
|
19 |
* License along with FFmpeg; if not, write to the Free Software |
|
20 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
21 |
*/ |
|
22 |
|
|
23 |
#include "avcodec.h" |
|
24 |
#include "fmtconvert.h" |
|
25 |
|
|
26 |
static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){ |
|
27 |
int i; |
|
28 |
for(i=0; i<len; i++) |
|
29 |
dst[i] = src[i] * mul; |
|
30 |
} |
|
31 |
|
|
32 |
static av_always_inline int float_to_int16_one(const float *src){ |
|
33 |
return av_clip_int16(lrintf(*src)); |
|
34 |
} |
|
35 |
|
|
36 |
static void float_to_int16_c(int16_t *dst, const float *src, long len) |
|
37 |
{ |
|
38 |
int i; |
|
39 |
for(i=0; i<len; i++) |
|
40 |
dst[i] = float_to_int16_one(src+i); |
|
41 |
} |
|
42 |
|
|
43 |
static void float_to_int16_interleave_c(int16_t *dst, const float **src, |
|
44 |
long len, int channels) |
|
45 |
{ |
|
46 |
int i,j,c; |
|
47 |
if(channels==2){ |
|
48 |
for(i=0; i<len; i++){ |
|
49 |
dst[2*i] = float_to_int16_one(src[0]+i); |
|
50 |
dst[2*i+1] = float_to_int16_one(src[1]+i); |
|
51 |
} |
|
52 |
}else{ |
|
53 |
for(c=0; c<channels; c++) |
|
54 |
for(i=0, j=c; i<len; i++, j+=channels) |
|
55 |
dst[j] = float_to_int16_one(src[c]+i); |
|
56 |
} |
|
57 |
} |
|
58 |
|
|
59 |
av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx) |
|
60 |
{ |
|
61 |
c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c; |
|
62 |
c->float_to_int16 = float_to_int16_c; |
|
63 |
c->float_to_int16_interleave = float_to_int16_interleave_c; |
|
64 |
|
|
65 |
if (ARCH_ARM) ff_fmt_convert_init_arm(c, avctx); |
|
66 |
if (ARCH_PPC) ff_fmt_convert_init_ppc(c, avctx); |
|
67 |
if (HAVE_MMX) ff_fmt_convert_init_x86(c, avctx); |
|
68 |
} |
libavcodec/fmtconvert.h | ||
---|---|---|
1 |
/* |
|
2 |
* Format Conversion Utils |
|
3 |
* Copyright (c) 2000, 2001 Fabrice Bellard |
|
4 |
* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> |
|
5 |
* |
|
6 |
* This file is part of FFmpeg. |
|
7 |
* |
|
8 |
* FFmpeg is free software; you can redistribute it and/or |
|
9 |
* modify it under the terms of the GNU Lesser General Public |
|
10 |
* License as published by the Free Software Foundation; either |
|
11 |
* version 2.1 of the License, or (at your option) any later version. |
|
12 |
* |
|
13 |
* FFmpeg is distributed in the hope that it will be useful, |
|
14 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
15 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
|
16 |
* Lesser General Public License for more details. |
|
17 |
* |
|
18 |
* You should have received a copy of the GNU Lesser General Public |
|
19 |
* License along with FFmpeg; if not, write to the Free Software |
|
20 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
|
21 |
*/ |
|
22 |
|
|
23 |
#ifndef AVCODEC_FMTCONVERT_H |
|
24 |
#define AVCODEC_FMTCONVERT_H |
|
25 |
|
|
26 |
#include "avcodec.h" |
|
27 |
|
|
28 |
typedef struct FmtConvertContext { |
|
29 |
/** |
|
30 |
* Convert an array of int32_t to float and multiply by a float value. |
|
31 |
* @param dst destination array of float. |
|
32 |
* constraints: 16-byte aligned |
|
33 |
* @param src source array of int32_t. |
|
34 |
* constraints: 16-byte aligned |
|
35 |
* @param len number of elements to convert. |
|
36 |
* constraints: multiple of 8 |
|
37 |
*/ |
|
38 |
void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len); |
|
39 |
|
|
40 |
/** |
|
41 |
* Convert an array of float to an array of int16_t. |
|
42 |
* |
|
43 |
* Convert floats from in the range [-32768.0,32767.0] to ints |
|
44 |
* without rescaling |
|
45 |
* |
|
46 |
* @param dst destination array of int16_t. |
|
47 |
* constraints: 16-byte aligned |
|
48 |
* @param src source array of float. |
|
49 |
* constraints: 16-byte aligned |
|
50 |
* @param len number of elements to convert. |
|
51 |
* constraints: multiple of 8 |
|
52 |
*/ |
|
53 |
void (*float_to_int16)(int16_t *dst, const float *src, long len); |
|
54 |
|
|
55 |
/** |
|
56 |
* Convert multiple arrays of float to an interleaved array of int16_t. |
|
57 |
* |
|
58 |
* Convert floats from in the range [-32768.0,32767.0] to ints |
|
59 |
* without rescaling |
|
60 |
* |
Also available in: Unified diff