Revision c73d99e6

View differences:

libavcodec/Makefile
12 12
       bitstream_filter.o                                               \
13 13
       dsputil.o                                                        \
14 14
       faanidct.o                                                       \
15
       fmtconvert.o                                                     \
15 16
       imgconvert.o                                                     \
16 17
       jrevdct.o                                                        \
17 18
       opt.o                                                            \
libavcodec/aac.h
35 35
#include "fft.h"
36 36
#include "mpeg4audio.h"
37 37
#include "sbr.h"
38
#include "fmtconvert.h"
38 39

  
39 40
#include <stdint.h>
40 41

  
......
268 269
    FFTContext mdct;
269 270
    FFTContext mdct_small;
270 271
    DSPContext dsp;
272
    FmtConvertContext fmt_conv;
271 273
    int random_state;
272 274
    /** @} */
273 275

  
libavcodec/aacdec.c
85 85
#include "get_bits.h"
86 86
#include "dsputil.h"
87 87
#include "fft.h"
88
#include "fmtconvert.h"
88 89
#include "lpc.h"
89 90

  
90 91
#include "aac.h"
......
562 563
    ff_aac_sbr_init();
563 564

  
564 565
    dsputil_init(&ac->dsp, avctx);
566
    ff_fmt_convert_init(&ac->fmt_conv, avctx);
565 567

  
566 568
    ac->random_state = 0x1f2e3d4c;
567 569

  
......
2032 2034
    *data_size = data_size_tmp;
2033 2035

  
2034 2036
    if (samples)
2035
        ac->dsp.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2037
        ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
2036 2038

  
2037 2039
    if (ac->output_configured)
2038 2040
        ac->output_configured = OC_LOCKED;
libavcodec/ac3dec.c
193 193
    ff_mdct_init(&s->imdct_512, 9, 1, 1.0);
194 194
    ff_kbd_window_init(s->window, 5.0, 256);
195 195
    dsputil_init(&s->dsp, avctx);
196
    ff_fmt_convert_init(&s->fmt_conv, avctx);
196 197
    av_lfg_init(&s->dith_state, 0);
197 198

  
198 199
    /* set scale value for float to int16 conversion */
......
1255 1256
        } else {
1256 1257
            gain *= s->dynamic_range[0];
1257 1258
        }
1258
        s->dsp.int32_to_float_fmul_scalar(s->transform_coeffs[ch], s->fixed_coeffs[ch], gain, 256);
1259
        s->fmt_conv.int32_to_float_fmul_scalar(s->transform_coeffs[ch], s->fixed_coeffs[ch], gain, 256);
1259 1260
    }
1260 1261

  
1261 1262
    /* apply spectral extension to high frequency bins */
......
1407 1408
            av_log(avctx, AV_LOG_ERROR, "error decoding the audio block\n");
1408 1409
            err = 1;
1409 1410
        }
1410
        s->dsp.float_to_int16_interleave(out_samples, output, 256, s->out_channels);
1411
        s->fmt_conv.float_to_int16_interleave(out_samples, output, 256, s->out_channels);
1411 1412
        out_samples += 256 * s->out_channels;
1412 1413
    }
1413 1414
    *data_size = s->num_blocks * 256 * avctx->channels * sizeof (int16_t);
libavcodec/ac3dec.h
55 55
#include "get_bits.h"
56 56
#include "dsputil.h"
57 57
#include "fft.h"
58
#include "fmtconvert.h"
58 59

  
59 60
/* override ac3.h to include coupling channel */
60 61
#undef AC3_MAX_CHANNELS
......
190 191

  
191 192
///@defgroup opt optimization
192 193
    DSPContext dsp;                         ///< for optimization
194
    FmtConvertContext fmt_conv;             ///< optimized conversion functions
193 195
    float mul_bias;                         ///< scaling for float_to_int16 conversion
194 196
///@}
195 197

  
libavcodec/arm/Makefile
9 9
OBJS                                   += arm/dsputil_init_arm.o        \
10 10
                                          arm/dsputil_arm.o             \
11 11
                                          arm/fft_init_arm.o            \
12
                                          arm/fmtconvert_init_arm.o     \
12 13
                                          arm/jrevdct_arm.o             \
13 14
                                          arm/mpegvideo_arm.o           \
14 15
                                          arm/simple_idct_arm.o         \
......
22 23
                                          arm/dsputil_armv6.o           \
23 24
                                          arm/simple_idct_armv6.o       \
24 25

  
26
VFP-OBJS-$(HAVE_ARMV6)                 += arm/fmtconvert_vfp.o          \
27

  
25 28
OBJS-$(HAVE_ARMVFP)                    += arm/dsputil_vfp.o             \
26 29
                                          arm/dsputil_init_vfp.o        \
30
                                          $(VFP-OBJS-yes)
27 31

  
28 32
OBJS-$(HAVE_IWMMXT)                    += arm/dsputil_iwmmxt.o          \
29 33
                                          arm/mpegvideo_iwmmxt.o        \
......
52 56

  
53 57
OBJS-$(HAVE_NEON)                      += arm/dsputil_init_neon.o       \
54 58
                                          arm/dsputil_neon.o            \
59
                                          arm/fmtconvert_neon.o         \
55 60
                                          arm/int_neon.o                \
56 61
                                          arm/mpegvideo_neon.o          \
57 62
                                          arm/simple_idct_neon.o        \
libavcodec/arm/dsputil_init_neon.c
153 153
                              int len);
154 154
void ff_butterflies_float_neon(float *v1, float *v2, int len);
155 155
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len);
156
void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src,
157
                                        float mul, int len);
158 156
void ff_vector_fmul_reverse_neon(float *dst, const float *src0,
159 157
                                 const float *src1, int len);
160 158
void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1,
......
162 160

  
163 161
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max,
164 162
                          int len);
165
void ff_float_to_int16_neon(int16_t *, const float *, long);
166
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
167 163

  
168 164
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
169 165

  
......
308 304
    c->vector_fmul_scalar         = ff_vector_fmul_scalar_neon;
309 305
    c->butterflies_float          = ff_butterflies_float_neon;
310 306
    c->scalarproduct_float        = ff_scalarproduct_float_neon;
311
    c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon;
312 307
    c->vector_fmul_reverse        = ff_vector_fmul_reverse_neon;
313 308
    c->vector_fmul_add            = ff_vector_fmul_add_neon;
314 309
    c->vector_clipf               = ff_vector_clipf_neon;
......
319 314
    c->sv_fmul_scalar[0] = ff_sv_fmul_scalar_2_neon;
320 315
    c->sv_fmul_scalar[1] = ff_sv_fmul_scalar_4_neon;
321 316

  
322
    if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
323
        c->float_to_int16            = ff_float_to_int16_neon;
324
        c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
325
    }
326

  
327 317
    if (CONFIG_VORBIS_DECODER)
328 318
        c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
329 319

  
libavcodec/arm/dsputil_init_vfp.c
25 25
                        const float *src1, int len);
26 26
void ff_vector_fmul_reverse_vfp(float *dst, const float *src0,
27 27
                                const float *src1, int len);
28
void ff_float_to_int16_vfp(int16_t *dst, const float *src, long len);
29 28

  
30 29
void ff_dsputil_init_vfp(DSPContext* c, AVCodecContext *avctx)
31 30
{
32 31
    c->vector_fmul = ff_vector_fmul_vfp;
33 32
    c->vector_fmul_reverse = ff_vector_fmul_reverse_vfp;
34
#if HAVE_ARMV6
35
    c->float_to_int16 = ff_float_to_int16_vfp;
36
#endif
37 33
}
libavcodec/arm/dsputil_neon.S
400 400
        bx              lr
401 401
endfunc
402 402

  
403
function ff_float_to_int16_neon, export=1
404
        subs            r2,  r2,  #8
405
        vld1.64         {d0-d1},  [r1,:128]!
406
        vcvt.s32.f32    q8,  q0,  #16
407
        vld1.64         {d2-d3},  [r1,:128]!
408
        vcvt.s32.f32    q9,  q1,  #16
409
        beq             3f
410
        bics            ip,  r2,  #15
411
        beq             2f
412
1:      subs            ip,  ip,  #16
413
        vshrn.s32       d4,  q8,  #16
414
        vld1.64         {d0-d1},  [r1,:128]!
415
        vcvt.s32.f32    q0,  q0,  #16
416
        vshrn.s32       d5,  q9,  #16
417
        vld1.64         {d2-d3},  [r1,:128]!
418
        vcvt.s32.f32    q1,  q1,  #16
419
        vshrn.s32       d6,  q0,  #16
420
        vst1.64         {d4-d5},  [r0,:128]!
421
        vshrn.s32       d7,  q1,  #16
422
        vld1.64         {d16-d17},[r1,:128]!
423
        vcvt.s32.f32    q8,  q8,  #16
424
        vld1.64         {d18-d19},[r1,:128]!
425
        vcvt.s32.f32    q9,  q9,  #16
426
        vst1.64         {d6-d7},  [r0,:128]!
427
        bne             1b
428
        ands            r2,  r2,  #15
429
        beq             3f
430
2:      vld1.64         {d0-d1},  [r1,:128]!
431
        vshrn.s32       d4,  q8,  #16
432
        vcvt.s32.f32    q0,  q0,  #16
433
        vld1.64         {d2-d3},  [r1,:128]!
434
        vshrn.s32       d5,  q9,  #16
435
        vcvt.s32.f32    q1,  q1,  #16
436
        vshrn.s32       d6,  q0,  #16
437
        vst1.64         {d4-d5},  [r0,:128]!
438
        vshrn.s32       d7,  q1,  #16
439
        vst1.64         {d6-d7},  [r0,:128]!
440
        bx              lr
441
3:      vshrn.s32       d4,  q8,  #16
442
        vshrn.s32       d5,  q9,  #16
443
        vst1.64         {d4-d5},  [r0,:128]!
444
        bx              lr
445
endfunc
446

  
447
function ff_float_to_int16_interleave_neon, export=1
448
        cmp             r3, #2
449
        ldrlt           r1, [r1]
450
        blt             ff_float_to_int16_neon
451
        bne             4f
452

  
453
        ldr             r3, [r1]
454
        ldr             r1, [r1, #4]
455

  
456
        subs            r2,  r2,  #8
457
        vld1.64         {d0-d1},  [r3,:128]!
458
        vcvt.s32.f32    q8,  q0,  #16
459
        vld1.64         {d2-d3},  [r3,:128]!
460
        vcvt.s32.f32    q9,  q1,  #16
461
        vld1.64         {d20-d21},[r1,:128]!
462
        vcvt.s32.f32    q10, q10, #16
463
        vld1.64         {d22-d23},[r1,:128]!
464
        vcvt.s32.f32    q11, q11, #16
465
        beq             3f
466
        bics            ip,  r2,  #15
467
        beq             2f
468
1:      subs            ip,  ip,  #16
469
        vld1.64         {d0-d1},  [r3,:128]!
470
        vcvt.s32.f32    q0,  q0,  #16
471
        vsri.32         q10, q8,  #16
472
        vld1.64         {d2-d3},  [r3,:128]!
473
        vcvt.s32.f32    q1,  q1,  #16
474
        vld1.64         {d24-d25},[r1,:128]!
475
        vcvt.s32.f32    q12, q12, #16
476
        vld1.64         {d26-d27},[r1,:128]!
477
        vsri.32         q11, q9,  #16
478
        vst1.64         {d20-d21},[r0,:128]!
479
        vcvt.s32.f32    q13, q13, #16
480
        vst1.64         {d22-d23},[r0,:128]!
481
        vsri.32         q12, q0,  #16
482
        vld1.64         {d16-d17},[r3,:128]!
483
        vsri.32         q13, q1,  #16
484
        vst1.64         {d24-d25},[r0,:128]!
485
        vcvt.s32.f32    q8,  q8,  #16
486
        vld1.64         {d18-d19},[r3,:128]!
487
        vcvt.s32.f32    q9,  q9,  #16
488
        vld1.64         {d20-d21},[r1,:128]!
489
        vcvt.s32.f32    q10, q10, #16
490
        vld1.64         {d22-d23},[r1,:128]!
491
        vcvt.s32.f32    q11, q11, #16
492
        vst1.64         {d26-d27},[r0,:128]!
493
        bne             1b
494
        ands            r2,  r2,  #15
495
        beq             3f
496
2:      vsri.32         q10, q8,  #16
497
        vld1.64         {d0-d1},  [r3,:128]!
498
        vcvt.s32.f32    q0,  q0,  #16
499
        vld1.64         {d2-d3},  [r3,:128]!
500
        vcvt.s32.f32    q1,  q1,  #16
501
        vld1.64         {d24-d25},[r1,:128]!
502
        vcvt.s32.f32    q12, q12, #16
503
        vsri.32         q11, q9,  #16
504
        vld1.64         {d26-d27},[r1,:128]!
505
        vcvt.s32.f32    q13, q13, #16
506
        vst1.64         {d20-d21},[r0,:128]!
507
        vsri.32         q12, q0,  #16
508
        vst1.64         {d22-d23},[r0,:128]!
509
        vsri.32         q13, q1,  #16
510
        vst1.64         {d24-d27},[r0,:128]!
511
        bx              lr
512
3:      vsri.32         q10, q8,  #16
513
        vsri.32         q11, q9,  #16
514
        vst1.64         {d20-d23},[r0,:128]!
515
        bx              lr
516

  
517
4:      push            {r4-r8,lr}
518
        cmp             r3,  #4
519
        lsl             ip,  r3,  #1
520
        blt             4f
521

  
522
        @ 4 channels
523
5:      ldmia           r1!, {r4-r7}
524
        mov             lr,  r2
525
        mov             r8,  r0
526
        vld1.64         {d16-d17},[r4,:128]!
527
        vcvt.s32.f32    q8,  q8,  #16
528
        vld1.64         {d18-d19},[r5,:128]!
529
        vcvt.s32.f32    q9,  q9,  #16
530
        vld1.64         {d20-d21},[r6,:128]!
531
        vcvt.s32.f32    q10, q10, #16
532
        vld1.64         {d22-d23},[r7,:128]!
533
        vcvt.s32.f32    q11, q11, #16
534
6:      subs            lr,  lr,  #8
535
        vld1.64         {d0-d1},  [r4,:128]!
536
        vcvt.s32.f32    q0,  q0,  #16
537
        vsri.32         q9,  q8,  #16
538
        vld1.64         {d2-d3},  [r5,:128]!
539
        vcvt.s32.f32    q1,  q1,  #16
540
        vsri.32         q11, q10, #16
541
        vld1.64         {d4-d5},  [r6,:128]!
542
        vcvt.s32.f32    q2,  q2,  #16
543
        vzip.32         d18, d22
544
        vld1.64         {d6-d7},  [r7,:128]!
545
        vcvt.s32.f32    q3,  q3,  #16
546
        vzip.32         d19, d23
547
        vst1.64         {d18},    [r8], ip
548
        vsri.32         q1,  q0,  #16
549
        vst1.64         {d22},    [r8], ip
550
        vsri.32         q3,  q2,  #16
551
        vst1.64         {d19},    [r8], ip
552
        vzip.32         d2,  d6
553
        vst1.64         {d23},    [r8], ip
554
        vzip.32         d3,  d7
555
        beq             7f
556
        vld1.64         {d16-d17},[r4,:128]!
557
        vcvt.s32.f32    q8,  q8,  #16
558
        vst1.64         {d2},     [r8], ip
559
        vld1.64         {d18-d19},[r5,:128]!
560
        vcvt.s32.f32    q9,  q9,  #16
561
        vst1.64         {d6},     [r8], ip
562
        vld1.64         {d20-d21},[r6,:128]!
563
        vcvt.s32.f32    q10, q10, #16
564
        vst1.64         {d3},     [r8], ip
565
        vld1.64         {d22-d23},[r7,:128]!
566
        vcvt.s32.f32    q11, q11, #16
567
        vst1.64         {d7},     [r8], ip
568
        b               6b
569
7:      vst1.64         {d2},     [r8], ip
570
        vst1.64         {d6},     [r8], ip
571
        vst1.64         {d3},     [r8], ip
572
        vst1.64         {d7},     [r8], ip
573
        subs            r3,  r3,  #4
574
        popeq           {r4-r8,pc}
575
        cmp             r3,  #4
576
        add             r0,  r0,  #8
577
        bge             5b
578

  
579
        @ 2 channels
580
4:      cmp             r3,  #2
581
        blt             4f
582
        ldmia           r1!, {r4-r5}
583
        mov             lr,  r2
584
        mov             r8,  r0
585
        tst             lr,  #8
586
        vld1.64         {d16-d17},[r4,:128]!
587
        vcvt.s32.f32    q8,  q8,  #16
588
        vld1.64         {d18-d19},[r5,:128]!
589
        vcvt.s32.f32    q9,  q9,  #16
590
        vld1.64         {d20-d21},[r4,:128]!
591
        vcvt.s32.f32    q10, q10, #16
592
        vld1.64         {d22-d23},[r5,:128]!
593
        vcvt.s32.f32    q11, q11, #16
594
        beq             6f
595
        subs            lr,  lr,  #8
596
        beq             7f
597
        vsri.32         d18, d16, #16
598
        vsri.32         d19, d17, #16
599
        vld1.64         {d16-d17},[r4,:128]!
600
        vcvt.s32.f32    q8,  q8,  #16
601
        vst1.32         {d18[0]}, [r8], ip
602
        vsri.32         d22, d20, #16
603
        vst1.32         {d18[1]}, [r8], ip
604
        vsri.32         d23, d21, #16
605
        vst1.32         {d19[0]}, [r8], ip
606
        vst1.32         {d19[1]}, [r8], ip
607
        vld1.64         {d18-d19},[r5,:128]!
608
        vcvt.s32.f32    q9,  q9,  #16
609
        vst1.32         {d22[0]}, [r8], ip
610
        vst1.32         {d22[1]}, [r8], ip
611
        vld1.64         {d20-d21},[r4,:128]!
612
        vcvt.s32.f32    q10, q10, #16
613
        vst1.32         {d23[0]}, [r8], ip
614
        vst1.32         {d23[1]}, [r8], ip
615
        vld1.64         {d22-d23},[r5,:128]!
616
        vcvt.s32.f32    q11, q11, #16
617
6:      subs            lr,  lr,  #16
618
        vld1.64         {d0-d1},  [r4,:128]!
619
        vcvt.s32.f32    q0,  q0,  #16
620
        vsri.32         d18, d16, #16
621
        vld1.64         {d2-d3},  [r5,:128]!
622
        vcvt.s32.f32    q1,  q1,  #16
623
        vsri.32         d19, d17, #16
624
        vld1.64         {d4-d5},  [r4,:128]!
625
        vcvt.s32.f32    q2,  q2,  #16
626
        vld1.64         {d6-d7},  [r5,:128]!
627
        vcvt.s32.f32    q3,  q3,  #16
628
        vst1.32         {d18[0]}, [r8], ip
629
        vsri.32         d22, d20, #16
630
        vst1.32         {d18[1]}, [r8], ip
631
        vsri.32         d23, d21, #16
632
        vst1.32         {d19[0]}, [r8], ip
633
        vsri.32         d2,  d0,  #16
634
        vst1.32         {d19[1]}, [r8], ip
635
        vsri.32         d3,  d1,  #16
636
        vst1.32         {d22[0]}, [r8], ip
637
        vsri.32         d6,  d4,  #16
638
        vst1.32         {d22[1]}, [r8], ip
639
        vsri.32         d7,  d5,  #16
640
        vst1.32         {d23[0]}, [r8], ip
641
        vst1.32         {d23[1]}, [r8], ip
642
        beq             6f
643
        vld1.64         {d16-d17},[r4,:128]!
644
        vcvt.s32.f32    q8,  q8,  #16
645
        vst1.32         {d2[0]},  [r8], ip
646
        vst1.32         {d2[1]},  [r8], ip
647
        vld1.64         {d18-d19},[r5,:128]!
648
        vcvt.s32.f32    q9,  q9,  #16
649
        vst1.32         {d3[0]},  [r8], ip
650
        vst1.32         {d3[1]},  [r8], ip
651
        vld1.64         {d20-d21},[r4,:128]!
652
        vcvt.s32.f32    q10, q10, #16
653
        vst1.32         {d6[0]},  [r8], ip
654
        vst1.32         {d6[1]},  [r8], ip
655
        vld1.64         {d22-d23},[r5,:128]!
656
        vcvt.s32.f32    q11, q11, #16
657
        vst1.32         {d7[0]},  [r8], ip
658
        vst1.32         {d7[1]},  [r8], ip
659
        bgt             6b
660
6:      vst1.32         {d2[0]},  [r8], ip
661
        vst1.32         {d2[1]},  [r8], ip
662
        vst1.32         {d3[0]},  [r8], ip
663
        vst1.32         {d3[1]},  [r8], ip
664
        vst1.32         {d6[0]},  [r8], ip
665
        vst1.32         {d6[1]},  [r8], ip
666
        vst1.32         {d7[0]},  [r8], ip
667
        vst1.32         {d7[1]},  [r8], ip
668
        b               8f
669
7:      vsri.32         d18, d16, #16
670
        vsri.32         d19, d17, #16
671
        vst1.32         {d18[0]}, [r8], ip
672
        vsri.32         d22, d20, #16
673
        vst1.32         {d18[1]}, [r8], ip
674
        vsri.32         d23, d21, #16
675
        vst1.32         {d19[0]}, [r8], ip
676
        vst1.32         {d19[1]}, [r8], ip
677
        vst1.32         {d22[0]}, [r8], ip
678
        vst1.32         {d22[1]}, [r8], ip
679
        vst1.32         {d23[0]}, [r8], ip
680
        vst1.32         {d23[1]}, [r8], ip
681
8:      subs            r3,  r3,  #2
682
        add             r0,  r0,  #4
683
        popeq           {r4-r8,pc}
684

  
685
        @ 1 channel
686
4:      ldr             r4,  [r1],#4
687
        tst             r2,  #8
688
        mov             lr,  r2
689
        mov             r5,  r0
690
        vld1.64         {d0-d1},  [r4,:128]!
691
        vcvt.s32.f32    q0,  q0,  #16
692
        vld1.64         {d2-d3},  [r4,:128]!
693
        vcvt.s32.f32    q1,  q1,  #16
694
        bne             8f
695
6:      subs            lr,  lr,  #16
696
        vld1.64         {d4-d5},  [r4,:128]!
697
        vcvt.s32.f32    q2,  q2,  #16
698
        vld1.64         {d6-d7},  [r4,:128]!
699
        vcvt.s32.f32    q3,  q3,  #16
700
        vst1.16         {d0[1]},  [r5,:16], ip
701
        vst1.16         {d0[3]},  [r5,:16], ip
702
        vst1.16         {d1[1]},  [r5,:16], ip
703
        vst1.16         {d1[3]},  [r5,:16], ip
704
        vst1.16         {d2[1]},  [r5,:16], ip
705
        vst1.16         {d2[3]},  [r5,:16], ip
706
        vst1.16         {d3[1]},  [r5,:16], ip
707
        vst1.16         {d3[3]},  [r5,:16], ip
708
        beq             7f
709
        vld1.64         {d0-d1},  [r4,:128]!
710
        vcvt.s32.f32    q0,  q0,  #16
711
        vld1.64         {d2-d3},  [r4,:128]!
712
        vcvt.s32.f32    q1,  q1,  #16
713
7:      vst1.16         {d4[1]},  [r5,:16], ip
714
        vst1.16         {d4[3]},  [r5,:16], ip
715
        vst1.16         {d5[1]},  [r5,:16], ip
716
        vst1.16         {d5[3]},  [r5,:16], ip
717
        vst1.16         {d6[1]},  [r5,:16], ip
718
        vst1.16         {d6[3]},  [r5,:16], ip
719
        vst1.16         {d7[1]},  [r5,:16], ip
720
        vst1.16         {d7[3]},  [r5,:16], ip
721
        bgt             6b
722
        pop             {r4-r8,pc}
723
8:      subs            lr,  lr,  #8
724
        vst1.16         {d0[1]},  [r5,:16], ip
725
        vst1.16         {d0[3]},  [r5,:16], ip
726
        vst1.16         {d1[1]},  [r5,:16], ip
727
        vst1.16         {d1[3]},  [r5,:16], ip
728
        vst1.16         {d2[1]},  [r5,:16], ip
729
        vst1.16         {d2[3]},  [r5,:16], ip
730
        vst1.16         {d3[1]},  [r5,:16], ip
731
        vst1.16         {d3[3]},  [r5,:16], ip
732
        popeq           {r4-r8,pc}
733
        vld1.64         {d0-d1},  [r4,:128]!
734
        vcvt.s32.f32    q0,  q0,  #16
735
        vld1.64         {d2-d3},  [r4,:128]!
736
        vcvt.s32.f32    q1,  q1,  #16
737
        b               6b
738
endfunc
739

  
740 403
function ff_vector_fmul_neon, export=1
741 404
        subs            r3,  r3,  #8
742 405
        vld1.64         {d0-d3},  [r1,:128]!
......
1050 713
        bx              lr
1051 714
endfunc
1052 715

  
1053
function ff_int32_to_float_fmul_scalar_neon, export=1
1054
VFP     vdup.32         q0,  d0[0]
1055
VFP     len     .req    r2
1056
NOVFP   vdup.32         q0,  r2
1057
NOVFP   len     .req    r3
1058

  
1059
        vld1.32         {q1},[r1,:128]!
1060
        vcvt.f32.s32    q3,  q1
1061
        vld1.32         {q2},[r1,:128]!
1062
        vcvt.f32.s32    q8,  q2
1063
1:      subs            len, len, #8
1064
        pld             [r1, #16]
1065
        vmul.f32        q9,  q3,  q0
1066
        vmul.f32        q10, q8,  q0
1067
        beq             2f
1068
        vld1.32         {q1},[r1,:128]!
1069
        vcvt.f32.s32    q3,  q1
1070
        vld1.32         {q2},[r1,:128]!
1071
        vcvt.f32.s32    q8,  q2
1072
        vst1.32         {q9}, [r0,:128]!
1073
        vst1.32         {q10},[r0,:128]!
1074
        b               1b
1075
2:      vst1.32         {q9}, [r0,:128]!
1076
        vst1.32         {q10},[r0,:128]!
1077
        bx              lr
1078
        .unreq  len
1079
endfunc
1080

  
1081 716
function ff_vector_fmul_reverse_neon, export=1
1082 717
        add             r2,  r2,  r3,  lsl #2
1083 718
        sub             r2,  r2,  #32
libavcodec/arm/dsputil_vfp.S
131 131
        vpop            {d8-d15}
132 132
        bx              lr
133 133
endfunc
134

  
135
#if HAVE_ARMV6
136
/**
137
 * ARM VFP optimized float to int16 conversion.
138
 * Assume that len is a positive number and is multiple of 8, destination
139
 * buffer is at least 4 bytes aligned (8 bytes alignment is better for
140
 * performance), little endian byte sex
141
 */
142
@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
143
function ff_float_to_int16_vfp, export=1
144
        push            {r4-r8,lr}
145
        vpush           {d8-d11}
146
        vldmia          r1!, {s16-s23}
147
        vcvt.s32.f32    s0,  s16
148
        vcvt.s32.f32    s1,  s17
149
        vcvt.s32.f32    s2,  s18
150
        vcvt.s32.f32    s3,  s19
151
        vcvt.s32.f32    s4,  s20
152
        vcvt.s32.f32    s5,  s21
153
        vcvt.s32.f32    s6,  s22
154
        vcvt.s32.f32    s7,  s23
155
1:
156
        subs            r2,  r2,  #8
157
        vmov            r3,  r4,  s0, s1
158
        vmov            r5,  r6,  s2, s3
159
        vmov            r7,  r8,  s4, s5
160
        vmov            ip,  lr,  s6, s7
161
        vldmiagt        r1!, {s16-s23}
162
        ssat            r4,  #16, r4
163
        ssat            r3,  #16, r3
164
        ssat            r6,  #16, r6
165
        ssat            r5,  #16, r5
166
        pkhbt           r3,  r3,  r4, lsl #16
167
        pkhbt           r4,  r5,  r6, lsl #16
168
        vcvtgt.s32.f32  s0,  s16
169
        vcvtgt.s32.f32  s1,  s17
170
        vcvtgt.s32.f32  s2,  s18
171
        vcvtgt.s32.f32  s3,  s19
172
        vcvtgt.s32.f32  s4,  s20
173
        vcvtgt.s32.f32  s5,  s21
174
        vcvtgt.s32.f32  s6,  s22
175
        vcvtgt.s32.f32  s7,  s23
176
        ssat            r8,  #16, r8
177
        ssat            r7,  #16, r7
178
        ssat            lr,  #16, lr
179
        ssat            ip,  #16, ip
180
        pkhbt           r5,  r7,  r8, lsl #16
181
        pkhbt           r6,  ip,  lr, lsl #16
182
        stmia           r0!, {r3-r6}
183
        bgt             1b
184

  
185
        vpop            {d8-d11}
186
        pop             {r4-r8,pc}
187
endfunc
188
#endif
libavcodec/arm/fmtconvert_init_arm.c
1
/*
2
 * ARM optimized Format Conversion Utils
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

  
21
#include <stdint.h>
22

  
23
#include "libavcodec/avcodec.h"
24
#include "libavcodec/fmtconvert.h"
25

  
26
void ff_int32_to_float_fmul_scalar_neon(float *dst, const int *src,
27
                                        float mul, int len);
28

  
29
void ff_float_to_int16_neon(int16_t *dst, const float *src, long len);
30
void ff_float_to_int16_interleave_neon(int16_t *, const float **, long, int);
31

  
32
void ff_float_to_int16_vfp(int16_t *dst, const float *src, long len);
33

  
34
void ff_fmt_convert_init_arm(FmtConvertContext *c, AVCodecContext *avctx)
35
{
36
    if (HAVE_ARMVFP && HAVE_ARMV6) {
37
        c->float_to_int16 = ff_float_to_int16_vfp;
38
    }
39

  
40
    if (HAVE_NEON) {
41
        c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_neon;
42

  
43
        if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
44
            c->float_to_int16            = ff_float_to_int16_neon;
45
            c->float_to_int16_interleave = ff_float_to_int16_interleave_neon;
46
        }
47
    }
48
}
libavcodec/arm/fmtconvert_neon.S
1
/*
2
 * ARM NEON optimised Format Conversion Utils
3
 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21

  
22
#include "config.h"
23
#include "asm.S"
24

  
25
        preserve8
26
        .text
27

  
28
function ff_float_to_int16_neon, export=1
29
        subs            r2,  r2,  #8
30
        vld1.64         {d0-d1},  [r1,:128]!
31
        vcvt.s32.f32    q8,  q0,  #16
32
        vld1.64         {d2-d3},  [r1,:128]!
33
        vcvt.s32.f32    q9,  q1,  #16
34
        beq             3f
35
        bics            ip,  r2,  #15
36
        beq             2f
37
1:      subs            ip,  ip,  #16
38
        vshrn.s32       d4,  q8,  #16
39
        vld1.64         {d0-d1},  [r1,:128]!
40
        vcvt.s32.f32    q0,  q0,  #16
41
        vshrn.s32       d5,  q9,  #16
42
        vld1.64         {d2-d3},  [r1,:128]!
43
        vcvt.s32.f32    q1,  q1,  #16
44
        vshrn.s32       d6,  q0,  #16
45
        vst1.64         {d4-d5},  [r0,:128]!
46
        vshrn.s32       d7,  q1,  #16
47
        vld1.64         {d16-d17},[r1,:128]!
48
        vcvt.s32.f32    q8,  q8,  #16
49
        vld1.64         {d18-d19},[r1,:128]!
50
        vcvt.s32.f32    q9,  q9,  #16
51
        vst1.64         {d6-d7},  [r0,:128]!
52
        bne             1b
53
        ands            r2,  r2,  #15
54
        beq             3f
55
2:      vld1.64         {d0-d1},  [r1,:128]!
56
        vshrn.s32       d4,  q8,  #16
57
        vcvt.s32.f32    q0,  q0,  #16
58
        vld1.64         {d2-d3},  [r1,:128]!
59
        vshrn.s32       d5,  q9,  #16
60
        vcvt.s32.f32    q1,  q1,  #16
61
        vshrn.s32       d6,  q0,  #16
62
        vst1.64         {d4-d5},  [r0,:128]!
63
        vshrn.s32       d7,  q1,  #16
64
        vst1.64         {d6-d7},  [r0,:128]!
65
        bx              lr
66
3:      vshrn.s32       d4,  q8,  #16
67
        vshrn.s32       d5,  q9,  #16
68
        vst1.64         {d4-d5},  [r0,:128]!
69
        bx              lr
70
endfunc
71

  
72
function ff_float_to_int16_interleave_neon, export=1
73
        cmp             r3, #2
74
        ldrlt           r1, [r1]
75
        blt             ff_float_to_int16_neon
76
        bne             4f
77

  
78
        ldr             r3, [r1]
79
        ldr             r1, [r1, #4]
80

  
81
        subs            r2,  r2,  #8
82
        vld1.64         {d0-d1},  [r3,:128]!
83
        vcvt.s32.f32    q8,  q0,  #16
84
        vld1.64         {d2-d3},  [r3,:128]!
85
        vcvt.s32.f32    q9,  q1,  #16
86
        vld1.64         {d20-d21},[r1,:128]!
87
        vcvt.s32.f32    q10, q10, #16
88
        vld1.64         {d22-d23},[r1,:128]!
89
        vcvt.s32.f32    q11, q11, #16
90
        beq             3f
91
        bics            ip,  r2,  #15
92
        beq             2f
93
1:      subs            ip,  ip,  #16
94
        vld1.64         {d0-d1},  [r3,:128]!
95
        vcvt.s32.f32    q0,  q0,  #16
96
        vsri.32         q10, q8,  #16
97
        vld1.64         {d2-d3},  [r3,:128]!
98
        vcvt.s32.f32    q1,  q1,  #16
99
        vld1.64         {d24-d25},[r1,:128]!
100
        vcvt.s32.f32    q12, q12, #16
101
        vld1.64         {d26-d27},[r1,:128]!
102
        vsri.32         q11, q9,  #16
103
        vst1.64         {d20-d21},[r0,:128]!
104
        vcvt.s32.f32    q13, q13, #16
105
        vst1.64         {d22-d23},[r0,:128]!
106
        vsri.32         q12, q0,  #16
107
        vld1.64         {d16-d17},[r3,:128]!
108
        vsri.32         q13, q1,  #16
109
        vst1.64         {d24-d25},[r0,:128]!
110
        vcvt.s32.f32    q8,  q8,  #16
111
        vld1.64         {d18-d19},[r3,:128]!
112
        vcvt.s32.f32    q9,  q9,  #16
113
        vld1.64         {d20-d21},[r1,:128]!
114
        vcvt.s32.f32    q10, q10, #16
115
        vld1.64         {d22-d23},[r1,:128]!
116
        vcvt.s32.f32    q11, q11, #16
117
        vst1.64         {d26-d27},[r0,:128]!
118
        bne             1b
119
        ands            r2,  r2,  #15
120
        beq             3f
121
2:      vsri.32         q10, q8,  #16
122
        vld1.64         {d0-d1},  [r3,:128]!
123
        vcvt.s32.f32    q0,  q0,  #16
124
        vld1.64         {d2-d3},  [r3,:128]!
125
        vcvt.s32.f32    q1,  q1,  #16
126
        vld1.64         {d24-d25},[r1,:128]!
127
        vcvt.s32.f32    q12, q12, #16
128
        vsri.32         q11, q9,  #16
129
        vld1.64         {d26-d27},[r1,:128]!
130
        vcvt.s32.f32    q13, q13, #16
131
        vst1.64         {d20-d21},[r0,:128]!
132
        vsri.32         q12, q0,  #16
133
        vst1.64         {d22-d23},[r0,:128]!
134
        vsri.32         q13, q1,  #16
135
        vst1.64         {d24-d27},[r0,:128]!
136
        bx              lr
137
3:      vsri.32         q10, q8,  #16
138
        vsri.32         q11, q9,  #16
139
        vst1.64         {d20-d23},[r0,:128]!
140
        bx              lr
141

  
142
4:      push            {r4-r8,lr}
143
        cmp             r3,  #4
144
        lsl             ip,  r3,  #1
145
        blt             4f
146

  
147
        @ 4 channels
148
5:      ldmia           r1!, {r4-r7}
149
        mov             lr,  r2
150
        mov             r8,  r0
151
        vld1.64         {d16-d17},[r4,:128]!
152
        vcvt.s32.f32    q8,  q8,  #16
153
        vld1.64         {d18-d19},[r5,:128]!
154
        vcvt.s32.f32    q9,  q9,  #16
155
        vld1.64         {d20-d21},[r6,:128]!
156
        vcvt.s32.f32    q10, q10, #16
157
        vld1.64         {d22-d23},[r7,:128]!
158
        vcvt.s32.f32    q11, q11, #16
159
6:      subs            lr,  lr,  #8
160
        vld1.64         {d0-d1},  [r4,:128]!
161
        vcvt.s32.f32    q0,  q0,  #16
162
        vsri.32         q9,  q8,  #16
163
        vld1.64         {d2-d3},  [r5,:128]!
164
        vcvt.s32.f32    q1,  q1,  #16
165
        vsri.32         q11, q10, #16
166
        vld1.64         {d4-d5},  [r6,:128]!
167
        vcvt.s32.f32    q2,  q2,  #16
168
        vzip.32         d18, d22
169
        vld1.64         {d6-d7},  [r7,:128]!
170
        vcvt.s32.f32    q3,  q3,  #16
171
        vzip.32         d19, d23
172
        vst1.64         {d18},    [r8], ip
173
        vsri.32         q1,  q0,  #16
174
        vst1.64         {d22},    [r8], ip
175
        vsri.32         q3,  q2,  #16
176
        vst1.64         {d19},    [r8], ip
177
        vzip.32         d2,  d6
178
        vst1.64         {d23},    [r8], ip
179
        vzip.32         d3,  d7
180
        beq             7f
181
        vld1.64         {d16-d17},[r4,:128]!
182
        vcvt.s32.f32    q8,  q8,  #16
183
        vst1.64         {d2},     [r8], ip
184
        vld1.64         {d18-d19},[r5,:128]!
185
        vcvt.s32.f32    q9,  q9,  #16
186
        vst1.64         {d6},     [r8], ip
187
        vld1.64         {d20-d21},[r6,:128]!
188
        vcvt.s32.f32    q10, q10, #16
189
        vst1.64         {d3},     [r8], ip
190
        vld1.64         {d22-d23},[r7,:128]!
191
        vcvt.s32.f32    q11, q11, #16
192
        vst1.64         {d7},     [r8], ip
193
        b               6b
194
7:      vst1.64         {d2},     [r8], ip
195
        vst1.64         {d6},     [r8], ip
196
        vst1.64         {d3},     [r8], ip
197
        vst1.64         {d7},     [r8], ip
198
        subs            r3,  r3,  #4
199
        popeq           {r4-r8,pc}
200
        cmp             r3,  #4
201
        add             r0,  r0,  #8
202
        bge             5b
203

  
204
        @ 2 channels
205
4:      cmp             r3,  #2
206
        blt             4f
207
        ldmia           r1!, {r4-r5}
208
        mov             lr,  r2
209
        mov             r8,  r0
210
        tst             lr,  #8
211
        vld1.64         {d16-d17},[r4,:128]!
212
        vcvt.s32.f32    q8,  q8,  #16
213
        vld1.64         {d18-d19},[r5,:128]!
214
        vcvt.s32.f32    q9,  q9,  #16
215
        vld1.64         {d20-d21},[r4,:128]!
216
        vcvt.s32.f32    q10, q10, #16
217
        vld1.64         {d22-d23},[r5,:128]!
218
        vcvt.s32.f32    q11, q11, #16
219
        beq             6f
220
        subs            lr,  lr,  #8
221
        beq             7f
222
        vsri.32         d18, d16, #16
223
        vsri.32         d19, d17, #16
224
        vld1.64         {d16-d17},[r4,:128]!
225
        vcvt.s32.f32    q8,  q8,  #16
226
        vst1.32         {d18[0]}, [r8], ip
227
        vsri.32         d22, d20, #16
228
        vst1.32         {d18[1]}, [r8], ip
229
        vsri.32         d23, d21, #16
230
        vst1.32         {d19[0]}, [r8], ip
231
        vst1.32         {d19[1]}, [r8], ip
232
        vld1.64         {d18-d19},[r5,:128]!
233
        vcvt.s32.f32    q9,  q9,  #16
234
        vst1.32         {d22[0]}, [r8], ip
235
        vst1.32         {d22[1]}, [r8], ip
236
        vld1.64         {d20-d21},[r4,:128]!
237
        vcvt.s32.f32    q10, q10, #16
238
        vst1.32         {d23[0]}, [r8], ip
239
        vst1.32         {d23[1]}, [r8], ip
240
        vld1.64         {d22-d23},[r5,:128]!
241
        vcvt.s32.f32    q11, q11, #16
242
6:      subs            lr,  lr,  #16
243
        vld1.64         {d0-d1},  [r4,:128]!
244
        vcvt.s32.f32    q0,  q0,  #16
245
        vsri.32         d18, d16, #16
246
        vld1.64         {d2-d3},  [r5,:128]!
247
        vcvt.s32.f32    q1,  q1,  #16
248
        vsri.32         d19, d17, #16
249
        vld1.64         {d4-d5},  [r4,:128]!
250
        vcvt.s32.f32    q2,  q2,  #16
251
        vld1.64         {d6-d7},  [r5,:128]!
252
        vcvt.s32.f32    q3,  q3,  #16
253
        vst1.32         {d18[0]}, [r8], ip
254
        vsri.32         d22, d20, #16
255
        vst1.32         {d18[1]}, [r8], ip
256
        vsri.32         d23, d21, #16
257
        vst1.32         {d19[0]}, [r8], ip
258
        vsri.32         d2,  d0,  #16
259
        vst1.32         {d19[1]}, [r8], ip
260
        vsri.32         d3,  d1,  #16
261
        vst1.32         {d22[0]}, [r8], ip
262
        vsri.32         d6,  d4,  #16
263
        vst1.32         {d22[1]}, [r8], ip
264
        vsri.32         d7,  d5,  #16
265
        vst1.32         {d23[0]}, [r8], ip
266
        vst1.32         {d23[1]}, [r8], ip
267
        beq             6f
268
        vld1.64         {d16-d17},[r4,:128]!
269
        vcvt.s32.f32    q8,  q8,  #16
270
        vst1.32         {d2[0]},  [r8], ip
271
        vst1.32         {d2[1]},  [r8], ip
272
        vld1.64         {d18-d19},[r5,:128]!
273
        vcvt.s32.f32    q9,  q9,  #16
274
        vst1.32         {d3[0]},  [r8], ip
275
        vst1.32         {d3[1]},  [r8], ip
276
        vld1.64         {d20-d21},[r4,:128]!
277
        vcvt.s32.f32    q10, q10, #16
278
        vst1.32         {d6[0]},  [r8], ip
279
        vst1.32         {d6[1]},  [r8], ip
280
        vld1.64         {d22-d23},[r5,:128]!
281
        vcvt.s32.f32    q11, q11, #16
282
        vst1.32         {d7[0]},  [r8], ip
283
        vst1.32         {d7[1]},  [r8], ip
284
        bgt             6b
285
6:      vst1.32         {d2[0]},  [r8], ip
286
        vst1.32         {d2[1]},  [r8], ip
287
        vst1.32         {d3[0]},  [r8], ip
288
        vst1.32         {d3[1]},  [r8], ip
289
        vst1.32         {d6[0]},  [r8], ip
290
        vst1.32         {d6[1]},  [r8], ip
291
        vst1.32         {d7[0]},  [r8], ip
292
        vst1.32         {d7[1]},  [r8], ip
293
        b               8f
294
7:      vsri.32         d18, d16, #16
295
        vsri.32         d19, d17, #16
296
        vst1.32         {d18[0]}, [r8], ip
297
        vsri.32         d22, d20, #16
298
        vst1.32         {d18[1]}, [r8], ip
299
        vsri.32         d23, d21, #16
300
        vst1.32         {d19[0]}, [r8], ip
301
        vst1.32         {d19[1]}, [r8], ip
302
        vst1.32         {d22[0]}, [r8], ip
303
        vst1.32         {d22[1]}, [r8], ip
304
        vst1.32         {d23[0]}, [r8], ip
305
        vst1.32         {d23[1]}, [r8], ip
306
8:      subs            r3,  r3,  #2
307
        add             r0,  r0,  #4
308
        popeq           {r4-r8,pc}
309

  
310
        @ 1 channel
311
4:      ldr             r4,  [r1],#4
312
        tst             r2,  #8
313
        mov             lr,  r2
314
        mov             r5,  r0
315
        vld1.64         {d0-d1},  [r4,:128]!
316
        vcvt.s32.f32    q0,  q0,  #16
317
        vld1.64         {d2-d3},  [r4,:128]!
318
        vcvt.s32.f32    q1,  q1,  #16
319
        bne             8f
320
6:      subs            lr,  lr,  #16
321
        vld1.64         {d4-d5},  [r4,:128]!
322
        vcvt.s32.f32    q2,  q2,  #16
323
        vld1.64         {d6-d7},  [r4,:128]!
324
        vcvt.s32.f32    q3,  q3,  #16
325
        vst1.16         {d0[1]},  [r5,:16], ip
326
        vst1.16         {d0[3]},  [r5,:16], ip
327
        vst1.16         {d1[1]},  [r5,:16], ip
328
        vst1.16         {d1[3]},  [r5,:16], ip
329
        vst1.16         {d2[1]},  [r5,:16], ip
330
        vst1.16         {d2[3]},  [r5,:16], ip
331
        vst1.16         {d3[1]},  [r5,:16], ip
332
        vst1.16         {d3[3]},  [r5,:16], ip
333
        beq             7f
334
        vld1.64         {d0-d1},  [r4,:128]!
335
        vcvt.s32.f32    q0,  q0,  #16
336
        vld1.64         {d2-d3},  [r4,:128]!
337
        vcvt.s32.f32    q1,  q1,  #16
338
7:      vst1.16         {d4[1]},  [r5,:16], ip
339
        vst1.16         {d4[3]},  [r5,:16], ip
340
        vst1.16         {d5[1]},  [r5,:16], ip
341
        vst1.16         {d5[3]},  [r5,:16], ip
342
        vst1.16         {d6[1]},  [r5,:16], ip
343
        vst1.16         {d6[3]},  [r5,:16], ip
344
        vst1.16         {d7[1]},  [r5,:16], ip
345
        vst1.16         {d7[3]},  [r5,:16], ip
346
        bgt             6b
347
        pop             {r4-r8,pc}
348
8:      subs            lr,  lr,  #8
349
        vst1.16         {d0[1]},  [r5,:16], ip
350
        vst1.16         {d0[3]},  [r5,:16], ip
351
        vst1.16         {d1[1]},  [r5,:16], ip
352
        vst1.16         {d1[3]},  [r5,:16], ip
353
        vst1.16         {d2[1]},  [r5,:16], ip
354
        vst1.16         {d2[3]},  [r5,:16], ip
355
        vst1.16         {d3[1]},  [r5,:16], ip
356
        vst1.16         {d3[3]},  [r5,:16], ip
357
        popeq           {r4-r8,pc}
358
        vld1.64         {d0-d1},  [r4,:128]!
359
        vcvt.s32.f32    q0,  q0,  #16
360
        vld1.64         {d2-d3},  [r4,:128]!
361
        vcvt.s32.f32    q1,  q1,  #16
362
        b               6b
363
endfunc
364

  
365
function ff_int32_to_float_fmul_scalar_neon, export=1
366
VFP     vdup.32         q0,  d0[0]
367
VFP     len     .req    r2
368
NOVFP   vdup.32         q0,  r2
369
NOVFP   len     .req    r3
370

  
371
        vld1.32         {q1},[r1,:128]!
372
        vcvt.f32.s32    q3,  q1
373
        vld1.32         {q2},[r1,:128]!
374
        vcvt.f32.s32    q8,  q2
375
1:      subs            len, len, #8
376
        pld             [r1, #16]
377
        vmul.f32        q9,  q3,  q0
378
        vmul.f32        q10, q8,  q0
379
        beq             2f
380
        vld1.32         {q1},[r1,:128]!
381
        vcvt.f32.s32    q3,  q1
382
        vld1.32         {q2},[r1,:128]!
383
        vcvt.f32.s32    q8,  q2
384
        vst1.32         {q9}, [r0,:128]!
385
        vst1.32         {q10},[r0,:128]!
386
        b               1b
387
2:      vst1.32         {q9}, [r0,:128]!
388
        vst1.32         {q10},[r0,:128]!
389
        bx              lr
390
        .unreq  len
391
endfunc
libavcodec/arm/fmtconvert_vfp.S
1
/*
2
 * Copyright (c) 2008 Siarhei Siamashka <ssvb@users.sourceforge.net>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

  
21
#include "config.h"
22
#include "asm.S"
23

  
24
        .syntax unified
25

  
26
/**
27
 * ARM VFP optimized float to int16 conversion.
28
 * Assume that len is a positive number and is multiple of 8, destination
29
 * buffer is at least 4 bytes aligned (8 bytes alignment is better for
30
 * performance), little endian byte sex
31
 */
32
@ void ff_float_to_int16_vfp(int16_t *dst, const float *src, int len)
33
function ff_float_to_int16_vfp, export=1
34
        push            {r4-r8,lr}
35
        vpush           {d8-d11}
36
        vldmia          r1!, {s16-s23}
37
        vcvt.s32.f32    s0,  s16
38
        vcvt.s32.f32    s1,  s17
39
        vcvt.s32.f32    s2,  s18
40
        vcvt.s32.f32    s3,  s19
41
        vcvt.s32.f32    s4,  s20
42
        vcvt.s32.f32    s5,  s21
43
        vcvt.s32.f32    s6,  s22
44
        vcvt.s32.f32    s7,  s23
45
1:
46
        subs            r2,  r2,  #8
47
        vmov            r3,  r4,  s0, s1
48
        vmov            r5,  r6,  s2, s3
49
        vmov            r7,  r8,  s4, s5
50
        vmov            ip,  lr,  s6, s7
51
        vldmiagt        r1!, {s16-s23}
52
        ssat            r4,  #16, r4
53
        ssat            r3,  #16, r3
54
        ssat            r6,  #16, r6
55
        ssat            r5,  #16, r5
56
        pkhbt           r3,  r3,  r4, lsl #16
57
        pkhbt           r4,  r5,  r6, lsl #16
58
        vcvtgt.s32.f32  s0,  s16
59
        vcvtgt.s32.f32  s1,  s17
60
        vcvtgt.s32.f32  s2,  s18
61
        vcvtgt.s32.f32  s3,  s19
62
        vcvtgt.s32.f32  s4,  s20
63
        vcvtgt.s32.f32  s5,  s21
64
        vcvtgt.s32.f32  s6,  s22
65
        vcvtgt.s32.f32  s7,  s23
66
        ssat            r8,  #16, r8
67
        ssat            r7,  #16, r7
68
        ssat            lr,  #16, lr
69
        ssat            ip,  #16, ip
70
        pkhbt           r5,  r7,  r8, lsl #16
71
        pkhbt           r6,  ip,  lr, lsl #16
72
        stmia           r0!, {r3-r6}
73
        bgt             1b
74

  
75
        vpop            {d8-d11}
76
        pop             {r4-r8,pc}
77
endfunc
libavcodec/binkaudio.c
33 33
#include "get_bits.h"
34 34
#include "dsputil.h"
35 35
#include "fft.h"
36
#include "fmtconvert.h"
36 37

  
37 38
extern const uint16_t ff_wma_critical_freqs[25];
38 39

  
......
43 44
    AVCodecContext *avctx;
44 45
    GetBitContext gb;
45 46
    DSPContext dsp;
47
    FmtConvertContext fmt_conv;
46 48
    int first;
47 49
    int channels;
48 50
    int frame_len;          ///< transform size (samples)
......
71 73

  
72 74
    s->avctx = avctx;
73 75
    dsputil_init(&s->dsp, avctx);
76
    ff_fmt_convert_init(&s->fmt_conv, avctx);
74 77

  
75 78
    /* determine frame length */
76 79
    if (avctx->sample_rate < 22050) {
......
222 225
            ff_rdft_calc(&s->trans.rdft, coeffs);
223 226
    }
224 227

  
225
    s->dsp.float_to_int16_interleave(out, (const float **)s->coeffs_ptr, s->frame_len, s->channels);
228
    s->fmt_conv.float_to_int16_interleave(out, (const float **)s->coeffs_ptr,
229
                                          s->frame_len, s->channels);
226 230

  
227 231
    if (!s->first) {
228 232
        int count = s->overlap_len * s->channels;
libavcodec/dca.c
40 40
#include "dca.h"
41 41
#include "synth_filter.h"
42 42
#include "dcadsp.h"
43
#include "fmtconvert.h"
43 44

  
44 45
//#define TRACE
45 46

  
......
347 348
    FFTContext imdct;
348 349
    SynthFilterContext synth;
349 350
    DCADSPContext dcadsp;
351
    FmtConvertContext fmt_conv;
350 352
} DCAContext;
351 353

  
352 354
static const uint16_t dca_vlc_offs[] = {
......
1115 1117
                        block[m] = get_bitalloc(&s->gb, &dca_smpl_bitalloc[abits], sel);
1116 1118
                }
1117 1119

  
1118
                s->dsp.int32_to_float_fmul_scalar(subband_samples[k][l],
1120
                s->fmt_conv.int32_to_float_fmul_scalar(subband_samples[k][l],
1119 1121
                                                  block, rscale, 8);
1120 1122
            }
1121 1123

  
......
1802 1804
            }
1803 1805
        }
1804 1806

  
1805
        s->dsp.float_to_int16_interleave(samples, s->samples_chanptr, 256, channels);
1807
        s->fmt_conv.float_to_int16_interleave(samples, s->samples_chanptr, 256, channels);
1806 1808
        samples += 256 * channels;
1807 1809
    }
1808 1810

  
......
1835 1837
    ff_mdct_init(&s->imdct, 6, 1, 1.0);
1836 1838
    ff_synth_filter_init(&s->synth);
1837 1839
    ff_dcadsp_init(&s->dcadsp);
1840
    ff_fmt_convert_init(&s->fmt_conv, avctx);
1838 1841

  
1839 1842
    for (i = 0; i < DCA_PRIM_CHANNELS_MAX+1; i++)
1840 1843
        s->samples_chanptr[i] = s->samples + i * 256;
libavcodec/dsputil.c
3867 3867
    return p;
3868 3868
}
3869 3869

  
3870
static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){
3871
    int i;
3872
    for(i=0; i<len; i++)
3873
        dst[i] = src[i] * mul;
3874
}
3875

  
3876 3870
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
3877 3871
                   uint32_t maxi, uint32_t maxisign)
3878 3872
{
......
3918 3912
    }
3919 3913
}
3920 3914

  
3921
static av_always_inline int float_to_int16_one(const float *src){
3922
    return av_clip_int16(lrintf(*src));
3923
}
3924

  
3925
static void ff_float_to_int16_c(int16_t *dst, const float *src, long len){
3926
    int i;
3927
    for(i=0; i<len; i++)
3928
        dst[i] = float_to_int16_one(src+i);
3929
}
3930

  
3931
static void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, int channels){
3932
    int i,j,c;
3933
    if(channels==2){
3934
        for(i=0; i<len; i++){
3935
            dst[2*i]   = float_to_int16_one(src[0]+i);
3936
            dst[2*i+1] = float_to_int16_one(src[1]+i);
3937
        }
3938
    }else{
3939
        for(c=0; c<channels; c++)
3940
            for(i=0, j=c; i<len; i++, j+=channels)
3941
                dst[j] = float_to_int16_one(src[c]+i);
3942
    }
3943
}
3944

  
3945 3915
static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
3946 3916
{
3947 3917
    int res = 0;
......
4437 4407
    c->vector_fmul_reverse = vector_fmul_reverse_c;
4438 4408
    c->vector_fmul_add = vector_fmul_add_c;
4439 4409
    c->vector_fmul_window = vector_fmul_window_c;
4440
    c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
4441 4410
    c->vector_clipf = vector_clipf_c;
4442
    c->float_to_int16 = ff_float_to_int16_c;
4443
    c->float_to_int16_interleave = ff_float_to_int16_interleave_c;
4444 4411
    c->scalarproduct_int16 = scalarproduct_int16_c;
4445 4412
    c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
4446 4413
    c->scalarproduct_float = scalarproduct_float_c;
libavcodec/dsputil.h
392 392
    /* assume len is a multiple of 4, and arrays are 16-byte aligned */
393 393
    void (*vector_fmul_window)(float *dst, const float *src0, const float *src1, const float *win, int len);
394 394
    /* assume len is a multiple of 8, and arrays are 16-byte aligned */
395
    void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
396 395
    void (*vector_clipf)(float *dst /* align 16 */, const float *src /* align 16 */, float min, float max, int len /* align 16 */);
397 396
    /**
398 397
     * Multiply a vector of floats by a scalar float.  Source and
......
445 444
     */
446 445
    void (*butterflies_float)(float *restrict v1, float *restrict v2, int len);
447 446

  
448
    /* convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
449
    void (*float_to_int16)(int16_t *dst, const float *src, long len);
450
    void (*float_to_int16_interleave)(int16_t *dst, const float **src, long len, int channels);
451

  
452 447
    /* (I)DCT */
453 448
    void (*fdct)(DCTELEM *block/* align 16*/);
454 449
    void (*fdct248)(DCTELEM *block/* align 16*/);
libavcodec/fmtconvert.c
1
/*
2
 * Format Conversion Utils
3
 * Copyright (c) 2000, 2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

  
23
#include "avcodec.h"
24
#include "fmtconvert.h"
25

  
26
static void int32_to_float_fmul_scalar_c(float *dst, const int *src, float mul, int len){
27
    int i;
28
    for(i=0; i<len; i++)
29
        dst[i] = src[i] * mul;
30
}
31

  
32
static av_always_inline int float_to_int16_one(const float *src){
33
    return av_clip_int16(lrintf(*src));
34
}
35

  
36
static void float_to_int16_c(int16_t *dst, const float *src, long len)
37
{
38
    int i;
39
    for(i=0; i<len; i++)
40
        dst[i] = float_to_int16_one(src+i);
41
}
42

  
43
static void float_to_int16_interleave_c(int16_t *dst, const float **src,
44
                                        long len, int channels)
45
{
46
    int i,j,c;
47
    if(channels==2){
48
        for(i=0; i<len; i++){
49
            dst[2*i]   = float_to_int16_one(src[0]+i);
50
            dst[2*i+1] = float_to_int16_one(src[1]+i);
51
        }
52
    }else{
53
        for(c=0; c<channels; c++)
54
            for(i=0, j=c; i<len; i++, j+=channels)
55
                dst[j] = float_to_int16_one(src[c]+i);
56
    }
57
}
58

  
59
av_cold void ff_fmt_convert_init(FmtConvertContext *c, AVCodecContext *avctx)
60
{
61
    c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_c;
62
    c->float_to_int16             = float_to_int16_c;
63
    c->float_to_int16_interleave  = float_to_int16_interleave_c;
64

  
65
    if (ARCH_ARM) ff_fmt_convert_init_arm(c, avctx);
66
    if (ARCH_PPC) ff_fmt_convert_init_ppc(c, avctx);
67
    if (HAVE_MMX) ff_fmt_convert_init_x86(c, avctx);
68
}
libavcodec/fmtconvert.h
1
/*
2
 * Format Conversion Utils
3
 * Copyright (c) 2000, 2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22

  
23
#ifndef AVCODEC_FMTCONVERT_H
24
#define AVCODEC_FMTCONVERT_H
25

  
26
#include "avcodec.h"
27

  
28
typedef struct FmtConvertContext {
29
    /**
30
     * Convert an array of int32_t to float and multiply by a float value.
31
     * @param dst destination array of float.
32
     *            constraints: 16-byte aligned
33
     * @param src source array of int32_t.
34
     *            constraints: 16-byte aligned
35
     * @param len number of elements to convert.
36
     *            constraints: multiple of 8
37
     */
38
    void (*int32_to_float_fmul_scalar)(float *dst, const int *src, float mul, int len);
39

  
40
    /**
41
     * Convert an array of float to an array of int16_t.
42
     *
43
     * Convert floats from in the range [-32768.0,32767.0] to ints
44
     * without rescaling
45
     *
46
     * @param dst destination array of int16_t.
47
     *            constraints: 16-byte aligned
48
     * @param src source array of float.
49
     *            constraints: 16-byte aligned
50
     * @param len number of elements to convert.
51
     *            constraints: multiple of 8
52
     */
53
    void (*float_to_int16)(int16_t *dst, const float *src, long len);
54

  
55
    /**
56
     * Convert multiple arrays of float to an interleaved array of int16_t.
57
     *
58
     * Convert floats from in the range [-32768.0,32767.0] to ints
59
     * without rescaling
60
     *
61
     * @param dst destination array of interleaved int16_t.
... This diff was truncated because it exceeds the maximum size that can be displayed.

Also available in: Unified diff