ffmpeg / libavcodec / x86 / fmtconvert.asm @ 888fa31e
History | View | Annotate | Download (2.56 KB)
1 | c73d99e6 | Justin Ruggles | ;****************************************************************************** |
---|---|---|---|
2 | ;* x86 optimized Format Conversion Utils |
||
3 | ;* Copyright (c) 2008 Loren Merritt |
||
4 | ;* |
||
5 | 2912e87a | Mans Rullgard | ;* This file is part of Libav. |
6 | c73d99e6 | Justin Ruggles | ;* |
7 | 2912e87a | Mans Rullgard | ;* Libav is free software; you can redistribute it and/or |
8 | c73d99e6 | Justin Ruggles | ;* modify it under the terms of the GNU Lesser General Public |
9 | ;* License as published by the Free Software Foundation; either |
||
10 | ;* version 2.1 of the License, or (at your option) any later version. |
||
11 | ;* |
||
12 | 2912e87a | Mans Rullgard | ;* Libav is distributed in the hope that it will be useful, |
13 | c73d99e6 | Justin Ruggles | ;* but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | ;* Lesser General Public License for more details. |
||
16 | ;* |
||
17 | ;* You should have received a copy of the GNU Lesser General Public |
||
18 | 2912e87a | Mans Rullgard | ;* License along with Libav; if not, write to the Free Software |
19 | 888fa31e | Diego Biurrun | ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
20 | c73d99e6 | Justin Ruggles | ;****************************************************************************** |
21 | |||
22 | %include "x86inc.asm" |
||
23 | |||
24 | section .text align=16 |
||
25 | |||
26 | %macro PSWAPD_SSE 2 |
||
27 | pshufw %1, %2, 0x4e |
||
28 | %endmacro |
||
29 | %macro PSWAPD_3DN1 2 |
||
30 | movq %1, %2 |
||
31 | psrlq %1, 32 |
||
32 | punpckldq %1, %2 |
||
33 | %endmacro |
||
34 | |||
35 | %macro FLOAT_TO_INT16_INTERLEAVE6 1 |
||
36 | ; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len) |
||
37 | cglobal float_to_int16_interleave6_%1, 2,7,0, dst, src, src1, src2, src3, src4, src5 |
||
38 | %ifdef ARCH_X86_64 |
||
39 | %define lend r10d |
||
40 | mov lend, r2d |
||
41 | %else |
||
42 | %define lend dword r2m |
||
43 | %endif |
||
44 | mov src1q, [srcq+1*gprsize] |
||
45 | mov src2q, [srcq+2*gprsize] |
||
46 | mov src3q, [srcq+3*gprsize] |
||
47 | mov src4q, [srcq+4*gprsize] |
||
48 | mov src5q, [srcq+5*gprsize] |
||
49 | mov srcq, [srcq] |
||
50 | sub src1q, srcq |
||
51 | sub src2q, srcq |
||
52 | sub src3q, srcq |
||
53 | sub src4q, srcq |
||
54 | sub src5q, srcq |
||
55 | .loop: |
||
56 | cvtps2pi mm0, [srcq] |
||
57 | cvtps2pi mm1, [srcq+src1q] |
||
58 | cvtps2pi mm2, [srcq+src2q] |
||
59 | cvtps2pi mm3, [srcq+src3q] |
||
60 | cvtps2pi mm4, [srcq+src4q] |
||
61 | cvtps2pi mm5, [srcq+src5q] |
||
62 | packssdw mm0, mm3 |
||
63 | packssdw mm1, mm4 |
||
64 | packssdw mm2, mm5 |
||
65 | pswapd mm3, mm0 |
||
66 | punpcklwd mm0, mm1 |
||
67 | punpckhwd mm1, mm2 |
||
68 | punpcklwd mm2, mm3 |
||
69 | pswapd mm3, mm0 |
||
70 | punpckldq mm0, mm2 |
||
71 | punpckhdq mm2, mm1 |
||
72 | punpckldq mm1, mm3 |
||
73 | movq [dstq ], mm0 |
||
74 | movq [dstq+16], mm2 |
||
75 | movq [dstq+ 8], mm1 |
||
76 | add srcq, 8 |
||
77 | add dstq, 24 |
||
78 | sub lend, 2 |
||
79 | jg .loop |
||
80 | emms |
||
81 | RET |
||
82 | %endmacro ; FLOAT_TO_INT16_INTERLEAVE6 |
||
83 | |||
84 | %define pswapd PSWAPD_SSE |
||
85 | FLOAT_TO_INT16_INTERLEAVE6 sse |
||
86 | %define cvtps2pi pf2id |
||
87 | %define pswapd PSWAPD_3DN1 |
||
88 | FLOAT_TO_INT16_INTERLEAVE6 3dnow |
||
89 | %undef pswapd |
||
90 | FLOAT_TO_INT16_INTERLEAVE6 3dn2 |
||
91 | %undef cvtps2pi |