ffmpeg / libavcodec / ps2 / dsputil_mmi.c @ 8dbe5856
History | View | Annotate | Download (5.51 KB)
1 |
/*
|
---|---|
2 |
* MMI optimized DSP utils
|
3 |
* Copyright (c) 2000, 2001 Fabrice Bellard
|
4 |
*
|
5 |
* MMI optimization by Leon van Stuivenberg
|
6 |
* clear_blocks_mmi() by BroadQ
|
7 |
*
|
8 |
* This file is part of FFmpeg.
|
9 |
*
|
10 |
* FFmpeg is free software; you can redistribute it and/or
|
11 |
* modify it under the terms of the GNU Lesser General Public
|
12 |
* License as published by the Free Software Foundation; either
|
13 |
* version 2.1 of the License, or (at your option) any later version.
|
14 |
*
|
15 |
* FFmpeg is distributed in the hope that it will be useful,
|
16 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
17 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
18 |
* Lesser General Public License for more details.
|
19 |
*
|
20 |
* You should have received a copy of the GNU Lesser General Public
|
21 |
* License along with FFmpeg; if not, write to the Free Software
|
22 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
23 |
*/
|
24 |
|
25 |
#include "libavcodec/dsputil.h" |
26 |
#include "mmi.h" |
27 |
|
28 |
void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block); |
29 |
void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block); |
30 |
void ff_mmi_idct(DCTELEM *block);
|
31 |
|
32 |
static void clear_blocks_mmi(DCTELEM * blocks) |
33 |
{ |
34 |
__asm__ volatile(
|
35 |
".set noreorder \n"
|
36 |
"addiu $9, %0, 768 \n"
|
37 |
"nop \n"
|
38 |
"1: \n"
|
39 |
"sq $0, 0(%0) \n"
|
40 |
"move $8, %0 \n"
|
41 |
"addi %0, %0, 64 \n"
|
42 |
"sq $0, 16($8) \n"
|
43 |
"slt $10, %0, $9 \n"
|
44 |
"sq $0, 32($8) \n"
|
45 |
"bnez $10, 1b \n"
|
46 |
"sq $0, 48($8) \n"
|
47 |
".set reorder \n"
|
48 |
: "+r" (blocks) :: "$8", "$9", "memory" ); |
49 |
} |
50 |
|
51 |
|
52 |
static void get_pixels_mmi(DCTELEM *block, const uint8_t *pixels, int line_size) |
53 |
{ |
54 |
__asm__ volatile(
|
55 |
".set push \n\t"
|
56 |
".set mips3 \n\t"
|
57 |
"ld $8, 0(%0) \n\t"
|
58 |
"add %0, %0, %2 \n\t"
|
59 |
"ld $9, 0(%0) \n\t"
|
60 |
"add %0, %0, %2 \n\t"
|
61 |
"ld $10, 0(%0) \n\t"
|
62 |
"pextlb $8, $0, $8 \n\t"
|
63 |
"sq $8, 0(%1) \n\t"
|
64 |
"add %0, %0, %2 \n\t"
|
65 |
"ld $8, 0(%0) \n\t"
|
66 |
"pextlb $9, $0, $9 \n\t"
|
67 |
"sq $9, 16(%1) \n\t"
|
68 |
"add %0, %0, %2 \n\t"
|
69 |
"ld $9, 0(%0) \n\t"
|
70 |
"pextlb $10, $0, $10 \n\t"
|
71 |
"sq $10, 32(%1) \n\t"
|
72 |
"add %0, %0, %2 \n\t"
|
73 |
"ld $10, 0(%0) \n\t"
|
74 |
"pextlb $8, $0, $8 \n\t"
|
75 |
"sq $8, 48(%1) \n\t"
|
76 |
"add %0, %0, %2 \n\t"
|
77 |
"ld $8, 0(%0) \n\t"
|
78 |
"pextlb $9, $0, $9 \n\t"
|
79 |
"sq $9, 64(%1) \n\t"
|
80 |
"add %0, %0, %2 \n\t"
|
81 |
"ld $9, 0(%0) \n\t"
|
82 |
"pextlb $10, $0, $10 \n\t"
|
83 |
"sq $10, 80(%1) \n\t"
|
84 |
"pextlb $8, $0, $8 \n\t"
|
85 |
"sq $8, 96(%1) \n\t"
|
86 |
"pextlb $9, $0, $9 \n\t"
|
87 |
"sq $9, 112(%1) \n\t"
|
88 |
".set pop \n\t"
|
89 |
: "+r" (pixels) : "r" (block), "r" (line_size) : "$8", "$9", "$10", "memory" ); |
90 |
} |
91 |
|
92 |
|
93 |
static void put_pixels8_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
94 |
{ |
95 |
__asm__ volatile(
|
96 |
".set push \n\t"
|
97 |
".set mips3 \n\t"
|
98 |
"1: \n\t"
|
99 |
"ldr $8, 0(%1) \n\t"
|
100 |
"addiu %2, %2, -1 \n\t"
|
101 |
"ldl $8, 7(%1) \n\t"
|
102 |
"add %1, %1, %3 \n\t"
|
103 |
"sd $8, 0(%0) \n\t"
|
104 |
"add %0, %0, %3 \n\t"
|
105 |
"bgtz %2, 1b \n\t"
|
106 |
".set pop \n\t"
|
107 |
: "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) |
108 |
: "$8", "memory" ); |
109 |
} |
110 |
|
111 |
|
112 |
static void put_pixels16_mmi(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
113 |
{ |
114 |
__asm__ volatile (
|
115 |
".set push \n\t"
|
116 |
".set mips3 \n\t"
|
117 |
"1: \n\t"
|
118 |
"ldr $8, 0(%1) \n\t"
|
119 |
"add $11, %1, %3 \n\t"
|
120 |
"ldl $8, 7(%1) \n\t"
|
121 |
"add $10, %0, %3 \n\t"
|
122 |
"ldr $9, 8(%1) \n\t"
|
123 |
"ldl $9, 15(%1) \n\t"
|
124 |
"ldr $12, 0($11) \n\t"
|
125 |
"add %1, $11, %3 \n\t"
|
126 |
"ldl $12, 7($11) \n\t"
|
127 |
"pcpyld $8, $9, $8 \n\t"
|
128 |
"sq $8, 0(%0) \n\t"
|
129 |
"ldr $13, 8($11) \n\t"
|
130 |
"addiu %2, %2, -2 \n\t"
|
131 |
"ldl $13, 15($11) \n\t"
|
132 |
"add %0, $10, %3 \n\t"
|
133 |
"pcpyld $12, $13, $12 \n\t"
|
134 |
"sq $12, 0($10) \n\t"
|
135 |
"bgtz %2, 1b \n\t"
|
136 |
".set pop \n\t"
|
137 |
: "+r" (block), "+r" (pixels), "+r" (h) : "r" (line_size) |
138 |
: "$8", "$9", "$10", "$11", "$12", "$13", "memory" ); |
139 |
} |
140 |
|
141 |
|
142 |
void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
|
143 |
{ |
144 |
const int idct_algo= avctx->idct_algo; |
145 |
const int h264_high_depth = avctx->codec_id == CODEC_ID_H264 && avctx->bits_per_raw_sample > 8; |
146 |
|
147 |
if (!h264_high_depth) {
|
148 |
c->clear_blocks = clear_blocks_mmi; |
149 |
|
150 |
c->put_pixels_tab[1][0] = put_pixels8_mmi; |
151 |
c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmi; |
152 |
|
153 |
c->put_pixels_tab[0][0] = put_pixels16_mmi; |
154 |
c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi; |
155 |
} |
156 |
|
157 |
c->get_pixels = get_pixels_mmi; |
158 |
|
159 |
if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){
|
160 |
c->idct_put= ff_mmi_idct_put; |
161 |
c->idct_add= ff_mmi_idct_add; |
162 |
c->idct = ff_mmi_idct; |
163 |
c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; |
164 |
} |
165 |
} |
166 |
|