ffmpeg / libavcodec / alpha / mpegvideo_alpha.c @ b550bfaa
History | View | Annotate | Download (4.47 KB)
1 |
/*
|
---|---|
2 |
* Alpha optimized DSP utils
|
3 |
* Copyright (c) 2002 Falk Hueffner <falk@debian.org>
|
4 |
*
|
5 |
* This file is part of FFmpeg.
|
6 |
*
|
7 |
* FFmpeg is free software; you can redistribute it and/or
|
8 |
* modify it under the terms of the GNU Lesser General Public
|
9 |
* License as published by the Free Software Foundation; either
|
10 |
* version 2.1 of the License, or (at your option) any later version.
|
11 |
*
|
12 |
* FFmpeg is distributed in the hope that it will be useful,
|
13 |
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
14 |
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
15 |
* Lesser General Public License for more details.
|
16 |
*
|
17 |
* You should have received a copy of the GNU Lesser General Public
|
18 |
* License along with FFmpeg; if not, write to the Free Software
|
19 |
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
20 |
*/
|
21 |
|
22 |
#include "asm.h" |
23 |
#include "dsputil.h" |
24 |
#include "mpegvideo.h" |
25 |
|
26 |
static void dct_unquantize_h263_intra_axp(MpegEncContext *s, DCTELEM *block, |
27 |
int n, int qscale) |
28 |
{ |
29 |
int i, n_coeffs;
|
30 |
uint64_t qmul, qadd; |
31 |
uint64_t correction; |
32 |
DCTELEM *orig_block = block; |
33 |
DCTELEM block0; /* might not be used uninitialized */
|
34 |
|
35 |
qadd = WORD_VEC((qscale - 1) | 1); |
36 |
qmul = qscale << 1;
|
37 |
/* This mask kills spill from negative subwords to the next subword. */
|
38 |
correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ |
39 |
|
40 |
if (!s->h263_aic) {
|
41 |
if (n < 4) |
42 |
block0 = block[0] * s->y_dc_scale;
|
43 |
else
|
44 |
block0 = block[0] * s->c_dc_scale;
|
45 |
} else {
|
46 |
qadd = 0;
|
47 |
} |
48 |
n_coeffs = 63; // does not always use zigzag table |
49 |
|
50 |
for(i = 0; i <= n_coeffs; block += 4, i += 4) { |
51 |
uint64_t levels, negmask, zeros, add; |
52 |
|
53 |
levels = ldq(block); |
54 |
if (levels == 0) |
55 |
continue;
|
56 |
|
57 |
#ifdef __alpha_max__
|
58 |
/* I don't think the speed difference justifies runtime
|
59 |
detection. */
|
60 |
negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ |
61 |
negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ |
62 |
#else
|
63 |
negmask = cmpbge(WORD_VEC(0x7fff), levels);
|
64 |
negmask &= (negmask >> 1) | (1 << 7); |
65 |
negmask = zap(-1, negmask);
|
66 |
#endif
|
67 |
|
68 |
zeros = cmpbge(0, levels);
|
69 |
zeros &= zeros >> 1;
|
70 |
/* zeros |= zeros << 1 is not needed since qadd <= 255, so
|
71 |
zapping the lower byte suffices. */
|
72 |
|
73 |
levels *= qmul; |
74 |
levels -= correction & (negmask << 16);
|
75 |
|
76 |
/* Negate qadd for negative levels. */
|
77 |
add = qadd ^ negmask; |
78 |
add += WORD_VEC(0x0001) & negmask;
|
79 |
/* Set qadd to 0 for levels == 0. */
|
80 |
add = zap(add, zeros); |
81 |
|
82 |
levels += add; |
83 |
|
84 |
stq(levels, block); |
85 |
} |
86 |
|
87 |
if (s->mb_intra && !s->h263_aic)
|
88 |
orig_block[0] = block0;
|
89 |
} |
90 |
|
91 |
static void dct_unquantize_h263_inter_axp(MpegEncContext *s, DCTELEM *block, |
92 |
int n, int qscale) |
93 |
{ |
94 |
int i, n_coeffs;
|
95 |
uint64_t qmul, qadd; |
96 |
uint64_t correction; |
97 |
|
98 |
qadd = WORD_VEC((qscale - 1) | 1); |
99 |
qmul = qscale << 1;
|
100 |
/* This mask kills spill from negative subwords to the next subword. */
|
101 |
correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */ |
102 |
|
103 |
n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]]; |
104 |
|
105 |
for(i = 0; i <= n_coeffs; block += 4, i += 4) { |
106 |
uint64_t levels, negmask, zeros, add; |
107 |
|
108 |
levels = ldq(block); |
109 |
if (levels == 0) |
110 |
continue;
|
111 |
|
112 |
#ifdef __alpha_max__
|
113 |
/* I don't think the speed difference justifies runtime
|
114 |
detection. */
|
115 |
negmask = maxsw4(levels, -1); /* negative -> ffff (-1) */ |
116 |
negmask = minsw4(negmask, 0); /* positive -> 0000 (0) */ |
117 |
#else
|
118 |
negmask = cmpbge(WORD_VEC(0x7fff), levels);
|
119 |
negmask &= (negmask >> 1) | (1 << 7); |
120 |
negmask = zap(-1, negmask);
|
121 |
#endif
|
122 |
|
123 |
zeros = cmpbge(0, levels);
|
124 |
zeros &= zeros >> 1;
|
125 |
/* zeros |= zeros << 1 is not needed since qadd <= 255, so
|
126 |
zapping the lower byte suffices. */
|
127 |
|
128 |
levels *= qmul; |
129 |
levels -= correction & (negmask << 16);
|
130 |
|
131 |
/* Negate qadd for negative levels. */
|
132 |
add = qadd ^ negmask; |
133 |
add += WORD_VEC(0x0001) & negmask;
|
134 |
/* Set qadd to 0 for levels == 0. */
|
135 |
add = zap(add, zeros); |
136 |
|
137 |
levels += add; |
138 |
|
139 |
stq(levels, block); |
140 |
} |
141 |
} |
142 |
|
143 |
void MPV_common_init_axp(MpegEncContext *s)
|
144 |
{ |
145 |
s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_axp; |
146 |
s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_axp; |
147 |
} |