Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ a8d73e56

History | View | Annotate | Download (98.6 KB)

1 791e7b83 Michael Niedermayer
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22
#include "cabac.h"
23
24
#include "mpegvideo.h"
25
26
#undef NDEBUG
27
#include <assert.h>
28
29
#define MAX_DECOMPOSITIONS 8
30
#define MAX_PLANES 4
31
#define DWTELEM int
32
#define QROOT 8 
33
34
static const int8_t quant3[256]={
35
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51
};
52
static const int8_t quant3b[256]={
53
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
};
70
static const int8_t quant5[256]={
71
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
72
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
80
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
87
};
88
static const int8_t quant7[256]={
89
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
92
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
93
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
98
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
105
};
106
static const int8_t quant9[256]={
107
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
109
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
116
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
123
};
124
static const int8_t quant11[256]={
125
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
126
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
128
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
134
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
141
};
142
static const int8_t quant13[256]={
143
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
144
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
147
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
152
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
159
};
160
161
#define OBMC_MAX 64
162
#if 0 //64*cubic
163
static const uint8_t obmc32[1024]={
164
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
166
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
167
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
168
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
169
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
170
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
171
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
172
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
173
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
174
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
175
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
176
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
177
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
178
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
179
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
180
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
182
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
184
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
185
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
186
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
187
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
189
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
190
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
191
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
192
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
193
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
194
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
195
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
196
//error:0.000022
197
};
198
static const uint8_t obmc16[256]={
199
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
200
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
201
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
202
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
203
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
204
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
205
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
206
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
207
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
209
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
211
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
212
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
213
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
214
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215
//error:0.000033
216
};
217
#elif 1 // 64*linear
218
static const uint8_t obmc32[1024]={
219
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
220
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
221
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
222
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
223
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
224
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
225
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
226
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
227
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
228
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
229
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
230
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
231
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
232
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
233
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
234
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
235
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
237
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
239
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
240
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
241
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
242
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
243
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
244
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
245
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
246
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
247
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
248
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
249
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
250
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
251
 //error:0.000020
252
};
253
static const uint8_t obmc16[256]={
254
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
255
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
256
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
257
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
258
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
259
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
260
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
261
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
262
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
264
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
266
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
267
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
268
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
269
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
270
//error:0.000015
271
};
272
#else //64*cos
273
static const uint8_t obmc32[1024]={
274
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
275
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
276
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
277
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
278
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
279
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
280
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
281
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
282
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
283
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
284
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
285
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
286
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
287
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
288
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
289
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
290
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
292
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
294
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
295
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
296
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
297
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
299
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
300
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
301
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
302
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
303
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
304
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
305
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
306
//error:0.000022
307
};
308
static const uint8_t obmc16[256]={
309
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
310
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
311
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
312
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
313
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
314
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
315
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
316
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
317
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
319
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
321
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
322
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
323
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
324
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325
//error:0.000022
326
};
327
#endif
328
329
330
typedef struct SubBand{
331
    int level;
332
    int stride;
333
    int width;
334
    int height;
335
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
336
    DWTELEM *buf;
337
    struct SubBand *parent;
338
    uint8_t state[/*7*2*/ 7 + 512][32];
339
}SubBand;
340
341
typedef struct Plane{
342
    int width;
343
    int height;
344
    SubBand band[MAX_DECOMPOSITIONS][4];
345
}Plane;
346
347
typedef struct SnowContext{
348
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
349
350
    AVCodecContext *avctx;
351
    CABACContext c;
352
    DSPContext dsp;
353
    AVFrame input_picture;
354
    AVFrame current_picture;
355
    AVFrame last_picture;
356
    AVFrame mconly_picture;
357
//     uint8_t q_context[16];
358
    uint8_t header_state[32];
359
    int keyframe;
360
    int version;
361
    int spatial_decomposition_type;
362
    int temporal_decomposition_type;
363
    int spatial_decomposition_count;
364
    int temporal_decomposition_count;
365
    DWTELEM *spatial_dwt_buffer;
366
    DWTELEM *pred_buffer;
367
    int colorspace_type;
368
    int chroma_h_shift;
369
    int chroma_v_shift;
370
    int spatial_scalability;
371
    int qlog;
372
    int mv_scale;
373
    int qbias;
374
#define QBIAS_SHIFT 3
375
    int b_width; //FIXME remove?
376
    int b_height; //FIXME remove?
377
    Plane plane[MAX_PLANES];
378
    SubBand mb_band;
379
    SubBand mv_band[2];
380
    
381
    uint16_t *mb_type;
382
    uint8_t *mb_mean;
383
    uint32_t *dummy;
384
    int16_t (*motion_val8)[2];
385
    int16_t (*motion_val16)[2];
386
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
387
}SnowContext;
388
389
#define QEXPSHIFT 7 //FIXME try to change this to 0
390
static const uint8_t qexp[8]={
391
    128, 140, 152, 166, 181, 197, 215, 235
392
//   64,  70,  76,  83,  91,  99, 108, 117
393
//   32,  35,  38,  41,  45,  49,  54,  59
394
//   16,  17,  19,  21,  23,  25,  27,  29
395
//    8,   9,  10,  10,  11,  12,  13,  15
396
};
397
398
static inline int mirror(int v, int m){
399
    if     (v<0) return -v;
400
    else if(v>m) return 2*m-v;
401
    else         return v;
402
}
403
404
static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
405
    int i;
406
407
    if(v){
408
        const int a= ABS(v);
409
        const int e= av_log2(a);
410
#if 1
411
        const int el= FFMIN(e, 10);   
412
        put_cabac(c, state+0, 0);
413
414
        for(i=0; i<el; i++){
415
            put_cabac(c, state+1+i, 1);  //1..10
416
        }
417
        for(; i<e; i++){
418
            put_cabac(c, state+1+9, 1);  //1..10
419
        }
420
        put_cabac(c, state+1+FFMIN(i,9), 0);
421
422
        for(i=e-1; i>=el; i--){
423
            put_cabac(c, state+22+9, (a>>i)&1); //22..31
424
        }
425
        for(; i>=0; i--){
426
            put_cabac(c, state+22+i, (a>>i)&1); //22..31
427
        }
428
429
        if(is_signed)
430
            put_cabac(c, state+11 + el, v < 0); //11..21
431
#else
432
        
433
        put_cabac(c, state+0, 0);
434
        if(e<=9){
435
            for(i=0; i<e; i++){
436
                put_cabac(c, state+1+i, 1);  //1..10
437
            }
438
            put_cabac(c, state+1+i, 0);
439
440
            for(i=e-1; i>=0; i--){
441
                put_cabac(c, state+22+i, (a>>i)&1); //22..31
442
            }
443
444
            if(is_signed)
445
                put_cabac(c, state+11 + e, v < 0); //11..21
446
        }else{
447
            for(i=0; i<e; i++){
448
                put_cabac(c, state+1+FFMIN(i,9), 1);  //1..10
449
            }
450
            put_cabac(c, state+1+FFMIN(i,9), 0);
451
452
            for(i=e-1; i>=0; i--){
453
                put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
454
            }
455
456
            if(is_signed)
457
                put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
458
        }
459
#endif
460
    }else{
461
        put_cabac(c, state+0, 1);
462
    }
463
}
464
465
static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
466
    if(get_cabac(c, state+0))
467
        return 0;
468
    else{
469
        int i, e, a, el;
470
 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
471
        for(e=0; e<10; e++){ 
472
            if(get_cabac(c, state + 1 + e)==0) // 1..10
473
                break;
474
        }
475
        el= e;
476
 
477
        if(e==10){
478
            while(get_cabac(c, state + 1 + 9)) //10
479
                e++;
480
        }
481
        a= 1;
482
        for(i=e-1; i>=el; i--){
483
            a += a + get_cabac(c, state+22+9); //31
484
        }
485
        for(; i>=0; i--){
486
            a += a + get_cabac(c, state+22+i); //22..31
487
        }
488
489
        if(is_signed && get_cabac(c, state+11 + el)) //11..21
490
            return -a;
491
        else
492
            return a;
493
    }
494
}
495
496
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
497
    const int mirror_left= !highpass;
498
    const int mirror_right= (width&1) ^ highpass;
499
    const int w= (width>>1) - 1 + (highpass & width);
500
    int i;
501
502
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
503
    if(mirror_left){
504
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
505
        dst += dst_step;
506
        src += src_step;
507
    }
508
    
509
    for(i=0; i<w; i++){
510
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
511
    }
512
    
513
    if(mirror_right){
514
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
515
    }
516
}
517
518
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
519
    const int mirror_left= !highpass;
520
    const int mirror_right= (width&1) ^ highpass;
521
    const int w= (width>>1) - 1 + (highpass & width);
522
    int i;
523
524
    if(mirror_left){
525
        int r= 3*2*ref[0];
526
        r += r>>4;
527
        r += r>>8;
528
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
529
        dst += dst_step;
530
        src += src_step;
531
    }
532
    
533
    for(i=0; i<w; i++){
534
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
535
        r += r>>4;
536
        r += r>>8;
537
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
538
    }
539
    
540
    if(mirror_right){
541
        int r= 3*2*ref[w*ref_step];
542
        r += r>>4;
543
        r += r>>8;
544
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
545
    }
546
}
547
548
549
static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
550
    int x, i;
551
    
552
    for(x=start; x<width; x+=2){
553
        int64_t sum=0;
554
555
        for(i=0; i<n; i++){
556
            int x2= x + 2*i - n + 1;
557
            if     (x2<     0) x2= -x2;
558
            else if(x2>=width) x2= 2*width-x2-2;
559
            sum += coeffs[i]*(int64_t)dst[x2];
560
        }
561
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
562
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
563
    }
564
}
565
566
static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
567
    int x, y, i;
568
    for(y=start; y<height; y+=2){
569
        for(x=0; x<width; x++){
570
            int64_t sum=0;
571
    
572
            for(i=0; i<n; i++){
573
                int y2= y + 2*i - n + 1;
574
                if     (y2<      0) y2= -y2;
575
                else if(y2>=height) y2= 2*height-y2-2;
576
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
577
            }
578
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
579
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
580
        }
581
    }
582
}
583
584
#define SCALEX 1
585
#define LX0 0
586
#define LX1 1
587
588 de890c9b Michael Niedermayer
#if 0 // more accurate 9/7
589 791e7b83 Michael Niedermayer
#define N1 2
590
#define SHIFT1 14
591
#define COEFFS1 (int[]){-25987,-25987}
592
#define N2 2
593
#define SHIFT2 19
594
#define COEFFS2 (int[]){-27777,-27777}
595
#define N3 2
596
#define SHIFT3 15
597
#define COEFFS3 (int[]){28931,28931}
598
#define N4 2
599
#define SHIFT4 15
600
#define COEFFS4 (int[]){14533,14533}
601
#elif 1 // 13/7 CRF
602
#define N1 4
603
#define SHIFT1 4
604
#define COEFFS1 (int[]){1,-9,-9,1}
605 de890c9b Michael Niedermayer
#define N2 4
606 791e7b83 Michael Niedermayer
#define SHIFT2 4
607
#define COEFFS2 (int[]){-1,5,5,-1}
608
#define N3 0
609
#define SHIFT3 1
610
#define COEFFS3 NULL
611
#define N4 0
612
#define SHIFT4 1
613
#define COEFFS4 NULL
614
#elif 1 // 3/5
615
#define LX0 1
616
#define LX1 0
617
#define SCALEX 0.5
618
#define N1 2
619
#define SHIFT1 1
620
#define COEFFS1 (int[]){1,1}
621
#define N2 2
622
#define SHIFT2 2
623
#define COEFFS2 (int[]){-1,-1}
624
#define N3 0
625
#define SHIFT3 0
626
#define COEFFS3 NULL
627
#define N4 0
628
#define SHIFT4 0
629
#define COEFFS4 NULL
630
#elif 1 // 11/5 
631
#define N1 0
632
#define SHIFT1 1
633
#define COEFFS1 NULL
634
#define N2 2
635
#define SHIFT2 2
636
#define COEFFS2 (int[]){-1,-1}
637
#define N3 2
638
#define SHIFT3 0
639
#define COEFFS3 (int[]){-1,-1}
640
#define N4 4
641
#define SHIFT4 7
642
#define COEFFS4 (int[]){-5,29,29,-5}
643
#define SCALEX 4
644
#elif 1 // 9/7 CDF
645
#define N1 2
646
#define SHIFT1 7
647
#define COEFFS1 (int[]){-203,-203}
648
#define N2 2
649
#define SHIFT2 12
650
#define COEFFS2 (int[]){-217,-217}
651
#define N3 2
652
#define SHIFT3 7
653
#define COEFFS3 (int[]){113,113}
654
#define N4 2
655
#define SHIFT4 9
656
#define COEFFS4 (int[]){227,227}
657
#define SCALEX 1
658
#elif 1 // 7/5 CDF
659
#define N1 0
660
#define SHIFT1 1
661
#define COEFFS1 NULL
662
#define N2 2
663
#define SHIFT2 2
664
#define COEFFS2 (int[]){-1,-1}
665
#define N3 2
666
#define SHIFT3 0
667
#define COEFFS3 (int[]){-1,-1}
668
#define N4 2
669
#define SHIFT4 4
670
#define COEFFS4 (int[]){3,3}
671
#elif 1 // 9/7 MN
672
#define N1 4
673
#define SHIFT1 4
674
#define COEFFS1 (int[]){1,-9,-9,1}
675
#define N2 2
676
#define SHIFT2 2
677
#define COEFFS2 (int[]){1,1}
678
#define N3 0
679
#define SHIFT3 1
680
#define COEFFS3 NULL
681
#define N4 0
682
#define SHIFT4 1
683
#define COEFFS4 NULL
684
#else // 13/7 CRF
685
#define N1 4
686
#define SHIFT1 4
687
#define COEFFS1 (int[]){1,-9,-9,1}
688
#define N2 4
689
#define SHIFT2 4
690
#define COEFFS2 (int[]){-1,5,5,-1}
691
#define N3 0
692
#define SHIFT3 1
693
#define COEFFS3 NULL
694
#define N4 0
695
#define SHIFT4 1
696
#define COEFFS4 NULL
697
#endif
698
static void horizontal_decomposeX(int *b, int width){
699
    int temp[width];
700
    const int width2= width>>1;
701
    const int w2= (width+1)>>1;
702
    int A1,A2,A3,A4, x;
703
704
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
705
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
706
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
707
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
708
    
709
    for(x=0; x<width2; x++){
710
        temp[x   ]= b[2*x    ];
711
        temp[x+w2]= b[2*x + 1];
712
    }
713
    if(width&1)
714
        temp[x   ]= b[2*x    ];
715
    memcpy(b, temp, width*sizeof(int));
716
}
717
718
static void horizontal_composeX(int *b, int width){
719
    int temp[width];
720
    const int width2= width>>1;
721
    int A1,A2,A3,A4, x;
722
    const int w2= (width+1)>>1;
723
724
    memcpy(temp, b, width*sizeof(int));
725
    for(x=0; x<width2; x++){
726
        b[2*x    ]= temp[x   ];
727
        b[2*x + 1]= temp[x+w2];
728
    }
729
    if(width&1)
730
        b[2*x    ]= temp[x   ];
731
732
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
733
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
734
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
735
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
736
}
737
738
static void spatial_decomposeX(int *buffer, int width, int height, int stride){
739
    int x, y;
740
  
741
    for(y=0; y<height; y++){
742
        for(x=0; x<width; x++){
743
            buffer[y*stride + x] *= SCALEX;
744
        }
745
    }
746
747
    for(y=0; y<height; y++){
748
        horizontal_decomposeX(buffer + y*stride, width);
749
    }
750
    
751
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
752
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
753
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
754
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
755
}
756
757
static void spatial_composeX(int *buffer, int width, int height, int stride){
758
    int x, y;
759
  
760
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
761
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
762
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
763
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
764
765
    for(y=0; y<height; y++){
766
        horizontal_composeX(buffer + y*stride, width);
767
    }
768
769
    for(y=0; y<height; y++){
770
        for(x=0; x<width; x++){
771
            buffer[y*stride + x] /= SCALEX;
772
        }
773
    }
774
}
775
776
static void horizontal_decompose53i(int *b, int width){
777
    int temp[width];
778
    const int width2= width>>1;
779
    int A1,A2,A3,A4, x;
780
    const int w2= (width+1)>>1;
781
782
    for(x=0; x<width2; x++){
783
        temp[x   ]= b[2*x    ];
784
        temp[x+w2]= b[2*x + 1];
785
    }
786
    if(width&1)
787
        temp[x   ]= b[2*x    ];
788
#if 0
789
    A2= temp[1       ];
790
    A4= temp[0       ];
791
    A1= temp[0+width2];
792
    A1 -= (A2 + A4)>>1;
793
    A4 += (A1 + 1)>>1;
794
    b[0+width2] = A1;
795
    b[0       ] = A4;
796
    for(x=1; x+1<width2; x+=2){
797
        A3= temp[x+width2];
798
        A4= temp[x+1     ];
799
        A3 -= (A2 + A4)>>1;
800
        A2 += (A1 + A3 + 2)>>2;
801
        b[x+width2] = A3;
802
        b[x       ] = A2;
803

804
        A1= temp[x+1+width2];
805
        A2= temp[x+2       ];
806
        A1 -= (A2 + A4)>>1;
807
        A4 += (A1 + A3 + 2)>>2;
808
        b[x+1+width2] = A1;
809
        b[x+1       ] = A4;
810
    }
811
    A3= temp[width-1];
812
    A3 -= A2;
813
    A2 += (A1 + A3 + 2)>>2;
814
    b[width -1] = A3;
815
    b[width2-1] = A2;
816
#else        
817
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
818
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
819
#endif
820
}
821
822
static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
823
    int i;
824
    
825
    for(i=0; i<width; i++){
826
        b1[i] -= (b0[i] + b2[i])>>1;
827
    }
828
}
829
830
static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
831
    int i;
832
    
833
    for(i=0; i<width; i++){
834
        b1[i] += (b0[i] + b2[i] + 2)>>2;
835
    }
836
}
837
838
static void spatial_decompose53i(int *buffer, int width, int height, int stride){
839
    int x, y;
840
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
841
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
842
  
843
    for(y=-2; y<height; y+=2){
844
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
845
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
846
847
{START_TIMER
848
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
849
        if(y+2 < height) horizontal_decompose53i(b3, width);
850
STOP_TIMER("horizontal_decompose53i")}
851
        
852
{START_TIMER
853
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
854
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
855
STOP_TIMER("vertical_decompose53i*")}
856
        
857
        b0=b2;
858
        b1=b3;
859
    }
860
}
861
862
#define lift5 lift
863
#if 1
864
#define W_AM 3
865
#define W_AO 0
866
#define W_AS 1
867
868
#define W_BM 1
869
#define W_BO 8
870
#define W_BS 4
871
872
#undef lift5
873
#define W_CM 9999
874
#define W_CO 2
875
#define W_CS 2
876
877
#define W_DM 15
878
#define W_DO 16
879
#define W_DS 5
880
#elif 0
881
#define W_AM 55
882
#define W_AO 16
883
#define W_AS 5
884
885
#define W_BM 3
886
#define W_BO 32
887
#define W_BS 6
888
889
#define W_CM 127
890
#define W_CO 64
891
#define W_CS 7
892
893
#define W_DM 7
894
#define W_DO 8
895
#define W_DS 4
896
#elif 0
897
#define W_AM 97
898
#define W_AO 32
899
#define W_AS 6
900
901
#define W_BM 63
902
#define W_BO 512
903
#define W_BS 10
904
905
#define W_CM 13
906
#define W_CO 8
907
#define W_CS 4
908
909
#define W_DM 15
910
#define W_DO 16
911
#define W_DS 5
912
913
#else
914
915
#define W_AM 203
916
#define W_AO 64
917
#define W_AS 7
918
919
#define W_BM 217
920
#define W_BO 2048
921
#define W_BS 12
922
923
#define W_CM 113
924
#define W_CO 64
925
#define W_CS 7
926
927
#define W_DM 227
928
#define W_DO 128
929
#define W_DS 9
930
#endif
931
static void horizontal_decompose97i(int *b, int width){
932
    int temp[width];
933
    const int w2= (width+1)>>1;
934
935
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
936
    lift (temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
937
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
938
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
939
}
940
941
942
static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
943
    int i;
944
    
945
    for(i=0; i<width; i++){
946
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
947
    }
948
}
949
950
static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
951
    int i;
952
    
953
    for(i=0; i<width; i++){
954
#ifdef lift5
955
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
956
#else
957
        int r= 3*(b0[i] + b2[i]);
958
        r+= r>>4;
959
        r+= r>>8;
960
        b1[i] += (r+W_CO)>>W_CS;
961
#endif
962
    }
963
}
964
965
static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
966
    int i;
967
    
968
    for(i=0; i<width; i++){
969
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
970
    }
971
}
972
973
static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
974
    int i;
975
    
976
    for(i=0; i<width; i++){
977
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
978
    }
979
}
980
981
static void spatial_decompose97i(int *buffer, int width, int height, int stride){
982
    int x, y;
983
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
984
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
985
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
986
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
987
  
988
    for(y=-4; y<height; y+=2){
989
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
990
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
991
992
{START_TIMER
993
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
994
        if(y+4 < height) horizontal_decompose97i(b5, width);
995
if(width>400){
996
STOP_TIMER("horizontal_decompose97i")
997
}}
998
        
999
{START_TIMER
1000
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1001
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1002
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1003
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1004
1005
if(width>400){
1006
STOP_TIMER("vertical_decompose97i")
1007
}}
1008
        
1009
        b0=b2;
1010
        b1=b3;
1011
        b2=b4;
1012
        b3=b5;
1013
    }
1014
}
1015
1016
static void spatial_dwt(SnowContext *s, int *buffer, int width, int height, int stride){
1017
    int level;
1018
    
1019
    for(level=0; level<s->spatial_decomposition_count; level++){
1020
        switch(s->spatial_decomposition_type){
1021
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1022
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1023
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1024
        }
1025
    }
1026
}
1027
1028
static void horizontal_compose53i(int *b, int width){
1029
    int temp[width];
1030
    const int width2= width>>1;
1031
    const int w2= (width+1)>>1;
1032
    int A1,A2,A3,A4, x;
1033
1034
#if 0
1035
    A2= temp[1       ];
1036
    A4= temp[0       ];
1037
    A1= temp[0+width2];
1038
    A1 -= (A2 + A4)>>1;
1039
    A4 += (A1 + 1)>>1;
1040
    b[0+width2] = A1;
1041
    b[0       ] = A4;
1042
    for(x=1; x+1<width2; x+=2){
1043
        A3= temp[x+width2];
1044
        A4= temp[x+1     ];
1045
        A3 -= (A2 + A4)>>1;
1046
        A2 += (A1 + A3 + 2)>>2;
1047
        b[x+width2] = A3;
1048
        b[x       ] = A2;
1049

1050
        A1= temp[x+1+width2];
1051
        A2= temp[x+2       ];
1052
        A1 -= (A2 + A4)>>1;
1053
        A4 += (A1 + A3 + 2)>>2;
1054
        b[x+1+width2] = A1;
1055
        b[x+1       ] = A4;
1056
    }
1057
    A3= temp[width-1];
1058
    A3 -= A2;
1059
    A2 += (A1 + A3 + 2)>>2;
1060
    b[width -1] = A3;
1061
    b[width2-1] = A2;
1062
#else   
1063
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1064
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1065
#endif
1066
    for(x=0; x<width2; x++){
1067
        b[2*x    ]= temp[x   ];
1068
        b[2*x + 1]= temp[x+w2];
1069
    }
1070
    if(width&1)
1071
        b[2*x    ]= temp[x   ];
1072
}
1073
1074
static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1075
    int i;
1076
    
1077
    for(i=0; i<width; i++){
1078
        b1[i] += (b0[i] + b2[i])>>1;
1079
    }
1080
}
1081
1082
static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1083
    int i;
1084
    
1085
    for(i=0; i<width; i++){
1086
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1087
    }
1088
}
1089
1090
static void spatial_compose53i(int *buffer, int width, int height, int stride){
1091
    int x, y;
1092
    DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1093
    DWTELEM *b1= buffer + mirror(-1  , height-1)*stride;
1094
  
1095
    for(y=-1; y<=height; y+=2){
1096
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1097
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1098
1099
{START_TIMER
1100
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1101
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1102
STOP_TIMER("vertical_compose53i*")}
1103
1104
{START_TIMER
1105
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1106
        if(b0 <= b2) horizontal_compose53i(b1, width);
1107
STOP_TIMER("horizontal_compose53i")}
1108
1109
        b0=b2;
1110
        b1=b3;
1111
    }
1112
}   
1113
1114
 
1115
static void horizontal_compose97i(int *b, int width){
1116
    int temp[width];
1117
    const int w2= (width+1)>>1;
1118
1119
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1120
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1121
    lift (b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1122
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1123
}
1124
1125
static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1126
    int i;
1127
    
1128
    for(i=0; i<width; i++){
1129
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1130
    }
1131
}
1132
1133
static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1134
    int i;
1135
    
1136
    for(i=0; i<width; i++){
1137
#ifdef lift5
1138
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1139
#else
1140
        int r= 3*(b0[i] + b2[i]);
1141
        r+= r>>4;
1142
        r+= r>>8;
1143
        b1[i] -= (r+W_CO)>>W_CS;
1144
#endif
1145
    }
1146
}
1147
1148
static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1149
    int i;
1150
    
1151
    for(i=0; i<width; i++){
1152
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1153
    }
1154
}
1155
1156
static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1157
    int i;
1158
    
1159
    for(i=0; i<width; i++){
1160
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1161
    }
1162
}
1163
1164
static void spatial_compose97i(int *buffer, int width, int height, int stride){
1165
    int x, y;
1166
    DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1167
    DWTELEM *b1= buffer + mirror(-3  , height-1)*stride;
1168
    DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1169
    DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1170
1171
    for(y=-3; y<=height; y+=2){
1172
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1173
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1174
1175
        if(stride == width && y+4 < height && 0){ 
1176
            int x;
1177
            for(x=0; x<width/2; x++)
1178
                b5[x] += 64*2;
1179
            for(; x<width; x++)
1180
                b5[x] += 169*2;
1181
        }
1182
        
1183
{START_TIMER
1184
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1185
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1186
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1187
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1188
if(width>400){
1189
STOP_TIMER("vertical_compose97i")}}
1190
1191
{START_TIMER
1192
        if(y-1>=  0) horizontal_compose97i(b0, width);
1193
        if(b0 <= b2) horizontal_compose97i(b1, width);
1194
if(width>400 && b0 <= b2){
1195
STOP_TIMER("horizontal_compose97i")}}
1196
        
1197
        b0=b2;
1198
        b1=b3;
1199
        b2=b4;
1200
        b3=b5;
1201
    }
1202
}
1203
1204
static void spatial_idwt(SnowContext *s, int *buffer, int width, int height, int stride){
1205
    int level;
1206
1207
    for(level=s->spatial_decomposition_count-1; level>=0; level--){
1208
        switch(s->spatial_decomposition_type){
1209
        case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1210
        case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1211
        case 2: spatial_composeX  (buffer, width>>level, height>>level, stride<<level); break;
1212
        }
1213
    }
1214
}
1215
1216
static const int hilbert[16][2]={
1217
    {0,0}, {1,0}, {1,1}, {0,1},
1218
    {0,2}, {0,3}, {1,3}, {1,2},
1219
    {2,2}, {2,3}, {3,3}, {3,2},
1220
    {3,1}, {2,1}, {2,0}, {3,0},
1221
};
1222
#if 0
1223
-o o-
1224
 | |
1225
 o-o
1226
 
1227
-o-o o-o-
1228
   | | 
1229
 o-o o-o
1230
 |     |
1231
 o o-o o
1232
 | | | |
1233
 o-o o-o
1234
 
1235
 0112122312232334122323342334
1236
 0123456789ABCDEF0123456789AB
1237
 RLLRMRRLLRRMRLLMLRRLMLLRRLLM
1238
 
1239
 4  B  F 14 1B
1240
 4 11 15 20 27
1241
 
1242
-o o-o-o o-o-o o-
1243
 | |   | |   | |
1244
 o-o o-o o-o o-o
1245
     |     |
1246
 o-o o-o o-o o-o
1247
 | |   | |   | |
1248
 o o-o-o o-o-o o
1249
 |             |
1250
 o-o o-o-o-o o-o
1251
   | |     | | 
1252
 o-o o-o o-o o-o
1253
 |     | |     |
1254
 o o-o o o o-o o
1255
 | | | | | | | |
1256
 o-o o-o o-o o-o
1257

1258
#endif
1259
1260
#define SVI(a, i, x, y) \
1261
{\
1262
    a[i][0]= x;\
1263
    a[i][1]= y;\
1264
    i++;\
1265
}
1266
1267
static int sig_cmp(const void *a, const void *b){
1268
    const int16_t* da = (const int16_t *) a;
1269
    const int16_t* db = (const int16_t *) b;
1270
    
1271
    if(da[1] != db[1]) return da[1] - db[1];
1272
    else               return da[0] - db[0];
1273
}
1274
1275
1276
static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1277
    const int level= b->level;
1278
    const int w= b->width;
1279
    const int h= b->height;
1280
    int x, y;
1281 a8d73e56 Michael Niedermayer
    
1282
#if 0
1283 791e7b83 Michael Niedermayer
    if(orientation==3 && parent && 0){
1284
        int16_t candidate[w*h][2];
1285
        uint8_t state[w*h];
1286
        int16_t boarder[3][w*h*4][2];
1287
        int16_t significant[w*h][2];
1288
        int candidate_count=0;
1289
        int boarder_count[3]={0,0,0};
1290
        int significant_count=0;
1291
        int rle_pos=0;
1292
        int v, last_v;
1293
        int primary= orientation==1;
1294
        
1295
        memset(candidate, 0, sizeof(candidate));
1296
        memset(state, 0, sizeof(state));
1297
        memset(boarder, 0, sizeof(boarder));
1298
        
1299
        for(y=0; y<h; y++){
1300
            for(x=0; x<w; x++){
1301
                if(parent[(x>>1) + (y>>1)*2*stride])
1302
                    SVI(candidate, candidate_count, x, y)
1303
            }
1304
        }
1305

1306
        for(;;){
1307
            while(candidate_count && !boarder_count[0] && !boarder_count[1] && !boarder_count[2]){
1308
                candidate_count--;
1309
                x= candidate[ candidate_count][0];
1310
                y= candidate[ candidate_count][1];
1311
                if(state[x + y*w])
1312
                    continue;
1313
                state[x + y*w]= 1;
1314
                v= !!src[x + y*stride];
1315
                put_cabac(&s->c, &b->state[0][0], v);
1316
                if(v){
1317
                    SVI(significant, significant_count, x,y)
1318
                    if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1319
                    if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1320
                    if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1321
                    if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1322
                    if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1323
                    if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1324
                    if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1325
                    if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1326
                }
1327
            }
1328
            while(!boarder_count[0] && !boarder_count[1] && !boarder_count[2] && rle_pos < w*h){
1329
                int run=0;
1330
                for(; rle_pos < w*h;){
1331
                    x= rle_pos % w; //FIXME speed
1332
                    y= rle_pos / w;
1333
                    rle_pos++;
1334
                    if(state[x + y*w])
1335
                        continue;
1336
                    state[x + y*w]= 1;
1337
                    v= !!src[x + y*stride];
1338
                    if(v){
1339
                        put_symbol(&s->c, b->state[1], run, 0);
1340
                        SVI(significant, significant_count, x,y)
1341
                        if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1342
                        if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1343
                        if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1344
                        if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1345
                        if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1346
                        if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1347
                        if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1348
                        if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1349
                        break;
1350
//FIXME                note only right & down can be boarders
1351
                    }
1352
                    run++;
1353
                }
1354
            }
1355
            if(!boarder_count[0] && !boarder_count[1] && !boarder_count[2])
1356
                break;
1357
            
1358
            while(boarder_count[0] || boarder_count[1] || boarder_count[2]){
1359
                int index;
1360
                
1361
                if     (boarder_count[  primary]) index=  primary;
1362
                else if(boarder_count[1-primary]) index=1-primary;
1363
                else                              index=2;
1364
                
1365
                boarder_count[index]--;
1366
                x= boarder[index][ boarder_count[index] ][0];
1367
                y= boarder[index][ boarder_count[index] ][1];
1368
                if(state[x + y*w]) //FIXME maybe check earlier
1369
                    continue;
1370
                state[x + y*w]= 1;
1371
                v= !!src[x + y*stride];
1372
                put_cabac(&s->c, &b->state[0][index+1], v);
1373
                if(v){
1374
                    SVI(significant, significant_count, x,y)
1375
                    if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1376
                    if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1377
                    if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1378
                    if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1379
                    if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1380
                    if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1381
                    if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1382
                    if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1383
                }
1384
            }
1385
        }
1386
        //FIXME sort significant coeffs maybe
1387
        if(1){
1388
            qsort(significant, significant_count, sizeof(int16_t[2]), sig_cmp);
1389
        }
1390
        
1391
        last_v=1;
1392
        while(significant_count){
1393
            int context= 3 + quant7[last_v&0xFF]; //use significance of suroundings
1394
            significant_count--;
1395
            x= significant[significant_count][0];//FIXME try opposit direction
1396
            y= significant[significant_count][1];
1397
            v= src[x + y*stride];
1398
            put_symbol(&s->c, b->state[context + 2], v, 1); //FIXME try to avoid first bit, try this with the old code too!!
1399
            last_v= v;
1400
        }
1401
    }
1402
#endif
1403
    if(1){
1404
        int run=0;
1405 a8d73e56 Michael Niedermayer
        int runs[w*h];
1406 791e7b83 Michael Niedermayer
        int run_index=0;
1407
                
1408
        for(y=0; y<h; y++){
1409
            for(x=0; x<w; x++){
1410 78486403 Michael Niedermayer
                int v, p=0;
1411 6b2f6646 Michael Niedermayer
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1412 a8d73e56 Michael Niedermayer
                v= src[x + y*stride];
1413 791e7b83 Michael Niedermayer
1414
                if(y){
1415 a8d73e56 Michael Niedermayer
                    t= src[x + (y-1)*stride];
1416 791e7b83 Michael Niedermayer
                    if(x){
1417 a8d73e56 Michael Niedermayer
                        lt= src[x - 1 + (y-1)*stride];
1418 791e7b83 Michael Niedermayer
                    }
1419
                    if(x + 1 < w){
1420 a8d73e56 Michael Niedermayer
                        rt= src[x + 1 + (y-1)*stride];
1421 791e7b83 Michael Niedermayer
                    }
1422
                }
1423
                if(x){
1424 a8d73e56 Michael Niedermayer
                    l= src[x - 1 + y*stride];
1425 6b2f6646 Michael Niedermayer
                    /*if(x > 1){
1426
                        if(orientation==1) ll= src[y + (x-2)*stride];
1427
                        else               ll= src[x - 2 + y*stride];
1428 791e7b83 Michael Niedermayer
                    }*/
1429
                }
1430 78486403 Michael Niedermayer
                if(parent){
1431 a8d73e56 Michael Niedermayer
                    int px= x>>1;
1432
                    int py= y>>1;
1433 78486403 Michael Niedermayer
                    if(px<b->parent->width && py<b->parent->height) 
1434
                        p= parent[px + py*2*stride];
1435
                }
1436
                if(!(/*ll|*/l|lt|t|rt|p)){
1437 791e7b83 Michael Niedermayer
                    if(v){
1438
                        runs[run_index++]= run;
1439
                        run=0;
1440
                    }else{
1441
                        run++;
1442
                    }
1443
                }
1444
            }
1445
        }
1446
        runs[run_index++]= run;
1447
        run_index=0;
1448
        run= runs[run_index++];
1449
1450 6b2f6646 Michael Niedermayer
        put_symbol(&s->c, b->state[1], run, 0);
1451 791e7b83 Michael Niedermayer
        
1452
        for(y=0; y<h; y++){
1453
            for(x=0; x<w; x++){
1454 78486403 Michael Niedermayer
                int v, p=0;
1455 6b2f6646 Michael Niedermayer
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1456 a8d73e56 Michael Niedermayer
                v= src[x + y*stride];
1457 791e7b83 Michael Niedermayer
1458
                if(y){
1459 a8d73e56 Michael Niedermayer
                    t= src[x + (y-1)*stride];
1460 791e7b83 Michael Niedermayer
                    if(x){
1461 a8d73e56 Michael Niedermayer
                        lt= src[x - 1 + (y-1)*stride];
1462 791e7b83 Michael Niedermayer
                    }
1463
                    if(x + 1 < w){
1464 a8d73e56 Michael Niedermayer
                        rt= src[x + 1 + (y-1)*stride];
1465 791e7b83 Michael Niedermayer
                    }
1466
                }
1467
                if(x){
1468 a8d73e56 Michael Niedermayer
                    l= src[x - 1 + y*stride];
1469 6b2f6646 Michael Niedermayer
                    /*if(x > 1){
1470
                        if(orientation==1) ll= src[y + (x-2)*stride];
1471
                        else               ll= src[x - 2 + y*stride];
1472 791e7b83 Michael Niedermayer
                    }*/
1473
                }
1474 78486403 Michael Niedermayer
                if(parent){
1475 a8d73e56 Michael Niedermayer
                    int px= x>>1;
1476
                    int py= y>>1;
1477 78486403 Michael Niedermayer
                    if(px<b->parent->width && py<b->parent->height) 
1478
                        p= parent[px + py*2*stride];
1479
                }
1480
                if(/*ll|*/l|lt|t|rt|p){
1481
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1482 6b2f6646 Michael Niedermayer
1483
                    put_cabac(&s->c, &b->state[0][context], !!v);
1484 791e7b83 Michael Niedermayer
                }else{
1485
                    if(!run){
1486
                        run= runs[run_index++];
1487 6b2f6646 Michael Niedermayer
                        put_symbol(&s->c, b->state[1], run, 0);
1488 791e7b83 Michael Niedermayer
                        assert(v);
1489
                    }else{
1490
                        run--;
1491
                        assert(!v);
1492
                    }
1493
                }
1494
                if(v){
1495 78486403 Michael Niedermayer
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1496 6b2f6646 Michael Niedermayer
1497
                    put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
1498 791e7b83 Michael Niedermayer
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1499
                }
1500
            }
1501
        }
1502
        return;
1503
    }
1504
}
1505
1506 a8d73e56 Michael Niedermayer
static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1507 791e7b83 Michael Niedermayer
    const int level= b->level;
1508
    const int w= b->width;
1509
    const int h= b->height;
1510
    int x,y;
1511
1512
    START_TIMER
1513
    
1514
    if(1){
1515
        int run;
1516
                
1517
        for(y=0; y<b->height; y++)
1518
            memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1519
1520 6b2f6646 Michael Niedermayer
        run= get_symbol(&s->c, b->state[1], 0);
1521 791e7b83 Michael Niedermayer
        for(y=0; y<h; y++){
1522
            for(x=0; x<w; x++){
1523 78486403 Michael Niedermayer
                int v, p=0;
1524 6b2f6646 Michael Niedermayer
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1525 791e7b83 Michael Niedermayer
1526
                if(y){
1527 a8d73e56 Michael Niedermayer
                    t= src[x + (y-1)*stride];
1528 791e7b83 Michael Niedermayer
                    if(x){
1529 a8d73e56 Michael Niedermayer
                        lt= src[x - 1 + (y-1)*stride];
1530 791e7b83 Michael Niedermayer
                    }
1531
                    if(x + 1 < w){
1532 a8d73e56 Michael Niedermayer
                        rt= src[x + 1 + (y-1)*stride];
1533 791e7b83 Michael Niedermayer
                    }
1534
                }
1535
                if(x){
1536 a8d73e56 Michael Niedermayer
                    l= src[x - 1 + y*stride];
1537 6b2f6646 Michael Niedermayer
                    /*if(x > 1){
1538
                        if(orientation==1) ll= src[y + (x-2)*stride];
1539
                        else               ll= src[x - 2 + y*stride];
1540 791e7b83 Michael Niedermayer
                    }*/
1541
                }
1542 78486403 Michael Niedermayer
                if(parent){
1543 a8d73e56 Michael Niedermayer
                    int px= x>>1;
1544
                    int py= y>>1;
1545 78486403 Michael Niedermayer
                    if(px<b->parent->width && py<b->parent->height) 
1546
                        p= parent[px + py*2*stride];
1547
                }
1548
                if(/*ll|*/l|lt|t|rt|p){
1549
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1550 6b2f6646 Michael Niedermayer
1551
                    v=get_cabac(&s->c, &b->state[0][context]);
1552 791e7b83 Michael Niedermayer
                }else{
1553
                    if(!run){
1554 6b2f6646 Michael Niedermayer
                        run= get_symbol(&s->c, b->state[1], 0);
1555 791e7b83 Michael Niedermayer
                        //FIXME optimize this here
1556
                        //FIXME try to store a more naive run
1557
                        v=1;
1558
                    }else{
1559
                        run--;
1560
                        v=0;
1561
                    }
1562
                }
1563
                if(v){
1564 78486403 Michael Niedermayer
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1565 6b2f6646 Michael Niedermayer
                    v= get_symbol(&s->c, b->state[context + 2], 0) + 1;
1566 791e7b83 Michael Niedermayer
                    if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
1567
                        v= -v;
1568 a8d73e56 Michael Niedermayer
                    src[x + y*stride]= v;
1569 791e7b83 Michael Niedermayer
                }
1570
            }
1571
        }
1572
        if(level+1 == s->spatial_decomposition_count){
1573
            STOP_TIMER("decode_subband")
1574
        }
1575
        
1576
        return;
1577
    }
1578
}
1579
1580
static void reset_contexts(SnowContext *s){
1581
    int plane_index, level, orientation;
1582
1583
    for(plane_index=0; plane_index<2; plane_index++){
1584
        for(level=0; level<s->spatial_decomposition_count; level++){
1585
            for(orientation=level ? 1:0; orientation<4; orientation++){
1586
                memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
1587
            }
1588
        }
1589
    }
1590
    memset(s->mb_band.state, 0, sizeof(s->mb_band.state));
1591
    memset(s->mv_band[0].state, 0, sizeof(s->mv_band[0].state));
1592
    memset(s->mv_band[1].state, 0, sizeof(s->mv_band[1].state));
1593
    memset(s->header_state, 0, sizeof(s->header_state));
1594
}
1595
1596
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1597
    int x, y;
1598
1599
    for(y=0; y < b_h+5; y++){
1600
        for(x=0; x < b_w; x++){
1601
            int a0= src[x     + y*stride];
1602
            int a1= src[x + 1 + y*stride];
1603
            int a2= src[x + 2 + y*stride];
1604
            int a3= src[x + 3 + y*stride];
1605
            int a4= src[x + 4 + y*stride];
1606
            int a5= src[x + 5 + y*stride];
1607
//            int am= 9*(a1+a2) - (a0+a3);
1608
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1609
//            int am= 18*(a2+a3) - 2*(a1+a4);
1610
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1611
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1612
1613
//            if(b_w==16) am= 8*(a1+a2);
1614
1615
            if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
1616
            else     tmp[x + y*stride]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
1617
1618
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
1619
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
1620
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
1621
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1622
        }
1623
    }
1624
    for(y=0; y < b_h; y++){
1625
        for(x=0; x < b_w; x++){
1626
            int a0= tmp[x +  y     *stride];
1627
            int a1= tmp[x + (y + 1)*stride];
1628
            int a2= tmp[x + (y + 2)*stride];
1629
            int a3= tmp[x + (y + 3)*stride];
1630
            int a4= tmp[x + (y + 4)*stride];
1631
            int a5= tmp[x + (y + 5)*stride];
1632
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1633
//            int am= 18*(a2+a3) - 2*(a1+a4);
1634
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1635
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1636
            
1637
//            if(b_w==16) am= 8*(a1+a2);
1638
1639
            if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
1640
            else     dst[x + y*stride]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
1641
1642
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
1643
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
1644
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
1645
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
1646
        }
1647
    }
1648
}
1649
1650
#define mcb(dx,dy,b_w)\
1651
static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
1652
    uint8_t tmp[stride*(b_w+5)];\
1653
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1654
}
1655
1656
mcb( 0, 0,16)
1657
mcb( 4, 0,16)
1658
mcb( 8, 0,16)
1659
mcb(12, 0,16)
1660
mcb( 0, 4,16)
1661
mcb( 4, 4,16)
1662
mcb( 8, 4,16)
1663
mcb(12, 4,16)
1664
mcb( 0, 8,16)
1665
mcb( 4, 8,16)
1666
mcb( 8, 8,16)
1667
mcb(12, 8,16)
1668
mcb( 0,12,16)
1669
mcb( 4,12,16)
1670
mcb( 8,12,16)
1671
mcb(12,12,16)
1672
1673
#define mca(dx,dy,b_w)\
1674
static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
1675
    uint8_t tmp[stride*(b_w+5)];\
1676
    assert(h==b_w);\
1677
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1678
}
1679
1680
mca( 0, 0,16)
1681
mca( 8, 0,16)
1682
mca( 0, 8,16)
1683
mca( 8, 8,16)
1684
1685
static void add_xblock(DWTELEM *dst, uint8_t *src, uint8_t *obmc, int s_x, int s_y, int b_w, int b_h, int mv_x, int mv_y, int w, int h, int dst_stride, int src_stride, int obmc_stride, int mb_type, int add){
1686
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME move to context to gurantee alignment
1687
    int x,y;
1688
1689
    if(s_x<0){
1690
        obmc -= s_x;
1691
        b_w += s_x;
1692
        s_x=0;
1693
    }else if(s_x + b_w > w){
1694
        b_w = w - s_x;
1695
    }
1696
    if(s_y<0){
1697
        obmc -= s_y*obmc_stride;
1698
        b_h += s_y;
1699
        s_y=0;
1700
    }else if(s_y + b_h> h){
1701
        b_h = h - s_y;
1702
    }
1703
1704
    dst += s_x + s_y*dst_stride;
1705
    
1706
    if(mb_type==1){
1707
        src += s_x + s_y*src_stride;
1708
        for(y=0; y < b_h; y++){
1709
            for(x=0; x < b_w; x++){
1710
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
1711
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
1712
            }
1713
        }
1714
    }else{
1715
        int dx= mv_x&15;
1716
        int dy= mv_y&15;
1717
//        int dxy= (mv_x&1) + 2*(mv_y&1);
1718
1719
        s_x += (mv_x>>4) - 2;
1720
        s_y += (mv_y>>4) - 2;
1721
        src += s_x + s_y*src_stride;
1722
        //use dsputil
1723
    
1724
        if(   (unsigned)s_x >= w - b_w - 4
1725
           || (unsigned)s_y >= h - b_h - 4){
1726
            ff_emulated_edge_mc(tmp + 32, src, src_stride, b_w+5, b_h+5, s_x, s_y, w, h);
1727
            src= tmp + 32;
1728
        }
1729
1730
        if(mb_type==0){
1731
            mc_block(tmp, src, tmp + 64+8, src_stride, b_w, b_h, dx, dy);
1732
        }else{
1733
            int sum=0;
1734
            for(y=0; y < b_h; y++){
1735
                for(x=0; x < b_w; x++){
1736
                    sum += src[x+  y*src_stride];
1737
                }
1738
            }
1739
            sum= (sum + b_h*b_w/2) / (b_h*b_w);
1740
            for(y=0; y < b_h; y++){
1741
                for(x=0; x < b_w; x++){
1742
                    tmp[x + y*src_stride]= sum; 
1743
                }
1744
            }
1745
        }
1746
1747
        for(y=0; y < b_h; y++){
1748
            for(x=0; x < b_w; x++){
1749
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
1750
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
1751
            }
1752
        }
1753
    }
1754
}
1755
1756
static void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
1757
    Plane *p= &s->plane[plane_index];
1758
    const int mb_w= s->mb_band.width;
1759
    const int mb_h= s->mb_band.height;
1760
    const int mb_stride= s->mb_band.stride;
1761
    int x, y, mb_x, mb_y;
1762
    int scale      = plane_index ?  s->mv_scale : 2*s->mv_scale;
1763
    int block_w    = plane_index ?  8 : 16;
1764
    uint8_t *obmc  = plane_index ? obmc16 : obmc32;
1765
    int obmc_stride= plane_index ? 16 : 32;
1766
    int ref_stride= s->last_picture.linesize[plane_index];
1767
    uint8_t *ref  = s->last_picture.data[plane_index];
1768
    int w= p->width;
1769
    int h= p->height;
1770
    
1771
if(s->avctx->debug&512){
1772
    for(y=0; y<h; y++){
1773
        for(x=0; x<w; x++){
1774
            if(add) buf[x + y*w]+= 128*256;
1775
            else    buf[x + y*w]-= 128*256;
1776
        }
1777
    }
1778
    
1779
    return;
1780
}
1781
    for(mb_y=-1; mb_y<=mb_h; mb_y++){
1782
        for(mb_x=-1; mb_x<=mb_w; mb_x++){
1783
            int index= clip(mb_x, 0, mb_w-1) + clip(mb_y, 0, mb_h-1)*mb_stride;
1784
1785
            add_xblock(buf, ref, obmc, 
1786
                       block_w*mb_x - block_w/2, 
1787
                       block_w*mb_y - block_w/2,
1788
                       2*block_w, 2*block_w,
1789
                       s->mv_band[0].buf[index]*scale, s->mv_band[1].buf[index]*scale,
1790
                       w, h,
1791
                       w, ref_stride, obmc_stride, 
1792
                       s->mb_band.buf[index], add);
1793
1794
        }
1795
    }
1796
}
1797
1798
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
1799
    const int level= b->level;
1800
    const int w= b->width;
1801
    const int h= b->height;
1802
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
1803
    const int qmul= qexp[qlog&7]<<(qlog>>3);
1804
    int x,y;
1805
1806
    assert(QROOT==8);
1807
1808
    bias= bias ? 0 : (3*qmul)>>3;
1809
    
1810
    if(!bias){
1811
        for(y=0; y<h; y++){
1812
            for(x=0; x<w; x++){
1813
                int i= src[x + y*stride]; 
1814
                //FIXME use threshold
1815
                //FIXME optimize
1816
                //FIXME bias
1817
                if(i>=0){
1818
                    i<<= QEXPSHIFT;
1819
                    i/= qmul;
1820
                    src[x + y*stride]=  i;
1821
                }else{
1822
                    i= -i;
1823
                    i<<= QEXPSHIFT;
1824
                    i/= qmul;
1825
                    src[x + y*stride]= -i;
1826
                }
1827
            }
1828
        }
1829
    }else{
1830
        for(y=0; y<h; y++){
1831
            for(x=0; x<w; x++){
1832
                int i= src[x + y*stride]; 
1833
                
1834
                //FIXME use threshold
1835
                //FIXME optimize
1836
                //FIXME bias
1837
                if(i>=0){
1838
                    i<<= QEXPSHIFT;
1839
                    i= (i + bias) / qmul;
1840
                    src[x + y*stride]=  i;
1841
                }else{
1842
                    i= -i;
1843
                    i<<= QEXPSHIFT;
1844
                    i= (i + bias) / qmul;
1845
                    src[x + y*stride]= -i;
1846
                }
1847
            }
1848
        }
1849
    }
1850
}
1851
1852
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
1853
    const int level= b->level;
1854
    const int w= b->width;
1855
    const int h= b->height;
1856
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
1857
    const int qmul= qexp[qlog&7]<<(qlog>>3);
1858
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1859
    int x,y;
1860
    
1861
    assert(QROOT==8);
1862
1863
    for(y=0; y<h; y++){
1864
        for(x=0; x<w; x++){
1865
            int i= src[x + y*stride];
1866
            if(i<0){
1867
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1868
            }else if(i>0){
1869
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
1870
            }
1871
        }
1872
    }
1873
}
1874
1875
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
1876
    const int w= b->width;
1877
    const int h= b->height;
1878
    int x,y;
1879
    
1880
    for(y=h-1; y>=0; y--){
1881
        for(x=w-1; x>=0; x--){
1882
            int i= x + y*stride;
1883
            
1884
            if(x){
1885
                if(use_median){
1886
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
1887
                    else  src[i] -= src[i - 1];
1888
                }else{
1889
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
1890
                    else  src[i] -= src[i - 1];
1891
                }
1892
            }else{
1893
                if(y) src[i] -= src[i - stride];
1894
            }
1895
        }
1896
    }
1897
}
1898
1899
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
1900
    const int w= b->width;
1901
    const int h= b->height;
1902
    int x,y;
1903
    
1904
    for(y=0; y<h; y++){
1905
        for(x=0; x<w; x++){
1906
            int i= x + y*stride;
1907
            
1908
            if(x){
1909
                if(use_median){
1910
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
1911
                    else  src[i] += src[i - 1];
1912
                }else{
1913
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
1914
                    else  src[i] += src[i - 1];
1915
                }
1916
            }else{
1917
                if(y) src[i] += src[i - stride];
1918
            }
1919
        }
1920
    }
1921
}
1922
1923
static void encode_header(SnowContext *s){
1924
    int plane_index, level, orientation;
1925
1926
    put_cabac(&s->c, s->header_state, s->keyframe); // state clearing stuff?
1927
    if(s->keyframe){
1928
        put_symbol(&s->c, s->header_state, s->version, 0);
1929
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
1930
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
1931
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
1932
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
1933
        put_symbol(&s->c, s->header_state, s->b_width, 0);
1934
        put_symbol(&s->c, s->header_state, s->b_height, 0);
1935
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
1936
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
1937
        put_cabac(&s->c, s->header_state, s->spatial_scalability);
1938
//        put_cabac(&s->c, s->header_state, s->rate_scalability);
1939
1940
        for(plane_index=0; plane_index<2; plane_index++){
1941
            for(level=0; level<s->spatial_decomposition_count; level++){
1942
                for(orientation=level ? 1:0; orientation<4; orientation++){
1943
                    if(orientation==2) continue;
1944
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
1945
                }
1946
            }
1947
        }
1948
    }
1949
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
1950
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
1951
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
1952
    put_symbol(&s->c, s->header_state, s->qbias, 1);
1953
}
1954
1955
static int decode_header(SnowContext *s){
1956
    int plane_index, level, orientation;
1957
1958
    s->keyframe= get_cabac(&s->c, s->header_state);
1959
    if(s->keyframe){
1960
        s->version= get_symbol(&s->c, s->header_state, 0);
1961
        if(s->version>0){
1962
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
1963
            return -1;
1964
        }
1965
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1966
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1967
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1968
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1969
        s->b_width= get_symbol(&s->c, s->header_state, 0);
1970
        s->b_height= get_symbol(&s->c, s->header_state, 0);
1971
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1972
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1973
        s->spatial_scalability= get_cabac(&s->c, s->header_state);
1974
//        s->rate_scalability= get_cabac(&s->c, s->header_state);
1975
1976
        for(plane_index=0; plane_index<3; plane_index++){
1977
            for(level=0; level<s->spatial_decomposition_count; level++){
1978
                for(orientation=level ? 1:0; orientation<4; orientation++){
1979
                    int q;
1980
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1981
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1982
                    else                    q= get_symbol(&s->c, s->header_state, 1);
1983
                    s->plane[plane_index].band[level][orientation].qlog= q;
1984
                }
1985
            }
1986
        }
1987
    }
1988
    
1989
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1990
    if(s->spatial_decomposition_type > 2){
1991
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1992
        return -1;
1993
    }
1994
    
1995
    s->qlog= get_symbol(&s->c, s->header_state, 1);
1996
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
1997
    s->qbias= get_symbol(&s->c, s->header_state, 1);
1998
1999
    return 0;
2000
}
2001
2002
static int common_init(AVCodecContext *avctx){
2003
    SnowContext *s = avctx->priv_data;
2004
    int width, height;
2005
    int level, orientation, plane_index, dec;
2006
2007
    s->avctx= avctx;
2008
        
2009
    dsputil_init(&s->dsp, avctx);
2010
2011
#define mcf(dx,dy)\
2012
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
2013
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2014
        mc_block ## dx ## dy;
2015
2016
    mcf( 0, 0)
2017
    mcf( 4, 0)
2018
    mcf( 8, 0)
2019
    mcf(12, 0)
2020
    mcf( 0, 4)
2021
    mcf( 4, 4)
2022
    mcf( 8, 4)
2023
    mcf(12, 4)
2024
    mcf( 0, 8)
2025
    mcf( 4, 8)
2026
    mcf( 8, 8)
2027
    mcf(12, 8)
2028
    mcf( 0,12)
2029
    mcf( 4,12)
2030
    mcf( 8,12)
2031
    mcf(12,12)
2032
2033
#define mcfh(dx,dy)\
2034
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
2035
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2036
        mc_block_hpel ## dx ## dy;
2037
2038
    mcfh(0, 0)
2039
    mcfh(8, 0)
2040
    mcfh(0, 8)
2041
    mcfh(8, 8)
2042
        
2043
    dec= s->spatial_decomposition_count= 5;
2044
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2045
    
2046
    s->chroma_h_shift= 1; //FIXME XXX
2047
    s->chroma_v_shift= 1;
2048
    
2049
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2050
    
2051
    s->b_width = (s->avctx->width +(1<<dec)-1)>>dec;
2052
    s->b_height= (s->avctx->height+(1<<dec)-1)>>dec;
2053
    
2054
    s->spatial_dwt_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
2055
    s->pred_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
2056
    
2057
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2058
    
2059
    for(plane_index=0; plane_index<3; plane_index++){    
2060
        int w= s->avctx->width;
2061
        int h= s->avctx->height;
2062
2063
        if(plane_index){
2064
            w>>= s->chroma_h_shift;
2065
            h>>= s->chroma_v_shift;
2066
        }
2067
        s->plane[plane_index].width = w;
2068
        s->plane[plane_index].height= h;
2069
av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
2070
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2071
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2072
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2073
                
2074
                b->buf= s->spatial_dwt_buffer;
2075
                b->level= level;
2076
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2077
                b->width = (w + !(orientation&1))>>1;
2078
                b->height= (h + !(orientation>1))>>1;
2079
                
2080
                if(orientation&1) b->buf += (w+1)>>1;
2081
                if(orientation>1) b->buf += b->stride>>1;
2082
                
2083
                if(level)
2084
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
2085
            }
2086
            w= (w+1)>>1;
2087
            h= (h+1)>>1;
2088
        }
2089
    }
2090
    
2091
    //FIXME init_subband() ?
2092
    s->mb_band.stride= s->mv_band[0].stride= s->mv_band[1].stride=
2093
    s->mb_band.width = s->mv_band[0].width = s->mv_band[1].width = (s->avctx->width + 15)>>4;
2094
    s->mb_band.height= s->mv_band[0].height= s->mv_band[1].height= (s->avctx->height+ 15)>>4;
2095
    s->mb_band   .buf= av_mallocz(s->mb_band   .stride * s->mb_band   .height*sizeof(DWTELEM));
2096
    s->mv_band[0].buf= av_mallocz(s->mv_band[0].stride * s->mv_band[0].height*sizeof(DWTELEM));
2097
    s->mv_band[1].buf= av_mallocz(s->mv_band[1].stride * s->mv_band[1].height*sizeof(DWTELEM));
2098
2099
    reset_contexts(s);
2100
/*    
2101
    width= s->width= avctx->width;
2102
    height= s->height= avctx->height;
2103
    
2104
    assert(width && height);
2105
*/
2106
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2107
    
2108
    return 0;
2109
}
2110
2111
2112
static void calculate_vissual_weight(SnowContext *s, Plane *p){
2113
    int width = p->width;
2114
    int height= p->height;
2115
    int i, level, orientation, x, y;
2116
2117
    for(level=0; level<s->spatial_decomposition_count; level++){
2118
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2119
            SubBand *b= &p->band[level][orientation];
2120
            DWTELEM *buf= b->buf;
2121
            int64_t error=0;
2122
            
2123
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2124
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
2125
            spatial_idwt(s, s->spatial_dwt_buffer, width, height, width);
2126
            for(y=0; y<height; y++){
2127
                for(x=0; x<width; x++){
2128
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
2129
                    error += d*d;
2130
                }
2131
            }
2132
2133
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
2134
            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
2135
        }
2136
    }
2137
}
2138
2139
static int encode_init(AVCodecContext *avctx)
2140
{
2141
    SnowContext *s = avctx->priv_data;
2142
    int i;
2143
    int level, orientation, plane_index;
2144
2145
    common_init(avctx);
2146
 
2147
    s->version=0;
2148
    
2149
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2150
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2151
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2152
    s->mb_type        = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int16_t));
2153
    s->mb_mean        = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int8_t ));
2154
    s->dummy          = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int32_t));
2155
    h263_encode_init(&s->m); //mv_penalty
2156
2157
    for(plane_index=0; plane_index<3; plane_index++){
2158
        calculate_vissual_weight(s, &s->plane[plane_index]);
2159
    }
2160
    
2161
    
2162
    avctx->coded_frame= &s->current_picture;
2163
    switch(avctx->pix_fmt){
2164
//    case PIX_FMT_YUV444P:
2165
//    case PIX_FMT_YUV422P:
2166
    case PIX_FMT_YUV420P:
2167
    case PIX_FMT_GRAY8:
2168
//    case PIX_FMT_YUV411P:
2169
//    case PIX_FMT_YUV410P:
2170
        s->colorspace_type= 0;
2171
        break;
2172
/*    case PIX_FMT_RGBA32:
2173
        s->colorspace= 1;
2174
        break;*/
2175
    default:
2176
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2177
        return -1;
2178
    }
2179
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2180
    s->chroma_h_shift= 1;
2181
    s->chroma_v_shift= 1;
2182
    return 0;
2183
}
2184
2185
static int frame_start(SnowContext *s){
2186
   AVFrame tmp;
2187
2188
   if(s->keyframe)
2189
        reset_contexts(s);
2190
 
2191
    tmp= s->last_picture;
2192
    s->last_picture= s->current_picture;
2193
    s->current_picture= tmp;
2194
    
2195
    s->current_picture.reference= 1;
2196
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2197
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2198
        return -1;
2199
    }
2200
    
2201
    return 0;
2202
}
2203
2204
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2205
    SnowContext *s = avctx->priv_data;
2206
    CABACContext * const c= &s->c;
2207
    AVFrame *pict = data;
2208
    const int width= s->avctx->width;
2209
    const int height= s->avctx->height;
2210
    int used_count= 0;
2211
    int log2_threshold, level, orientation, plane_index, i;
2212
2213
    if(avctx->strict_std_compliance >= 0){
2214
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2215
               "use vstrict=-1 to use it anyway\n");
2216
        return -1;
2217
    }
2218
        
2219
    ff_init_cabac_encoder(c, buf, buf_size);
2220
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2221
    
2222
    s->input_picture = *pict;
2223
2224
    memset(s->header_state, 0, sizeof(s->header_state));
2225
2226
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2227
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2228
    
2229 e071139a Michael Niedermayer
    s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2230 791e7b83 Michael Niedermayer
    //<64 >60
2231
    s->qlog += 61;
2232
2233
    for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
2234
        s->mb_band.buf[i]= s->keyframe;
2235
    }
2236
    
2237
    frame_start(s);
2238
2239
    if(pict->pict_type == P_TYPE){
2240
        int block_width = (width +15)>>4;
2241
        int block_height= (height+15)>>4;
2242
        int stride= s->current_picture.linesize[0];
2243
        uint8_t *src_plane= s->input_picture.data[0];
2244
        int src_stride= s->input_picture.linesize[0];
2245
        int x,y;
2246
        
2247
        assert(s->current_picture.data[0]);
2248
        assert(s->last_picture.data[0]);
2249
     
2250
        s->m.avctx= s->avctx;
2251
        s->m.current_picture.data[0]= s->current_picture.data[0];
2252
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
2253
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
2254
        s->m.current_picture_ptr= &s->m.current_picture;
2255
        s->m.   last_picture_ptr= &s->m.   last_picture;
2256
        s->m.linesize=
2257
        s->m.   last_picture.linesize[0]=
2258
        s->m.    new_picture.linesize[0]=
2259
        s->m.current_picture.linesize[0]= stride;
2260
        s->m.width = width;
2261
        s->m.height= height;
2262
        s->m.mb_width = block_width;
2263
        s->m.mb_height= block_height;
2264
        s->m.mb_stride=   s->m.mb_width+1;
2265
        s->m.b8_stride= 2*s->m.mb_width+1;
2266
        s->m.f_code=1;
2267
        s->m.pict_type= pict->pict_type;
2268
        s->m.me_method= s->avctx->me_method;
2269
        s->m.me.scene_change_score=0;
2270
        s->m.flags= s->avctx->flags;
2271
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2272
        s->m.out_format= FMT_H263;
2273
        s->m.unrestricted_mv= 1;
2274
2275
        s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
2276
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
2277
        s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
2278
2279
        if(!s->motion_val8){
2280
            s->motion_val8 = av_mallocz(s->m.b8_stride*block_height*2*2*sizeof(int16_t));
2281
            s->motion_val16= av_mallocz(s->m.mb_stride*block_height*2*sizeof(int16_t));
2282
        }
2283
        
2284
        s->m.mb_type= s->mb_type;
2285
        
2286
        //dummies, to avoid segfaults
2287
        s->m.current_picture.mb_mean  = s->mb_mean;
2288
        s->m.current_picture.mb_var   = (int16_t*)s->dummy;
2289
        s->m.current_picture.mc_mb_var= (int16_t*)s->dummy;
2290
        s->m.current_picture.mb_type  = s->dummy;
2291
        
2292
        s->m.current_picture.motion_val[0]= s->motion_val8;
2293
        s->m.p_mv_table= s->motion_val16;
2294
        s->m.dsp= s->dsp; //move
2295
        ff_init_me(&s->m);
2296
    
2297
        
2298
        s->m.me.pre_pass=1;
2299
        s->m.me.dia_size= s->avctx->pre_dia_size;
2300
        s->m.first_slice_line=1;
2301
        for(y= block_height-1; y >= 0; y--) {
2302
            uint8_t src[stride*16];
2303
2304
            s->m.new_picture.data[0]= src - y*16*stride; //ugly
2305
            s->m.mb_y= y;
2306
            for(i=0; i<16 && i + 16*y<height; i++){
2307
                memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
2308
                for(x=width; x<16*block_width; x++)
2309
                    src[i*stride+x]= src[i*stride+x-1];
2310
            }
2311
            for(; i<16 && i + 16*y<16*block_height; i++)
2312
                memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
2313
2314
            for(x=block_width-1; x >=0 ;x--) {
2315
                s->m.mb_x= x;
2316
                ff_init_block_index(&s->m);
2317
                ff_update_block_index(&s->m);
2318
                ff_pre_estimate_p_frame_motion(&s->m, x, y);
2319
            }
2320
            s->m.first_slice_line=0;
2321
        }        
2322
        s->m.me.pre_pass=0;
2323
        
2324
        
2325
        s->m.me.dia_size= s->avctx->dia_size;
2326
        s->m.first_slice_line=1;
2327
        for (y = 0; y < block_height; y++) {
2328
            uint8_t src[stride*16];
2329
2330
            s->m.new_picture.data[0]= src - y*16*stride; //ugly
2331
            s->m.mb_y= y;
2332
            
2333
            assert(width <= stride);
2334
            assert(width <= 16*block_width);
2335
    
2336
            for(i=0; i<16 && i + 16*y<height; i++){
2337
                memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
2338
                for(x=width; x<16*block_width; x++)
2339
                    src[i*stride+x]= src[i*stride+x-1];
2340
            }
2341
            for(; i<16 && i + 16*y<16*block_height; i++)
2342
                memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
2343
    
2344
            for (x = 0; x < block_width; x++) {
2345
                int mb_xy= x + y*(s->mb_band.stride);
2346
                s->m.mb_x= x;
2347
                ff_init_block_index(&s->m);
2348
                ff_update_block_index(&s->m);
2349
                
2350
                ff_estimate_p_frame_motion(&s->m, x, y);
2351
                
2352
                s->mb_band   .buf[mb_xy]= (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER)
2353
                 ? 0 : 2;
2354
                s->mv_band[0].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][0];
2355
                s->mv_band[1].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][1];
2356
                
2357
                if(s->mb_band   .buf[x + y*(s->mb_band.stride)]==2 && 0){
2358
                    int dc0=128, dc1=128, dc, dc2, dir;
2359
                    int offset= (s->avctx->flags & CODEC_FLAG_QPEL) ? 64 : 32;
2360
                    
2361
                    dc       =s->mb_mean[x +  y   *s->m.mb_stride    ];
2362
                    if(x) dc0=s->mb_mean[x +  y   *s->m.mb_stride - 1];
2363
                    if(y) dc1=s->mb_mean[x + (y-1)*s->m.mb_stride    ];
2364
                    dc2= (dc0+dc1)>>1;
2365
#if 0
2366
                    if     (ABS(dc0 - dc) < ABS(dc1 - dc) && ABS(dc0 - dc) < ABS(dc2 - dc))
2367
                        dir= 1;
2368
                    else if(ABS(dc0 - dc) >=ABS(dc1 - dc) && ABS(dc1 - dc) < ABS(dc2 - dc))
2369
                        dir=-1;
2370
                    else
2371
                        dir=0;
2372
#endif                    
2373
                    if(ABS(dc0 - dc) < ABS(dc1 - dc) && x){
2374
                        s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + y*(s->mb_band.stride)-1] - offset;
2375
                        s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + y*(s->mb_band.stride)-1];
2376
                        s->mb_mean[x +  y   *s->m.mb_stride    ]= dc0;
2377
                    }else if(y){
2378
                        s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + (y-1)*(s->mb_band.stride)];
2379
                        s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + (y-1)*(s->mb_band.stride)] - offset;
2380
                        s->mb_mean[x +  y   *s->m.mb_stride    ]= dc1;
2381
                    }
2382
                }
2383
//                s->mb_band   .buf[x + y*(s->mb_band.stride)]=1; //FIXME intra only test
2384
            }
2385
            s->m.first_slice_line=0;
2386
        }
2387
        assert(s->m.pict_type == P_TYPE);
2388
        if(s->m.me.scene_change_score > s->avctx->scenechange_threshold){
2389
            s->m.pict_type= 
2390
            pict->pict_type =I_TYPE;
2391
            for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
2392
                s->mb_band.buf[i]= 1;
2393
                s->mv_band[0].buf[i]=
2394
                s->mv_band[1].buf[i]= 0;
2395
            }
2396
    //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
2397
        }        
2398
    }
2399
        
2400
    s->m.first_slice_line=1;
2401
    
2402
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
2403
2404
    encode_header(s);
2405
    
2406
    decorrelate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 0, 1);
2407
    decorrelate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 0, 1);
2408
    decorrelate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 0 ,1);
2409
    encode_subband(s, &s->mb_band   , s->mb_band   .buf, NULL, s->mb_band   .stride, 0);
2410
    encode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
2411
    encode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
2412
    
2413
//FIXME avoid this
2414
    correlate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 1, 1);
2415
    correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
2416
    correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
2417
    
2418
    for(plane_index=0; plane_index<3; plane_index++){
2419
        Plane *p= &s->plane[plane_index];
2420
        int w= p->width;
2421
        int h= p->height;
2422
        int x, y;
2423
        int bits= put_bits_count(&s->c.pb);
2424
2425
        //FIXME optimize
2426
#if QPRED
2427
        memset(s->pred_buffer, 0, sizeof(DWTELEM)*w*h);
2428
        predict_plane(s, s->pred_buffer, plane_index, 1);
2429
        spatial_dwt(s, s->pred_buffer, w, h, w);
2430
        for(level=0; level<s->spatial_decomposition_count; level++){
2431
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2432
                SubBand *b= &p->band[level][orientation];
2433
                int delta= ((int)s->pred_buffer - (int)s->spatial_dwt_buffer)/sizeof(DWTELEM);
2434
                
2435
                quantize  (s, b, b->buf + delta, b->stride, s->qbias);
2436
                dequantize(s, b, b->buf + delta, b->stride);
2437
            }
2438
        }
2439
        for(y=0; y<h; y++){
2440
            for(x=0; x<w; x++){
2441
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
2442
            }
2443
        }
2444
        spatial_dwt(s, s->spatial_dwt_buffer, w, h, w);
2445
        for(y=0; y<h; y++){
2446
            for(x=0; x<w; x++){
2447
                s->spatial_dwt_buffer[y*w + x]-= s->pred_buffer[y*w + x];
2448
            }
2449
        }
2450
#else
2451
     if(pict->data[plane_index]) //FIXME gray hack
2452
        for(y=0; y<h; y++){
2453
            for(x=0; x<w; x++){
2454
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
2455
            }
2456
        }
2457
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
2458
        spatial_dwt(s, s->spatial_dwt_buffer, w, h, w);
2459
#endif
2460
 
2461
        for(level=0; level<s->spatial_decomposition_count; level++){
2462
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2463
                SubBand *b= &p->band[level][orientation];
2464
                
2465
                quantize(s, b, b->buf, b->stride, s->qbias);
2466
                if(orientation==0)
2467
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
2468
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2469
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
2470
                if(orientation==0)
2471
                    correlate(s, b, b->buf, b->stride, 1, 0);
2472
            }
2473
        }
2474
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
2475
2476
        for(level=0; level<s->spatial_decomposition_count; level++){
2477
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2478
                SubBand *b= &p->band[level][orientation];
2479
2480
                dequantize(s, b, b->buf, b->stride);
2481
            }
2482
        }
2483
        
2484
#if QPRED
2485
        for(y=0; y<h; y++){
2486
            for(x=0; x<w; x++){
2487
                s->spatial_dwt_buffer[y*w + x]+= s->pred_buffer[y*w + x];
2488
            }
2489
        }
2490
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
2491
#else
2492
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
2493
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2494
#endif
2495
        //FIXME optimize
2496
        for(y=0; y<h; y++){
2497
            for(x=0; x<w; x++){
2498
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2499
                if(v&(~255)) v= ~(v>>31);
2500
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2501
            }
2502
        }
2503
        if(s->avctx->flags&CODEC_FLAG_PSNR){
2504
            int64_t error= 0;
2505
            
2506
    if(pict->data[plane_index]) //FIXME gray hack
2507
            for(y=0; y<h; y++){
2508
                for(x=0; x<w; x++){
2509
                    int d= s->spatial_dwt_buffer[y*w + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]*256;
2510
                    error += d*d;
2511
                }
2512
            }
2513
            error= (error + 128*256)>>16;
2514
            s->avctx->error[plane_index] += error;
2515
            s->avctx->error[3] += error;
2516
        }
2517
    }
2518
2519
    if(s->last_picture.data[0])
2520
        avctx->release_buffer(avctx, &s->last_picture);
2521
2522
    emms_c();
2523
    
2524
    return put_cabac_terminate(c, 1);
2525
}
2526
2527
static void common_end(SnowContext *s){
2528
    av_freep(&s->spatial_dwt_buffer);
2529
    av_freep(&s->mb_band.buf);
2530
    av_freep(&s->mv_band[0].buf);
2531
    av_freep(&s->mv_band[1].buf);
2532
2533
    av_freep(&s->m.me.scratchpad);    
2534
    av_freep(&s->m.me.map);
2535
    av_freep(&s->m.me.score_map);
2536
    av_freep(&s->mb_type);
2537
    av_freep(&s->mb_mean);
2538
    av_freep(&s->dummy);
2539
    av_freep(&s->motion_val8);
2540
    av_freep(&s->motion_val16);
2541
}
2542
2543
static int encode_end(AVCodecContext *avctx)
2544
{
2545
    SnowContext *s = avctx->priv_data;
2546
2547
    common_end(s);
2548
2549
    return 0;
2550
}
2551
2552
static int decode_init(AVCodecContext *avctx)
2553
{
2554
//    SnowContext *s = avctx->priv_data;
2555
2556
    common_init(avctx);
2557
    
2558
    return 0;
2559
}
2560
2561
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
2562
    SnowContext *s = avctx->priv_data;
2563
    CABACContext * const c= &s->c;
2564
    const int width= s->avctx->width;
2565
    const int height= s->avctx->height;
2566
    int bytes_read;
2567
    AVFrame *picture = data;
2568
    int log2_threshold, level, orientation, plane_index;
2569
    
2570
2571
    /* no supplementary picture */
2572
    if (buf_size == 0)
2573
        return 0;
2574
2575
    ff_init_cabac_decoder(c, buf, buf_size);
2576
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2577
2578
    memset(s->header_state, 0, sizeof(s->header_state));
2579
2580
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2581
    decode_header(s);
2582
2583
    frame_start(s);
2584
    //keyframe flag dupliaction mess FIXME
2585
    if(avctx->debug&FF_DEBUG_PICT_INFO)
2586
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2587
    
2588
    decode_subband(s, &s->mb_band   , s->mb_band   .buf, NULL, s->mb_band   .stride, 0);
2589
    decode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
2590
    decode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
2591
    correlate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 1, 1);
2592
    correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
2593
    correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
2594
2595
    for(plane_index=0; plane_index<3; plane_index++){
2596
        Plane *p= &s->plane[plane_index];
2597
        int w= p->width;
2598
        int h= p->height;
2599
        int x, y;
2600
        
2601
if(s->avctx->debug&2048){
2602
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2603
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2604
2605
        for(y=0; y<h; y++){
2606
            for(x=0; x<w; x++){
2607
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2608
                if(v&(~255)) v= ~(v>>31);
2609
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2610
            }
2611
        }
2612
}
2613
        for(level=0; level<s->spatial_decomposition_count; level++){
2614
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2615
                SubBand *b= &p->band[level][orientation];
2616
2617
                decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2618
                if(orientation==0)
2619
                    correlate(s, b, b->buf, b->stride, 1, 0);
2620
            }
2621
        }
2622
if(!(s->avctx->debug&1024))
2623
        for(level=0; level<s->spatial_decomposition_count; level++){
2624
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2625
                SubBand *b= &p->band[level][orientation];
2626
2627
                dequantize(s, b, b->buf, b->stride);
2628
            }
2629
        }
2630
2631
#if QPRED
2632
        memset(s->pred_buffer, 0, sizeof(DWTELEM)*w*h);
2633
        predict_plane(s, s->pred_buffer, plane_index, 1);
2634
        spatial_dwt(s, s->pred_buffer, w, h, w);
2635
        for(level=0; level<s->spatial_decomposition_count; level++){
2636
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2637
                SubBand *b= &p->band[level][orientation];
2638
                int delta= ((int)s->pred_buffer - (int)s->spatial_dwt_buffer)/sizeof(DWTELEM);
2639
                
2640
                quantize  (s, b, b->buf + delta, b->stride, s->qbias);
2641
                dequantize(s, b, b->buf + delta, b->stride);
2642
            }
2643
        }
2644
        for(y=0; y<h; y++){
2645
            for(x=0; x<w; x++){
2646
                s->spatial_dwt_buffer[y*w + x]+= s->pred_buffer[y*w + x];
2647
            }
2648
        }
2649
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
2650
#else
2651
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
2652
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2653
#endif
2654
2655
        //FIXME optimize
2656
        for(y=0; y<h; y++){
2657
            for(x=0; x<w; x++){
2658
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2659
                if(v&(~255)) v= ~(v>>31);
2660
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2661
            }
2662
        }
2663
    }
2664
            
2665
    emms_c();
2666
2667
    if(s->last_picture.data[0])
2668
        avctx->release_buffer(avctx, &s->last_picture);
2669
2670
if(!(s->avctx->debug&2048))        
2671
    *picture= s->current_picture;
2672
else
2673
    *picture= s->mconly_picture;
2674
    
2675
    *data_size = sizeof(AVFrame);
2676
    
2677
    bytes_read= get_cabac_terminate(c);
2678
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
2679
2680
    return bytes_read;
2681
}
2682
2683
static int decode_end(AVCodecContext *avctx)
2684
{
2685
    SnowContext *s = avctx->priv_data;
2686
2687
    common_end(s);
2688
2689
    return 0;
2690
}
2691
2692
AVCodec snow_decoder = {
2693
    "snow",
2694
    CODEC_TYPE_VIDEO,
2695
    CODEC_ID_SNOW,
2696
    sizeof(SnowContext),
2697
    decode_init,
2698
    NULL,
2699
    decode_end,
2700
    decode_frame,
2701
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2702
    NULL
2703
};
2704
2705
AVCodec snow_encoder = {
2706
    "snow",
2707
    CODEC_TYPE_VIDEO,
2708
    CODEC_ID_SNOW,
2709
    sizeof(SnowContext),
2710
    encode_init,
2711
    encode_frame,
2712
    encode_end,
2713
};
2714
2715
2716
#if 0
2717
#undef malloc
2718
#undef free
2719
#undef printf
2720

2721
int main(){
2722
    int width=256;
2723
    int height=256;
2724
    int buffer[2][width*height];
2725
    SnowContext s;
2726
    int i;
2727
    s.spatial_decomposition_count=6;
2728
    s.spatial_decomposition_type=1;
2729
    
2730
    printf("testing 5/3 DWT\n");
2731
    for(i=0; i<width*height; i++)
2732
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
2733
    
2734
    spatial_dwt(&s, buffer[0], width, height, width);
2735
    spatial_idwt(&s, buffer[0], width, height, width);
2736
    
2737
    for(i=0; i<width*height; i++)
2738
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
2739

2740
    printf("testing 9/7 DWT\n");
2741
    s.spatial_decomposition_type=0;
2742
    for(i=0; i<width*height; i++)
2743
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
2744
    
2745
    spatial_dwt(&s, buffer[0], width, height, width);
2746
    spatial_idwt(&s, buffer[0], width, height, width);
2747
    
2748
    for(i=0; i<width*height; i++)
2749
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
2750
        
2751
    printf("testing AC coder\n");
2752
    memset(s.header_state, 0, sizeof(s.header_state));
2753
    ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
2754
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2755
        
2756
    for(i=-256; i<256; i++){
2757
START_TIMER
2758
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
2759
STOP_TIMER("put_symbol")
2760
    }
2761
    put_cabac_terminate(&s.c, 1);
2762

2763
    memset(s.header_state, 0, sizeof(s.header_state));
2764
    ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
2765
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2766
    
2767
    for(i=-256; i<256; i++){
2768
        int j;
2769
START_TIMER
2770
        j= get_symbol(&s.c, s.header_state, 1);
2771
STOP_TIMER("get_symbol")
2772
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
2773
    }
2774
{
2775
int level, orientation, x, y;
2776
int64_t errors[8][4];
2777
int64_t g=0;
2778

2779
    memset(errors, 0, sizeof(errors));
2780
    s.spatial_decomposition_count=3;
2781
    s.spatial_decomposition_type=0;
2782
    for(level=0; level<s.spatial_decomposition_count; level++){
2783
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2784
            int w= width  >> (s.spatial_decomposition_count-level);
2785
            int h= height >> (s.spatial_decomposition_count-level);
2786
            int stride= width  << (s.spatial_decomposition_count-level);
2787
            DWTELEM *buf= buffer[0];
2788
            int64_t error=0;
2789

2790
            if(orientation&1) buf+=w;
2791
            if(orientation>1) buf+=stride>>1;
2792
            
2793
            memset(buffer[0], 0, sizeof(int)*width*height);
2794
            buf[w/2 + h/2*stride]= 256*256;
2795
            spatial_idwt(&s, buffer[0], width, height, width);
2796
            for(y=0; y<height; y++){
2797
                for(x=0; x<width; x++){
2798
                    int64_t d= buffer[0][x + y*width];
2799
                    error += d*d;
2800
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
2801
                }
2802
                if(ABS(height/2-y)<9 && level==2) printf("\n");
2803
            }
2804
            error= (int)(sqrt(error)+0.5);
2805
            errors[level][orientation]= error;
2806
            if(g) g=ff_gcd(g, error);
2807
            else g= error;
2808
        }
2809
    }
2810
    printf("static int const visual_weight[][4]={\n");
2811
    for(level=0; level<s.spatial_decomposition_count; level++){
2812
        printf("  {");
2813
        for(orientation=0; orientation<4; orientation++){
2814
            printf("%8lld,", errors[level][orientation]/g);
2815
        }
2816
        printf("},\n");
2817
    }
2818
    printf("};\n");
2819
    {
2820
            int level=2;
2821
            int orientation=3;
2822
            int w= width  >> (s.spatial_decomposition_count-level);
2823
            int h= height >> (s.spatial_decomposition_count-level);
2824
            int stride= width  << (s.spatial_decomposition_count-level);
2825
            DWTELEM *buf= buffer[0];
2826
            int64_t error=0;
2827

2828
            buf+=w;
2829
            buf+=stride>>1;
2830
            
2831
            memset(buffer[0], 0, sizeof(int)*width*height);
2832
#if 1
2833
            for(y=0; y<height; y++){
2834
                for(x=0; x<width; x++){
2835
                    int tab[4]={0,2,3,1};
2836
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
2837
                }
2838
            }
2839
            spatial_dwt(&s, buffer[0], width, height, width);
2840
#else
2841
            for(y=0; y<h; y++){
2842
                for(x=0; x<w; x++){
2843
                    buf[x + y*stride  ]=169;
2844
                    buf[x + y*stride-w]=64;
2845
                }
2846
            }
2847
            spatial_idwt(&s, buffer[0], width, height, width);
2848
#endif
2849
            for(y=0; y<height; y++){
2850
                for(x=0; x<width; x++){
2851
                    int64_t d= buffer[0][x + y*width];
2852
                    error += d*d;
2853
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
2854
                }
2855
                if(ABS(height/2-y)<9) printf("\n");
2856
            }
2857
    }
2858
2859
}
2860
    return 0;
2861
}
2862
#endif