Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ a8d73e56

History | View | Annotate | Download (98.6 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22
#include "cabac.h"
23

    
24
#include "mpegvideo.h"
25

    
26
#undef NDEBUG
27
#include <assert.h>
28

    
29
#define MAX_DECOMPOSITIONS 8
30
#define MAX_PLANES 4
31
#define DWTELEM int
32
#define QROOT 8 
33

    
34
static const int8_t quant3[256]={
35
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51
};
52
static const int8_t quant3b[256]={
53
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
};
70
static const int8_t quant5[256]={
71
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
72
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
80
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
87
};
88
static const int8_t quant7[256]={
89
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
92
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
93
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
98
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
105
};
106
static const int8_t quant9[256]={
107
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
109
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
116
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
123
};
124
static const int8_t quant11[256]={
125
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
126
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
128
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
134
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
141
};
142
static const int8_t quant13[256]={
143
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
144
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
147
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
152
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
159
};
160

    
161
#define OBMC_MAX 64
162
#if 0 //64*cubic
163
static const uint8_t obmc32[1024]={
164
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
166
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
167
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
168
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
169
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
170
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
171
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
172
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
173
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
174
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
175
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
176
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
177
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
178
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
179
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
180
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
182
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
184
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
185
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
186
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
187
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
189
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
190
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
191
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
192
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
193
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
194
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
195
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
196
//error:0.000022
197
};
198
static const uint8_t obmc16[256]={
199
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
200
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
201
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
202
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
203
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
204
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
205
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
206
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
207
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
209
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
211
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
212
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
213
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
214
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215
//error:0.000033
216
};
217
#elif 1 // 64*linear
218
static const uint8_t obmc32[1024]={
219
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
220
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
221
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
222
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
223
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
224
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
225
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
226
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
227
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
228
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
229
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
230
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
231
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
232
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
233
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
234
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
235
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
237
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
239
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
240
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
241
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
242
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
243
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
244
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
245
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
246
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
247
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
248
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
249
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
250
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
251
 //error:0.000020
252
};
253
static const uint8_t obmc16[256]={
254
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
255
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
256
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
257
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
258
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
259
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
260
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
261
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
262
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
264
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
266
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
267
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
268
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
269
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
270
//error:0.000015
271
};
272
#else //64*cos
273
static const uint8_t obmc32[1024]={
274
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
275
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
276
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
277
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
278
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
279
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
280
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
281
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
282
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
283
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
284
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
285
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
286
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
287
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
288
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
289
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
290
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
292
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
294
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
295
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
296
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
297
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
299
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
300
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
301
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
302
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
303
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
304
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
305
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
306
//error:0.000022
307
};
308
static const uint8_t obmc16[256]={
309
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
310
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
311
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
312
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
313
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
314
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
315
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
316
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
317
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
319
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
321
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
322
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
323
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
324
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325
//error:0.000022
326
};
327
#endif
328

    
329

    
330
typedef struct SubBand{
331
    int level;
332
    int stride;
333
    int width;
334
    int height;
335
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
336
    DWTELEM *buf;
337
    struct SubBand *parent;
338
    uint8_t state[/*7*2*/ 7 + 512][32];
339
}SubBand;
340

    
341
typedef struct Plane{
342
    int width;
343
    int height;
344
    SubBand band[MAX_DECOMPOSITIONS][4];
345
}Plane;
346

    
347
typedef struct SnowContext{
348
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
349

    
350
    AVCodecContext *avctx;
351
    CABACContext c;
352
    DSPContext dsp;
353
    AVFrame input_picture;
354
    AVFrame current_picture;
355
    AVFrame last_picture;
356
    AVFrame mconly_picture;
357
//     uint8_t q_context[16];
358
    uint8_t header_state[32];
359
    int keyframe;
360
    int version;
361
    int spatial_decomposition_type;
362
    int temporal_decomposition_type;
363
    int spatial_decomposition_count;
364
    int temporal_decomposition_count;
365
    DWTELEM *spatial_dwt_buffer;
366
    DWTELEM *pred_buffer;
367
    int colorspace_type;
368
    int chroma_h_shift;
369
    int chroma_v_shift;
370
    int spatial_scalability;
371
    int qlog;
372
    int mv_scale;
373
    int qbias;
374
#define QBIAS_SHIFT 3
375
    int b_width; //FIXME remove?
376
    int b_height; //FIXME remove?
377
    Plane plane[MAX_PLANES];
378
    SubBand mb_band;
379
    SubBand mv_band[2];
380
    
381
    uint16_t *mb_type;
382
    uint8_t *mb_mean;
383
    uint32_t *dummy;
384
    int16_t (*motion_val8)[2];
385
    int16_t (*motion_val16)[2];
386
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
387
}SnowContext;
388

    
389
#define QEXPSHIFT 7 //FIXME try to change this to 0
390
static const uint8_t qexp[8]={
391
    128, 140, 152, 166, 181, 197, 215, 235
392
//   64,  70,  76,  83,  91,  99, 108, 117
393
//   32,  35,  38,  41,  45,  49,  54,  59
394
//   16,  17,  19,  21,  23,  25,  27,  29
395
//    8,   9,  10,  10,  11,  12,  13,  15
396
};
397

    
398
static inline int mirror(int v, int m){
399
    if     (v<0) return -v;
400
    else if(v>m) return 2*m-v;
401
    else         return v;
402
}
403

    
404
static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
405
    int i;
406

    
407
    if(v){
408
        const int a= ABS(v);
409
        const int e= av_log2(a);
410
#if 1
411
        const int el= FFMIN(e, 10);   
412
        put_cabac(c, state+0, 0);
413

    
414
        for(i=0; i<el; i++){
415
            put_cabac(c, state+1+i, 1);  //1..10
416
        }
417
        for(; i<e; i++){
418
            put_cabac(c, state+1+9, 1);  //1..10
419
        }
420
        put_cabac(c, state+1+FFMIN(i,9), 0);
421

    
422
        for(i=e-1; i>=el; i--){
423
            put_cabac(c, state+22+9, (a>>i)&1); //22..31
424
        }
425
        for(; i>=0; i--){
426
            put_cabac(c, state+22+i, (a>>i)&1); //22..31
427
        }
428

    
429
        if(is_signed)
430
            put_cabac(c, state+11 + el, v < 0); //11..21
431
#else
432
        
433
        put_cabac(c, state+0, 0);
434
        if(e<=9){
435
            for(i=0; i<e; i++){
436
                put_cabac(c, state+1+i, 1);  //1..10
437
            }
438
            put_cabac(c, state+1+i, 0);
439

    
440
            for(i=e-1; i>=0; i--){
441
                put_cabac(c, state+22+i, (a>>i)&1); //22..31
442
            }
443

    
444
            if(is_signed)
445
                put_cabac(c, state+11 + e, v < 0); //11..21
446
        }else{
447
            for(i=0; i<e; i++){
448
                put_cabac(c, state+1+FFMIN(i,9), 1);  //1..10
449
            }
450
            put_cabac(c, state+1+FFMIN(i,9), 0);
451

    
452
            for(i=e-1; i>=0; i--){
453
                put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
454
            }
455

    
456
            if(is_signed)
457
                put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
458
        }
459
#endif
460
    }else{
461
        put_cabac(c, state+0, 1);
462
    }
463
}
464

    
465
static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
466
    if(get_cabac(c, state+0))
467
        return 0;
468
    else{
469
        int i, e, a, el;
470
 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
471
        for(e=0; e<10; e++){ 
472
            if(get_cabac(c, state + 1 + e)==0) // 1..10
473
                break;
474
        }
475
        el= e;
476
 
477
        if(e==10){
478
            while(get_cabac(c, state + 1 + 9)) //10
479
                e++;
480
        }
481
        a= 1;
482
        for(i=e-1; i>=el; i--){
483
            a += a + get_cabac(c, state+22+9); //31
484
        }
485
        for(; i>=0; i--){
486
            a += a + get_cabac(c, state+22+i); //22..31
487
        }
488

    
489
        if(is_signed && get_cabac(c, state+11 + el)) //11..21
490
            return -a;
491
        else
492
            return a;
493
    }
494
}
495

    
496
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
497
    const int mirror_left= !highpass;
498
    const int mirror_right= (width&1) ^ highpass;
499
    const int w= (width>>1) - 1 + (highpass & width);
500
    int i;
501

    
502
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
503
    if(mirror_left){
504
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
505
        dst += dst_step;
506
        src += src_step;
507
    }
508
    
509
    for(i=0; i<w; i++){
510
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
511
    }
512
    
513
    if(mirror_right){
514
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
515
    }
516
}
517

    
518
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
519
    const int mirror_left= !highpass;
520
    const int mirror_right= (width&1) ^ highpass;
521
    const int w= (width>>1) - 1 + (highpass & width);
522
    int i;
523

    
524
    if(mirror_left){
525
        int r= 3*2*ref[0];
526
        r += r>>4;
527
        r += r>>8;
528
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
529
        dst += dst_step;
530
        src += src_step;
531
    }
532
    
533
    for(i=0; i<w; i++){
534
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
535
        r += r>>4;
536
        r += r>>8;
537
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
538
    }
539
    
540
    if(mirror_right){
541
        int r= 3*2*ref[w*ref_step];
542
        r += r>>4;
543
        r += r>>8;
544
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
545
    }
546
}
547

    
548

    
549
static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
550
    int x, i;
551
    
552
    for(x=start; x<width; x+=2){
553
        int64_t sum=0;
554

    
555
        for(i=0; i<n; i++){
556
            int x2= x + 2*i - n + 1;
557
            if     (x2<     0) x2= -x2;
558
            else if(x2>=width) x2= 2*width-x2-2;
559
            sum += coeffs[i]*(int64_t)dst[x2];
560
        }
561
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
562
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
563
    }
564
}
565

    
566
static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
567
    int x, y, i;
568
    for(y=start; y<height; y+=2){
569
        for(x=0; x<width; x++){
570
            int64_t sum=0;
571
    
572
            for(i=0; i<n; i++){
573
                int y2= y + 2*i - n + 1;
574
                if     (y2<      0) y2= -y2;
575
                else if(y2>=height) y2= 2*height-y2-2;
576
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
577
            }
578
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
579
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
580
        }
581
    }
582
}
583

    
584
#define SCALEX 1
585
#define LX0 0
586
#define LX1 1
587

    
588
#if 0 // more accurate 9/7
589
#define N1 2
590
#define SHIFT1 14
591
#define COEFFS1 (int[]){-25987,-25987}
592
#define N2 2
593
#define SHIFT2 19
594
#define COEFFS2 (int[]){-27777,-27777}
595
#define N3 2
596
#define SHIFT3 15
597
#define COEFFS3 (int[]){28931,28931}
598
#define N4 2
599
#define SHIFT4 15
600
#define COEFFS4 (int[]){14533,14533}
601
#elif 1 // 13/7 CRF
602
#define N1 4
603
#define SHIFT1 4
604
#define COEFFS1 (int[]){1,-9,-9,1}
605
#define N2 4
606
#define SHIFT2 4
607
#define COEFFS2 (int[]){-1,5,5,-1}
608
#define N3 0
609
#define SHIFT3 1
610
#define COEFFS3 NULL
611
#define N4 0
612
#define SHIFT4 1
613
#define COEFFS4 NULL
614
#elif 1 // 3/5
615
#define LX0 1
616
#define LX1 0
617
#define SCALEX 0.5
618
#define N1 2
619
#define SHIFT1 1
620
#define COEFFS1 (int[]){1,1}
621
#define N2 2
622
#define SHIFT2 2
623
#define COEFFS2 (int[]){-1,-1}
624
#define N3 0
625
#define SHIFT3 0
626
#define COEFFS3 NULL
627
#define N4 0
628
#define SHIFT4 0
629
#define COEFFS4 NULL
630
#elif 1 // 11/5 
631
#define N1 0
632
#define SHIFT1 1
633
#define COEFFS1 NULL
634
#define N2 2
635
#define SHIFT2 2
636
#define COEFFS2 (int[]){-1,-1}
637
#define N3 2
638
#define SHIFT3 0
639
#define COEFFS3 (int[]){-1,-1}
640
#define N4 4
641
#define SHIFT4 7
642
#define COEFFS4 (int[]){-5,29,29,-5}
643
#define SCALEX 4
644
#elif 1 // 9/7 CDF
645
#define N1 2
646
#define SHIFT1 7
647
#define COEFFS1 (int[]){-203,-203}
648
#define N2 2
649
#define SHIFT2 12
650
#define COEFFS2 (int[]){-217,-217}
651
#define N3 2
652
#define SHIFT3 7
653
#define COEFFS3 (int[]){113,113}
654
#define N4 2
655
#define SHIFT4 9
656
#define COEFFS4 (int[]){227,227}
657
#define SCALEX 1
658
#elif 1 // 7/5 CDF
659
#define N1 0
660
#define SHIFT1 1
661
#define COEFFS1 NULL
662
#define N2 2
663
#define SHIFT2 2
664
#define COEFFS2 (int[]){-1,-1}
665
#define N3 2
666
#define SHIFT3 0
667
#define COEFFS3 (int[]){-1,-1}
668
#define N4 2
669
#define SHIFT4 4
670
#define COEFFS4 (int[]){3,3}
671
#elif 1 // 9/7 MN
672
#define N1 4
673
#define SHIFT1 4
674
#define COEFFS1 (int[]){1,-9,-9,1}
675
#define N2 2
676
#define SHIFT2 2
677
#define COEFFS2 (int[]){1,1}
678
#define N3 0
679
#define SHIFT3 1
680
#define COEFFS3 NULL
681
#define N4 0
682
#define SHIFT4 1
683
#define COEFFS4 NULL
684
#else // 13/7 CRF
685
#define N1 4
686
#define SHIFT1 4
687
#define COEFFS1 (int[]){1,-9,-9,1}
688
#define N2 4
689
#define SHIFT2 4
690
#define COEFFS2 (int[]){-1,5,5,-1}
691
#define N3 0
692
#define SHIFT3 1
693
#define COEFFS3 NULL
694
#define N4 0
695
#define SHIFT4 1
696
#define COEFFS4 NULL
697
#endif
698
static void horizontal_decomposeX(int *b, int width){
699
    int temp[width];
700
    const int width2= width>>1;
701
    const int w2= (width+1)>>1;
702
    int A1,A2,A3,A4, x;
703

    
704
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
705
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
706
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
707
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
708
    
709
    for(x=0; x<width2; x++){
710
        temp[x   ]= b[2*x    ];
711
        temp[x+w2]= b[2*x + 1];
712
    }
713
    if(width&1)
714
        temp[x   ]= b[2*x    ];
715
    memcpy(b, temp, width*sizeof(int));
716
}
717

    
718
static void horizontal_composeX(int *b, int width){
719
    int temp[width];
720
    const int width2= width>>1;
721
    int A1,A2,A3,A4, x;
722
    const int w2= (width+1)>>1;
723

    
724
    memcpy(temp, b, width*sizeof(int));
725
    for(x=0; x<width2; x++){
726
        b[2*x    ]= temp[x   ];
727
        b[2*x + 1]= temp[x+w2];
728
    }
729
    if(width&1)
730
        b[2*x    ]= temp[x   ];
731

    
732
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
733
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
734
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
735
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
736
}
737

    
738
static void spatial_decomposeX(int *buffer, int width, int height, int stride){
739
    int x, y;
740
  
741
    for(y=0; y<height; y++){
742
        for(x=0; x<width; x++){
743
            buffer[y*stride + x] *= SCALEX;
744
        }
745
    }
746

    
747
    for(y=0; y<height; y++){
748
        horizontal_decomposeX(buffer + y*stride, width);
749
    }
750
    
751
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
752
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
753
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
754
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
755
}
756

    
757
static void spatial_composeX(int *buffer, int width, int height, int stride){
758
    int x, y;
759
  
760
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
761
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
762
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
763
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
764

    
765
    for(y=0; y<height; y++){
766
        horizontal_composeX(buffer + y*stride, width);
767
    }
768

    
769
    for(y=0; y<height; y++){
770
        for(x=0; x<width; x++){
771
            buffer[y*stride + x] /= SCALEX;
772
        }
773
    }
774
}
775

    
776
static void horizontal_decompose53i(int *b, int width){
777
    int temp[width];
778
    const int width2= width>>1;
779
    int A1,A2,A3,A4, x;
780
    const int w2= (width+1)>>1;
781

    
782
    for(x=0; x<width2; x++){
783
        temp[x   ]= b[2*x    ];
784
        temp[x+w2]= b[2*x + 1];
785
    }
786
    if(width&1)
787
        temp[x   ]= b[2*x    ];
788
#if 0
789
    A2= temp[1       ];
790
    A4= temp[0       ];
791
    A1= temp[0+width2];
792
    A1 -= (A2 + A4)>>1;
793
    A4 += (A1 + 1)>>1;
794
    b[0+width2] = A1;
795
    b[0       ] = A4;
796
    for(x=1; x+1<width2; x+=2){
797
        A3= temp[x+width2];
798
        A4= temp[x+1     ];
799
        A3 -= (A2 + A4)>>1;
800
        A2 += (A1 + A3 + 2)>>2;
801
        b[x+width2] = A3;
802
        b[x       ] = A2;
803

804
        A1= temp[x+1+width2];
805
        A2= temp[x+2       ];
806
        A1 -= (A2 + A4)>>1;
807
        A4 += (A1 + A3 + 2)>>2;
808
        b[x+1+width2] = A1;
809
        b[x+1       ] = A4;
810
    }
811
    A3= temp[width-1];
812
    A3 -= A2;
813
    A2 += (A1 + A3 + 2)>>2;
814
    b[width -1] = A3;
815
    b[width2-1] = A2;
816
#else        
817
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
818
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
819
#endif
820
}
821

    
822
static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
823
    int i;
824
    
825
    for(i=0; i<width; i++){
826
        b1[i] -= (b0[i] + b2[i])>>1;
827
    }
828
}
829

    
830
static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
831
    int i;
832
    
833
    for(i=0; i<width; i++){
834
        b1[i] += (b0[i] + b2[i] + 2)>>2;
835
    }
836
}
837

    
838
static void spatial_decompose53i(int *buffer, int width, int height, int stride){
839
    int x, y;
840
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
841
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
842
  
843
    for(y=-2; y<height; y+=2){
844
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
845
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
846

    
847
{START_TIMER
848
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
849
        if(y+2 < height) horizontal_decompose53i(b3, width);
850
STOP_TIMER("horizontal_decompose53i")}
851
        
852
{START_TIMER
853
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
854
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
855
STOP_TIMER("vertical_decompose53i*")}
856
        
857
        b0=b2;
858
        b1=b3;
859
    }
860
}
861

    
862
#define lift5 lift
863
#if 1
864
#define W_AM 3
865
#define W_AO 0
866
#define W_AS 1
867

    
868
#define W_BM 1
869
#define W_BO 8
870
#define W_BS 4
871

    
872
#undef lift5
873
#define W_CM 9999
874
#define W_CO 2
875
#define W_CS 2
876

    
877
#define W_DM 15
878
#define W_DO 16
879
#define W_DS 5
880
#elif 0
881
#define W_AM 55
882
#define W_AO 16
883
#define W_AS 5
884

    
885
#define W_BM 3
886
#define W_BO 32
887
#define W_BS 6
888

    
889
#define W_CM 127
890
#define W_CO 64
891
#define W_CS 7
892

    
893
#define W_DM 7
894
#define W_DO 8
895
#define W_DS 4
896
#elif 0
897
#define W_AM 97
898
#define W_AO 32
899
#define W_AS 6
900

    
901
#define W_BM 63
902
#define W_BO 512
903
#define W_BS 10
904

    
905
#define W_CM 13
906
#define W_CO 8
907
#define W_CS 4
908

    
909
#define W_DM 15
910
#define W_DO 16
911
#define W_DS 5
912

    
913
#else
914

    
915
#define W_AM 203
916
#define W_AO 64
917
#define W_AS 7
918

    
919
#define W_BM 217
920
#define W_BO 2048
921
#define W_BS 12
922

    
923
#define W_CM 113
924
#define W_CO 64
925
#define W_CS 7
926

    
927
#define W_DM 227
928
#define W_DO 128
929
#define W_DS 9
930
#endif
931
static void horizontal_decompose97i(int *b, int width){
932
    int temp[width];
933
    const int w2= (width+1)>>1;
934

    
935
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
936
    lift (temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
937
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
938
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
939
}
940

    
941

    
942
static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
943
    int i;
944
    
945
    for(i=0; i<width; i++){
946
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
947
    }
948
}
949

    
950
static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
951
    int i;
952
    
953
    for(i=0; i<width; i++){
954
#ifdef lift5
955
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
956
#else
957
        int r= 3*(b0[i] + b2[i]);
958
        r+= r>>4;
959
        r+= r>>8;
960
        b1[i] += (r+W_CO)>>W_CS;
961
#endif
962
    }
963
}
964

    
965
static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
966
    int i;
967
    
968
    for(i=0; i<width; i++){
969
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
970
    }
971
}
972

    
973
static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
974
    int i;
975
    
976
    for(i=0; i<width; i++){
977
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
978
    }
979
}
980

    
981
static void spatial_decompose97i(int *buffer, int width, int height, int stride){
982
    int x, y;
983
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
984
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
985
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
986
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
987
  
988
    for(y=-4; y<height; y+=2){
989
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
990
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
991

    
992
{START_TIMER
993
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
994
        if(y+4 < height) horizontal_decompose97i(b5, width);
995
if(width>400){
996
STOP_TIMER("horizontal_decompose97i")
997
}}
998
        
999
{START_TIMER
1000
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1001
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1002
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1003
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1004

    
1005
if(width>400){
1006
STOP_TIMER("vertical_decompose97i")
1007
}}
1008
        
1009
        b0=b2;
1010
        b1=b3;
1011
        b2=b4;
1012
        b3=b5;
1013
    }
1014
}
1015

    
1016
static void spatial_dwt(SnowContext *s, int *buffer, int width, int height, int stride){
1017
    int level;
1018
    
1019
    for(level=0; level<s->spatial_decomposition_count; level++){
1020
        switch(s->spatial_decomposition_type){
1021
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1022
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1023
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1024
        }
1025
    }
1026
}
1027

    
1028
static void horizontal_compose53i(int *b, int width){
1029
    int temp[width];
1030
    const int width2= width>>1;
1031
    const int w2= (width+1)>>1;
1032
    int A1,A2,A3,A4, x;
1033

    
1034
#if 0
1035
    A2= temp[1       ];
1036
    A4= temp[0       ];
1037
    A1= temp[0+width2];
1038
    A1 -= (A2 + A4)>>1;
1039
    A4 += (A1 + 1)>>1;
1040
    b[0+width2] = A1;
1041
    b[0       ] = A4;
1042
    for(x=1; x+1<width2; x+=2){
1043
        A3= temp[x+width2];
1044
        A4= temp[x+1     ];
1045
        A3 -= (A2 + A4)>>1;
1046
        A2 += (A1 + A3 + 2)>>2;
1047
        b[x+width2] = A3;
1048
        b[x       ] = A2;
1049

1050
        A1= temp[x+1+width2];
1051
        A2= temp[x+2       ];
1052
        A1 -= (A2 + A4)>>1;
1053
        A4 += (A1 + A3 + 2)>>2;
1054
        b[x+1+width2] = A1;
1055
        b[x+1       ] = A4;
1056
    }
1057
    A3= temp[width-1];
1058
    A3 -= A2;
1059
    A2 += (A1 + A3 + 2)>>2;
1060
    b[width -1] = A3;
1061
    b[width2-1] = A2;
1062
#else   
1063
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1064
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1065
#endif
1066
    for(x=0; x<width2; x++){
1067
        b[2*x    ]= temp[x   ];
1068
        b[2*x + 1]= temp[x+w2];
1069
    }
1070
    if(width&1)
1071
        b[2*x    ]= temp[x   ];
1072
}
1073

    
1074
static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1075
    int i;
1076
    
1077
    for(i=0; i<width; i++){
1078
        b1[i] += (b0[i] + b2[i])>>1;
1079
    }
1080
}
1081

    
1082
static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1083
    int i;
1084
    
1085
    for(i=0; i<width; i++){
1086
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1087
    }
1088
}
1089

    
1090
static void spatial_compose53i(int *buffer, int width, int height, int stride){
1091
    int x, y;
1092
    DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1093
    DWTELEM *b1= buffer + mirror(-1  , height-1)*stride;
1094
  
1095
    for(y=-1; y<=height; y+=2){
1096
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1097
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1098

    
1099
{START_TIMER
1100
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1101
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1102
STOP_TIMER("vertical_compose53i*")}
1103

    
1104
{START_TIMER
1105
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1106
        if(b0 <= b2) horizontal_compose53i(b1, width);
1107
STOP_TIMER("horizontal_compose53i")}
1108

    
1109
        b0=b2;
1110
        b1=b3;
1111
    }
1112
}   
1113

    
1114
 
1115
static void horizontal_compose97i(int *b, int width){
1116
    int temp[width];
1117
    const int w2= (width+1)>>1;
1118

    
1119
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1120
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1121
    lift (b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1122
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1123
}
1124

    
1125
static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1126
    int i;
1127
    
1128
    for(i=0; i<width; i++){
1129
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1130
    }
1131
}
1132

    
1133
static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1134
    int i;
1135
    
1136
    for(i=0; i<width; i++){
1137
#ifdef lift5
1138
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1139
#else
1140
        int r= 3*(b0[i] + b2[i]);
1141
        r+= r>>4;
1142
        r+= r>>8;
1143
        b1[i] -= (r+W_CO)>>W_CS;
1144
#endif
1145
    }
1146
}
1147

    
1148
static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1149
    int i;
1150
    
1151
    for(i=0; i<width; i++){
1152
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1153
    }
1154
}
1155

    
1156
static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1157
    int i;
1158
    
1159
    for(i=0; i<width; i++){
1160
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1161
    }
1162
}
1163

    
1164
static void spatial_compose97i(int *buffer, int width, int height, int stride){
1165
    int x, y;
1166
    DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1167
    DWTELEM *b1= buffer + mirror(-3  , height-1)*stride;
1168
    DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1169
    DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1170

    
1171
    for(y=-3; y<=height; y+=2){
1172
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1173
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1174

    
1175
        if(stride == width && y+4 < height && 0){ 
1176
            int x;
1177
            for(x=0; x<width/2; x++)
1178
                b5[x] += 64*2;
1179
            for(; x<width; x++)
1180
                b5[x] += 169*2;
1181
        }
1182
        
1183
{START_TIMER
1184
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1185
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1186
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1187
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1188
if(width>400){
1189
STOP_TIMER("vertical_compose97i")}}
1190

    
1191
{START_TIMER
1192
        if(y-1>=  0) horizontal_compose97i(b0, width);
1193
        if(b0 <= b2) horizontal_compose97i(b1, width);
1194
if(width>400 && b0 <= b2){
1195
STOP_TIMER("horizontal_compose97i")}}
1196
        
1197
        b0=b2;
1198
        b1=b3;
1199
        b2=b4;
1200
        b3=b5;
1201
    }
1202
}
1203

    
1204
static void spatial_idwt(SnowContext *s, int *buffer, int width, int height, int stride){
1205
    int level;
1206

    
1207
    for(level=s->spatial_decomposition_count-1; level>=0; level--){
1208
        switch(s->spatial_decomposition_type){
1209
        case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1210
        case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1211
        case 2: spatial_composeX  (buffer, width>>level, height>>level, stride<<level); break;
1212
        }
1213
    }
1214
}
1215

    
1216
static const int hilbert[16][2]={
1217
    {0,0}, {1,0}, {1,1}, {0,1},
1218
    {0,2}, {0,3}, {1,3}, {1,2},
1219
    {2,2}, {2,3}, {3,3}, {3,2},
1220
    {3,1}, {2,1}, {2,0}, {3,0},
1221
};
1222
#if 0
1223
-o o-
1224
 | |
1225
 o-o
1226
 
1227
-o-o o-o-
1228
   | | 
1229
 o-o o-o
1230
 |     |
1231
 o o-o o
1232
 | | | |
1233
 o-o o-o
1234
 
1235
 0112122312232334122323342334
1236
 0123456789ABCDEF0123456789AB
1237
 RLLRMRRLLRRMRLLMLRRLMLLRRLLM
1238
 
1239
 4  B  F 14 1B
1240
 4 11 15 20 27
1241
 
1242
-o o-o-o o-o-o o-
1243
 | |   | |   | |
1244
 o-o o-o o-o o-o
1245
     |     |
1246
 o-o o-o o-o o-o
1247
 | |   | |   | |
1248
 o o-o-o o-o-o o
1249
 |             |
1250
 o-o o-o-o-o o-o
1251
   | |     | | 
1252
 o-o o-o o-o o-o
1253
 |     | |     |
1254
 o o-o o o o-o o
1255
 | | | | | | | |
1256
 o-o o-o o-o o-o
1257

1258
#endif
1259

    
1260
#define SVI(a, i, x, y) \
1261
{\
1262
    a[i][0]= x;\
1263
    a[i][1]= y;\
1264
    i++;\
1265
}
1266

    
1267
static int sig_cmp(const void *a, const void *b){
1268
    const int16_t* da = (const int16_t *) a;
1269
    const int16_t* db = (const int16_t *) b;
1270
    
1271
    if(da[1] != db[1]) return da[1] - db[1];
1272
    else               return da[0] - db[0];
1273
}
1274

    
1275

    
1276
static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1277
    const int level= b->level;
1278
    const int w= b->width;
1279
    const int h= b->height;
1280
    int x, y;
1281
    
1282
#if 0
1283
    if(orientation==3 && parent && 0){
1284
        int16_t candidate[w*h][2];
1285
        uint8_t state[w*h];
1286
        int16_t boarder[3][w*h*4][2];
1287
        int16_t significant[w*h][2];
1288
        int candidate_count=0;
1289
        int boarder_count[3]={0,0,0};
1290
        int significant_count=0;
1291
        int rle_pos=0;
1292
        int v, last_v;
1293
        int primary= orientation==1;
1294
        
1295
        memset(candidate, 0, sizeof(candidate));
1296
        memset(state, 0, sizeof(state));
1297
        memset(boarder, 0, sizeof(boarder));
1298
        
1299
        for(y=0; y<h; y++){
1300
            for(x=0; x<w; x++){
1301
                if(parent[(x>>1) + (y>>1)*2*stride])
1302
                    SVI(candidate, candidate_count, x, y)
1303
            }
1304
        }
1305

1306
        for(;;){
1307
            while(candidate_count && !boarder_count[0] && !boarder_count[1] && !boarder_count[2]){
1308
                candidate_count--;
1309
                x= candidate[ candidate_count][0];
1310
                y= candidate[ candidate_count][1];
1311
                if(state[x + y*w])
1312
                    continue;
1313
                state[x + y*w]= 1;
1314
                v= !!src[x + y*stride];
1315
                put_cabac(&s->c, &b->state[0][0], v);
1316
                if(v){
1317
                    SVI(significant, significant_count, x,y)
1318
                    if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1319
                    if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1320
                    if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1321
                    if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1322
                    if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1323
                    if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1324
                    if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1325
                    if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1326
                }
1327
            }
1328
            while(!boarder_count[0] && !boarder_count[1] && !boarder_count[2] && rle_pos < w*h){
1329
                int run=0;
1330
                for(; rle_pos < w*h;){
1331
                    x= rle_pos % w; //FIXME speed
1332
                    y= rle_pos / w;
1333
                    rle_pos++;
1334
                    if(state[x + y*w])
1335
                        continue;
1336
                    state[x + y*w]= 1;
1337
                    v= !!src[x + y*stride];
1338
                    if(v){
1339
                        put_symbol(&s->c, b->state[1], run, 0);
1340
                        SVI(significant, significant_count, x,y)
1341
                        if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1342
                        if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1343
                        if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1344
                        if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1345
                        if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1346
                        if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1347
                        if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1348
                        if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1349
                        break;
1350
//FIXME                note only right & down can be boarders
1351
                    }
1352
                    run++;
1353
                }
1354
            }
1355
            if(!boarder_count[0] && !boarder_count[1] && !boarder_count[2])
1356
                break;
1357
            
1358
            while(boarder_count[0] || boarder_count[1] || boarder_count[2]){
1359
                int index;
1360
                
1361
                if     (boarder_count[  primary]) index=  primary;
1362
                else if(boarder_count[1-primary]) index=1-primary;
1363
                else                              index=2;
1364
                
1365
                boarder_count[index]--;
1366
                x= boarder[index][ boarder_count[index] ][0];
1367
                y= boarder[index][ boarder_count[index] ][1];
1368
                if(state[x + y*w]) //FIXME maybe check earlier
1369
                    continue;
1370
                state[x + y*w]= 1;
1371
                v= !!src[x + y*stride];
1372
                put_cabac(&s->c, &b->state[0][index+1], v);
1373
                if(v){
1374
                    SVI(significant, significant_count, x,y)
1375
                    if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1376
                    if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1377
                    if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1378
                    if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1379
                    if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1380
                    if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1381
                    if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1382
                    if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1383
                }
1384
            }
1385
        }
1386
        //FIXME sort significant coeffs maybe
1387
        if(1){
1388
            qsort(significant, significant_count, sizeof(int16_t[2]), sig_cmp);
1389
        }
1390
        
1391
        last_v=1;
1392
        while(significant_count){
1393
            int context= 3 + quant7[last_v&0xFF]; //use significance of suroundings
1394
            significant_count--;
1395
            x= significant[significant_count][0];//FIXME try opposit direction
1396
            y= significant[significant_count][1];
1397
            v= src[x + y*stride];
1398
            put_symbol(&s->c, b->state[context + 2], v, 1); //FIXME try to avoid first bit, try this with the old code too!!
1399
            last_v= v;
1400
        }
1401
    }
1402
#endif
1403
    if(1){
1404
        int run=0;
1405
        int runs[w*h];
1406
        int run_index=0;
1407
                
1408
        for(y=0; y<h; y++){
1409
            for(x=0; x<w; x++){
1410
                int v, p=0;
1411
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1412
                v= src[x + y*stride];
1413

    
1414
                if(y){
1415
                    t= src[x + (y-1)*stride];
1416
                    if(x){
1417
                        lt= src[x - 1 + (y-1)*stride];
1418
                    }
1419
                    if(x + 1 < w){
1420
                        rt= src[x + 1 + (y-1)*stride];
1421
                    }
1422
                }
1423
                if(x){
1424
                    l= src[x - 1 + y*stride];
1425
                    /*if(x > 1){
1426
                        if(orientation==1) ll= src[y + (x-2)*stride];
1427
                        else               ll= src[x - 2 + y*stride];
1428
                    }*/
1429
                }
1430
                if(parent){
1431
                    int px= x>>1;
1432
                    int py= y>>1;
1433
                    if(px<b->parent->width && py<b->parent->height) 
1434
                        p= parent[px + py*2*stride];
1435
                }
1436
                if(!(/*ll|*/l|lt|t|rt|p)){
1437
                    if(v){
1438
                        runs[run_index++]= run;
1439
                        run=0;
1440
                    }else{
1441
                        run++;
1442
                    }
1443
                }
1444
            }
1445
        }
1446
        runs[run_index++]= run;
1447
        run_index=0;
1448
        run= runs[run_index++];
1449

    
1450
        put_symbol(&s->c, b->state[1], run, 0);
1451
        
1452
        for(y=0; y<h; y++){
1453
            for(x=0; x<w; x++){
1454
                int v, p=0;
1455
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1456
                v= src[x + y*stride];
1457

    
1458
                if(y){
1459
                    t= src[x + (y-1)*stride];
1460
                    if(x){
1461
                        lt= src[x - 1 + (y-1)*stride];
1462
                    }
1463
                    if(x + 1 < w){
1464
                        rt= src[x + 1 + (y-1)*stride];
1465
                    }
1466
                }
1467
                if(x){
1468
                    l= src[x - 1 + y*stride];
1469
                    /*if(x > 1){
1470
                        if(orientation==1) ll= src[y + (x-2)*stride];
1471
                        else               ll= src[x - 2 + y*stride];
1472
                    }*/
1473
                }
1474
                if(parent){
1475
                    int px= x>>1;
1476
                    int py= y>>1;
1477
                    if(px<b->parent->width && py<b->parent->height) 
1478
                        p= parent[px + py*2*stride];
1479
                }
1480
                if(/*ll|*/l|lt|t|rt|p){
1481
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1482

    
1483
                    put_cabac(&s->c, &b->state[0][context], !!v);
1484
                }else{
1485
                    if(!run){
1486
                        run= runs[run_index++];
1487
                        put_symbol(&s->c, b->state[1], run, 0);
1488
                        assert(v);
1489
                    }else{
1490
                        run--;
1491
                        assert(!v);
1492
                    }
1493
                }
1494
                if(v){
1495
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1496

    
1497
                    put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
1498
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1499
                }
1500
            }
1501
        }
1502
        return;
1503
    }
1504
}
1505

    
1506
static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1507
    const int level= b->level;
1508
    const int w= b->width;
1509
    const int h= b->height;
1510
    int x,y;
1511

    
1512
    START_TIMER
1513
    
1514
    if(1){
1515
        int run;
1516
                
1517
        for(y=0; y<b->height; y++)
1518
            memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1519

    
1520
        run= get_symbol(&s->c, b->state[1], 0);
1521
        for(y=0; y<h; y++){
1522
            for(x=0; x<w; x++){
1523
                int v, p=0;
1524
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1525

    
1526
                if(y){
1527
                    t= src[x + (y-1)*stride];
1528
                    if(x){
1529
                        lt= src[x - 1 + (y-1)*stride];
1530
                    }
1531
                    if(x + 1 < w){
1532
                        rt= src[x + 1 + (y-1)*stride];
1533
                    }
1534
                }
1535
                if(x){
1536
                    l= src[x - 1 + y*stride];
1537
                    /*if(x > 1){
1538
                        if(orientation==1) ll= src[y + (x-2)*stride];
1539
                        else               ll= src[x - 2 + y*stride];
1540
                    }*/
1541
                }
1542
                if(parent){
1543
                    int px= x>>1;
1544
                    int py= y>>1;
1545
                    if(px<b->parent->width && py<b->parent->height) 
1546
                        p= parent[px + py*2*stride];
1547
                }
1548
                if(/*ll|*/l|lt|t|rt|p){
1549
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1550

    
1551
                    v=get_cabac(&s->c, &b->state[0][context]);
1552
                }else{
1553
                    if(!run){
1554
                        run= get_symbol(&s->c, b->state[1], 0);
1555
                        //FIXME optimize this here
1556
                        //FIXME try to store a more naive run
1557
                        v=1;
1558
                    }else{
1559
                        run--;
1560
                        v=0;
1561
                    }
1562
                }
1563
                if(v){
1564
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1565
                    v= get_symbol(&s->c, b->state[context + 2], 0) + 1;
1566
                    if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
1567
                        v= -v;
1568
                    src[x + y*stride]= v;
1569
                }
1570
            }
1571
        }
1572
        if(level+1 == s->spatial_decomposition_count){
1573
            STOP_TIMER("decode_subband")
1574
        }
1575
        
1576
        return;
1577
    }
1578
}
1579

    
1580
static void reset_contexts(SnowContext *s){
1581
    int plane_index, level, orientation;
1582

    
1583
    for(plane_index=0; plane_index<2; plane_index++){
1584
        for(level=0; level<s->spatial_decomposition_count; level++){
1585
            for(orientation=level ? 1:0; orientation<4; orientation++){
1586
                memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
1587
            }
1588
        }
1589
    }
1590
    memset(s->mb_band.state, 0, sizeof(s->mb_band.state));
1591
    memset(s->mv_band[0].state, 0, sizeof(s->mv_band[0].state));
1592
    memset(s->mv_band[1].state, 0, sizeof(s->mv_band[1].state));
1593
    memset(s->header_state, 0, sizeof(s->header_state));
1594
}
1595

    
1596
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1597
    int x, y;
1598

    
1599
    for(y=0; y < b_h+5; y++){
1600
        for(x=0; x < b_w; x++){
1601
            int a0= src[x     + y*stride];
1602
            int a1= src[x + 1 + y*stride];
1603
            int a2= src[x + 2 + y*stride];
1604
            int a3= src[x + 3 + y*stride];
1605
            int a4= src[x + 4 + y*stride];
1606
            int a5= src[x + 5 + y*stride];
1607
//            int am= 9*(a1+a2) - (a0+a3);
1608
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1609
//            int am= 18*(a2+a3) - 2*(a1+a4);
1610
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1611
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1612

    
1613
//            if(b_w==16) am= 8*(a1+a2);
1614

    
1615
            if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
1616
            else     tmp[x + y*stride]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
1617

    
1618
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
1619
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
1620
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
1621
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1622
        }
1623
    }
1624
    for(y=0; y < b_h; y++){
1625
        for(x=0; x < b_w; x++){
1626
            int a0= tmp[x +  y     *stride];
1627
            int a1= tmp[x + (y + 1)*stride];
1628
            int a2= tmp[x + (y + 2)*stride];
1629
            int a3= tmp[x + (y + 3)*stride];
1630
            int a4= tmp[x + (y + 4)*stride];
1631
            int a5= tmp[x + (y + 5)*stride];
1632
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1633
//            int am= 18*(a2+a3) - 2*(a1+a4);
1634
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1635
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1636
            
1637
//            if(b_w==16) am= 8*(a1+a2);
1638

    
1639
            if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
1640
            else     dst[x + y*stride]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
1641

    
1642
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
1643
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
1644
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
1645
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
1646
        }
1647
    }
1648
}
1649

    
1650
#define mcb(dx,dy,b_w)\
1651
static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
1652
    uint8_t tmp[stride*(b_w+5)];\
1653
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1654
}
1655

    
1656
mcb( 0, 0,16)
1657
mcb( 4, 0,16)
1658
mcb( 8, 0,16)
1659
mcb(12, 0,16)
1660
mcb( 0, 4,16)
1661
mcb( 4, 4,16)
1662
mcb( 8, 4,16)
1663
mcb(12, 4,16)
1664
mcb( 0, 8,16)
1665
mcb( 4, 8,16)
1666
mcb( 8, 8,16)
1667
mcb(12, 8,16)
1668
mcb( 0,12,16)
1669
mcb( 4,12,16)
1670
mcb( 8,12,16)
1671
mcb(12,12,16)
1672

    
1673
#define mca(dx,dy,b_w)\
1674
static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
1675
    uint8_t tmp[stride*(b_w+5)];\
1676
    assert(h==b_w);\
1677
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1678
}
1679

    
1680
mca( 0, 0,16)
1681
mca( 8, 0,16)
1682
mca( 0, 8,16)
1683
mca( 8, 8,16)
1684

    
1685
static void add_xblock(DWTELEM *dst, uint8_t *src, uint8_t *obmc, int s_x, int s_y, int b_w, int b_h, int mv_x, int mv_y, int w, int h, int dst_stride, int src_stride, int obmc_stride, int mb_type, int add){
1686
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME move to context to gurantee alignment
1687
    int x,y;
1688

    
1689
    if(s_x<0){
1690
        obmc -= s_x;
1691
        b_w += s_x;
1692
        s_x=0;
1693
    }else if(s_x + b_w > w){
1694
        b_w = w - s_x;
1695
    }
1696
    if(s_y<0){
1697
        obmc -= s_y*obmc_stride;
1698
        b_h += s_y;
1699
        s_y=0;
1700
    }else if(s_y + b_h> h){
1701
        b_h = h - s_y;
1702
    }
1703

    
1704
    dst += s_x + s_y*dst_stride;
1705
    
1706
    if(mb_type==1){
1707
        src += s_x + s_y*src_stride;
1708
        for(y=0; y < b_h; y++){
1709
            for(x=0; x < b_w; x++){
1710
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
1711
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
1712
            }
1713
        }
1714
    }else{
1715
        int dx= mv_x&15;
1716
        int dy= mv_y&15;
1717
//        int dxy= (mv_x&1) + 2*(mv_y&1);
1718

    
1719
        s_x += (mv_x>>4) - 2;
1720
        s_y += (mv_y>>4) - 2;
1721
        src += s_x + s_y*src_stride;
1722
        //use dsputil
1723
    
1724
        if(   (unsigned)s_x >= w - b_w - 4
1725
           || (unsigned)s_y >= h - b_h - 4){
1726
            ff_emulated_edge_mc(tmp + 32, src, src_stride, b_w+5, b_h+5, s_x, s_y, w, h);
1727
            src= tmp + 32;
1728
        }
1729

    
1730
        if(mb_type==0){
1731
            mc_block(tmp, src, tmp + 64+8, src_stride, b_w, b_h, dx, dy);
1732
        }else{
1733
            int sum=0;
1734
            for(y=0; y < b_h; y++){
1735
                for(x=0; x < b_w; x++){
1736
                    sum += src[x+  y*src_stride];
1737
                }
1738
            }
1739
            sum= (sum + b_h*b_w/2) / (b_h*b_w);
1740
            for(y=0; y < b_h; y++){
1741
                for(x=0; x < b_w; x++){
1742
                    tmp[x + y*src_stride]= sum; 
1743
                }
1744
            }
1745
        }
1746

    
1747
        for(y=0; y < b_h; y++){
1748
            for(x=0; x < b_w; x++){
1749
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
1750
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
1751
            }
1752
        }
1753
    }
1754
}
1755

    
1756
static void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
1757
    Plane *p= &s->plane[plane_index];
1758
    const int mb_w= s->mb_band.width;
1759
    const int mb_h= s->mb_band.height;
1760
    const int mb_stride= s->mb_band.stride;
1761
    int x, y, mb_x, mb_y;
1762
    int scale      = plane_index ?  s->mv_scale : 2*s->mv_scale;
1763
    int block_w    = plane_index ?  8 : 16;
1764
    uint8_t *obmc  = plane_index ? obmc16 : obmc32;
1765
    int obmc_stride= plane_index ? 16 : 32;
1766
    int ref_stride= s->last_picture.linesize[plane_index];
1767
    uint8_t *ref  = s->last_picture.data[plane_index];
1768
    int w= p->width;
1769
    int h= p->height;
1770
    
1771
if(s->avctx->debug&512){
1772
    for(y=0; y<h; y++){
1773
        for(x=0; x<w; x++){
1774
            if(add) buf[x + y*w]+= 128*256;
1775
            else    buf[x + y*w]-= 128*256;
1776
        }
1777
    }
1778
    
1779
    return;
1780
}
1781
    for(mb_y=-1; mb_y<=mb_h; mb_y++){
1782
        for(mb_x=-1; mb_x<=mb_w; mb_x++){
1783
            int index= clip(mb_x, 0, mb_w-1) + clip(mb_y, 0, mb_h-1)*mb_stride;
1784

    
1785
            add_xblock(buf, ref, obmc, 
1786
                       block_w*mb_x - block_w/2, 
1787
                       block_w*mb_y - block_w/2,
1788
                       2*block_w, 2*block_w,
1789
                       s->mv_band[0].buf[index]*scale, s->mv_band[1].buf[index]*scale,
1790
                       w, h,
1791
                       w, ref_stride, obmc_stride, 
1792
                       s->mb_band.buf[index], add);
1793

    
1794
        }
1795
    }
1796
}
1797

    
1798
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
1799
    const int level= b->level;
1800
    const int w= b->width;
1801
    const int h= b->height;
1802
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
1803
    const int qmul= qexp[qlog&7]<<(qlog>>3);
1804
    int x,y;
1805

    
1806
    assert(QROOT==8);
1807

    
1808
    bias= bias ? 0 : (3*qmul)>>3;
1809
    
1810
    if(!bias){
1811
        for(y=0; y<h; y++){
1812
            for(x=0; x<w; x++){
1813
                int i= src[x + y*stride]; 
1814
                //FIXME use threshold
1815
                //FIXME optimize
1816
                //FIXME bias
1817
                if(i>=0){
1818
                    i<<= QEXPSHIFT;
1819
                    i/= qmul;
1820
                    src[x + y*stride]=  i;
1821
                }else{
1822
                    i= -i;
1823
                    i<<= QEXPSHIFT;
1824
                    i/= qmul;
1825
                    src[x + y*stride]= -i;
1826
                }
1827
            }
1828
        }
1829
    }else{
1830
        for(y=0; y<h; y++){
1831
            for(x=0; x<w; x++){
1832
                int i= src[x + y*stride]; 
1833
                
1834
                //FIXME use threshold
1835
                //FIXME optimize
1836
                //FIXME bias
1837
                if(i>=0){
1838
                    i<<= QEXPSHIFT;
1839
                    i= (i + bias) / qmul;
1840
                    src[x + y*stride]=  i;
1841
                }else{
1842
                    i= -i;
1843
                    i<<= QEXPSHIFT;
1844
                    i= (i + bias) / qmul;
1845
                    src[x + y*stride]= -i;
1846
                }
1847
            }
1848
        }
1849
    }
1850
}
1851

    
1852
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
1853
    const int level= b->level;
1854
    const int w= b->width;
1855
    const int h= b->height;
1856
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
1857
    const int qmul= qexp[qlog&7]<<(qlog>>3);
1858
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1859
    int x,y;
1860
    
1861
    assert(QROOT==8);
1862

    
1863
    for(y=0; y<h; y++){
1864
        for(x=0; x<w; x++){
1865
            int i= src[x + y*stride];
1866
            if(i<0){
1867
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1868
            }else if(i>0){
1869
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
1870
            }
1871
        }
1872
    }
1873
}
1874

    
1875
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
1876
    const int w= b->width;
1877
    const int h= b->height;
1878
    int x,y;
1879
    
1880
    for(y=h-1; y>=0; y--){
1881
        for(x=w-1; x>=0; x--){
1882
            int i= x + y*stride;
1883
            
1884
            if(x){
1885
                if(use_median){
1886
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
1887
                    else  src[i] -= src[i - 1];
1888
                }else{
1889
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
1890
                    else  src[i] -= src[i - 1];
1891
                }
1892
            }else{
1893
                if(y) src[i] -= src[i - stride];
1894
            }
1895
        }
1896
    }
1897
}
1898

    
1899
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
1900
    const int w= b->width;
1901
    const int h= b->height;
1902
    int x,y;
1903
    
1904
    for(y=0; y<h; y++){
1905
        for(x=0; x<w; x++){
1906
            int i= x + y*stride;
1907
            
1908
            if(x){
1909
                if(use_median){
1910
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
1911
                    else  src[i] += src[i - 1];
1912
                }else{
1913
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
1914
                    else  src[i] += src[i - 1];
1915
                }
1916
            }else{
1917
                if(y) src[i] += src[i - stride];
1918
            }
1919
        }
1920
    }
1921
}
1922

    
1923
static void encode_header(SnowContext *s){
1924
    int plane_index, level, orientation;
1925

    
1926
    put_cabac(&s->c, s->header_state, s->keyframe); // state clearing stuff?
1927
    if(s->keyframe){
1928
        put_symbol(&s->c, s->header_state, s->version, 0);
1929
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
1930
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
1931
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
1932
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
1933
        put_symbol(&s->c, s->header_state, s->b_width, 0);
1934
        put_symbol(&s->c, s->header_state, s->b_height, 0);
1935
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
1936
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
1937
        put_cabac(&s->c, s->header_state, s->spatial_scalability);
1938
//        put_cabac(&s->c, s->header_state, s->rate_scalability);
1939

    
1940
        for(plane_index=0; plane_index<2; plane_index++){
1941
            for(level=0; level<s->spatial_decomposition_count; level++){
1942
                for(orientation=level ? 1:0; orientation<4; orientation++){
1943
                    if(orientation==2) continue;
1944
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
1945
                }
1946
            }
1947
        }
1948
    }
1949
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
1950
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
1951
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
1952
    put_symbol(&s->c, s->header_state, s->qbias, 1);
1953
}
1954

    
1955
static int decode_header(SnowContext *s){
1956
    int plane_index, level, orientation;
1957

    
1958
    s->keyframe= get_cabac(&s->c, s->header_state);
1959
    if(s->keyframe){
1960
        s->version= get_symbol(&s->c, s->header_state, 0);
1961
        if(s->version>0){
1962
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
1963
            return -1;
1964
        }
1965
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1966
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1967
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1968
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1969
        s->b_width= get_symbol(&s->c, s->header_state, 0);
1970
        s->b_height= get_symbol(&s->c, s->header_state, 0);
1971
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1972
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1973
        s->spatial_scalability= get_cabac(&s->c, s->header_state);
1974
//        s->rate_scalability= get_cabac(&s->c, s->header_state);
1975

    
1976
        for(plane_index=0; plane_index<3; plane_index++){
1977
            for(level=0; level<s->spatial_decomposition_count; level++){
1978
                for(orientation=level ? 1:0; orientation<4; orientation++){
1979
                    int q;
1980
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1981
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1982
                    else                    q= get_symbol(&s->c, s->header_state, 1);
1983
                    s->plane[plane_index].band[level][orientation].qlog= q;
1984
                }
1985
            }
1986
        }
1987
    }
1988
    
1989
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1990
    if(s->spatial_decomposition_type > 2){
1991
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1992
        return -1;
1993
    }
1994
    
1995
    s->qlog= get_symbol(&s->c, s->header_state, 1);
1996
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
1997
    s->qbias= get_symbol(&s->c, s->header_state, 1);
1998

    
1999
    return 0;
2000
}
2001

    
2002
static int common_init(AVCodecContext *avctx){
2003
    SnowContext *s = avctx->priv_data;
2004
    int width, height;
2005
    int level, orientation, plane_index, dec;
2006

    
2007
    s->avctx= avctx;
2008
        
2009
    dsputil_init(&s->dsp, avctx);
2010

    
2011
#define mcf(dx,dy)\
2012
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
2013
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2014
        mc_block ## dx ## dy;
2015

    
2016
    mcf( 0, 0)
2017
    mcf( 4, 0)
2018
    mcf( 8, 0)
2019
    mcf(12, 0)
2020
    mcf( 0, 4)
2021
    mcf( 4, 4)
2022
    mcf( 8, 4)
2023
    mcf(12, 4)
2024
    mcf( 0, 8)
2025
    mcf( 4, 8)
2026
    mcf( 8, 8)
2027
    mcf(12, 8)
2028
    mcf( 0,12)
2029
    mcf( 4,12)
2030
    mcf( 8,12)
2031
    mcf(12,12)
2032

    
2033
#define mcfh(dx,dy)\
2034
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
2035
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2036
        mc_block_hpel ## dx ## dy;
2037

    
2038
    mcfh(0, 0)
2039
    mcfh(8, 0)
2040
    mcfh(0, 8)
2041
    mcfh(8, 8)
2042
        
2043
    dec= s->spatial_decomposition_count= 5;
2044
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2045
    
2046
    s->chroma_h_shift= 1; //FIXME XXX
2047
    s->chroma_v_shift= 1;
2048
    
2049
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2050
    
2051
    s->b_width = (s->avctx->width +(1<<dec)-1)>>dec;
2052
    s->b_height= (s->avctx->height+(1<<dec)-1)>>dec;
2053
    
2054
    s->spatial_dwt_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
2055
    s->pred_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
2056
    
2057
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2058
    
2059
    for(plane_index=0; plane_index<3; plane_index++){    
2060
        int w= s->avctx->width;
2061
        int h= s->avctx->height;
2062

    
2063
        if(plane_index){
2064
            w>>= s->chroma_h_shift;
2065
            h>>= s->chroma_v_shift;
2066
        }
2067
        s->plane[plane_index].width = w;
2068
        s->plane[plane_index].height= h;
2069
av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
2070
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2071
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2072
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2073
                
2074
                b->buf= s->spatial_dwt_buffer;
2075
                b->level= level;
2076
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2077
                b->width = (w + !(orientation&1))>>1;
2078
                b->height= (h + !(orientation>1))>>1;
2079
                
2080
                if(orientation&1) b->buf += (w+1)>>1;
2081
                if(orientation>1) b->buf += b->stride>>1;
2082
                
2083
                if(level)
2084
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
2085
            }
2086
            w= (w+1)>>1;
2087
            h= (h+1)>>1;
2088
        }
2089
    }
2090
    
2091
    //FIXME init_subband() ?
2092
    s->mb_band.stride= s->mv_band[0].stride= s->mv_band[1].stride=
2093
    s->mb_band.width = s->mv_band[0].width = s->mv_band[1].width = (s->avctx->width + 15)>>4;
2094
    s->mb_band.height= s->mv_band[0].height= s->mv_band[1].height= (s->avctx->height+ 15)>>4;
2095
    s->mb_band   .buf= av_mallocz(s->mb_band   .stride * s->mb_band   .height*sizeof(DWTELEM));
2096
    s->mv_band[0].buf= av_mallocz(s->mv_band[0].stride * s->mv_band[0].height*sizeof(DWTELEM));
2097
    s->mv_band[1].buf= av_mallocz(s->mv_band[1].stride * s->mv_band[1].height*sizeof(DWTELEM));
2098

    
2099
    reset_contexts(s);
2100
/*    
2101
    width= s->width= avctx->width;
2102
    height= s->height= avctx->height;
2103
    
2104
    assert(width && height);
2105
*/
2106
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2107
    
2108
    return 0;
2109
}
2110

    
2111

    
2112
static void calculate_vissual_weight(SnowContext *s, Plane *p){
2113
    int width = p->width;
2114
    int height= p->height;
2115
    int i, level, orientation, x, y;
2116

    
2117
    for(level=0; level<s->spatial_decomposition_count; level++){
2118
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2119
            SubBand *b= &p->band[level][orientation];
2120
            DWTELEM *buf= b->buf;
2121
            int64_t error=0;
2122
            
2123
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2124
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
2125
            spatial_idwt(s, s->spatial_dwt_buffer, width, height, width);
2126
            for(y=0; y<height; y++){
2127
                for(x=0; x<width; x++){
2128
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
2129
                    error += d*d;
2130
                }
2131
            }
2132

    
2133
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
2134
            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
2135
        }
2136
    }
2137
}
2138

    
2139
static int encode_init(AVCodecContext *avctx)
2140
{
2141
    SnowContext *s = avctx->priv_data;
2142
    int i;
2143
    int level, orientation, plane_index;
2144

    
2145
    common_init(avctx);
2146
 
2147
    s->version=0;
2148
    
2149
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2150
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2151
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2152
    s->mb_type        = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int16_t));
2153
    s->mb_mean        = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int8_t ));
2154
    s->dummy          = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int32_t));
2155
    h263_encode_init(&s->m); //mv_penalty
2156

    
2157
    for(plane_index=0; plane_index<3; plane_index++){
2158
        calculate_vissual_weight(s, &s->plane[plane_index]);
2159
    }
2160
    
2161
    
2162
    avctx->coded_frame= &s->current_picture;
2163
    switch(avctx->pix_fmt){
2164
//    case PIX_FMT_YUV444P:
2165
//    case PIX_FMT_YUV422P:
2166
    case PIX_FMT_YUV420P:
2167
    case PIX_FMT_GRAY8:
2168
//    case PIX_FMT_YUV411P:
2169
//    case PIX_FMT_YUV410P:
2170
        s->colorspace_type= 0;
2171
        break;
2172
/*    case PIX_FMT_RGBA32:
2173
        s->colorspace= 1;
2174
        break;*/
2175
    default:
2176
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2177
        return -1;
2178
    }
2179
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2180
    s->chroma_h_shift= 1;
2181
    s->chroma_v_shift= 1;
2182
    return 0;
2183
}
2184

    
2185
static int frame_start(SnowContext *s){
2186
   AVFrame tmp;
2187

    
2188
   if(s->keyframe)
2189
        reset_contexts(s);
2190
 
2191
    tmp= s->last_picture;
2192
    s->last_picture= s->current_picture;
2193
    s->current_picture= tmp;
2194
    
2195
    s->current_picture.reference= 1;
2196
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2197
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2198
        return -1;
2199
    }
2200
    
2201
    return 0;
2202
}
2203

    
2204
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2205
    SnowContext *s = avctx->priv_data;
2206
    CABACContext * const c= &s->c;
2207
    AVFrame *pict = data;
2208
    const int width= s->avctx->width;
2209
    const int height= s->avctx->height;
2210
    int used_count= 0;
2211
    int log2_threshold, level, orientation, plane_index, i;
2212

    
2213
    if(avctx->strict_std_compliance >= 0){
2214
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2215
               "use vstrict=-1 to use it anyway\n");
2216
        return -1;
2217
    }
2218
        
2219
    ff_init_cabac_encoder(c, buf, buf_size);
2220
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2221
    
2222
    s->input_picture = *pict;
2223

    
2224
    memset(s->header_state, 0, sizeof(s->header_state));
2225

    
2226
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2227
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2228
    
2229
    s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2230
    //<64 >60
2231
    s->qlog += 61;
2232

    
2233
    for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
2234
        s->mb_band.buf[i]= s->keyframe;
2235
    }
2236
    
2237
    frame_start(s);
2238

    
2239
    if(pict->pict_type == P_TYPE){
2240
        int block_width = (width +15)>>4;
2241
        int block_height= (height+15)>>4;
2242
        int stride= s->current_picture.linesize[0];
2243
        uint8_t *src_plane= s->input_picture.data[0];
2244
        int src_stride= s->input_picture.linesize[0];
2245
        int x,y;
2246
        
2247
        assert(s->current_picture.data[0]);
2248
        assert(s->last_picture.data[0]);
2249
     
2250
        s->m.avctx= s->avctx;
2251
        s->m.current_picture.data[0]= s->current_picture.data[0];
2252
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
2253
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
2254
        s->m.current_picture_ptr= &s->m.current_picture;
2255
        s->m.   last_picture_ptr= &s->m.   last_picture;
2256
        s->m.linesize=
2257
        s->m.   last_picture.linesize[0]=
2258
        s->m.    new_picture.linesize[0]=
2259
        s->m.current_picture.linesize[0]= stride;
2260
        s->m.width = width;
2261
        s->m.height= height;
2262
        s->m.mb_width = block_width;
2263
        s->m.mb_height= block_height;
2264
        s->m.mb_stride=   s->m.mb_width+1;
2265
        s->m.b8_stride= 2*s->m.mb_width+1;
2266
        s->m.f_code=1;
2267
        s->m.pict_type= pict->pict_type;
2268
        s->m.me_method= s->avctx->me_method;
2269
        s->m.me.scene_change_score=0;
2270
        s->m.flags= s->avctx->flags;
2271
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2272
        s->m.out_format= FMT_H263;
2273
        s->m.unrestricted_mv= 1;
2274

    
2275
        s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
2276
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
2277
        s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
2278

    
2279
        if(!s->motion_val8){
2280
            s->motion_val8 = av_mallocz(s->m.b8_stride*block_height*2*2*sizeof(int16_t));
2281
            s->motion_val16= av_mallocz(s->m.mb_stride*block_height*2*sizeof(int16_t));
2282
        }
2283
        
2284
        s->m.mb_type= s->mb_type;
2285
        
2286
        //dummies, to avoid segfaults
2287
        s->m.current_picture.mb_mean  = s->mb_mean;
2288
        s->m.current_picture.mb_var   = (int16_t*)s->dummy;
2289
        s->m.current_picture.mc_mb_var= (int16_t*)s->dummy;
2290
        s->m.current_picture.mb_type  = s->dummy;
2291
        
2292
        s->m.current_picture.motion_val[0]= s->motion_val8;
2293
        s->m.p_mv_table= s->motion_val16;
2294
        s->m.dsp= s->dsp; //move
2295
        ff_init_me(&s->m);
2296
    
2297
        
2298
        s->m.me.pre_pass=1;
2299
        s->m.me.dia_size= s->avctx->pre_dia_size;
2300
        s->m.first_slice_line=1;
2301
        for(y= block_height-1; y >= 0; y--) {
2302
            uint8_t src[stride*16];
2303

    
2304
            s->m.new_picture.data[0]= src - y*16*stride; //ugly
2305
            s->m.mb_y= y;
2306
            for(i=0; i<16 && i + 16*y<height; i++){
2307
                memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
2308
                for(x=width; x<16*block_width; x++)
2309
                    src[i*stride+x]= src[i*stride+x-1];
2310
            }
2311
            for(; i<16 && i + 16*y<16*block_height; i++)
2312
                memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
2313

    
2314
            for(x=block_width-1; x >=0 ;x--) {
2315
                s->m.mb_x= x;
2316
                ff_init_block_index(&s->m);
2317
                ff_update_block_index(&s->m);
2318
                ff_pre_estimate_p_frame_motion(&s->m, x, y);
2319
            }
2320
            s->m.first_slice_line=0;
2321
        }        
2322
        s->m.me.pre_pass=0;
2323
        
2324
        
2325
        s->m.me.dia_size= s->avctx->dia_size;
2326
        s->m.first_slice_line=1;
2327
        for (y = 0; y < block_height; y++) {
2328
            uint8_t src[stride*16];
2329

    
2330
            s->m.new_picture.data[0]= src - y*16*stride; //ugly
2331
            s->m.mb_y= y;
2332
            
2333
            assert(width <= stride);
2334
            assert(width <= 16*block_width);
2335
    
2336
            for(i=0; i<16 && i + 16*y<height; i++){
2337
                memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
2338
                for(x=width; x<16*block_width; x++)
2339
                    src[i*stride+x]= src[i*stride+x-1];
2340
            }
2341
            for(; i<16 && i + 16*y<16*block_height; i++)
2342
                memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
2343
    
2344
            for (x = 0; x < block_width; x++) {
2345
                int mb_xy= x + y*(s->mb_band.stride);
2346
                s->m.mb_x= x;
2347
                ff_init_block_index(&s->m);
2348
                ff_update_block_index(&s->m);
2349
                
2350
                ff_estimate_p_frame_motion(&s->m, x, y);
2351
                
2352
                s->mb_band   .buf[mb_xy]= (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER)
2353
                 ? 0 : 2;
2354
                s->mv_band[0].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][0];
2355
                s->mv_band[1].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][1];
2356
                
2357
                if(s->mb_band   .buf[x + y*(s->mb_band.stride)]==2 && 0){
2358
                    int dc0=128, dc1=128, dc, dc2, dir;
2359
                    int offset= (s->avctx->flags & CODEC_FLAG_QPEL) ? 64 : 32;
2360
                    
2361
                    dc       =s->mb_mean[x +  y   *s->m.mb_stride    ];
2362
                    if(x) dc0=s->mb_mean[x +  y   *s->m.mb_stride - 1];
2363
                    if(y) dc1=s->mb_mean[x + (y-1)*s->m.mb_stride    ];
2364
                    dc2= (dc0+dc1)>>1;
2365
#if 0
2366
                    if     (ABS(dc0 - dc) < ABS(dc1 - dc) && ABS(dc0 - dc) < ABS(dc2 - dc))
2367
                        dir= 1;
2368
                    else if(ABS(dc0 - dc) >=ABS(dc1 - dc) && ABS(dc1 - dc) < ABS(dc2 - dc))
2369
                        dir=-1;
2370
                    else
2371
                        dir=0;
2372
#endif                    
2373
                    if(ABS(dc0 - dc) < ABS(dc1 - dc) && x){
2374
                        s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + y*(s->mb_band.stride)-1] - offset;
2375
                        s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + y*(s->mb_band.stride)-1];
2376
                        s->mb_mean[x +  y   *s->m.mb_stride    ]= dc0;
2377
                    }else if(y){
2378
                        s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + (y-1)*(s->mb_band.stride)];
2379
                        s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + (y-1)*(s->mb_band.stride)] - offset;
2380
                        s->mb_mean[x +  y   *s->m.mb_stride    ]= dc1;
2381
                    }
2382
                }
2383
//                s->mb_band   .buf[x + y*(s->mb_band.stride)]=1; //FIXME intra only test
2384
            }
2385
            s->m.first_slice_line=0;
2386
        }
2387
        assert(s->m.pict_type == P_TYPE);
2388
        if(s->m.me.scene_change_score > s->avctx->scenechange_threshold){
2389
            s->m.pict_type= 
2390
            pict->pict_type =I_TYPE;
2391
            for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
2392
                s->mb_band.buf[i]= 1;
2393
                s->mv_band[0].buf[i]=
2394
                s->mv_band[1].buf[i]= 0;
2395
            }
2396
    //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
2397
        }        
2398
    }
2399
        
2400
    s->m.first_slice_line=1;
2401
    
2402
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
2403

    
2404
    encode_header(s);
2405
    
2406
    decorrelate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 0, 1);
2407
    decorrelate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 0, 1);
2408
    decorrelate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 0 ,1);
2409
    encode_subband(s, &s->mb_band   , s->mb_band   .buf, NULL, s->mb_band   .stride, 0);
2410
    encode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
2411
    encode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
2412
    
2413
//FIXME avoid this
2414
    correlate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 1, 1);
2415
    correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
2416
    correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
2417
    
2418
    for(plane_index=0; plane_index<3; plane_index++){
2419
        Plane *p= &s->plane[plane_index];
2420
        int w= p->width;
2421
        int h= p->height;
2422
        int x, y;
2423
        int bits= put_bits_count(&s->c.pb);
2424

    
2425
        //FIXME optimize
2426
#if QPRED
2427
        memset(s->pred_buffer, 0, sizeof(DWTELEM)*w*h);
2428
        predict_plane(s, s->pred_buffer, plane_index, 1);
2429
        spatial_dwt(s, s->pred_buffer, w, h, w);
2430
        for(level=0; level<s->spatial_decomposition_count; level++){
2431
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2432
                SubBand *b= &p->band[level][orientation];
2433
                int delta= ((int)s->pred_buffer - (int)s->spatial_dwt_buffer)/sizeof(DWTELEM);
2434
                
2435
                quantize  (s, b, b->buf + delta, b->stride, s->qbias);
2436
                dequantize(s, b, b->buf + delta, b->stride);
2437
            }
2438
        }
2439
        for(y=0; y<h; y++){
2440
            for(x=0; x<w; x++){
2441
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
2442
            }
2443
        }
2444
        spatial_dwt(s, s->spatial_dwt_buffer, w, h, w);
2445
        for(y=0; y<h; y++){
2446
            for(x=0; x<w; x++){
2447
                s->spatial_dwt_buffer[y*w + x]-= s->pred_buffer[y*w + x];
2448
            }
2449
        }
2450
#else
2451
     if(pict->data[plane_index]) //FIXME gray hack
2452
        for(y=0; y<h; y++){
2453
            for(x=0; x<w; x++){
2454
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
2455
            }
2456
        }
2457
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
2458
        spatial_dwt(s, s->spatial_dwt_buffer, w, h, w);
2459
#endif
2460
 
2461
        for(level=0; level<s->spatial_decomposition_count; level++){
2462
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2463
                SubBand *b= &p->band[level][orientation];
2464
                
2465
                quantize(s, b, b->buf, b->stride, s->qbias);
2466
                if(orientation==0)
2467
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
2468
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2469
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
2470
                if(orientation==0)
2471
                    correlate(s, b, b->buf, b->stride, 1, 0);
2472
            }
2473
        }
2474
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
2475

    
2476
        for(level=0; level<s->spatial_decomposition_count; level++){
2477
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2478
                SubBand *b= &p->band[level][orientation];
2479

    
2480
                dequantize(s, b, b->buf, b->stride);
2481
            }
2482
        }
2483
        
2484
#if QPRED
2485
        for(y=0; y<h; y++){
2486
            for(x=0; x<w; x++){
2487
                s->spatial_dwt_buffer[y*w + x]+= s->pred_buffer[y*w + x];
2488
            }
2489
        }
2490
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
2491
#else
2492
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
2493
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2494
#endif
2495
        //FIXME optimize
2496
        for(y=0; y<h; y++){
2497
            for(x=0; x<w; x++){
2498
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2499
                if(v&(~255)) v= ~(v>>31);
2500
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2501
            }
2502
        }
2503
        if(s->avctx->flags&CODEC_FLAG_PSNR){
2504
            int64_t error= 0;
2505
            
2506
    if(pict->data[plane_index]) //FIXME gray hack
2507
            for(y=0; y<h; y++){
2508
                for(x=0; x<w; x++){
2509
                    int d= s->spatial_dwt_buffer[y*w + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]*256;
2510
                    error += d*d;
2511
                }
2512
            }
2513
            error= (error + 128*256)>>16;
2514
            s->avctx->error[plane_index] += error;
2515
            s->avctx->error[3] += error;
2516
        }
2517
    }
2518

    
2519
    if(s->last_picture.data[0])
2520
        avctx->release_buffer(avctx, &s->last_picture);
2521

    
2522
    emms_c();
2523
    
2524
    return put_cabac_terminate(c, 1);
2525
}
2526

    
2527
static void common_end(SnowContext *s){
2528
    av_freep(&s->spatial_dwt_buffer);
2529
    av_freep(&s->mb_band.buf);
2530
    av_freep(&s->mv_band[0].buf);
2531
    av_freep(&s->mv_band[1].buf);
2532

    
2533
    av_freep(&s->m.me.scratchpad);    
2534
    av_freep(&s->m.me.map);
2535
    av_freep(&s->m.me.score_map);
2536
    av_freep(&s->mb_type);
2537
    av_freep(&s->mb_mean);
2538
    av_freep(&s->dummy);
2539
    av_freep(&s->motion_val8);
2540
    av_freep(&s->motion_val16);
2541
}
2542

    
2543
static int encode_end(AVCodecContext *avctx)
2544
{
2545
    SnowContext *s = avctx->priv_data;
2546

    
2547
    common_end(s);
2548

    
2549
    return 0;
2550
}
2551

    
2552
static int decode_init(AVCodecContext *avctx)
2553
{
2554
//    SnowContext *s = avctx->priv_data;
2555

    
2556
    common_init(avctx);
2557
    
2558
    return 0;
2559
}
2560

    
2561
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
2562
    SnowContext *s = avctx->priv_data;
2563
    CABACContext * const c= &s->c;
2564
    const int width= s->avctx->width;
2565
    const int height= s->avctx->height;
2566
    int bytes_read;
2567
    AVFrame *picture = data;
2568
    int log2_threshold, level, orientation, plane_index;
2569
    
2570

    
2571
    /* no supplementary picture */
2572
    if (buf_size == 0)
2573
        return 0;
2574

    
2575
    ff_init_cabac_decoder(c, buf, buf_size);
2576
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2577

    
2578
    memset(s->header_state, 0, sizeof(s->header_state));
2579

    
2580
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2581
    decode_header(s);
2582

    
2583
    frame_start(s);
2584
    //keyframe flag dupliaction mess FIXME
2585
    if(avctx->debug&FF_DEBUG_PICT_INFO)
2586
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2587
    
2588
    decode_subband(s, &s->mb_band   , s->mb_band   .buf, NULL, s->mb_band   .stride, 0);
2589
    decode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
2590
    decode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
2591
    correlate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 1, 1);
2592
    correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
2593
    correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
2594

    
2595
    for(plane_index=0; plane_index<3; plane_index++){
2596
        Plane *p= &s->plane[plane_index];
2597
        int w= p->width;
2598
        int h= p->height;
2599
        int x, y;
2600
        
2601
if(s->avctx->debug&2048){
2602
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2603
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2604

    
2605
        for(y=0; y<h; y++){
2606
            for(x=0; x<w; x++){
2607
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2608
                if(v&(~255)) v= ~(v>>31);
2609
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2610
            }
2611
        }
2612
}
2613
        for(level=0; level<s->spatial_decomposition_count; level++){
2614
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2615
                SubBand *b= &p->band[level][orientation];
2616

    
2617
                decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2618
                if(orientation==0)
2619
                    correlate(s, b, b->buf, b->stride, 1, 0);
2620
            }
2621
        }
2622
if(!(s->avctx->debug&1024))
2623
        for(level=0; level<s->spatial_decomposition_count; level++){
2624
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2625
                SubBand *b= &p->band[level][orientation];
2626

    
2627
                dequantize(s, b, b->buf, b->stride);
2628
            }
2629
        }
2630

    
2631
#if QPRED
2632
        memset(s->pred_buffer, 0, sizeof(DWTELEM)*w*h);
2633
        predict_plane(s, s->pred_buffer, plane_index, 1);
2634
        spatial_dwt(s, s->pred_buffer, w, h, w);
2635
        for(level=0; level<s->spatial_decomposition_count; level++){
2636
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2637
                SubBand *b= &p->band[level][orientation];
2638
                int delta= ((int)s->pred_buffer - (int)s->spatial_dwt_buffer)/sizeof(DWTELEM);
2639
                
2640
                quantize  (s, b, b->buf + delta, b->stride, s->qbias);
2641
                dequantize(s, b, b->buf + delta, b->stride);
2642
            }
2643
        }
2644
        for(y=0; y<h; y++){
2645
            for(x=0; x<w; x++){
2646
                s->spatial_dwt_buffer[y*w + x]+= s->pred_buffer[y*w + x];
2647
            }
2648
        }
2649
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
2650
#else
2651
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
2652
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2653
#endif
2654

    
2655
        //FIXME optimize
2656
        for(y=0; y<h; y++){
2657
            for(x=0; x<w; x++){
2658
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2659
                if(v&(~255)) v= ~(v>>31);
2660
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2661
            }
2662
        }
2663
    }
2664
            
2665
    emms_c();
2666

    
2667
    if(s->last_picture.data[0])
2668
        avctx->release_buffer(avctx, &s->last_picture);
2669

    
2670
if(!(s->avctx->debug&2048))        
2671
    *picture= s->current_picture;
2672
else
2673
    *picture= s->mconly_picture;
2674
    
2675
    *data_size = sizeof(AVFrame);
2676
    
2677
    bytes_read= get_cabac_terminate(c);
2678
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
2679

    
2680
    return bytes_read;
2681
}
2682

    
2683
static int decode_end(AVCodecContext *avctx)
2684
{
2685
    SnowContext *s = avctx->priv_data;
2686

    
2687
    common_end(s);
2688

    
2689
    return 0;
2690
}
2691

    
2692
AVCodec snow_decoder = {
2693
    "snow",
2694
    CODEC_TYPE_VIDEO,
2695
    CODEC_ID_SNOW,
2696
    sizeof(SnowContext),
2697
    decode_init,
2698
    NULL,
2699
    decode_end,
2700
    decode_frame,
2701
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2702
    NULL
2703
};
2704

    
2705
AVCodec snow_encoder = {
2706
    "snow",
2707
    CODEC_TYPE_VIDEO,
2708
    CODEC_ID_SNOW,
2709
    sizeof(SnowContext),
2710
    encode_init,
2711
    encode_frame,
2712
    encode_end,
2713
};
2714

    
2715

    
2716
#if 0
2717
#undef malloc
2718
#undef free
2719
#undef printf
2720

2721
int main(){
2722
    int width=256;
2723
    int height=256;
2724
    int buffer[2][width*height];
2725
    SnowContext s;
2726
    int i;
2727
    s.spatial_decomposition_count=6;
2728
    s.spatial_decomposition_type=1;
2729
    
2730
    printf("testing 5/3 DWT\n");
2731
    for(i=0; i<width*height; i++)
2732
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
2733
    
2734
    spatial_dwt(&s, buffer[0], width, height, width);
2735
    spatial_idwt(&s, buffer[0], width, height, width);
2736
    
2737
    for(i=0; i<width*height; i++)
2738
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
2739

2740
    printf("testing 9/7 DWT\n");
2741
    s.spatial_decomposition_type=0;
2742
    for(i=0; i<width*height; i++)
2743
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
2744
    
2745
    spatial_dwt(&s, buffer[0], width, height, width);
2746
    spatial_idwt(&s, buffer[0], width, height, width);
2747
    
2748
    for(i=0; i<width*height; i++)
2749
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
2750
        
2751
    printf("testing AC coder\n");
2752
    memset(s.header_state, 0, sizeof(s.header_state));
2753
    ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
2754
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2755
        
2756
    for(i=-256; i<256; i++){
2757
START_TIMER
2758
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
2759
STOP_TIMER("put_symbol")
2760
    }
2761
    put_cabac_terminate(&s.c, 1);
2762

2763
    memset(s.header_state, 0, sizeof(s.header_state));
2764
    ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
2765
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2766
    
2767
    for(i=-256; i<256; i++){
2768
        int j;
2769
START_TIMER
2770
        j= get_symbol(&s.c, s.header_state, 1);
2771
STOP_TIMER("get_symbol")
2772
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
2773
    }
2774
{
2775
int level, orientation, x, y;
2776
int64_t errors[8][4];
2777
int64_t g=0;
2778

2779
    memset(errors, 0, sizeof(errors));
2780
    s.spatial_decomposition_count=3;
2781
    s.spatial_decomposition_type=0;
2782
    for(level=0; level<s.spatial_decomposition_count; level++){
2783
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2784
            int w= width  >> (s.spatial_decomposition_count-level);
2785
            int h= height >> (s.spatial_decomposition_count-level);
2786
            int stride= width  << (s.spatial_decomposition_count-level);
2787
            DWTELEM *buf= buffer[0];
2788
            int64_t error=0;
2789

2790
            if(orientation&1) buf+=w;
2791
            if(orientation>1) buf+=stride>>1;
2792
            
2793
            memset(buffer[0], 0, sizeof(int)*width*height);
2794
            buf[w/2 + h/2*stride]= 256*256;
2795
            spatial_idwt(&s, buffer[0], width, height, width);
2796
            for(y=0; y<height; y++){
2797
                for(x=0; x<width; x++){
2798
                    int64_t d= buffer[0][x + y*width];
2799
                    error += d*d;
2800
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
2801
                }
2802
                if(ABS(height/2-y)<9 && level==2) printf("\n");
2803
            }
2804
            error= (int)(sqrt(error)+0.5);
2805
            errors[level][orientation]= error;
2806
            if(g) g=ff_gcd(g, error);
2807
            else g= error;
2808
        }
2809
    }
2810
    printf("static int const visual_weight[][4]={\n");
2811
    for(level=0; level<s.spatial_decomposition_count; level++){
2812
        printf("  {");
2813
        for(orientation=0; orientation<4; orientation++){
2814
            printf("%8lld,", errors[level][orientation]/g);
2815
        }
2816
        printf("},\n");
2817
    }
2818
    printf("};\n");
2819
    {
2820
            int level=2;
2821
            int orientation=3;
2822
            int w= width  >> (s.spatial_decomposition_count-level);
2823
            int h= height >> (s.spatial_decomposition_count-level);
2824
            int stride= width  << (s.spatial_decomposition_count-level);
2825
            DWTELEM *buf= buffer[0];
2826
            int64_t error=0;
2827

2828
            buf+=w;
2829
            buf+=stride>>1;
2830
            
2831
            memset(buffer[0], 0, sizeof(int)*width*height);
2832
#if 1
2833
            for(y=0; y<height; y++){
2834
                for(x=0; x<width; x++){
2835
                    int tab[4]={0,2,3,1};
2836
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
2837
                }
2838
            }
2839
            spatial_dwt(&s, buffer[0], width, height, width);
2840
#else
2841
            for(y=0; y<h; y++){
2842
                for(x=0; x<w; x++){
2843
                    buf[x + y*stride  ]=169;
2844
                    buf[x + y*stride-w]=64;
2845
                }
2846
            }
2847
            spatial_idwt(&s, buffer[0], width, height, width);
2848
#endif
2849
            for(y=0; y<height; y++){
2850
                for(x=0; x<width; x++){
2851
                    int64_t d= buffer[0][x + y*width];
2852
                    error += d*d;
2853
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
2854
                }
2855
                if(ABS(height/2-y)<9) printf("\n");
2856
            }
2857
    }
2858

    
2859
}
2860
    return 0;
2861
}
2862
#endif
2863