Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ f5a71928

History | View | Annotate | Download (133 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22

    
23
#include "rangecoder.h"
24
#define MID_STATE 128
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QSHIFT 5
35
#define QROOT (1<<QSHIFT)
36
#define LOSSLESS_QLOG -128
37
#define FRAC_BITS 8
38

    
39
static const int8_t quant3[256]={
40
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56
};
57
static const int8_t quant3b[256]={
58
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74
};
75
static const int8_t quant3bA[256]={
76
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92
};
93
static const int8_t quant5[256]={
94
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110
};
111
static const int8_t quant7[256]={
112
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128
};
129
static const int8_t quant9[256]={
130
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146
};
147
static const int8_t quant11[256]={
148
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164
};
165
static const int8_t quant13[256]={
166
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182
};
183

    
184
#define LOG2_OBMC_MAX 6
185
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
186
#if 0 //64*cubic
187
static const uint8_t obmc32[1024]={
188
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220
//error:0.000022
221
};
222
static const uint8_t obmc16[256]={
223
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239
//error:0.000033
240
};
241
#elif 1 // 64*linear
242
static const uint8_t obmc32[1024]={
243
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275
 //error:0.000020
276
};
277
static const uint8_t obmc16[256]={
278
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294
//error:0.000015
295
};
296
#else //64*cos
297
static const uint8_t obmc32[1024]={
298
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330
//error:0.000022
331
};
332
static const uint8_t obmc16[256]={
333
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349
//error:0.000022
350
};
351
#endif
352

    
353
//linear *64
354
static const uint8_t obmc8[64]={
355
 1, 3, 5, 7, 7, 5, 3, 1,
356
 3, 9,15,21,21,15, 9, 3,
357
 5,15,25,35,35,25,15, 5,
358
 7,21,35,49,49,35,21, 7,
359
 7,21,35,49,49,35,21, 7,
360
 5,15,25,35,35,25,15, 5,
361
 3, 9,15,21,21,15, 9, 3,
362
 1, 3, 5, 7, 7, 5, 3, 1,
363
//error:0.000000
364
};
365

    
366
//linear *64
367
static const uint8_t obmc4[16]={
368
 4,12,12, 4,
369
12,36,36,12,
370
12,36,36,12,
371
 4,12,12, 4,
372
//error:0.000000
373
};
374

    
375
static const uint8_t *obmc_tab[4]={
376
    obmc32, obmc16, obmc8, obmc4
377
};
378

    
379
typedef struct BlockNode{
380
    int16_t mx;
381
    int16_t my;
382
    uint8_t color[3];
383
    uint8_t type;
384
//#define TYPE_SPLIT    1
385
#define BLOCK_INTRA   1
386
//#define TYPE_NOCOLOR  4
387
    uint8_t level; //FIXME merge into type?
388
}BlockNode;
389

    
390
#define LOG2_MB_SIZE 4
391
#define MB_SIZE (1<<LOG2_MB_SIZE)
392

    
393
typedef struct x_and_coeff{
394
    int16_t x;
395
    uint16_t coeff;
396
} x_and_coeff;
397

    
398
typedef struct SubBand{
399
    int level;
400
    int stride;
401
    int width;
402
    int height;
403
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
404
    DWTELEM *buf;
405
    int buf_x_offset;
406
    int buf_y_offset;
407
    int stride_line; ///< Stride measured in lines, not pixels.
408
    x_and_coeff * x_coeff;
409
    struct SubBand *parent;
410
    uint8_t state[/*7*2*/ 7 + 512][32];
411
}SubBand;
412

    
413
typedef struct Plane{
414
    int width;
415
    int height;
416
    SubBand band[MAX_DECOMPOSITIONS][4];
417
}Plane;
418

    
419
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
420
typedef struct {
421
    DWTELEM * * line; ///< For use by idwt and predict_slices.
422
    DWTELEM * * data_stack; ///< Used for internal purposes.
423
    int data_stack_top;
424
    int line_count;
425
    int line_width;
426
    int data_count;
427
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
428
} slice_buffer;
429

    
430
typedef struct SnowContext{
431
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
432

    
433
    AVCodecContext *avctx;
434
    RangeCoder c;
435
    DSPContext dsp;
436
    AVFrame input_picture;
437
    AVFrame current_picture;
438
    AVFrame last_picture;
439
    AVFrame mconly_picture;
440
//     uint8_t q_context[16];
441
    uint8_t header_state[32];
442
    uint8_t block_state[128 + 32*128];
443
    int keyframe;
444
    int always_reset;
445
    int version;
446
    int spatial_decomposition_type;
447
    int temporal_decomposition_type;
448
    int spatial_decomposition_count;
449
    int temporal_decomposition_count;
450
    DWTELEM *spatial_dwt_buffer;
451
    int colorspace_type;
452
    int chroma_h_shift;
453
    int chroma_v_shift;
454
    int spatial_scalability;
455
    int qlog;
456
    int lambda;
457
    int lambda2;
458
    int mv_scale;
459
    int qbias;
460
#define QBIAS_SHIFT 3
461
    int b_width;
462
    int b_height;
463
    int block_max_depth;
464
    Plane plane[MAX_PLANES];
465
    BlockNode *block;
466
    slice_buffer sb;
467

    
468
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
469
}SnowContext;
470

    
471
typedef struct {
472
    DWTELEM *b0;
473
    DWTELEM *b1;
474
    DWTELEM *b2;
475
    DWTELEM *b3;
476
    int y;
477
} dwt_compose_t;
478

    
479
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
480
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
481

    
482
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
483
{
484
    int i;
485
  
486
    buf->base_buffer = base_buffer;
487
    buf->line_count = line_count;
488
    buf->line_width = line_width;
489
    buf->data_count = max_allocated_lines;
490
    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
491
    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
492
  
493
    for (i = 0; i < max_allocated_lines; i++)
494
    {
495
      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
496
    }
497
    
498
    buf->data_stack_top = max_allocated_lines - 1;
499
}
500

    
501
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
502
{
503
    int i;
504
    int offset;
505
    DWTELEM * buffer;
506
  
507
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);  
508
  
509
    assert(buf->data_stack_top >= 0);
510
//  assert(!buf->line[line]);
511
    if (buf->line[line])
512
        return buf->line[line];
513
    
514
    offset = buf->line_width * line;
515
    buffer = buf->data_stack[buf->data_stack_top];
516
    buf->data_stack_top--;
517
    buf->line[line] = buffer;
518
  
519
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
520
  
521
    return buffer;
522
}
523

    
524
static void slice_buffer_release(slice_buffer * buf, int line)
525
{
526
    int i;
527
    int offset;
528
    DWTELEM * buffer;
529

    
530
    assert(line >= 0 && line < buf->line_count);
531
    assert(buf->line[line]);
532

    
533
    offset = buf->line_width * line;
534
    buffer = buf->line[line];
535
    buf->data_stack_top++;
536
    buf->data_stack[buf->data_stack_top] = buffer;
537
    buf->line[line] = NULL;
538
  
539
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
540
}
541

    
542
static void slice_buffer_flush(slice_buffer * buf)
543
{
544
    int i;
545
    for (i = 0; i < buf->line_count; i++)
546
    {
547
        if (buf->line[i])
548
        {
549
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
550
            slice_buffer_release(buf, i);
551
        }
552
    }
553
}
554

    
555
static void slice_buffer_destroy(slice_buffer * buf)
556
{
557
    int i;
558
    slice_buffer_flush(buf);
559
  
560
    for (i = buf->data_count - 1; i >= 0; i--)
561
    {
562
        assert(buf->data_stack[i]);
563
        av_free(buf->data_stack[i]);
564
    }
565
    assert(buf->data_stack);
566
    av_free(buf->data_stack);
567
    assert(buf->line);
568
    av_free(buf->line);
569
}
570

    
571
#ifdef        __sgi
572
// Avoid a name clash on SGI IRIX
573
#undef        qexp
574
#endif
575
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
576
static uint8_t qexp[QROOT];
577

    
578
static inline int mirror(int v, int m){
579
    if     (v<0) return -v;
580
    else if(v>m) return 2*m-v;
581
    else         return v;
582
}
583

    
584
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
585
    int i;
586

    
587
    if(v){
588
        const int a= ABS(v);
589
        const int e= av_log2(a);
590
#if 1
591
        const int el= FFMIN(e, 10);   
592
        put_rac(c, state+0, 0);
593

    
594
        for(i=0; i<el; i++){
595
            put_rac(c, state+1+i, 1);  //1..10
596
        }
597
        for(; i<e; i++){
598
            put_rac(c, state+1+9, 1);  //1..10
599
        }
600
        put_rac(c, state+1+FFMIN(i,9), 0);
601

    
602
        for(i=e-1; i>=el; i--){
603
            put_rac(c, state+22+9, (a>>i)&1); //22..31
604
        }
605
        for(; i>=0; i--){
606
            put_rac(c, state+22+i, (a>>i)&1); //22..31
607
        }
608

    
609
        if(is_signed)
610
            put_rac(c, state+11 + el, v < 0); //11..21
611
#else
612
        
613
        put_rac(c, state+0, 0);
614
        if(e<=9){
615
            for(i=0; i<e; i++){
616
                put_rac(c, state+1+i, 1);  //1..10
617
            }
618
            put_rac(c, state+1+i, 0);
619

    
620
            for(i=e-1; i>=0; i--){
621
                put_rac(c, state+22+i, (a>>i)&1); //22..31
622
            }
623

    
624
            if(is_signed)
625
                put_rac(c, state+11 + e, v < 0); //11..21
626
        }else{
627
            for(i=0; i<e; i++){
628
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
629
            }
630
            put_rac(c, state+1+FFMIN(i,9), 0);
631

    
632
            for(i=e-1; i>=0; i--){
633
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
634
            }
635

    
636
            if(is_signed)
637
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
638
        }
639
#endif
640
    }else{
641
        put_rac(c, state+0, 1);
642
    }
643
}
644

    
645
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
646
    if(get_rac(c, state+0))
647
        return 0;
648
    else{
649
        int i, e, a;
650
        e= 0;
651
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
652
            e++;
653
        }
654

    
655
        a= 1;
656
        for(i=e-1; i>=0; i--){
657
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
658
        }
659

    
660
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
661
            return -a;
662
        else
663
            return a;
664
    }
665
}
666

    
667
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
668
    int i;
669
    int r= log2>=0 ? 1<<log2 : 1;
670

    
671
    assert(v>=0);
672
    assert(log2>=-4);
673

    
674
    while(v >= r){
675
        put_rac(c, state+4+log2, 1);
676
        v -= r;
677
        log2++;
678
        if(log2>0) r+=r;
679
    }
680
    put_rac(c, state+4+log2, 0);
681
    
682
    for(i=log2-1; i>=0; i--){
683
        put_rac(c, state+31-i, (v>>i)&1);
684
    }
685
}
686

    
687
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
688
    int i;
689
    int r= log2>=0 ? 1<<log2 : 1;
690
    int v=0;
691

    
692
    assert(log2>=-4);
693

    
694
    while(get_rac(c, state+4+log2)){
695
        v+= r;
696
        log2++;
697
        if(log2>0) r+=r;
698
    }
699
    
700
    for(i=log2-1; i>=0; i--){
701
        v+= get_rac(c, state+31-i)<<i;
702
    }
703

    
704
    return v;
705
}
706

    
707
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
708
    const int mirror_left= !highpass;
709
    const int mirror_right= (width&1) ^ highpass;
710
    const int w= (width>>1) - 1 + (highpass & width);
711
    int i;
712

    
713
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
714
    if(mirror_left){
715
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
716
        dst += dst_step;
717
        src += src_step;
718
    }
719
    
720
    for(i=0; i<w; i++){
721
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
722
    }
723
    
724
    if(mirror_right){
725
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
726
    }
727
}
728

    
729
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
730
    const int mirror_left= !highpass;
731
    const int mirror_right= (width&1) ^ highpass;
732
    const int w= (width>>1) - 1 + (highpass & width);
733
    int i;
734

    
735
    if(mirror_left){
736
        int r= 3*2*ref[0];
737
        r += r>>4;
738
        r += r>>8;
739
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
740
        dst += dst_step;
741
        src += src_step;
742
    }
743
    
744
    for(i=0; i<w; i++){
745
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
746
        r += r>>4;
747
        r += r>>8;
748
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
749
    }
750
    
751
    if(mirror_right){
752
        int r= 3*2*ref[w*ref_step];
753
        r += r>>4;
754
        r += r>>8;
755
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
756
    }
757
}
758

    
759
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
760
    const int mirror_left= !highpass;
761
    const int mirror_right= (width&1) ^ highpass;
762
    const int w= (width>>1) - 1 + (highpass & width);
763
    int i;
764

    
765
    assert(shift == 4);
766
#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
767
    if(mirror_left){
768
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
769
        dst += dst_step;
770
        src += src_step;
771
    }
772
    
773
    for(i=0; i<w; i++){
774
        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
775
    }
776
    
777
    if(mirror_right){
778
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
779
    }
780
}
781

    
782

    
783
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
784
    int x, i;
785
    
786
    for(x=start; x<width; x+=2){
787
        int64_t sum=0;
788

    
789
        for(i=0; i<n; i++){
790
            int x2= x + 2*i - n + 1;
791
            if     (x2<     0) x2= -x2;
792
            else if(x2>=width) x2= 2*width-x2-2;
793
            sum += coeffs[i]*(int64_t)dst[x2];
794
        }
795
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
796
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
797
    }
798
}
799

    
800
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
801
    int x, y, i;
802
    for(y=start; y<height; y+=2){
803
        for(x=0; x<width; x++){
804
            int64_t sum=0;
805
    
806
            for(i=0; i<n; i++){
807
                int y2= y + 2*i - n + 1;
808
                if     (y2<      0) y2= -y2;
809
                else if(y2>=height) y2= 2*height-y2-2;
810
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
811
            }
812
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
813
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
814
        }
815
    }
816
}
817

    
818
#define SCALEX 1
819
#define LX0 0
820
#define LX1 1
821

    
822
#if 0 // more accurate 9/7
823
#define N1 2
824
#define SHIFT1 14
825
#define COEFFS1 (int[]){-25987,-25987}
826
#define N2 2
827
#define SHIFT2 19
828
#define COEFFS2 (int[]){-27777,-27777}
829
#define N3 2
830
#define SHIFT3 15
831
#define COEFFS3 (int[]){28931,28931}
832
#define N4 2
833
#define SHIFT4 15
834
#define COEFFS4 (int[]){14533,14533}
835
#elif 1 // 13/7 CRF
836
#define N1 4
837
#define SHIFT1 4
838
#define COEFFS1 (int[]){1,-9,-9,1}
839
#define N2 4
840
#define SHIFT2 4
841
#define COEFFS2 (int[]){-1,5,5,-1}
842
#define N3 0
843
#define SHIFT3 1
844
#define COEFFS3 NULL
845
#define N4 0
846
#define SHIFT4 1
847
#define COEFFS4 NULL
848
#elif 1 // 3/5
849
#define LX0 1
850
#define LX1 0
851
#define SCALEX 0.5
852
#define N1 2
853
#define SHIFT1 1
854
#define COEFFS1 (int[]){1,1}
855
#define N2 2
856
#define SHIFT2 2
857
#define COEFFS2 (int[]){-1,-1}
858
#define N3 0
859
#define SHIFT3 0
860
#define COEFFS3 NULL
861
#define N4 0
862
#define SHIFT4 0
863
#define COEFFS4 NULL
864
#elif 1 // 11/5 
865
#define N1 0
866
#define SHIFT1 1
867
#define COEFFS1 NULL
868
#define N2 2
869
#define SHIFT2 2
870
#define COEFFS2 (int[]){-1,-1}
871
#define N3 2
872
#define SHIFT3 0
873
#define COEFFS3 (int[]){-1,-1}
874
#define N4 4
875
#define SHIFT4 7
876
#define COEFFS4 (int[]){-5,29,29,-5}
877
#define SCALEX 4
878
#elif 1 // 9/7 CDF
879
#define N1 2
880
#define SHIFT1 7
881
#define COEFFS1 (int[]){-203,-203}
882
#define N2 2
883
#define SHIFT2 12
884
#define COEFFS2 (int[]){-217,-217}
885
#define N3 2
886
#define SHIFT3 7
887
#define COEFFS3 (int[]){113,113}
888
#define N4 2
889
#define SHIFT4 9
890
#define COEFFS4 (int[]){227,227}
891
#define SCALEX 1
892
#elif 1 // 7/5 CDF
893
#define N1 0
894
#define SHIFT1 1
895
#define COEFFS1 NULL
896
#define N2 2
897
#define SHIFT2 2
898
#define COEFFS2 (int[]){-1,-1}
899
#define N3 2
900
#define SHIFT3 0
901
#define COEFFS3 (int[]){-1,-1}
902
#define N4 2
903
#define SHIFT4 4
904
#define COEFFS4 (int[]){3,3}
905
#elif 1 // 9/7 MN
906
#define N1 4
907
#define SHIFT1 4
908
#define COEFFS1 (int[]){1,-9,-9,1}
909
#define N2 2
910
#define SHIFT2 2
911
#define COEFFS2 (int[]){1,1}
912
#define N3 0
913
#define SHIFT3 1
914
#define COEFFS3 NULL
915
#define N4 0
916
#define SHIFT4 1
917
#define COEFFS4 NULL
918
#else // 13/7 CRF
919
#define N1 4
920
#define SHIFT1 4
921
#define COEFFS1 (int[]){1,-9,-9,1}
922
#define N2 4
923
#define SHIFT2 4
924
#define COEFFS2 (int[]){-1,5,5,-1}
925
#define N3 0
926
#define SHIFT3 1
927
#define COEFFS3 NULL
928
#define N4 0
929
#define SHIFT4 1
930
#define COEFFS4 NULL
931
#endif
932
static void horizontal_decomposeX(DWTELEM *b, int width){
933
    DWTELEM temp[width];
934
    const int width2= width>>1;
935
    const int w2= (width+1)>>1;
936
    int A1,A2,A3,A4, x;
937

    
938
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
939
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
940
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
941
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
942
    
943
    for(x=0; x<width2; x++){
944
        temp[x   ]= b[2*x    ];
945
        temp[x+w2]= b[2*x + 1];
946
    }
947
    if(width&1)
948
        temp[x   ]= b[2*x    ];
949
    memcpy(b, temp, width*sizeof(int));
950
}
951

    
952
static void horizontal_composeX(DWTELEM *b, int width){
953
    DWTELEM temp[width];
954
    const int width2= width>>1;
955
    int A1,A2,A3,A4, x;
956
    const int w2= (width+1)>>1;
957

    
958
    memcpy(temp, b, width*sizeof(int));
959
    for(x=0; x<width2; x++){
960
        b[2*x    ]= temp[x   ];
961
        b[2*x + 1]= temp[x+w2];
962
    }
963
    if(width&1)
964
        b[2*x    ]= temp[x   ];
965

    
966
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
967
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
968
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
969
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
970
}
971

    
972
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
973
    int x, y;
974
  
975
    for(y=0; y<height; y++){
976
        for(x=0; x<width; x++){
977
            buffer[y*stride + x] *= SCALEX;
978
        }
979
    }
980

    
981
    for(y=0; y<height; y++){
982
        horizontal_decomposeX(buffer + y*stride, width);
983
    }
984
    
985
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
986
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
987
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
988
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
989
}
990

    
991
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
992
    int x, y;
993
  
994
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
995
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
996
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
997
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
998

    
999
    for(y=0; y<height; y++){
1000
        horizontal_composeX(buffer + y*stride, width);
1001
    }
1002

    
1003
    for(y=0; y<height; y++){
1004
        for(x=0; x<width; x++){
1005
            buffer[y*stride + x] /= SCALEX;
1006
        }
1007
    }
1008
}
1009

    
1010
static void horizontal_decompose53i(DWTELEM *b, int width){
1011
    DWTELEM temp[width];
1012
    const int width2= width>>1;
1013
    int A1,A2,A3,A4, x;
1014
    const int w2= (width+1)>>1;
1015

    
1016
    for(x=0; x<width2; x++){
1017
        temp[x   ]= b[2*x    ];
1018
        temp[x+w2]= b[2*x + 1];
1019
    }
1020
    if(width&1)
1021
        temp[x   ]= b[2*x    ];
1022
#if 0
1023
    A2= temp[1       ];
1024
    A4= temp[0       ];
1025
    A1= temp[0+width2];
1026
    A1 -= (A2 + A4)>>1;
1027
    A4 += (A1 + 1)>>1;
1028
    b[0+width2] = A1;
1029
    b[0       ] = A4;
1030
    for(x=1; x+1<width2; x+=2){
1031
        A3= temp[x+width2];
1032
        A4= temp[x+1     ];
1033
        A3 -= (A2 + A4)>>1;
1034
        A2 += (A1 + A3 + 2)>>2;
1035
        b[x+width2] = A3;
1036
        b[x       ] = A2;
1037

1038
        A1= temp[x+1+width2];
1039
        A2= temp[x+2       ];
1040
        A1 -= (A2 + A4)>>1;
1041
        A4 += (A1 + A3 + 2)>>2;
1042
        b[x+1+width2] = A1;
1043
        b[x+1       ] = A4;
1044
    }
1045
    A3= temp[width-1];
1046
    A3 -= A2;
1047
    A2 += (A1 + A3 + 2)>>2;
1048
    b[width -1] = A3;
1049
    b[width2-1] = A2;
1050
#else        
1051
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1052
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
1053
#endif
1054
}
1055

    
1056
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1057
    int i;
1058
    
1059
    for(i=0; i<width; i++){
1060
        b1[i] -= (b0[i] + b2[i])>>1;
1061
    }
1062
}
1063

    
1064
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1065
    int i;
1066
    
1067
    for(i=0; i<width; i++){
1068
        b1[i] += (b0[i] + b2[i] + 2)>>2;
1069
    }
1070
}
1071

    
1072
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1073
    int y;
1074
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1075
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
1076
  
1077
    for(y=-2; y<height; y+=2){
1078
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1079
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1080

    
1081
{START_TIMER
1082
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
1083
        if(y+2 < height) horizontal_decompose53i(b3, width);
1084
STOP_TIMER("horizontal_decompose53i")}
1085
        
1086
{START_TIMER
1087
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1088
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1089
STOP_TIMER("vertical_decompose53i*")}
1090
        
1091
        b0=b2;
1092
        b1=b3;
1093
    }
1094
}
1095

    
1096
#define liftS lift
1097
#define lift5 lift
1098
#if 1
1099
#define W_AM 3
1100
#define W_AO 0
1101
#define W_AS 1
1102

    
1103
#undef liftS
1104
#define W_BM 1
1105
#define W_BO 8
1106
#define W_BS 4
1107

    
1108
#define W_CM 1
1109
#define W_CO 0
1110
#define W_CS 0
1111

    
1112
#define W_DM 3
1113
#define W_DO 4
1114
#define W_DS 3
1115
#elif 0
1116
#define W_AM 55
1117
#define W_AO 16
1118
#define W_AS 5
1119

    
1120
#define W_BM 3
1121
#define W_BO 32
1122
#define W_BS 6
1123

    
1124
#define W_CM 127
1125
#define W_CO 64
1126
#define W_CS 7
1127

    
1128
#define W_DM 7
1129
#define W_DO 8
1130
#define W_DS 4
1131
#elif 0
1132
#define W_AM 97
1133
#define W_AO 32
1134
#define W_AS 6
1135

    
1136
#define W_BM 63
1137
#define W_BO 512
1138
#define W_BS 10
1139

    
1140
#define W_CM 13
1141
#define W_CO 8
1142
#define W_CS 4
1143

    
1144
#define W_DM 15
1145
#define W_DO 16
1146
#define W_DS 5
1147

    
1148
#else
1149

    
1150
#define W_AM 203
1151
#define W_AO 64
1152
#define W_AS 7
1153

    
1154
#define W_BM 217
1155
#define W_BO 2048
1156
#define W_BS 12
1157

    
1158
#define W_CM 113
1159
#define W_CO 64
1160
#define W_CS 7
1161

    
1162
#define W_DM 227
1163
#define W_DO 128
1164
#define W_DS 9
1165
#endif
1166
static void horizontal_decompose97i(DWTELEM *b, int width){
1167
    DWTELEM temp[width];
1168
    const int w2= (width+1)>>1;
1169

    
1170
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1171
    liftS(temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1172
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1173
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1174
}
1175

    
1176

    
1177
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1178
    int i;
1179
    
1180
    for(i=0; i<width; i++){
1181
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1182
    }
1183
}
1184

    
1185
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1186
    int i;
1187
    
1188
    for(i=0; i<width; i++){
1189
#ifdef lift5
1190
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1191
#else
1192
        int r= 3*(b0[i] + b2[i]);
1193
        r+= r>>4;
1194
        r+= r>>8;
1195
        b1[i] += (r+W_CO)>>W_CS;
1196
#endif
1197
    }
1198
}
1199

    
1200
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1201
    int i;
1202
    
1203
    for(i=0; i<width; i++){
1204
#ifdef liftS
1205
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1206
#else
1207
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1208
#endif
1209
    }
1210
}
1211

    
1212
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1213
    int i;
1214
    
1215
    for(i=0; i<width; i++){
1216
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1217
    }
1218
}
1219

    
1220
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1221
    int y;
1222
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1223
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1224
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1225
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1226
  
1227
    for(y=-4; y<height; y+=2){
1228
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1229
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1230

    
1231
{START_TIMER
1232
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1233
        if(y+4 < height) horizontal_decompose97i(b5, width);
1234
if(width>400){
1235
STOP_TIMER("horizontal_decompose97i")
1236
}}
1237
        
1238
{START_TIMER
1239
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1240
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1241
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1242
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1243

    
1244
if(width>400){
1245
STOP_TIMER("vertical_decompose97i")
1246
}}
1247
        
1248
        b0=b2;
1249
        b1=b3;
1250
        b2=b4;
1251
        b3=b5;
1252
    }
1253
}
1254

    
1255
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1256
    int level;
1257
    
1258
    for(level=0; level<decomposition_count; level++){
1259
        switch(type){
1260
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1261
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1262
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1263
        }
1264
    }
1265
}
1266

    
1267
static void horizontal_compose53i(DWTELEM *b, int width){
1268
    DWTELEM temp[width];
1269
    const int width2= width>>1;
1270
    const int w2= (width+1)>>1;
1271
    int A1,A2,A3,A4, x;
1272

    
1273
#if 0
1274
    A2= temp[1       ];
1275
    A4= temp[0       ];
1276
    A1= temp[0+width2];
1277
    A1 -= (A2 + A4)>>1;
1278
    A4 += (A1 + 1)>>1;
1279
    b[0+width2] = A1;
1280
    b[0       ] = A4;
1281
    for(x=1; x+1<width2; x+=2){
1282
        A3= temp[x+width2];
1283
        A4= temp[x+1     ];
1284
        A3 -= (A2 + A4)>>1;
1285
        A2 += (A1 + A3 + 2)>>2;
1286
        b[x+width2] = A3;
1287
        b[x       ] = A2;
1288

1289
        A1= temp[x+1+width2];
1290
        A2= temp[x+2       ];
1291
        A1 -= (A2 + A4)>>1;
1292
        A4 += (A1 + A3 + 2)>>2;
1293
        b[x+1+width2] = A1;
1294
        b[x+1       ] = A4;
1295
    }
1296
    A3= temp[width-1];
1297
    A3 -= A2;
1298
    A2 += (A1 + A3 + 2)>>2;
1299
    b[width -1] = A3;
1300
    b[width2-1] = A2;
1301
#else   
1302
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1303
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1304
#endif
1305
    for(x=0; x<width2; x++){
1306
        b[2*x    ]= temp[x   ];
1307
        b[2*x + 1]= temp[x+w2];
1308
    }
1309
    if(width&1)
1310
        b[2*x    ]= temp[x   ];
1311
}
1312

    
1313
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1314
    int i;
1315
    
1316
    for(i=0; i<width; i++){
1317
        b1[i] += (b0[i] + b2[i])>>1;
1318
    }
1319
}
1320

    
1321
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1322
    int i;
1323
    
1324
    for(i=0; i<width; i++){
1325
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1326
    }
1327
}
1328

    
1329
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1330
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1331
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1332
    cs->y = -1;
1333
}
1334

    
1335
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1336
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1337
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1338
    cs->y = -1;
1339
}
1340

    
1341
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1342
    int y= cs->y;
1343
    int mirror0 = mirror(y-1, height-1);
1344
    int mirror1 = mirror(y  , height-1);
1345
    int mirror2 = mirror(y+1, height-1);
1346
    int mirror3 = mirror(y+2, height-1);
1347
    
1348
    DWTELEM *b0= cs->b0;
1349
    DWTELEM *b1= cs->b1;
1350
    DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1351
    DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1352

    
1353
{START_TIMER
1354
        if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1355
        if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1356
STOP_TIMER("vertical_compose53i*")}
1357

    
1358
{START_TIMER
1359
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1360
        if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1361
STOP_TIMER("horizontal_compose53i")}
1362

    
1363
    cs->b0 = b2;
1364
    cs->b1 = b3;
1365
    cs->y += 2;
1366
}
1367

    
1368
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1369
    int y= cs->y;
1370
    DWTELEM *b0= cs->b0;
1371
    DWTELEM *b1= cs->b1;
1372
    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1373
    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1374

    
1375
{START_TIMER
1376
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1377
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1378
STOP_TIMER("vertical_compose53i*")}
1379

    
1380
{START_TIMER
1381
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1382
        if(b0 <= b2) horizontal_compose53i(b1, width);
1383
STOP_TIMER("horizontal_compose53i")}
1384

    
1385
    cs->b0 = b2;
1386
    cs->b1 = b3;
1387
    cs->y += 2;
1388
}
1389

    
1390
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1391
    dwt_compose_t cs;
1392
    spatial_compose53i_init(&cs, buffer, height, stride);
1393
    while(cs.y <= height)
1394
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1395
}   
1396

    
1397
 
1398
static void horizontal_compose97i(DWTELEM *b, int width){
1399
    DWTELEM temp[width];
1400
    const int w2= (width+1)>>1;
1401

    
1402
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1403
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1404
    liftS(b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1405
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1406
}
1407

    
1408
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1409
    int i;
1410
    
1411
    for(i=0; i<width; i++){
1412
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1413
    }
1414
}
1415

    
1416
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1417
    int i;
1418
    
1419
    for(i=0; i<width; i++){
1420
#ifdef lift5
1421
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1422
#else
1423
        int r= 3*(b0[i] + b2[i]);
1424
        r+= r>>4;
1425
        r+= r>>8;
1426
        b1[i] -= (r+W_CO)>>W_CS;
1427
#endif
1428
    }
1429
}
1430

    
1431
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1432
    int i;
1433
    
1434
    for(i=0; i<width; i++){
1435
#ifdef liftS
1436
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1437
#else
1438
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1439
#endif
1440
    }
1441
}
1442

    
1443
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1444
    int i;
1445
    
1446
    for(i=0; i<width; i++){
1447
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1448
    }
1449
}
1450

    
1451
static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1452
    int i;
1453
    
1454
    for(i=0; i<width; i++){
1455
        int r;
1456
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1457
#ifdef lift5
1458
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1459
#else
1460
        r= 3*(b2[i] + b4[i]);
1461
        r+= r>>4;
1462
        r+= r>>8;
1463
        b3[i] -= (r+W_CO)>>W_CS;
1464
#endif
1465
#ifdef liftS
1466
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1467
#else
1468
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1469
#endif
1470
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1471
    }
1472
}
1473

    
1474
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1475
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1476
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1477
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1478
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1479
    cs->y = -3;
1480
}
1481

    
1482
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1483
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1484
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1485
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1486
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1487
    cs->y = -3;
1488
}
1489

    
1490
static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1491
    int y = cs->y;
1492
    
1493
    int mirror0 = mirror(y - 1, height - 1);
1494
    int mirror1 = mirror(y + 0, height - 1);
1495
    int mirror2 = mirror(y + 1, height - 1);
1496
    int mirror3 = mirror(y + 2, height - 1);
1497
    int mirror4 = mirror(y + 3, height - 1);
1498
    int mirror5 = mirror(y + 4, height - 1);
1499
    DWTELEM *b0= cs->b0;
1500
    DWTELEM *b1= cs->b1;
1501
    DWTELEM *b2= cs->b2;
1502
    DWTELEM *b3= cs->b3;
1503
    DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1504
    DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1505
        
1506
{START_TIMER
1507
    if(y>0 && y+4<height){
1508
        vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1509
    }else{
1510
        if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1511
        if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1512
        if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1513
        if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1514
    }
1515
if(width>400){
1516
STOP_TIMER("vertical_compose97i")}}
1517

    
1518
{START_TIMER
1519
        if(y-1>=  0) horizontal_compose97i(b0, width);
1520
        if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1521
if(width>400 && mirror0 <= mirror2){
1522
STOP_TIMER("horizontal_compose97i")}}
1523

    
1524
    cs->b0=b2;
1525
    cs->b1=b3;
1526
    cs->b2=b4;
1527
    cs->b3=b5;
1528
    cs->y += 2;
1529
}
1530

    
1531
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1532
    int y = cs->y;
1533
    DWTELEM *b0= cs->b0;
1534
    DWTELEM *b1= cs->b1;
1535
    DWTELEM *b2= cs->b2;
1536
    DWTELEM *b3= cs->b3;
1537
    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1538
    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1539

    
1540
        if(stride == width && y+4 < height && 0){ 
1541
            int x;
1542
            for(x=0; x<width/2; x++)
1543
                b5[x] += 64*2;
1544
            for(; x<width; x++)
1545
                b5[x] += 169*2;
1546
        }
1547
        
1548
{START_TIMER
1549
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1550
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1551
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1552
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1553
if(width>400){
1554
STOP_TIMER("vertical_compose97i")}}
1555

    
1556
{START_TIMER
1557
        if(y-1>=  0) horizontal_compose97i(b0, width);
1558
        if(b0 <= b2) horizontal_compose97i(b1, width);
1559
if(width>400 && b0 <= b2){
1560
STOP_TIMER("horizontal_compose97i")}}
1561

    
1562
    cs->b0=b2;
1563
    cs->b1=b3;
1564
    cs->b2=b4;
1565
    cs->b3=b5;
1566
    cs->y += 2;
1567
}
1568

    
1569
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1570
    dwt_compose_t cs;
1571
    spatial_compose97i_init(&cs, buffer, height, stride);
1572
    while(cs.y <= height)
1573
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1574
}
1575

    
1576
void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1577
    int level;
1578
    for(level=decomposition_count-1; level>=0; level--){
1579
        switch(type){
1580
        case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1581
        case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1582
        /* not slicified yet */
1583
        case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1584
          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1585
        }
1586
    }
1587
}
1588

    
1589
void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1590
    int level;
1591
    for(level=decomposition_count-1; level>=0; level--){
1592
        switch(type){
1593
        case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1594
        case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1595
        /* not slicified yet */
1596
        case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1597
        }
1598
    }
1599
}
1600

    
1601
void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1602
    const int support = type==1 ? 3 : 5;
1603
    int level;
1604
    if(type==2) return;
1605

    
1606
    for(level=decomposition_count-1; level>=0; level--){
1607
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1608
            switch(type){
1609
            case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1610
                    break;
1611
            case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1612
                    break;
1613
            case 2: break;
1614
            }
1615
        }
1616
    }
1617
}
1618

    
1619
void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1620
    const int support = type==1 ? 3 : 5;
1621
    int level;
1622
    if(type==2) return;
1623

    
1624
    for(level=decomposition_count-1; level>=0; level--){
1625
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1626
            switch(type){
1627
            case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1628
                    break;
1629
            case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1630
                    break;
1631
            case 2: break;
1632
            }
1633
        }
1634
    }
1635
}
1636

    
1637
void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1638
    if(type==2){
1639
        int level;
1640
        for(level=decomposition_count-1; level>=0; level--)
1641
            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
1642
    }else{
1643
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1644
        int y;
1645
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1646
        for(y=0; y<height; y+=4)
1647
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1648
    }
1649
}
1650

    
1651
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1652
    const int w= b->width;
1653
    const int h= b->height;
1654
    int x, y;
1655

    
1656
    if(1){
1657
        int run=0;
1658
        int runs[w*h];
1659
        int run_index=0;
1660
                
1661
        for(y=0; y<h; y++){
1662
            for(x=0; x<w; x++){
1663
                int v, p=0;
1664
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1665
                v= src[x + y*stride];
1666

    
1667
                if(y){
1668
                    t= src[x + (y-1)*stride];
1669
                    if(x){
1670
                        lt= src[x - 1 + (y-1)*stride];
1671
                    }
1672
                    if(x + 1 < w){
1673
                        rt= src[x + 1 + (y-1)*stride];
1674
                    }
1675
                }
1676
                if(x){
1677
                    l= src[x - 1 + y*stride];
1678
                    /*if(x > 1){
1679
                        if(orientation==1) ll= src[y + (x-2)*stride];
1680
                        else               ll= src[x - 2 + y*stride];
1681
                    }*/
1682
                }
1683
                if(parent){
1684
                    int px= x>>1;
1685
                    int py= y>>1;
1686
                    if(px<b->parent->width && py<b->parent->height) 
1687
                        p= parent[px + py*2*stride];
1688
                }
1689
                if(!(/*ll|*/l|lt|t|rt|p)){
1690
                    if(v){
1691
                        runs[run_index++]= run;
1692
                        run=0;
1693
                    }else{
1694
                        run++;
1695
                    }
1696
                }
1697
            }
1698
        }
1699
        runs[run_index++]= run;
1700
        run_index=0;
1701
        run= runs[run_index++];
1702

    
1703
        put_symbol2(&s->c, b->state[1], run, 3);
1704
        
1705
        for(y=0; y<h; y++){
1706
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1707
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1708
                return -1;
1709
            }
1710
            for(x=0; x<w; x++){
1711
                int v, p=0;
1712
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1713
                v= src[x + y*stride];
1714

    
1715
                if(y){
1716
                    t= src[x + (y-1)*stride];
1717
                    if(x){
1718
                        lt= src[x - 1 + (y-1)*stride];
1719
                    }
1720
                    if(x + 1 < w){
1721
                        rt= src[x + 1 + (y-1)*stride];
1722
                    }
1723
                }
1724
                if(x){
1725
                    l= src[x - 1 + y*stride];
1726
                    /*if(x > 1){
1727
                        if(orientation==1) ll= src[y + (x-2)*stride];
1728
                        else               ll= src[x - 2 + y*stride];
1729
                    }*/
1730
                }
1731
                if(parent){
1732
                    int px= x>>1;
1733
                    int py= y>>1;
1734
                    if(px<b->parent->width && py<b->parent->height) 
1735
                        p= parent[px + py*2*stride];
1736
                }
1737
                if(/*ll|*/l|lt|t|rt|p){
1738
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1739

    
1740
                    put_rac(&s->c, &b->state[0][context], !!v);
1741
                }else{
1742
                    if(!run){
1743
                        run= runs[run_index++];
1744

    
1745
                        put_symbol2(&s->c, b->state[1], run, 3);
1746
                        assert(v);
1747
                    }else{
1748
                        run--;
1749
                        assert(!v);
1750
                    }
1751
                }
1752
                if(v){
1753
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1754
                    int l2= 2*ABS(l) + (l<0);
1755
                    int t2= 2*ABS(t) + (t<0);
1756

    
1757
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1758
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1759
                }
1760
            }
1761
        }
1762
    }
1763
    return 0;
1764
}
1765

    
1766
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1767
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1768
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1769
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1770
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1771
}
1772

    
1773
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1774
    const int w= b->width;
1775
    const int h= b->height;
1776
    int x,y;
1777
    
1778
    if(1){
1779
        int run;
1780
        int index=0;
1781
        int prev_index=-1;
1782
        int prev2_index=0;
1783
        int parent_index= 0;
1784
        int prev_parent_index= 0;
1785

    
1786
        run= get_symbol2(&s->c, b->state[1], 3);
1787
        for(y=0; y<h; y++){
1788
            int v=0;
1789
            int lt=0, t=0, rt=0;
1790

    
1791
            if(y && b->x_coeff[prev_index].x == 0){
1792
                rt= b->x_coeff[prev_index].coeff;
1793
            }
1794
            for(x=0; x<w; x++){
1795
                int p=0;
1796
                const int l= v;
1797
                
1798
                lt= t; t= rt;
1799

    
1800
                if(y){
1801
                    if(b->x_coeff[prev_index].x <= x)
1802
                        prev_index++;
1803
                    if(b->x_coeff[prev_index].x == x + 1)
1804
                        rt= b->x_coeff[prev_index].coeff;
1805
                    else
1806
                        rt=0;
1807
                }
1808
                if(parent){
1809
                    if(x>>1 > parent->x_coeff[parent_index].x){
1810
                        parent_index++;
1811
                    }
1812
                    if(x>>1 == parent->x_coeff[parent_index].x){
1813
                        p= parent->x_coeff[parent_index].coeff;
1814
                    }
1815
                }
1816
                if(/*ll|*/l|lt|t|rt|p){
1817
                    int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1818

    
1819
                    v=get_rac(&s->c, &b->state[0][context]);
1820
                }else{
1821
                    if(!run){
1822
                        run= get_symbol2(&s->c, b->state[1], 3);
1823
                        v=1;
1824
                    }else{
1825
                        run--;
1826
                        v=0;
1827

    
1828
                        if(y && parent){
1829
                            int max_run;
1830

    
1831
                            max_run= FFMIN(run, b->x_coeff[prev_index].x - x - 2);
1832
                            max_run= FFMIN(max_run, 2*parent->x_coeff[parent_index].x - x - 1);
1833
                            x+= max_run;
1834
                            run-= max_run;
1835
                        }
1836
                    }
1837
                }
1838
                if(v){
1839
                    int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1840
                    v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1841
                    v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1842
                       
1843
                    b->x_coeff[index].x=x;
1844
                    b->x_coeff[index++].coeff= v;
1845
                }
1846
            }
1847
            b->x_coeff[index++].x= w+1; //end marker
1848
            prev_index= prev2_index;
1849
            prev2_index= index;
1850
            
1851
            if(parent){
1852
                if(y&1){
1853
                    while(parent->x_coeff[parent_index].x != parent->width+1)
1854
                        parent_index++;
1855
                    parent_index++;
1856
                    prev_parent_index= parent_index;
1857
                }else{
1858
                    parent_index= prev_parent_index;
1859
                }
1860
            }
1861
        }
1862

    
1863
        b->x_coeff[index++].x= w+1; //end marker
1864
    }
1865
}
1866

    
1867
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1868
    const int w= b->width;
1869
    int x,y;
1870
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1871
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1872
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1873
    int new_index = 0;
1874
    
1875
    START_TIMER
1876

    
1877
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1878
        qadd= 0;
1879
        qmul= 1<<QEXPSHIFT;
1880
    }
1881

    
1882
    /* If we are on the second or later slice, restore our index. */
1883
    if (start_y != 0)
1884
        new_index = save_state[0];
1885

    
1886
        
1887
    for(y=start_y; y<h; y++){
1888
        int x = 0;
1889
        int v;
1890
        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1891
        memset(line, 0, b->width*sizeof(DWTELEM));
1892
        v = b->x_coeff[new_index].coeff;
1893
        x = b->x_coeff[new_index++].x;
1894
        while(x < w)
1895
        {
1896
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1897
            register int u= -(v&1);
1898
            line[x] = (t^u) - u;
1899

    
1900
            v = b->x_coeff[new_index].coeff;
1901
            x = b->x_coeff[new_index++].x;
1902
        }
1903
    }
1904
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1905
        STOP_TIMER("decode_subband")
1906
    }
1907
        
1908
    /* Save our variables for the next slice. */
1909
    save_state[0] = new_index;
1910
        
1911
    return;
1912
}
1913

    
1914
static void reset_contexts(SnowContext *s){
1915
    int plane_index, level, orientation;
1916

    
1917
    for(plane_index=0; plane_index<3; plane_index++){
1918
        for(level=0; level<s->spatial_decomposition_count; level++){
1919
            for(orientation=level ? 1:0; orientation<4; orientation++){
1920
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1921
            }
1922
        }
1923
    }
1924
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1925
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1926
}
1927

    
1928
static int alloc_blocks(SnowContext *s){
1929
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1930
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1931
    
1932
    s->b_width = w;
1933
    s->b_height= h;
1934
    
1935
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1936
    return 0;
1937
}
1938

    
1939
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1940
    uint8_t *bytestream= d->bytestream;
1941
    uint8_t *bytestream_start= d->bytestream_start;
1942
    *d= *s;
1943
    d->bytestream= bytestream;
1944
    d->bytestream_start= bytestream_start;
1945
}
1946

    
1947
//near copy & paste from dsputil, FIXME
1948
static int pix_sum(uint8_t * pix, int line_size, int w)
1949
{
1950
    int s, i, j;
1951

    
1952
    s = 0;
1953
    for (i = 0; i < w; i++) {
1954
        for (j = 0; j < w; j++) {
1955
            s += pix[0];
1956
            pix ++;
1957
        }
1958
        pix += line_size - w;
1959
    }
1960
    return s;
1961
}
1962

    
1963
//near copy & paste from dsputil, FIXME
1964
static int pix_norm1(uint8_t * pix, int line_size, int w)
1965
{
1966
    int s, i, j;
1967
    uint32_t *sq = squareTbl + 256;
1968

    
1969
    s = 0;
1970
    for (i = 0; i < w; i++) {
1971
        for (j = 0; j < w; j ++) {
1972
            s += sq[pix[0]];
1973
            pix ++;
1974
        }
1975
        pix += line_size - w;
1976
    }
1977
    return s;
1978
}
1979

    
1980
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1981
    const int w= s->b_width << s->block_max_depth;
1982
    const int rem_depth= s->block_max_depth - level;
1983
    const int index= (x + y*w) << rem_depth;
1984
    const int block_w= 1<<rem_depth;
1985
    BlockNode block;
1986
    int i,j;
1987
    
1988
    block.color[0]= l;
1989
    block.color[1]= cb;
1990
    block.color[2]= cr;
1991
    block.mx= mx;
1992
    block.my= my;
1993
    block.type= type;
1994
    block.level= level;
1995

    
1996
    for(j=0; j<block_w; j++){
1997
        for(i=0; i<block_w; i++){
1998
            s->block[index + i + j*w]= block;
1999
        }
2000
    }
2001
}
2002

    
2003
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2004
    const int offset[3]= {
2005
          y*c->  stride + x,
2006
        ((y*c->uvstride + x)>>1),
2007
        ((y*c->uvstride + x)>>1),
2008
    };
2009
    int i;
2010
    for(i=0; i<3; i++){
2011
        c->src[0][i]= src [i];
2012
        c->ref[0][i]= ref [i] + offset[i];
2013
    }
2014
    assert(!ref_index);
2015
}
2016

    
2017
//FIXME copy&paste
2018
#define P_LEFT P[1]
2019
#define P_TOP P[2]
2020
#define P_TOPRIGHT P[3]
2021
#define P_MEDIAN P[4]
2022
#define P_MV1 P[9]
2023
#define FLAG_QPEL   1 //must be 1
2024

    
2025
static int encode_q_branch(SnowContext *s, int level, int x, int y){
2026
    uint8_t p_buffer[1024];
2027
    uint8_t i_buffer[1024];
2028
    uint8_t p_state[sizeof(s->block_state)];
2029
    uint8_t i_state[sizeof(s->block_state)];
2030
    RangeCoder pc, ic;
2031
    uint8_t *pbbak= s->c.bytestream;
2032
    uint8_t *pbbak_start= s->c.bytestream_start;
2033
    int score, score2, iscore, i_len, p_len, block_s, sum;
2034
    const int w= s->b_width  << s->block_max_depth;
2035
    const int h= s->b_height << s->block_max_depth;
2036
    const int rem_depth= s->block_max_depth - level;
2037
    const int index= (x + y*w) << rem_depth;
2038
    const int block_w= 1<<(LOG2_MB_SIZE - level);
2039
    static BlockNode null_block= { //FIXME add border maybe
2040
        .color= {128,128,128},
2041
        .mx= 0,
2042
        .my= 0,
2043
        .type= 0,
2044
        .level= 0,
2045
    };
2046
    int trx= (x+1)<<rem_depth;
2047
    int try= (y+1)<<rem_depth;
2048
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2049
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2050
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2051
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2052
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2053
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2054
    int pl = left->color[0];
2055
    int pcb= left->color[1];
2056
    int pcr= left->color[2];
2057
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2058
    int pmy= mid_pred(left->my, top->my, tr->my);
2059
    int mx=0, my=0;
2060
    int l,cr,cb, i;
2061
    const int stride= s->current_picture.linesize[0];
2062
    const int uvstride= s->current_picture.linesize[1];
2063
    const int instride= s->input_picture.linesize[0];
2064
    const int uvinstride= s->input_picture.linesize[1];
2065
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
2066
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2067
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2068
    uint8_t current_mb[3][stride*block_w];
2069
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2070
    int P[10][2];
2071
    int16_t last_mv[3][2];
2072
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2073
    const int shift= 1+qpel;
2074
    MotionEstContext *c= &s->m.me;
2075
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2076
    int my_context= av_log2(2*ABS(left->my - top->my));
2077
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2078

    
2079
    assert(sizeof(s->block_state) >= 256);
2080
    if(s->keyframe){
2081
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2082
        return 0;
2083
    }
2084

    
2085
    //FIXME optimize
2086
    for(i=0; i<block_w; i++)
2087
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
2088
    for(i=0; i<block_w>>1; i++)
2089
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2090
    for(i=0; i<block_w>>1; i++)
2091
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2092

    
2093
//    clip predictors / edge ?
2094

    
2095
    P_LEFT[0]= left->mx;
2096
    P_LEFT[1]= left->my;
2097
    P_TOP [0]= top->mx;
2098
    P_TOP [1]= top->my;
2099
    P_TOPRIGHT[0]= tr->mx;
2100
    P_TOPRIGHT[1]= tr->my;
2101
    
2102
    last_mv[0][0]= s->block[index].mx;
2103
    last_mv[0][1]= s->block[index].my;
2104
    last_mv[1][0]= right->mx;
2105
    last_mv[1][1]= right->my;
2106
    last_mv[2][0]= bottom->mx;
2107
    last_mv[2][1]= bottom->my;
2108
    
2109
    s->m.mb_stride=2;
2110
    s->m.mb_x= 
2111
    s->m.mb_y= 0;
2112
    s->m.me.skip= 0;
2113

    
2114
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2115
    
2116
    assert(s->m.me.  stride ==   stride);
2117
    assert(s->m.me.uvstride == uvstride);
2118
    
2119
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2120
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2121
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2122
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2123
    
2124
    c->xmin = - x*block_w - 16+2;
2125
    c->ymin = - y*block_w - 16+2;
2126
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2127
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2128

    
2129
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2130
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
2131
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2132
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2133
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2134
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2135
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2136

    
2137
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2138
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2139

    
2140
    if (!y) {
2141
        c->pred_x= P_LEFT[0];
2142
        c->pred_y= P_LEFT[1];
2143
    } else {
2144
        c->pred_x = P_MEDIAN[0];
2145
        c->pred_y = P_MEDIAN[1];
2146
    }
2147

    
2148
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
2149
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2150

    
2151
    assert(mx >= c->xmin);
2152
    assert(mx <= c->xmax);
2153
    assert(my >= c->ymin);
2154
    assert(my <= c->ymax);
2155
    
2156
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2157
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2158
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2159
                             
2160
  //  subpel search
2161
    pc= s->c;
2162
    pc.bytestream_start=
2163
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2164
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2165

    
2166
    if(level!=s->block_max_depth)
2167
        put_rac(&pc, &p_state[4 + s_context], 1);
2168
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2169
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2170
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2171
    p_len= pc.bytestream - pc.bytestream_start;
2172
    score += (s->lambda2*(p_len*8
2173
              + (pc.outstanding_count - s->c.outstanding_count)*8
2174
              + (-av_log2(pc.range)    + av_log2(s->c.range))
2175
             ))>>FF_LAMBDA_SHIFT;
2176

    
2177
    block_s= block_w*block_w;
2178
    sum = pix_sum(&current_mb[0][0], stride, block_w);
2179
    l= (sum + block_s/2)/block_s;
2180
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2181
    
2182
    block_s= block_w*block_w>>2;
2183
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2184
    cb= (sum + block_s/2)/block_s;
2185
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2186
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2187
    cr= (sum + block_s/2)/block_s;
2188
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2189

    
2190
    ic= s->c;
2191
    ic.bytestream_start=
2192
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2193
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2194
    if(level!=s->block_max_depth)
2195
        put_rac(&ic, &i_state[4 + s_context], 1);
2196
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2197
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2198
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2199
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2200
    i_len= ic.bytestream - ic.bytestream_start;
2201
    iscore += (s->lambda2*(i_len*8
2202
              + (ic.outstanding_count - s->c.outstanding_count)*8
2203
              + (-av_log2(ic.range)    + av_log2(s->c.range))
2204
             ))>>FF_LAMBDA_SHIFT;
2205

    
2206
//    assert(score==256*256*256*64-1);
2207
    assert(iscore < 255*255*256 + s->lambda2*10);
2208
    assert(iscore >= 0);
2209
    assert(l>=0 && l<=255);
2210
    assert(pl>=0 && pl<=255);
2211

    
2212
    if(level==0){
2213
        int varc= iscore >> 8;
2214
        int vard= score >> 8;
2215
        if (vard <= 64 || vard < varc)
2216
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2217
        else
2218
            c->scene_change_score+= s->m.qscale;
2219
    }
2220
        
2221
    if(level!=s->block_max_depth){
2222
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2223
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2224
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2225
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2226
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2227
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2228
    
2229
        if(score2 < score && score2 < iscore)
2230
            return score2;
2231
    }
2232
    
2233
    if(iscore < score){
2234
        memcpy(pbbak, i_buffer, i_len);
2235
        s->c= ic;
2236
        s->c.bytestream_start= pbbak_start;
2237
        s->c.bytestream= pbbak + i_len;
2238
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2239
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2240
        return iscore;
2241
    }else{
2242
        memcpy(pbbak, p_buffer, p_len);
2243
        s->c= pc;
2244
        s->c.bytestream_start= pbbak_start;
2245
        s->c.bytestream= pbbak + p_len;
2246
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2247
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2248
        return score;
2249
    }
2250
}
2251

    
2252
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2253
    const int w= s->b_width << s->block_max_depth;
2254
    const int rem_depth= s->block_max_depth - level;
2255
    const int index= (x + y*w) << rem_depth;
2256
    static BlockNode null_block= { //FIXME add border maybe
2257
        .color= {128,128,128},
2258
        .mx= 0,
2259
        .my= 0,
2260
        .type= 0,
2261
        .level= 0,
2262
    };
2263
    int trx= (x+1)<<rem_depth;
2264
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2265
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2266
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2267
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2268
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2269
    
2270
    if(s->keyframe){
2271
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2272
        return;
2273
    }
2274

    
2275
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2276
        int type;
2277
        int l = left->color[0];
2278
        int cb= left->color[1];
2279
        int cr= left->color[2];
2280
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2281
        int my= mid_pred(left->my, top->my, tr->my);
2282
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2283
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2284
        
2285
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2286

    
2287
        if(type){
2288
            l += get_symbol(&s->c, &s->block_state[32], 1);
2289
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2290
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2291
        }else{
2292
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2293
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2294
        }
2295
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2296
    }else{
2297
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2298
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2299
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2300
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2301
    }
2302
}
2303

    
2304
static void encode_blocks(SnowContext *s){
2305
    int x, y;
2306
    int w= s->b_width;
2307
    int h= s->b_height;
2308

    
2309
    for(y=0; y<h; y++){
2310
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2311
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2312
            return;
2313
        }
2314
        for(x=0; x<w; x++){
2315
            encode_q_branch(s, 0, x, y);
2316
        }
2317
    }
2318
}
2319

    
2320
static void decode_blocks(SnowContext *s){
2321
    int x, y;
2322
    int w= s->b_width;
2323
    int h= s->b_height;
2324

    
2325
    for(y=0; y<h; y++){
2326
        for(x=0; x<w; x++){
2327
            decode_q_branch(s, 0, x, y);
2328
        }
2329
    }
2330
}
2331

    
2332
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2333
    int x, y;
2334
START_TIMER
2335
    for(y=0; y < b_h+5; y++){
2336
        for(x=0; x < b_w; x++){
2337
            int a0= src[x    ];
2338
            int a1= src[x + 1];
2339
            int a2= src[x + 2];
2340
            int a3= src[x + 3];
2341
            int a4= src[x + 4];
2342
            int a5= src[x + 5];
2343
//            int am= 9*(a1+a2) - (a0+a3);
2344
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2345
//            int am= 18*(a2+a3) - 2*(a1+a4);
2346
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2347
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2348

    
2349
//            if(b_w==16) am= 8*(a1+a2);
2350

    
2351
            if(dx<8) tmp[x]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2352
            else     tmp[x]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2353

    
2354
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2355
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2356
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2357
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2358
        }
2359
        tmp += stride;
2360
        src += stride;
2361
    }
2362
    tmp -= (b_h+5)*stride;
2363
    
2364
    for(y=0; y < b_h; y++){
2365
        for(x=0; x < b_w; x++){
2366
            int a0= tmp[x + 0*stride];
2367
            int a1= tmp[x + 1*stride];
2368
            int a2= tmp[x + 2*stride];
2369
            int a3= tmp[x + 3*stride];
2370
            int a4= tmp[x + 4*stride];
2371
            int a5= tmp[x + 5*stride];
2372
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2373
//            int am= 18*(a2+a3) - 2*(a1+a4);
2374
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2375
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2376
            
2377
//            if(b_w==16) am= 8*(a1+a2);
2378

    
2379
            if(dy<8) dst[x]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2380
            else     dst[x]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2381

    
2382
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2383
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2384
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2385
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2386
        }
2387
        dst += stride;
2388
        tmp += stride;
2389
    }
2390
STOP_TIMER("mc_block")
2391
}
2392

    
2393
#define mca(dx,dy,b_w)\
2394
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2395
    uint8_t tmp[stride*(b_w+5)];\
2396
    assert(h==b_w);\
2397
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2398
}
2399

    
2400
mca( 0, 0,16)
2401
mca( 8, 0,16)
2402
mca( 0, 8,16)
2403
mca( 8, 8,16)
2404
mca( 0, 0,8)
2405
mca( 8, 0,8)
2406
mca( 0, 8,8)
2407
mca( 8, 8,8)
2408

    
2409
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2410
    if(block->type){
2411
        int x, y;
2412
        const int color= block->color[plane_index];
2413
        for(y=0; y < b_h; y++){
2414
            for(x=0; x < b_w; x++){
2415
                dst[x + y*stride]= color;
2416
            }
2417
        }
2418
    }else{
2419
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2420
        int mx= block->mx*scale;
2421
        int my= block->my*scale;
2422
        const int dx= mx&15;
2423
        const int dy= my&15;
2424
        sx += (mx>>4) - 2;
2425
        sy += (my>>4) - 2;
2426
        src += sx + sy*stride;
2427
        if(   (unsigned)sx >= w - b_w - 4
2428
           || (unsigned)sy >= h - b_h - 4){
2429
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2430
            src= tmp + MB_SIZE;
2431
        }
2432
        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2433
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2434
        else
2435
            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2436
    }
2437
}
2438

    
2439
static always_inline int same_block(BlockNode *a, BlockNode *b){
2440
    return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2441
}
2442

    
2443
//FIXME name clenup (b_w, block_w, b_width stuff)
2444
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2445
    DWTELEM * dst = NULL;
2446
    const int b_width = s->b_width  << s->block_max_depth;
2447
    const int b_height= s->b_height << s->block_max_depth;
2448
    const int b_stride= b_width;
2449
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2450
    BlockNode *rt= lt+1;
2451
    BlockNode *lb= lt+b_stride;
2452
    BlockNode *rb= lb+1;
2453
    uint8_t *block[4]; 
2454
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2455
    int x,y;
2456

    
2457
    if(b_x<0){
2458
        lt= rt;
2459
        lb= rb;
2460
    }else if(b_x + 1 >= b_width){
2461
        rt= lt;
2462
        rb= lb;
2463
    }
2464
    if(b_y<0){
2465
        lt= lb;
2466
        rt= rb;
2467
    }else if(b_y + 1 >= b_height){
2468
        lb= lt;
2469
        rb= rt;
2470
    }
2471
        
2472
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2473
        obmc -= src_x;
2474
        b_w += src_x;
2475
        src_x=0;
2476
    }else if(src_x + b_w > w){
2477
        b_w = w - src_x;
2478
    }
2479
    if(src_y<0){
2480
        obmc -= src_y*obmc_stride;
2481
        b_h += src_y;
2482
        src_y=0;
2483
    }else if(src_y + b_h> h){
2484
        b_h = h - src_y;
2485
    }
2486
    
2487
    if(b_w<=0 || b_h<=0) return;
2488

    
2489
assert(src_stride > 7*MB_SIZE);
2490
//    old_dst += src_x + src_y*dst_stride;
2491
    dst8+= src_x + src_y*src_stride;
2492
//    src += src_x + src_y*src_stride;
2493

    
2494
    block[0]= tmp+3*MB_SIZE;
2495
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2496

    
2497
    if(same_block(lt, rt)){
2498
        block[1]= block[0];
2499
    }else{
2500
        block[1]= tmp + 4*MB_SIZE;
2501
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2502
    }
2503
        
2504
    if(same_block(lt, lb)){
2505
        block[2]= block[0];
2506
    }else if(same_block(rt, lb)){
2507
        block[2]= block[1];
2508
    }else{
2509
        block[2]= tmp+5*MB_SIZE;
2510
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2511
    }
2512

    
2513
    if(same_block(lt, rb) ){
2514
        block[3]= block[0];
2515
    }else if(same_block(rt, rb)){
2516
        block[3]= block[1];
2517
    }else if(same_block(lb, rb)){
2518
        block[3]= block[2];
2519
    }else{
2520
        block[3]= tmp+6*MB_SIZE;
2521
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2522
    }
2523
#if 0
2524
    for(y=0; y<b_h; y++){
2525
        for(x=0; x<b_w; x++){
2526
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2527
            if(add) dst[x + y*dst_stride] += v;
2528
            else    dst[x + y*dst_stride] -= v;
2529
        }
2530
    }
2531
    for(y=0; y<b_h; y++){
2532
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2533
        for(x=0; x<b_w; x++){
2534
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2535
            if(add) dst[x + y*dst_stride] += v;
2536
            else    dst[x + y*dst_stride] -= v;
2537
        }
2538
    }
2539
    for(y=0; y<b_h; y++){
2540
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2541
        for(x=0; x<b_w; x++){
2542
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2543
            if(add) dst[x + y*dst_stride] += v;
2544
            else    dst[x + y*dst_stride] -= v;
2545
        }
2546
    }
2547
    for(y=0; y<b_h; y++){
2548
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2549
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2550
        for(x=0; x<b_w; x++){
2551
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2552
            if(add) dst[x + y*dst_stride] += v;
2553
            else    dst[x + y*dst_stride] -= v;
2554
        }
2555
    }
2556
#else
2557
{
2558

    
2559
    START_TIMER
2560
    
2561
    int block_index = 0;
2562
    for(y=0; y<b_h; y++){
2563
        //FIXME ugly missue of obmc_stride
2564
        uint8_t *obmc1= obmc + y*obmc_stride;
2565
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2566
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2567
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2568
        dst = slice_buffer_get_line(sb, src_y + y);
2569
        for(x=0; x<b_w; x++){
2570
            int v=   obmc1[x] * block[3][x + y*src_stride]
2571
                    +obmc2[x] * block[2][x + y*src_stride]
2572
                    +obmc3[x] * block[1][x + y*src_stride]
2573
                    +obmc4[x] * block[0][x + y*src_stride];
2574

    
2575
            v <<= 8 - LOG2_OBMC_MAX;
2576
            if(FRAC_BITS != 8){
2577
                v += 1<<(7 - FRAC_BITS);
2578
                v >>= 8 - FRAC_BITS;
2579
            }
2580
            if(add){
2581
//                v += old_dst[x + y*dst_stride];
2582
                v += dst[x + src_x];
2583
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2584
                if(v&(~255)) v= ~(v>>31);
2585
                dst8[x + y*src_stride] = v;
2586
            }else{
2587
//                old_dst[x + y*dst_stride] -= v;
2588
                dst[x + src_x] -= v;
2589
            }
2590
        }
2591
    }
2592
        STOP_TIMER("Inner add y block")
2593
}
2594
#endif
2595
}
2596

    
2597
//FIXME name clenup (b_w, block_w, b_width stuff)
2598
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2599
    const int b_width = s->b_width  << s->block_max_depth;
2600
    const int b_height= s->b_height << s->block_max_depth;
2601
    const int b_stride= b_width;
2602
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2603
    BlockNode *rt= lt+1;
2604
    BlockNode *lb= lt+b_stride;
2605
    BlockNode *rb= lb+1;
2606
    uint8_t *block[4]; 
2607
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2608
    int x,y;
2609

    
2610
    if(b_x<0){
2611
        lt= rt;
2612
        lb= rb;
2613
    }else if(b_x + 1 >= b_width){
2614
        rt= lt;
2615
        rb= lb;
2616
    }
2617
    if(b_y<0){
2618
        lt= lb;
2619
        rt= rb;
2620
    }else if(b_y + 1 >= b_height){
2621
        lb= lt;
2622
        rb= rt;
2623
    }
2624
        
2625
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2626
        obmc -= src_x;
2627
        b_w += src_x;
2628
        src_x=0;
2629
    }else if(src_x + b_w > w){
2630
        b_w = w - src_x;
2631
    }
2632
    if(src_y<0){
2633
        obmc -= src_y*obmc_stride;
2634
        b_h += src_y;
2635
        src_y=0;
2636
    }else if(src_y + b_h> h){
2637
        b_h = h - src_y;
2638
    }
2639
    
2640
    if(b_w<=0 || b_h<=0) return;
2641

    
2642
assert(src_stride > 7*MB_SIZE);
2643
    dst += src_x + src_y*dst_stride;
2644
    dst8+= src_x + src_y*src_stride;
2645
//    src += src_x + src_y*src_stride;
2646

    
2647
    block[0]= tmp+3*MB_SIZE;
2648
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2649

    
2650
    if(same_block(lt, rt)){
2651
        block[1]= block[0];
2652
    }else{
2653
        block[1]= tmp + 4*MB_SIZE;
2654
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2655
    }
2656
        
2657
    if(same_block(lt, lb)){
2658
        block[2]= block[0];
2659
    }else if(same_block(rt, lb)){
2660
        block[2]= block[1];
2661
    }else{
2662
        block[2]= tmp+5*MB_SIZE;
2663
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2664
    }
2665

    
2666
    if(same_block(lt, rb) ){
2667
        block[3]= block[0];
2668
    }else if(same_block(rt, rb)){
2669
        block[3]= block[1];
2670
    }else if(same_block(lb, rb)){
2671
        block[3]= block[2];
2672
    }else{
2673
        block[3]= tmp+6*MB_SIZE;
2674
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2675
    }
2676
#if 0
2677
    for(y=0; y<b_h; y++){
2678
        for(x=0; x<b_w; x++){
2679
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2680
            if(add) dst[x + y*dst_stride] += v;
2681
            else    dst[x + y*dst_stride] -= v;
2682
        }
2683
    }
2684
    for(y=0; y<b_h; y++){
2685
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2686
        for(x=0; x<b_w; x++){
2687
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2688
            if(add) dst[x + y*dst_stride] += v;
2689
            else    dst[x + y*dst_stride] -= v;
2690
        }
2691
    }
2692
    for(y=0; y<b_h; y++){
2693
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2694
        for(x=0; x<b_w; x++){
2695
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2696
            if(add) dst[x + y*dst_stride] += v;
2697
            else    dst[x + y*dst_stride] -= v;
2698
        }
2699
    }
2700
    for(y=0; y<b_h; y++){
2701
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2702
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2703
        for(x=0; x<b_w; x++){
2704
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2705
            if(add) dst[x + y*dst_stride] += v;
2706
            else    dst[x + y*dst_stride] -= v;
2707
        }
2708
    }
2709
#else
2710
    for(y=0; y<b_h; y++){
2711
        //FIXME ugly missue of obmc_stride
2712
        uint8_t *obmc1= obmc + y*obmc_stride;
2713
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2714
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2715
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2716
        for(x=0; x<b_w; x++){
2717
            int v=   obmc1[x] * block[3][x + y*src_stride]
2718
                    +obmc2[x] * block[2][x + y*src_stride]
2719
                    +obmc3[x] * block[1][x + y*src_stride]
2720
                    +obmc4[x] * block[0][x + y*src_stride];
2721
            
2722
            v <<= 8 - LOG2_OBMC_MAX;
2723
            if(FRAC_BITS != 8){
2724
                v += 1<<(7 - FRAC_BITS);
2725
                v >>= 8 - FRAC_BITS;
2726
            }
2727
            if(add){
2728
                v += dst[x + y*dst_stride];
2729
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2730
                if(v&(~255)) v= ~(v>>31);
2731
                dst8[x + y*src_stride] = v;
2732
            }else{
2733
                dst[x + y*dst_stride] -= v;
2734
            }
2735
        }
2736
    }
2737
#endif
2738
}
2739

    
2740
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2741
    Plane *p= &s->plane[plane_index];
2742
    const int mb_w= s->b_width  << s->block_max_depth;
2743
    const int mb_h= s->b_height << s->block_max_depth;
2744
    int x, y, mb_x;
2745
    int block_size = MB_SIZE >> s->block_max_depth;
2746
    int block_w    = plane_index ? block_size/2 : block_size;
2747
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2748
    int obmc_stride= plane_index ? block_size : 2*block_size;
2749
    int ref_stride= s->current_picture.linesize[plane_index];
2750
    uint8_t *ref  = s->last_picture.data[plane_index];
2751
    uint8_t *dst8= s->current_picture.data[plane_index];
2752
    int w= p->width;
2753
    int h= p->height;
2754
    START_TIMER
2755
    
2756
    if(s->keyframe || (s->avctx->debug&512)){
2757
        if(mb_y==mb_h)
2758
            return;
2759

    
2760
        if(add){
2761
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++)
2762
            {
2763
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2764
                DWTELEM * line = sb->line[y];
2765
                for(x=0; x<w; x++)
2766
                {
2767
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2768
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2769
                    v >>= FRAC_BITS;
2770
                    if(v&(~255)) v= ~(v>>31);
2771
                    dst8[x + y*ref_stride]= v;
2772
                }
2773
            }
2774
        }else{
2775
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++)
2776
            {
2777
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2778
                DWTELEM * line = sb->line[y];
2779
                for(x=0; x<w; x++)
2780
                {
2781
                    line[x] -= 128 << FRAC_BITS;
2782
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2783
                }
2784
            }
2785
        }
2786

    
2787
        return;
2788
    }
2789
    
2790
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2791
            START_TIMER
2792

    
2793
            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, 
2794
                       block_w*mb_x - block_w/2,
2795
                       block_w*mb_y - block_w/2,
2796
                       block_w, block_w,
2797
                       w, h,
2798
                       w, ref_stride, obmc_stride,
2799
                       mb_x - 1, mb_y - 1,
2800
                       add, plane_index);
2801
            
2802
            STOP_TIMER("add_yblock")
2803
        }
2804
    
2805
    STOP_TIMER("predict_slice")
2806
}
2807

    
2808
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2809
    Plane *p= &s->plane[plane_index];
2810
    const int mb_w= s->b_width  << s->block_max_depth;
2811
    const int mb_h= s->b_height << s->block_max_depth;
2812
    int x, y, mb_x;
2813
    int block_size = MB_SIZE >> s->block_max_depth;
2814
    int block_w    = plane_index ? block_size/2 : block_size;
2815
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2816
    int obmc_stride= plane_index ? block_size : 2*block_size;
2817
    int ref_stride= s->current_picture.linesize[plane_index];
2818
    uint8_t *ref  = s->last_picture.data[plane_index];
2819
    uint8_t *dst8= s->current_picture.data[plane_index];
2820
    int w= p->width;
2821
    int h= p->height;
2822
    START_TIMER
2823
    
2824
    if(s->keyframe || (s->avctx->debug&512)){
2825
        if(mb_y==mb_h)
2826
            return;
2827

    
2828
        if(add){
2829
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
2830
                for(x=0; x<w; x++){
2831
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2832
                    v >>= FRAC_BITS;
2833
                    if(v&(~255)) v= ~(v>>31);
2834
                    dst8[x + y*ref_stride]= v;
2835
                }
2836
            }
2837
        }else{
2838
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
2839
                for(x=0; x<w; x++){
2840
                    buf[x + y*w]-= 128<<FRAC_BITS;
2841
                }
2842
            }
2843
        }
2844

    
2845
        return;
2846
    }
2847
    
2848
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2849
            START_TIMER
2850

    
2851
            add_yblock(s, buf, dst8, ref, obmc, 
2852
                       block_w*mb_x - block_w/2,
2853
                       block_w*mb_y - block_w/2,
2854
                       block_w, block_w,
2855
                       w, h,
2856
                       w, ref_stride, obmc_stride,
2857
                       mb_x - 1, mb_y - 1,
2858
                       add, plane_index);
2859
            
2860
            STOP_TIMER("add_yblock")
2861
        }
2862
    
2863
    STOP_TIMER("predict_slice")
2864
}
2865

    
2866
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2867
    const int mb_h= s->b_height << s->block_max_depth;
2868
    int mb_y;
2869
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2870
        predict_slice(s, buf, plane_index, add, mb_y);
2871
}
2872

    
2873
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2874
    const int level= b->level;
2875
    const int w= b->width;
2876
    const int h= b->height;
2877
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2878
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2879
    int x,y, thres1, thres2;
2880
    START_TIMER
2881

    
2882
    if(s->qlog == LOSSLESS_QLOG) return;
2883
 
2884
    bias= bias ? 0 : (3*qmul)>>3;
2885
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2886
    thres2= 2*thres1;
2887
    
2888
    if(!bias){
2889
        for(y=0; y<h; y++){
2890
            for(x=0; x<w; x++){
2891
                int i= src[x + y*stride];
2892
                
2893
                if((unsigned)(i+thres1) > thres2){
2894
                    if(i>=0){
2895
                        i<<= QEXPSHIFT;
2896
                        i/= qmul; //FIXME optimize
2897
                        src[x + y*stride]=  i;
2898
                    }else{
2899
                        i= -i;
2900
                        i<<= QEXPSHIFT;
2901
                        i/= qmul; //FIXME optimize
2902
                        src[x + y*stride]= -i;
2903
                    }
2904
                }else
2905
                    src[x + y*stride]= 0;
2906
            }
2907
        }
2908
    }else{
2909
        for(y=0; y<h; y++){
2910
            for(x=0; x<w; x++){
2911
                int i= src[x + y*stride]; 
2912
                
2913
                if((unsigned)(i+thres1) > thres2){
2914
                    if(i>=0){
2915
                        i<<= QEXPSHIFT;
2916
                        i= (i + bias) / qmul; //FIXME optimize
2917
                        src[x + y*stride]=  i;
2918
                    }else{
2919
                        i= -i;
2920
                        i<<= QEXPSHIFT;
2921
                        i= (i + bias) / qmul; //FIXME optimize
2922
                        src[x + y*stride]= -i;
2923
                    }
2924
                }else
2925
                    src[x + y*stride]= 0;
2926
            }
2927
        }
2928
    }
2929
    if(level+1 == s->spatial_decomposition_count){
2930
//        STOP_TIMER("quantize")
2931
    }
2932
}
2933

    
2934
static void dequantize_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride){
2935
    const int w= b->width;
2936
    const int h= b->height;
2937
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2938
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2939
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2940
    int x,y;
2941
    START_TIMER
2942
    
2943
    if(s->qlog == LOSSLESS_QLOG) return;
2944
    
2945
    for(y=0; y<h; y++){
2946
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2947
        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2948
        for(x=0; x<w; x++){
2949
            int i= line[x];
2950
            if(i<0){
2951
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2952
            }else if(i>0){
2953
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2954
            }
2955
        }
2956
    }
2957
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2958
        STOP_TIMER("dquant")
2959
    }
2960
}
2961

    
2962
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2963
    const int w= b->width;
2964
    const int h= b->height;
2965
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2966
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2967
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2968
    int x,y;
2969
    START_TIMER
2970
    
2971
    if(s->qlog == LOSSLESS_QLOG) return;
2972
    
2973
    for(y=0; y<h; y++){
2974
        for(x=0; x<w; x++){
2975
            int i= src[x + y*stride];
2976
            if(i<0){
2977
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2978
            }else if(i>0){
2979
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2980
            }
2981
        }
2982
    }
2983
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2984
        STOP_TIMER("dquant")
2985
    }
2986
}
2987

    
2988
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2989
    const int w= b->width;
2990
    const int h= b->height;
2991
    int x,y;
2992
    
2993
    for(y=h-1; y>=0; y--){
2994
        for(x=w-1; x>=0; x--){
2995
            int i= x + y*stride;
2996
            
2997
            if(x){
2998
                if(use_median){
2999
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3000
                    else  src[i] -= src[i - 1];
3001
                }else{
3002
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3003
                    else  src[i] -= src[i - 1];
3004
                }
3005
            }else{
3006
                if(y) src[i] -= src[i - stride];
3007
            }
3008
        }
3009
    }
3010
}
3011

    
3012
static void correlate_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3013
    const int w= b->width;
3014
    const int h= b->height;
3015
    int x,y;
3016
    
3017
//    START_TIMER
3018
    
3019
    DWTELEM * line;
3020
    DWTELEM * prev;
3021
    
3022
    for(y=0; y<h; y++){
3023
        prev = line;
3024
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3025
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3026
        for(x=0; x<w; x++){
3027
            if(x){
3028
                if(use_median){
3029
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3030
                    else  line[x] += line[x - 1];
3031
                }else{
3032
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3033
                    else  line[x] += line[x - 1];
3034
                }
3035
            }else{
3036
                if(y) line[x] += prev[x];
3037
            }
3038
        }
3039
    }
3040
    
3041
//    STOP_TIMER("correlate")
3042
}
3043

    
3044
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3045
    const int w= b->width;
3046
    const int h= b->height;
3047
    int x,y;
3048
    
3049
    for(y=0; y<h; y++){
3050
        for(x=0; x<w; x++){
3051
            int i= x + y*stride;
3052
            
3053
            if(x){
3054
                if(use_median){
3055
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3056
                    else  src[i] += src[i - 1];
3057
                }else{
3058
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3059
                    else  src[i] += src[i - 1];
3060
                }
3061
            }else{
3062
                if(y) src[i] += src[i - stride];
3063
            }
3064
        }
3065
    }
3066
}
3067

    
3068
static void encode_header(SnowContext *s){
3069
    int plane_index, level, orientation;
3070
    uint8_t kstate[32]; 
3071
    
3072
    memset(kstate, MID_STATE, sizeof(kstate));   
3073

    
3074
    put_rac(&s->c, kstate, s->keyframe);
3075
    if(s->keyframe || s->always_reset)
3076
        reset_contexts(s);
3077
    if(s->keyframe){
3078
        put_symbol(&s->c, s->header_state, s->version, 0);
3079
        put_rac(&s->c, s->header_state, s->always_reset);
3080
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3081
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3082
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3083
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3084
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3085
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3086
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3087
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3088

    
3089
        for(plane_index=0; plane_index<2; plane_index++){
3090
            for(level=0; level<s->spatial_decomposition_count; level++){
3091
                for(orientation=level ? 1:0; orientation<4; orientation++){
3092
                    if(orientation==2) continue;
3093
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3094
                }
3095
            }
3096
        }
3097
    }
3098
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3099
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
3100
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
3101
    put_symbol(&s->c, s->header_state, s->qbias, 1);
3102
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3103
}
3104

    
3105
static int decode_header(SnowContext *s){
3106
    int plane_index, level, orientation;
3107
    uint8_t kstate[32];
3108

    
3109
    memset(kstate, MID_STATE, sizeof(kstate));   
3110

    
3111
    s->keyframe= get_rac(&s->c, kstate);
3112
    if(s->keyframe || s->always_reset)
3113
        reset_contexts(s);
3114
    if(s->keyframe){
3115
        s->version= get_symbol(&s->c, s->header_state, 0);
3116
        if(s->version>0){
3117
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3118
            return -1;
3119
        }
3120
        s->always_reset= get_rac(&s->c, s->header_state);
3121
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3122
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3123
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3124
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3125
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3126
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3127
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3128
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3129

    
3130
        for(plane_index=0; plane_index<3; plane_index++){
3131
            for(level=0; level<s->spatial_decomposition_count; level++){
3132
                for(orientation=level ? 1:0; orientation<4; orientation++){
3133
                    int q;
3134
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3135
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3136
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3137
                    s->plane[plane_index].band[level][orientation].qlog= q;
3138
                }
3139
            }
3140
        }
3141
    }
3142
    
3143
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3144
    if(s->spatial_decomposition_type > 2){
3145
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3146
        return -1;
3147
    }
3148
    
3149
    s->qlog= get_symbol(&s->c, s->header_state, 1);
3150
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3151
    s->qbias= get_symbol(&s->c, s->header_state, 1);
3152
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3153

    
3154
    return 0;
3155
}
3156

    
3157
static void init_qexp(){
3158
    int i;
3159
    double v=128;
3160

    
3161
    for(i=0; i<QROOT; i++){
3162
        qexp[i]= lrintf(v);
3163
        v *= pow(2, 1.0 / QROOT); 
3164
    }
3165
}
3166

    
3167
static int common_init(AVCodecContext *avctx){
3168
    SnowContext *s = avctx->priv_data;
3169
    int width, height;
3170
    int level, orientation, plane_index, dec;
3171

    
3172
    s->avctx= avctx;
3173
        
3174
    dsputil_init(&s->dsp, avctx);
3175

    
3176
#define mcf(dx,dy)\
3177
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3178
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3179
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3180
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3181
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3182
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3183

    
3184
    mcf( 0, 0)
3185
    mcf( 4, 0)
3186
    mcf( 8, 0)
3187
    mcf(12, 0)
3188
    mcf( 0, 4)
3189
    mcf( 4, 4)
3190
    mcf( 8, 4)
3191
    mcf(12, 4)
3192
    mcf( 0, 8)
3193
    mcf( 4, 8)
3194
    mcf( 8, 8)
3195
    mcf(12, 8)
3196
    mcf( 0,12)
3197
    mcf( 4,12)
3198
    mcf( 8,12)
3199
    mcf(12,12)
3200

    
3201
#define mcfh(dx,dy)\
3202
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3203
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3204
        mc_block_hpel ## dx ## dy ## 16;\
3205
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3206
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3207
        mc_block_hpel ## dx ## dy ## 8;
3208

    
3209
    mcfh(0, 0)
3210
    mcfh(8, 0)
3211
    mcfh(0, 8)
3212
    mcfh(8, 8)
3213

    
3214
    if(!qexp[0])
3215
        init_qexp();
3216

    
3217
    dec= s->spatial_decomposition_count= 5;
3218
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3219
    
3220
    s->chroma_h_shift= 1; //FIXME XXX
3221
    s->chroma_v_shift= 1;
3222
    
3223
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3224
    
3225
    width= s->avctx->width;
3226
    height= s->avctx->height;
3227

    
3228
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3229
    
3230
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3231
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3232
    
3233
    for(plane_index=0; plane_index<3; plane_index++){    
3234
        int w= s->avctx->width;
3235
        int h= s->avctx->height;
3236

    
3237
        if(plane_index){
3238
            w>>= s->chroma_h_shift;
3239
            h>>= s->chroma_v_shift;
3240
        }
3241
        s->plane[plane_index].width = w;
3242
        s->plane[plane_index].height= h;
3243
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3244
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3245
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3246
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3247
                
3248
                b->buf= s->spatial_dwt_buffer;
3249
                b->level= level;
3250
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3251
                b->width = (w + !(orientation&1))>>1;
3252
                b->height= (h + !(orientation>1))>>1;
3253
                
3254
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3255
                b->buf_x_offset = 0;
3256
                b->buf_y_offset = 0;
3257
                
3258
                if(orientation&1){
3259
                    b->buf += (w+1)>>1;
3260
                    b->buf_x_offset = (w+1)>>1;
3261
                }
3262
                if(orientation>1){
3263
                    b->buf += b->stride>>1;
3264
                    b->buf_y_offset = b->stride_line >> 1;
3265
                }
3266
                
3267
                if(level)
3268
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3269
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3270
            }
3271
            w= (w+1)>>1;
3272
            h= (h+1)>>1;
3273
        }
3274
    }
3275
    
3276
    reset_contexts(s);
3277
/*    
3278
    width= s->width= avctx->width;
3279
    height= s->height= avctx->height;
3280
    
3281
    assert(width && height);
3282
*/
3283
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3284
    
3285
    return 0;
3286
}
3287

    
3288

    
3289
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3290
    int width = p->width;
3291
    int height= p->height;
3292
    int level, orientation, x, y;
3293

    
3294
    for(level=0; level<s->spatial_decomposition_count; level++){
3295
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3296
            SubBand *b= &p->band[level][orientation];
3297
            DWTELEM *buf= b->buf;
3298
            int64_t error=0;
3299
            
3300
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3301
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
3302
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3303
            for(y=0; y<height; y++){
3304
                for(x=0; x<width; x++){
3305
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
3306
                    error += d*d;
3307
                }
3308
            }
3309

    
3310
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3311
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3312
        }
3313
    }
3314
}
3315

    
3316
static int encode_init(AVCodecContext *avctx)
3317
{
3318
    SnowContext *s = avctx->priv_data;
3319
    int plane_index;
3320

    
3321
    if(avctx->strict_std_compliance >= 0){
3322
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
3323
               "use vstrict=-1 / -strict -1 to use it anyway\n");
3324
        return -1;
3325
    }
3326
 
3327
    common_init(avctx);
3328
    alloc_blocks(s);
3329
 
3330
    s->version=0;
3331
    
3332
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3333
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3334
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3335
    h263_encode_init(&s->m); //mv_penalty
3336

    
3337
    for(plane_index=0; plane_index<3; plane_index++){
3338
        calculate_vissual_weight(s, &s->plane[plane_index]);
3339
    }
3340
    
3341
    
3342
    avctx->coded_frame= &s->current_picture;
3343
    switch(avctx->pix_fmt){
3344
//    case PIX_FMT_YUV444P:
3345
//    case PIX_FMT_YUV422P:
3346
    case PIX_FMT_YUV420P:
3347
    case PIX_FMT_GRAY8:
3348
//    case PIX_FMT_YUV411P:
3349
//    case PIX_FMT_YUV410P:
3350
        s->colorspace_type= 0;
3351
        break;
3352
/*    case PIX_FMT_RGBA32:
3353
        s->colorspace= 1;
3354
        break;*/
3355
    default:
3356
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3357
        return -1;
3358
    }
3359
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3360
    s->chroma_h_shift= 1;
3361
    s->chroma_v_shift= 1;
3362
    return 0;
3363
}
3364

    
3365
static int frame_start(SnowContext *s){
3366
   AVFrame tmp;
3367
   int w= s->avctx->width; //FIXME round up to x16 ?
3368
   int h= s->avctx->height;
3369

    
3370
    if(s->current_picture.data[0]){
3371
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
3372
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3373
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3374
    }
3375

    
3376
    tmp= s->last_picture;
3377
    s->last_picture= s->current_picture;
3378
    s->current_picture= tmp;
3379
    
3380
    s->current_picture.reference= 1;
3381
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3382
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3383
        return -1;
3384
    }
3385
    
3386
    return 0;
3387
}
3388

    
3389
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3390
    SnowContext *s = avctx->priv_data;
3391
    RangeCoder * const c= &s->c;
3392
    AVFrame *pict = data;
3393
    const int width= s->avctx->width;
3394
    const int height= s->avctx->height;
3395
    int level, orientation, plane_index;
3396

    
3397
    ff_init_range_encoder(c, buf, buf_size);
3398
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3399
    
3400
    s->input_picture = *pict;
3401

    
3402
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3403
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3404
    
3405
    if(pict->quality){
3406
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3407
        //<64 >60
3408
        s->qlog += 61*QROOT/8;
3409
    }else{
3410
        s->qlog= LOSSLESS_QLOG;
3411
    }
3412

    
3413
    frame_start(s);
3414
    s->current_picture.key_frame= s->keyframe;
3415

    
3416
    if(pict->pict_type == P_TYPE){
3417
        int block_width = (width +15)>>4;
3418
        int block_height= (height+15)>>4;
3419
        int stride= s->current_picture.linesize[0];
3420
        
3421
        assert(s->current_picture.data[0]);
3422
        assert(s->last_picture.data[0]);
3423
     
3424
        s->m.avctx= s->avctx;
3425
        s->m.current_picture.data[0]= s->current_picture.data[0];
3426
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
3427
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3428
        s->m.current_picture_ptr= &s->m.current_picture;
3429
        s->m.   last_picture_ptr= &s->m.   last_picture;
3430
        s->m.linesize=
3431
        s->m.   last_picture.linesize[0]=
3432
        s->m.    new_picture.linesize[0]=
3433
        s->m.current_picture.linesize[0]= stride;
3434
        s->m.uvlinesize= s->current_picture.linesize[1];
3435
        s->m.width = width;
3436
        s->m.height= height;
3437
        s->m.mb_width = block_width;
3438
        s->m.mb_height= block_height;
3439
        s->m.mb_stride=   s->m.mb_width+1;
3440
        s->m.b8_stride= 2*s->m.mb_width+1;
3441
        s->m.f_code=1;
3442
        s->m.pict_type= pict->pict_type;
3443
        s->m.me_method= s->avctx->me_method;
3444
        s->m.me.scene_change_score=0;
3445
        s->m.flags= s->avctx->flags;
3446
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3447
        s->m.out_format= FMT_H263;
3448
        s->m.unrestricted_mv= 1;
3449

    
3450
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3451
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3452
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3453

    
3454
        s->m.dsp= s->dsp; //move
3455
        ff_init_me(&s->m);
3456
    }
3457
    
3458
redo_frame:
3459
        
3460
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3461

    
3462
    encode_header(s);
3463
    encode_blocks(s);
3464
      
3465
    for(plane_index=0; plane_index<3; plane_index++){
3466
        Plane *p= &s->plane[plane_index];
3467
        int w= p->width;
3468
        int h= p->height;
3469
        int x, y;
3470
//        int bits= put_bits_count(&s->c.pb);
3471

    
3472
        //FIXME optimize
3473
     if(pict->data[plane_index]) //FIXME gray hack
3474
        for(y=0; y<h; y++){
3475
            for(x=0; x<w; x++){
3476
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3477
            }
3478
        }
3479
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3480
        
3481
        if(   plane_index==0 
3482
           && pict->pict_type == P_TYPE 
3483
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3484
            ff_init_range_encoder(c, buf, buf_size);
3485
            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3486
            pict->pict_type= FF_I_TYPE;
3487
            s->keyframe=1;
3488
            reset_contexts(s);
3489
            goto redo_frame;
3490
        }
3491
        
3492
        if(s->qlog == LOSSLESS_QLOG){
3493
            for(y=0; y<h; y++){
3494
                for(x=0; x<w; x++){
3495
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3496
                }
3497
            }
3498
        }
3499
 
3500
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3501

    
3502
        for(level=0; level<s->spatial_decomposition_count; level++){
3503
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3504
                SubBand *b= &p->band[level][orientation];
3505
                
3506
                quantize(s, b, b->buf, b->stride, s->qbias);
3507
                if(orientation==0)
3508
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3509
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3510
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
3511
                if(orientation==0)
3512
                    correlate(s, b, b->buf, b->stride, 1, 0);
3513
            }
3514
        }
3515
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3516

    
3517
        for(level=0; level<s->spatial_decomposition_count; level++){
3518
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3519
                SubBand *b= &p->band[level][orientation];
3520

    
3521
                dequantize(s, b, b->buf, b->stride);
3522
            }
3523
        }
3524

    
3525
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3526
        if(s->qlog == LOSSLESS_QLOG){
3527
            for(y=0; y<h; y++){
3528
                for(x=0; x<w; x++){
3529
                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
3530
                }
3531
            }
3532
        }
3533
{START_TIMER
3534
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3535
STOP_TIMER("pred-conv")}
3536
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3537
            int64_t error= 0;
3538
            
3539
    if(pict->data[plane_index]) //FIXME gray hack
3540
            for(y=0; y<h; y++){
3541
                for(x=0; x<w; x++){
3542
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3543
                    error += d*d;
3544
                }
3545
            }
3546
            s->avctx->error[plane_index] += error;
3547
            s->current_picture.error[plane_index] = error;
3548
        }
3549
    }
3550

    
3551
    if(s->last_picture.data[0])
3552
        avctx->release_buffer(avctx, &s->last_picture);
3553

    
3554
    emms_c();
3555
    
3556
    return ff_rac_terminate(c);
3557
}
3558

    
3559
static void common_end(SnowContext *s){
3560
    int plane_index, level, orientation;
3561

    
3562
    av_freep(&s->spatial_dwt_buffer);
3563

    
3564
    av_freep(&s->m.me.scratchpad);    
3565
    av_freep(&s->m.me.map);
3566
    av_freep(&s->m.me.score_map);
3567
 
3568
    av_freep(&s->block);
3569

    
3570
    for(plane_index=0; plane_index<3; plane_index++){    
3571
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3572
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3573
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3574
                
3575
                av_freep(&b->x_coeff);
3576
            }
3577
        }
3578
    }
3579
}
3580

    
3581
static int encode_end(AVCodecContext *avctx)
3582
{
3583
    SnowContext *s = avctx->priv_data;
3584

    
3585
    common_end(s);
3586

    
3587
    return 0;
3588
}
3589

    
3590
static int decode_init(AVCodecContext *avctx)
3591
{
3592
    SnowContext *s = avctx->priv_data;
3593
    int block_size;
3594

    
3595
    common_init(avctx);
3596
    
3597
    block_size = MB_SIZE >> s->block_max_depth;
3598
    /* FIXME block_size * 2 is determined empirically. block_size * 1.5 is definitely needed, but I (Robert) cannot figure out why more than that is needed. Perhaps there is a bug, or perhaps I overlooked some demands that are placed on the buffer. */
3599
    /* FIXME The formula is WRONG. For height > 480, the buffer will overflow. */
3600
    /* FIXME For now, I will use a full frame of lines. Fortunately, this should not materially effect cache performance because lines are allocated using a stack, so if in fact only 50 out of 496 lines are needed at a time, the other 446 will sit allocated but never accessed. */
3601
//    slice_buffer_init(s->plane[0].sb, s->plane[0].height, (block_size * 2) + (s->spatial_decomposition_count * s->spatial_decomposition_count), s->plane[0].width, s->spatial_dwt_buffer);
3602
    slice_buffer_init(&s->sb, s->plane[0].height, s->plane[0].height, s->plane[0].width, s->spatial_dwt_buffer);
3603
    
3604
    return 0;
3605
}
3606

    
3607
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3608
    SnowContext *s = avctx->priv_data;
3609
    RangeCoder * const c= &s->c;
3610
    int bytes_read;
3611
    AVFrame *picture = data;
3612
    int level, orientation, plane_index;
3613

    
3614
    ff_init_range_decoder(c, buf, buf_size);
3615
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3616

    
3617
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3618
    decode_header(s);
3619
    if(!s->block) alloc_blocks(s);
3620

    
3621
    frame_start(s);
3622
    //keyframe flag dupliaction mess FIXME
3623
    if(avctx->debug&FF_DEBUG_PICT_INFO)
3624
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3625
    
3626
    decode_blocks(s);
3627

    
3628
    for(plane_index=0; plane_index<3; plane_index++){
3629
        Plane *p= &s->plane[plane_index];
3630
        int w= p->width;
3631
        int h= p->height;
3632
        int x, y;
3633
        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
3634
        SubBand * correlate_band;
3635
        
3636
if(s->avctx->debug&2048){
3637
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3638
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3639

    
3640
        for(y=0; y<h; y++){
3641
            for(x=0; x<w; x++){
3642
                int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
3643
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3644
            }
3645
        }
3646
}
3647

    
3648
{   START_TIMER
3649
    for(level=0; level<s->spatial_decomposition_count; level++){
3650
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3651
            SubBand *b= &p->band[level][orientation];
3652
            unpack_coeffs(s, b, b->parent, orientation);
3653
        }
3654
    }
3655
    STOP_TIMER("unpack coeffs");
3656
}
3657
        
3658
        /* Handle level 0, orientation 0 specially. It is particularly resistant to slicing but fortunately quite small, so process it in one pass. */
3659
        correlate_band = &p->band[0][0];
3660
        decode_subband_slice_buffered(s, correlate_band, &s->sb, 0, correlate_band->height, decode_state[0][0]);
3661
        correlate_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0);
3662
        dequantize_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride);
3663

    
3664
{START_TIMER
3665
    const int mb_h= s->b_height << s->block_max_depth;
3666
    const int block_size = MB_SIZE >> s->block_max_depth;
3667
    const int block_w    = plane_index ? block_size/2 : block_size;
3668
    int mb_y;
3669
    dwt_compose_t cs[MAX_DECOMPOSITIONS];
3670
    int yd=0, yq=0;
3671
    int y;
3672
    int end_y;
3673

    
3674
    ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
3675
    for(mb_y=0; mb_y<=mb_h; mb_y++){
3676
        
3677
        const int slice_starty = block_w*mb_y;
3678
        const int slice_h = block_w*(mb_y+1);
3679

    
3680
        {        
3681
        START_TIMER
3682
        for(level=0; level<s->spatial_decomposition_count; level++){
3683
            for(orientation=level ? 1 : 1; orientation<4; orientation++){
3684
                SubBand *b= &p->band[level][orientation];
3685
                int start_y;
3686
                int end_y;
3687
                int our_mb_start = mb_y;
3688
                int our_mb_end = (mb_y + 1);
3689
                start_y = FFMIN(b->height, (mb_y ? ((block_w * our_mb_start - 4) >> (s->spatial_decomposition_count - level)) + 5 : 0));
3690
                end_y = FFMIN(b->height, (((block_w * our_mb_end - 4) >> (s->spatial_decomposition_count - level)) + 5));
3691
                    
3692
                if (start_y != end_y)
3693
                    decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
3694
            }
3695
        }
3696
        STOP_TIMER("decode_subband_slice");
3697
        }
3698
        
3699
{   START_TIMER
3700
        for(; yd<slice_h; yd+=4){
3701
            ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
3702
        }
3703
    STOP_TIMER("idwt slice");}
3704

    
3705
        
3706
        if(s->qlog == LOSSLESS_QLOG){
3707
            for(; yq<slice_h && yq<h; yq++){
3708
                DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
3709
                for(x=0; x<w; x++){
3710
                    line[x] <<= FRAC_BITS;
3711
                }
3712
            }
3713
        }
3714

    
3715
        predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
3716
        
3717
        /* Nasty hack based empirically on how predict_slice_buffered() hits the buffer. */
3718
        /* FIXME If possible, make predict_slice fit into the slice. As of now, it works on some previous lines (up to slice_height / 2) if the condition on the next line is false. */
3719
        if (s->keyframe || (s->avctx->debug&512)){
3720
            y = FFMIN(p->height, slice_starty);
3721
            end_y = FFMIN(p->height, slice_h);
3722
        }
3723
        else{
3724
            y = FFMAX(0, FFMIN(p->height, slice_starty - (block_w >> 1)));
3725
            end_y = FFMAX(0, FFMIN(p->height, slice_h - (block_w >> 1)));
3726
        }
3727
        while(y < end_y)
3728
            slice_buffer_release(&s->sb, y++);
3729
    }
3730
    
3731
    slice_buffer_flush(&s->sb);
3732
    
3733
STOP_TIMER("idwt + predict_slices")}
3734
    }
3735
            
3736
    emms_c();
3737

    
3738
    if(s->last_picture.data[0])
3739
        avctx->release_buffer(avctx, &s->last_picture);
3740

    
3741
if(!(s->avctx->debug&2048))        
3742
    *picture= s->current_picture;
3743
else
3744
    *picture= s->mconly_picture;
3745
    
3746
    *data_size = sizeof(AVFrame);
3747
    
3748
    bytes_read= c->bytestream - c->bytestream_start;
3749
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
3750

    
3751
    return bytes_read;
3752
}
3753

    
3754
static int decode_end(AVCodecContext *avctx)
3755
{
3756
    SnowContext *s = avctx->priv_data;
3757

    
3758
    slice_buffer_destroy(&s->sb);
3759
    
3760
    common_end(s);
3761

    
3762
    return 0;
3763
}
3764

    
3765
AVCodec snow_decoder = {
3766
    "snow",
3767
    CODEC_TYPE_VIDEO,
3768
    CODEC_ID_SNOW,
3769
    sizeof(SnowContext),
3770
    decode_init,
3771
    NULL,
3772
    decode_end,
3773
    decode_frame,
3774
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3775
    NULL
3776
};
3777

    
3778
#ifdef CONFIG_ENCODERS
3779
AVCodec snow_encoder = {
3780
    "snow",
3781
    CODEC_TYPE_VIDEO,
3782
    CODEC_ID_SNOW,
3783
    sizeof(SnowContext),
3784
    encode_init,
3785
    encode_frame,
3786
    encode_end,
3787
};
3788
#endif
3789

    
3790

    
3791
#if 0
3792
#undef malloc
3793
#undef free
3794
#undef printf
3795

3796
int main(){
3797
    int width=256;
3798
    int height=256;
3799
    int buffer[2][width*height];
3800
    SnowContext s;
3801
    int i;
3802
    s.spatial_decomposition_count=6;
3803
    s.spatial_decomposition_type=1;
3804
    
3805
    printf("testing 5/3 DWT\n");
3806
    for(i=0; i<width*height; i++)
3807
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3808
    
3809
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3810
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3811
    
3812
    for(i=0; i<width*height; i++)
3813
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3814

3815
    printf("testing 9/7 DWT\n");
3816
    s.spatial_decomposition_type=0;
3817
    for(i=0; i<width*height; i++)
3818
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3819
    
3820
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3821
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3822
    
3823
    for(i=0; i<width*height; i++)
3824
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3825
        
3826
    printf("testing AC coder\n");
3827
    memset(s.header_state, 0, sizeof(s.header_state));
3828
    ff_init_range_encoder(&s.c, buffer[0], 256*256);
3829
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3830
        
3831
    for(i=-256; i<256; i++){
3832
START_TIMER
3833
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3834
STOP_TIMER("put_symbol")
3835
    }
3836
    ff_rac_terminate(&s.c);
3837

3838
    memset(s.header_state, 0, sizeof(s.header_state));
3839
    ff_init_range_decoder(&s.c, buffer[0], 256*256);
3840
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3841
    
3842
    for(i=-256; i<256; i++){
3843
        int j;
3844
START_TIMER
3845
        j= get_symbol(&s.c, s.header_state, 1);
3846
STOP_TIMER("get_symbol")
3847
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3848
    }
3849
{
3850
int level, orientation, x, y;
3851
int64_t errors[8][4];
3852
int64_t g=0;
3853

3854
    memset(errors, 0, sizeof(errors));
3855
    s.spatial_decomposition_count=3;
3856
    s.spatial_decomposition_type=0;
3857
    for(level=0; level<s.spatial_decomposition_count; level++){
3858
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3859
            int w= width  >> (s.spatial_decomposition_count-level);
3860
            int h= height >> (s.spatial_decomposition_count-level);
3861
            int stride= width  << (s.spatial_decomposition_count-level);
3862
            DWTELEM *buf= buffer[0];
3863
            int64_t error=0;
3864

3865
            if(orientation&1) buf+=w;
3866
            if(orientation>1) buf+=stride>>1;
3867
            
3868
            memset(buffer[0], 0, sizeof(int)*width*height);
3869
            buf[w/2 + h/2*stride]= 256*256;
3870
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3871
            for(y=0; y<height; y++){
3872
                for(x=0; x<width; x++){
3873
                    int64_t d= buffer[0][x + y*width];
3874
                    error += d*d;
3875
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3876
                }
3877
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3878
            }
3879
            error= (int)(sqrt(error)+0.5);
3880
            errors[level][orientation]= error;
3881
            if(g) g=ff_gcd(g, error);
3882
            else g= error;
3883
        }
3884
    }
3885
    printf("static int const visual_weight[][4]={\n");
3886
    for(level=0; level<s.spatial_decomposition_count; level++){
3887
        printf("  {");
3888
        for(orientation=0; orientation<4; orientation++){
3889
            printf("%8lld,", errors[level][orientation]/g);
3890
        }
3891
        printf("},\n");
3892
    }
3893
    printf("};\n");
3894
    {
3895
            int level=2;
3896
            int orientation=3;
3897
            int w= width  >> (s.spatial_decomposition_count-level);
3898
            int h= height >> (s.spatial_decomposition_count-level);
3899
            int stride= width  << (s.spatial_decomposition_count-level);
3900
            DWTELEM *buf= buffer[0];
3901
            int64_t error=0;
3902

3903
            buf+=w;
3904
            buf+=stride>>1;
3905
            
3906
            memset(buffer[0], 0, sizeof(int)*width*height);
3907
#if 1
3908
            for(y=0; y<height; y++){
3909
                for(x=0; x<width; x++){
3910
                    int tab[4]={0,2,3,1};
3911
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3912
                }
3913
            }
3914
            ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3915
#else
3916
            for(y=0; y<h; y++){
3917
                for(x=0; x<w; x++){
3918
                    buf[x + y*stride  ]=169;
3919
                    buf[x + y*stride-w]=64;
3920
                }
3921
            }
3922
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3923
#endif
3924
            for(y=0; y<height; y++){
3925
                for(x=0; x<width; x++){
3926
                    int64_t d= buffer[0][x + y*width];
3927
                    error += d*d;
3928
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3929
                }
3930
                if(ABS(height/2-y)<9) printf("\n");
3931
            }
3932
    }
3933

    
3934
}
3935
    return 0;
3936
}
3937
#endif
3938