Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 91aa4e33

History | View | Annotate | Download (135 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22

    
23
#include "rangecoder.h"
24
#define MID_STATE 128
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QSHIFT 5
35
#define QROOT (1<<QSHIFT)
36
#define LOSSLESS_QLOG -128
37
#define FRAC_BITS 8
38

    
39
static const int8_t quant3[256]={
40
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56
};
57
static const int8_t quant3b[256]={
58
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74
};
75
static const int8_t quant3bA[256]={
76
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92
};
93
static const int8_t quant5[256]={
94
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110
};
111
static const int8_t quant7[256]={
112
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128
};
129
static const int8_t quant9[256]={
130
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146
};
147
static const int8_t quant11[256]={
148
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164
};
165
static const int8_t quant13[256]={
166
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182
};
183

    
184
#define LOG2_OBMC_MAX 6
185
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
186
#if 0 //64*cubic
187
static const uint8_t obmc32[1024]={
188
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220
//error:0.000022
221
};
222
static const uint8_t obmc16[256]={
223
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239
//error:0.000033
240
};
241
#elif 1 // 64*linear
242
static const uint8_t obmc32[1024]={
243
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275
 //error:0.000020
276
};
277
static const uint8_t obmc16[256]={
278
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294
//error:0.000015
295
};
296
#else //64*cos
297
static const uint8_t obmc32[1024]={
298
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330
//error:0.000022
331
};
332
static const uint8_t obmc16[256]={
333
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349
//error:0.000022
350
};
351
#endif
352

    
353
//linear *64
354
static const uint8_t obmc8[64]={
355
 1, 3, 5, 7, 7, 5, 3, 1,
356
 3, 9,15,21,21,15, 9, 3,
357
 5,15,25,35,35,25,15, 5,
358
 7,21,35,49,49,35,21, 7,
359
 7,21,35,49,49,35,21, 7,
360
 5,15,25,35,35,25,15, 5,
361
 3, 9,15,21,21,15, 9, 3,
362
 1, 3, 5, 7, 7, 5, 3, 1,
363
//error:0.000000
364
};
365

    
366
//linear *64
367
static const uint8_t obmc4[16]={
368
 4,12,12, 4,
369
12,36,36,12,
370
12,36,36,12,
371
 4,12,12, 4,
372
//error:0.000000
373
};
374

    
375
static const uint8_t *obmc_tab[4]={
376
    obmc32, obmc16, obmc8, obmc4
377
};
378

    
379
typedef struct BlockNode{
380
    int16_t mx;
381
    int16_t my;
382
    uint8_t color[3];
383
    uint8_t type;
384
//#define TYPE_SPLIT    1
385
#define BLOCK_INTRA   1
386
//#define TYPE_NOCOLOR  4
387
    uint8_t level; //FIXME merge into type?
388
}BlockNode;
389

    
390
#define LOG2_MB_SIZE 4
391
#define MB_SIZE (1<<LOG2_MB_SIZE)
392

    
393
typedef struct x_and_coeff{
394
    int16_t x;
395
    uint16_t coeff;
396
} x_and_coeff;
397

    
398
typedef struct SubBand{
399
    int level;
400
    int stride;
401
    int width;
402
    int height;
403
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
404
    DWTELEM *buf;
405
    int buf_x_offset;
406
    int buf_y_offset;
407
    int stride_line; ///< Stride measured in lines, not pixels.
408
    x_and_coeff * x_coeff;
409
    struct SubBand *parent;
410
    uint8_t state[/*7*2*/ 7 + 512][32];
411
}SubBand;
412

    
413
typedef struct Plane{
414
    int width;
415
    int height;
416
    SubBand band[MAX_DECOMPOSITIONS][4];
417
}Plane;
418

    
419
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
420
typedef struct {
421
    DWTELEM * * line; ///< For use by idwt and predict_slices.
422
    DWTELEM * * data_stack; ///< Used for internal purposes.
423
    int data_stack_top;
424
    int line_count;
425
    int line_width;
426
    int data_count;
427
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
428
} slice_buffer;
429

    
430
typedef struct SnowContext{
431
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
432

    
433
    AVCodecContext *avctx;
434
    RangeCoder c;
435
    DSPContext dsp;
436
    AVFrame input_picture;
437
    AVFrame current_picture;
438
    AVFrame last_picture;
439
    AVFrame mconly_picture;
440
//     uint8_t q_context[16];
441
    uint8_t header_state[32];
442
    uint8_t block_state[128 + 32*128];
443
    int keyframe;
444
    int always_reset;
445
    int version;
446
    int spatial_decomposition_type;
447
    int temporal_decomposition_type;
448
    int spatial_decomposition_count;
449
    int temporal_decomposition_count;
450
    DWTELEM *spatial_dwt_buffer;
451
    int colorspace_type;
452
    int chroma_h_shift;
453
    int chroma_v_shift;
454
    int spatial_scalability;
455
    int qlog;
456
    int lambda;
457
    int lambda2;
458
    int mv_scale;
459
    int qbias;
460
#define QBIAS_SHIFT 3
461
    int b_width;
462
    int b_height;
463
    int block_max_depth;
464
    Plane plane[MAX_PLANES];
465
    BlockNode *block;
466
    slice_buffer sb;
467

    
468
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
469
}SnowContext;
470

    
471
typedef struct {
472
    DWTELEM *b0;
473
    DWTELEM *b1;
474
    DWTELEM *b2;
475
    DWTELEM *b3;
476
    int y;
477
} dwt_compose_t;
478

    
479
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
480
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
481

    
482
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
483
{
484
    int i;
485
  
486
    buf->base_buffer = base_buffer;
487
    buf->line_count = line_count;
488
    buf->line_width = line_width;
489
    buf->data_count = max_allocated_lines;
490
    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
491
    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
492
  
493
    for (i = 0; i < max_allocated_lines; i++)
494
    {
495
      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
496
    }
497
    
498
    buf->data_stack_top = max_allocated_lines - 1;
499
}
500

    
501
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
502
{
503
    int offset;
504
    DWTELEM * buffer;
505
  
506
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);  
507
  
508
    assert(buf->data_stack_top >= 0);
509
//  assert(!buf->line[line]);
510
    if (buf->line[line])
511
        return buf->line[line];
512
    
513
    offset = buf->line_width * line;
514
    buffer = buf->data_stack[buf->data_stack_top];
515
    buf->data_stack_top--;
516
    buf->line[line] = buffer;
517
  
518
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
519
  
520
    return buffer;
521
}
522

    
523
static void slice_buffer_release(slice_buffer * buf, int line)
524
{
525
    int offset;
526
    DWTELEM * buffer;
527

    
528
    assert(line >= 0 && line < buf->line_count);
529
    assert(buf->line[line]);
530

    
531
    offset = buf->line_width * line;
532
    buffer = buf->line[line];
533
    buf->data_stack_top++;
534
    buf->data_stack[buf->data_stack_top] = buffer;
535
    buf->line[line] = NULL;
536
  
537
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
538
}
539

    
540
static void slice_buffer_flush(slice_buffer * buf)
541
{
542
    int i;
543
    for (i = 0; i < buf->line_count; i++)
544
    {
545
        if (buf->line[i])
546
        {
547
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
548
            slice_buffer_release(buf, i);
549
        }
550
    }
551
}
552

    
553
static void slice_buffer_destroy(slice_buffer * buf)
554
{
555
    int i;
556
    slice_buffer_flush(buf);
557
  
558
    for (i = buf->data_count - 1; i >= 0; i--)
559
    {
560
        assert(buf->data_stack[i]);
561
        av_free(buf->data_stack[i]);
562
    }
563
    assert(buf->data_stack);
564
    av_free(buf->data_stack);
565
    assert(buf->line);
566
    av_free(buf->line);
567
}
568

    
569
#ifdef        __sgi
570
// Avoid a name clash on SGI IRIX
571
#undef        qexp
572
#endif
573
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
574
static uint8_t qexp[QROOT];
575

    
576
static inline int mirror(int v, int m){
577
    if     (v<0) return -v;
578
    else if(v>m) return 2*m-v;
579
    else         return v;
580
}
581

    
582
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
583
    int i;
584

    
585
    if(v){
586
        const int a= ABS(v);
587
        const int e= av_log2(a);
588
#if 1
589
        const int el= FFMIN(e, 10);   
590
        put_rac(c, state+0, 0);
591

    
592
        for(i=0; i<el; i++){
593
            put_rac(c, state+1+i, 1);  //1..10
594
        }
595
        for(; i<e; i++){
596
            put_rac(c, state+1+9, 1);  //1..10
597
        }
598
        put_rac(c, state+1+FFMIN(i,9), 0);
599

    
600
        for(i=e-1; i>=el; i--){
601
            put_rac(c, state+22+9, (a>>i)&1); //22..31
602
        }
603
        for(; i>=0; i--){
604
            put_rac(c, state+22+i, (a>>i)&1); //22..31
605
        }
606

    
607
        if(is_signed)
608
            put_rac(c, state+11 + el, v < 0); //11..21
609
#else
610
        
611
        put_rac(c, state+0, 0);
612
        if(e<=9){
613
            for(i=0; i<e; i++){
614
                put_rac(c, state+1+i, 1);  //1..10
615
            }
616
            put_rac(c, state+1+i, 0);
617

    
618
            for(i=e-1; i>=0; i--){
619
                put_rac(c, state+22+i, (a>>i)&1); //22..31
620
            }
621

    
622
            if(is_signed)
623
                put_rac(c, state+11 + e, v < 0); //11..21
624
        }else{
625
            for(i=0; i<e; i++){
626
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
627
            }
628
            put_rac(c, state+1+FFMIN(i,9), 0);
629

    
630
            for(i=e-1; i>=0; i--){
631
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
632
            }
633

    
634
            if(is_signed)
635
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
636
        }
637
#endif
638
    }else{
639
        put_rac(c, state+0, 1);
640
    }
641
}
642

    
643
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
644
    if(get_rac(c, state+0))
645
        return 0;
646
    else{
647
        int i, e, a;
648
        e= 0;
649
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
650
            e++;
651
        }
652

    
653
        a= 1;
654
        for(i=e-1; i>=0; i--){
655
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
656
        }
657

    
658
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
659
            return -a;
660
        else
661
            return a;
662
    }
663
}
664

    
665
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
666
    int i;
667
    int r= log2>=0 ? 1<<log2 : 1;
668

    
669
    assert(v>=0);
670
    assert(log2>=-4);
671

    
672
    while(v >= r){
673
        put_rac(c, state+4+log2, 1);
674
        v -= r;
675
        log2++;
676
        if(log2>0) r+=r;
677
    }
678
    put_rac(c, state+4+log2, 0);
679
    
680
    for(i=log2-1; i>=0; i--){
681
        put_rac(c, state+31-i, (v>>i)&1);
682
    }
683
}
684

    
685
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
686
    int i;
687
    int r= log2>=0 ? 1<<log2 : 1;
688
    int v=0;
689

    
690
    assert(log2>=-4);
691

    
692
    while(get_rac(c, state+4+log2)){
693
        v+= r;
694
        log2++;
695
        if(log2>0) r+=r;
696
    }
697
    
698
    for(i=log2-1; i>=0; i--){
699
        v+= get_rac(c, state+31-i)<<i;
700
    }
701

    
702
    return v;
703
}
704

    
705
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
706
    const int mirror_left= !highpass;
707
    const int mirror_right= (width&1) ^ highpass;
708
    const int w= (width>>1) - 1 + (highpass & width);
709
    int i;
710

    
711
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
712
    if(mirror_left){
713
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
714
        dst += dst_step;
715
        src += src_step;
716
    }
717
    
718
    for(i=0; i<w; i++){
719
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
720
    }
721
    
722
    if(mirror_right){
723
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
724
    }
725
}
726

    
727
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
728
    const int mirror_left= !highpass;
729
    const int mirror_right= (width&1) ^ highpass;
730
    const int w= (width>>1) - 1 + (highpass & width);
731
    int i;
732

    
733
    if(mirror_left){
734
        int r= 3*2*ref[0];
735
        r += r>>4;
736
        r += r>>8;
737
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
738
        dst += dst_step;
739
        src += src_step;
740
    }
741
    
742
    for(i=0; i<w; i++){
743
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
744
        r += r>>4;
745
        r += r>>8;
746
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
747
    }
748
    
749
    if(mirror_right){
750
        int r= 3*2*ref[w*ref_step];
751
        r += r>>4;
752
        r += r>>8;
753
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
754
    }
755
}
756

    
757
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
758
    const int mirror_left= !highpass;
759
    const int mirror_right= (width&1) ^ highpass;
760
    const int w= (width>>1) - 1 + (highpass & width);
761
    int i;
762

    
763
    assert(shift == 4);
764
#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
765
    if(mirror_left){
766
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
767
        dst += dst_step;
768
        src += src_step;
769
    }
770
    
771
    for(i=0; i<w; i++){
772
        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
773
    }
774
    
775
    if(mirror_right){
776
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
777
    }
778
}
779

    
780

    
781
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
782
    int x, i;
783
    
784
    for(x=start; x<width; x+=2){
785
        int64_t sum=0;
786

    
787
        for(i=0; i<n; i++){
788
            int x2= x + 2*i - n + 1;
789
            if     (x2<     0) x2= -x2;
790
            else if(x2>=width) x2= 2*width-x2-2;
791
            sum += coeffs[i]*(int64_t)dst[x2];
792
        }
793
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
794
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
795
    }
796
}
797

    
798
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
799
    int x, y, i;
800
    for(y=start; y<height; y+=2){
801
        for(x=0; x<width; x++){
802
            int64_t sum=0;
803
    
804
            for(i=0; i<n; i++){
805
                int y2= y + 2*i - n + 1;
806
                if     (y2<      0) y2= -y2;
807
                else if(y2>=height) y2= 2*height-y2-2;
808
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
809
            }
810
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
811
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
812
        }
813
    }
814
}
815

    
816
#define SCALEX 1
817
#define LX0 0
818
#define LX1 1
819

    
820
#if 0 // more accurate 9/7
821
#define N1 2
822
#define SHIFT1 14
823
#define COEFFS1 (int[]){-25987,-25987}
824
#define N2 2
825
#define SHIFT2 19
826
#define COEFFS2 (int[]){-27777,-27777}
827
#define N3 2
828
#define SHIFT3 15
829
#define COEFFS3 (int[]){28931,28931}
830
#define N4 2
831
#define SHIFT4 15
832
#define COEFFS4 (int[]){14533,14533}
833
#elif 1 // 13/7 CRF
834
#define N1 4
835
#define SHIFT1 4
836
#define COEFFS1 (int[]){1,-9,-9,1}
837
#define N2 4
838
#define SHIFT2 4
839
#define COEFFS2 (int[]){-1,5,5,-1}
840
#define N3 0
841
#define SHIFT3 1
842
#define COEFFS3 NULL
843
#define N4 0
844
#define SHIFT4 1
845
#define COEFFS4 NULL
846
#elif 1 // 3/5
847
#define LX0 1
848
#define LX1 0
849
#define SCALEX 0.5
850
#define N1 2
851
#define SHIFT1 1
852
#define COEFFS1 (int[]){1,1}
853
#define N2 2
854
#define SHIFT2 2
855
#define COEFFS2 (int[]){-1,-1}
856
#define N3 0
857
#define SHIFT3 0
858
#define COEFFS3 NULL
859
#define N4 0
860
#define SHIFT4 0
861
#define COEFFS4 NULL
862
#elif 1 // 11/5 
863
#define N1 0
864
#define SHIFT1 1
865
#define COEFFS1 NULL
866
#define N2 2
867
#define SHIFT2 2
868
#define COEFFS2 (int[]){-1,-1}
869
#define N3 2
870
#define SHIFT3 0
871
#define COEFFS3 (int[]){-1,-1}
872
#define N4 4
873
#define SHIFT4 7
874
#define COEFFS4 (int[]){-5,29,29,-5}
875
#define SCALEX 4
876
#elif 1 // 9/7 CDF
877
#define N1 2
878
#define SHIFT1 7
879
#define COEFFS1 (int[]){-203,-203}
880
#define N2 2
881
#define SHIFT2 12
882
#define COEFFS2 (int[]){-217,-217}
883
#define N3 2
884
#define SHIFT3 7
885
#define COEFFS3 (int[]){113,113}
886
#define N4 2
887
#define SHIFT4 9
888
#define COEFFS4 (int[]){227,227}
889
#define SCALEX 1
890
#elif 1 // 7/5 CDF
891
#define N1 0
892
#define SHIFT1 1
893
#define COEFFS1 NULL
894
#define N2 2
895
#define SHIFT2 2
896
#define COEFFS2 (int[]){-1,-1}
897
#define N3 2
898
#define SHIFT3 0
899
#define COEFFS3 (int[]){-1,-1}
900
#define N4 2
901
#define SHIFT4 4
902
#define COEFFS4 (int[]){3,3}
903
#elif 1 // 9/7 MN
904
#define N1 4
905
#define SHIFT1 4
906
#define COEFFS1 (int[]){1,-9,-9,1}
907
#define N2 2
908
#define SHIFT2 2
909
#define COEFFS2 (int[]){1,1}
910
#define N3 0
911
#define SHIFT3 1
912
#define COEFFS3 NULL
913
#define N4 0
914
#define SHIFT4 1
915
#define COEFFS4 NULL
916
#else // 13/7 CRF
917
#define N1 4
918
#define SHIFT1 4
919
#define COEFFS1 (int[]){1,-9,-9,1}
920
#define N2 4
921
#define SHIFT2 4
922
#define COEFFS2 (int[]){-1,5,5,-1}
923
#define N3 0
924
#define SHIFT3 1
925
#define COEFFS3 NULL
926
#define N4 0
927
#define SHIFT4 1
928
#define COEFFS4 NULL
929
#endif
930
static void horizontal_decomposeX(DWTELEM *b, int width){
931
    DWTELEM temp[width];
932
    const int width2= width>>1;
933
    const int w2= (width+1)>>1;
934
    int x;
935

    
936
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
937
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
938
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
939
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
940
    
941
    for(x=0; x<width2; x++){
942
        temp[x   ]= b[2*x    ];
943
        temp[x+w2]= b[2*x + 1];
944
    }
945
    if(width&1)
946
        temp[x   ]= b[2*x    ];
947
    memcpy(b, temp, width*sizeof(int));
948
}
949

    
950
static void horizontal_composeX(DWTELEM *b, int width){
951
    DWTELEM temp[width];
952
    const int width2= width>>1;
953
    int x;
954
    const int w2= (width+1)>>1;
955

    
956
    memcpy(temp, b, width*sizeof(int));
957
    for(x=0; x<width2; x++){
958
        b[2*x    ]= temp[x   ];
959
        b[2*x + 1]= temp[x+w2];
960
    }
961
    if(width&1)
962
        b[2*x    ]= temp[x   ];
963

    
964
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
965
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
966
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
967
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
968
}
969

    
970
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
971
    int x, y;
972
  
973
    for(y=0; y<height; y++){
974
        for(x=0; x<width; x++){
975
            buffer[y*stride + x] *= SCALEX;
976
        }
977
    }
978

    
979
    for(y=0; y<height; y++){
980
        horizontal_decomposeX(buffer + y*stride, width);
981
    }
982
    
983
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
984
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
985
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
986
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
987
}
988

    
989
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
990
    int x, y;
991
  
992
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
993
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
994
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
995
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
996

    
997
    for(y=0; y<height; y++){
998
        horizontal_composeX(buffer + y*stride, width);
999
    }
1000

    
1001
    for(y=0; y<height; y++){
1002
        for(x=0; x<width; x++){
1003
            buffer[y*stride + x] /= SCALEX;
1004
        }
1005
    }
1006
}
1007

    
1008
static void horizontal_decompose53i(DWTELEM *b, int width){
1009
    DWTELEM temp[width];
1010
    const int width2= width>>1;
1011
    int x;
1012
    const int w2= (width+1)>>1;
1013

    
1014
    for(x=0; x<width2; x++){
1015
        temp[x   ]= b[2*x    ];
1016
        temp[x+w2]= b[2*x + 1];
1017
    }
1018
    if(width&1)
1019
        temp[x   ]= b[2*x    ];
1020
#if 0
1021
    {
1022
    int A1,A2,A3,A4;
1023
    A2= temp[1       ];
1024
    A4= temp[0       ];
1025
    A1= temp[0+width2];
1026
    A1 -= (A2 + A4)>>1;
1027
    A4 += (A1 + 1)>>1;
1028
    b[0+width2] = A1;
1029
    b[0       ] = A4;
1030
    for(x=1; x+1<width2; x+=2){
1031
        A3= temp[x+width2];
1032
        A4= temp[x+1     ];
1033
        A3 -= (A2 + A4)>>1;
1034
        A2 += (A1 + A3 + 2)>>2;
1035
        b[x+width2] = A3;
1036
        b[x       ] = A2;
1037

1038
        A1= temp[x+1+width2];
1039
        A2= temp[x+2       ];
1040
        A1 -= (A2 + A4)>>1;
1041
        A4 += (A1 + A3 + 2)>>2;
1042
        b[x+1+width2] = A1;
1043
        b[x+1       ] = A4;
1044
    }
1045
    A3= temp[width-1];
1046
    A3 -= A2;
1047
    A2 += (A1 + A3 + 2)>>2;
1048
    b[width -1] = A3;
1049
    b[width2-1] = A2;
1050
    }
1051
#else        
1052
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1053
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
1054
#endif
1055
}
1056

    
1057
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1058
    int i;
1059
    
1060
    for(i=0; i<width; i++){
1061
        b1[i] -= (b0[i] + b2[i])>>1;
1062
    }
1063
}
1064

    
1065
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1066
    int i;
1067
    
1068
    for(i=0; i<width; i++){
1069
        b1[i] += (b0[i] + b2[i] + 2)>>2;
1070
    }
1071
}
1072

    
1073
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1074
    int y;
1075
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1076
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
1077
  
1078
    for(y=-2; y<height; y+=2){
1079
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1080
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1081

    
1082
{START_TIMER
1083
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
1084
        if(y+2 < height) horizontal_decompose53i(b3, width);
1085
STOP_TIMER("horizontal_decompose53i")}
1086
        
1087
{START_TIMER
1088
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1089
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1090
STOP_TIMER("vertical_decompose53i*")}
1091
        
1092
        b0=b2;
1093
        b1=b3;
1094
    }
1095
}
1096

    
1097
#define liftS lift
1098
#define lift5 lift
1099
#if 1
1100
#define W_AM 3
1101
#define W_AO 0
1102
#define W_AS 1
1103

    
1104
#undef liftS
1105
#define W_BM 1
1106
#define W_BO 8
1107
#define W_BS 4
1108

    
1109
#define W_CM 1
1110
#define W_CO 0
1111
#define W_CS 0
1112

    
1113
#define W_DM 3
1114
#define W_DO 4
1115
#define W_DS 3
1116
#elif 0
1117
#define W_AM 55
1118
#define W_AO 16
1119
#define W_AS 5
1120

    
1121
#define W_BM 3
1122
#define W_BO 32
1123
#define W_BS 6
1124

    
1125
#define W_CM 127
1126
#define W_CO 64
1127
#define W_CS 7
1128

    
1129
#define W_DM 7
1130
#define W_DO 8
1131
#define W_DS 4
1132
#elif 0
1133
#define W_AM 97
1134
#define W_AO 32
1135
#define W_AS 6
1136

    
1137
#define W_BM 63
1138
#define W_BO 512
1139
#define W_BS 10
1140

    
1141
#define W_CM 13
1142
#define W_CO 8
1143
#define W_CS 4
1144

    
1145
#define W_DM 15
1146
#define W_DO 16
1147
#define W_DS 5
1148

    
1149
#else
1150

    
1151
#define W_AM 203
1152
#define W_AO 64
1153
#define W_AS 7
1154

    
1155
#define W_BM 217
1156
#define W_BO 2048
1157
#define W_BS 12
1158

    
1159
#define W_CM 113
1160
#define W_CO 64
1161
#define W_CS 7
1162

    
1163
#define W_DM 227
1164
#define W_DO 128
1165
#define W_DS 9
1166
#endif
1167
static void horizontal_decompose97i(DWTELEM *b, int width){
1168
    DWTELEM temp[width];
1169
    const int w2= (width+1)>>1;
1170

    
1171
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1172
    liftS(temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1173
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1174
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1175
}
1176

    
1177

    
1178
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1179
    int i;
1180
    
1181
    for(i=0; i<width; i++){
1182
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1183
    }
1184
}
1185

    
1186
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1187
    int i;
1188
    
1189
    for(i=0; i<width; i++){
1190
#ifdef lift5
1191
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1192
#else
1193
        int r= 3*(b0[i] + b2[i]);
1194
        r+= r>>4;
1195
        r+= r>>8;
1196
        b1[i] += (r+W_CO)>>W_CS;
1197
#endif
1198
    }
1199
}
1200

    
1201
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1202
    int i;
1203
    
1204
    for(i=0; i<width; i++){
1205
#ifdef liftS
1206
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1207
#else
1208
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1209
#endif
1210
    }
1211
}
1212

    
1213
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1214
    int i;
1215
    
1216
    for(i=0; i<width; i++){
1217
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1218
    }
1219
}
1220

    
1221
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1222
    int y;
1223
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1224
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1225
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1226
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1227
  
1228
    for(y=-4; y<height; y+=2){
1229
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1230
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1231

    
1232
{START_TIMER
1233
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1234
        if(y+4 < height) horizontal_decompose97i(b5, width);
1235
if(width>400){
1236
STOP_TIMER("horizontal_decompose97i")
1237
}}
1238
        
1239
{START_TIMER
1240
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1241
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1242
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1243
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1244

    
1245
if(width>400){
1246
STOP_TIMER("vertical_decompose97i")
1247
}}
1248
        
1249
        b0=b2;
1250
        b1=b3;
1251
        b2=b4;
1252
        b3=b5;
1253
    }
1254
}
1255

    
1256
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1257
    int level;
1258
    
1259
    for(level=0; level<decomposition_count; level++){
1260
        switch(type){
1261
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1262
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1263
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1264
        }
1265
    }
1266
}
1267

    
1268
static void horizontal_compose53i(DWTELEM *b, int width){
1269
    DWTELEM temp[width];
1270
    const int width2= width>>1;
1271
    const int w2= (width+1)>>1;
1272
    int x;
1273

    
1274
#if 0
1275
    int A1,A2,A3,A4;
1276
    A2= temp[1       ];
1277
    A4= temp[0       ];
1278
    A1= temp[0+width2];
1279
    A1 -= (A2 + A4)>>1;
1280
    A4 += (A1 + 1)>>1;
1281
    b[0+width2] = A1;
1282
    b[0       ] = A4;
1283
    for(x=1; x+1<width2; x+=2){
1284
        A3= temp[x+width2];
1285
        A4= temp[x+1     ];
1286
        A3 -= (A2 + A4)>>1;
1287
        A2 += (A1 + A3 + 2)>>2;
1288
        b[x+width2] = A3;
1289
        b[x       ] = A2;
1290

1291
        A1= temp[x+1+width2];
1292
        A2= temp[x+2       ];
1293
        A1 -= (A2 + A4)>>1;
1294
        A4 += (A1 + A3 + 2)>>2;
1295
        b[x+1+width2] = A1;
1296
        b[x+1       ] = A4;
1297
    }
1298
    A3= temp[width-1];
1299
    A3 -= A2;
1300
    A2 += (A1 + A3 + 2)>>2;
1301
    b[width -1] = A3;
1302
    b[width2-1] = A2;
1303
#else   
1304
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1305
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1306
#endif
1307
    for(x=0; x<width2; x++){
1308
        b[2*x    ]= temp[x   ];
1309
        b[2*x + 1]= temp[x+w2];
1310
    }
1311
    if(width&1)
1312
        b[2*x    ]= temp[x   ];
1313
}
1314

    
1315
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1316
    int i;
1317
    
1318
    for(i=0; i<width; i++){
1319
        b1[i] += (b0[i] + b2[i])>>1;
1320
    }
1321
}
1322

    
1323
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1324
    int i;
1325
    
1326
    for(i=0; i<width; i++){
1327
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1328
    }
1329
}
1330

    
1331
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1332
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1333
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1334
    cs->y = -1;
1335
}
1336

    
1337
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1338
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1339
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1340
    cs->y = -1;
1341
}
1342

    
1343
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1344
    int y= cs->y;
1345
    int mirror0 = mirror(y-1, height-1);
1346
    int mirror1 = mirror(y  , height-1);
1347
    int mirror2 = mirror(y+1, height-1);
1348
    int mirror3 = mirror(y+2, height-1);
1349
    
1350
    DWTELEM *b0= cs->b0;
1351
    DWTELEM *b1= cs->b1;
1352
    DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1353
    DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1354

    
1355
{START_TIMER
1356
        if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1357
        if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1358
STOP_TIMER("vertical_compose53i*")}
1359

    
1360
{START_TIMER
1361
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1362
        if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1363
STOP_TIMER("horizontal_compose53i")}
1364

    
1365
    cs->b0 = b2;
1366
    cs->b1 = b3;
1367
    cs->y += 2;
1368
}
1369

    
1370
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1371
    int y= cs->y;
1372
    DWTELEM *b0= cs->b0;
1373
    DWTELEM *b1= cs->b1;
1374
    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1375
    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1376

    
1377
{START_TIMER
1378
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1379
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1380
STOP_TIMER("vertical_compose53i*")}
1381

    
1382
{START_TIMER
1383
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1384
        if(b0 <= b2) horizontal_compose53i(b1, width);
1385
STOP_TIMER("horizontal_compose53i")}
1386

    
1387
    cs->b0 = b2;
1388
    cs->b1 = b3;
1389
    cs->y += 2;
1390
}
1391

    
1392
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1393
    dwt_compose_t cs;
1394
    spatial_compose53i_init(&cs, buffer, height, stride);
1395
    while(cs.y <= height)
1396
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1397
}   
1398

    
1399
 
1400
static void horizontal_compose97i(DWTELEM *b, int width){
1401
    DWTELEM temp[width];
1402
    const int w2= (width+1)>>1;
1403

    
1404
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1405
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1406
    liftS(b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1407
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1408
}
1409

    
1410
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1411
    int i;
1412
    
1413
    for(i=0; i<width; i++){
1414
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1415
    }
1416
}
1417

    
1418
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1419
    int i;
1420
    
1421
    for(i=0; i<width; i++){
1422
#ifdef lift5
1423
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1424
#else
1425
        int r= 3*(b0[i] + b2[i]);
1426
        r+= r>>4;
1427
        r+= r>>8;
1428
        b1[i] -= (r+W_CO)>>W_CS;
1429
#endif
1430
    }
1431
}
1432

    
1433
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1434
    int i;
1435
    
1436
    for(i=0; i<width; i++){
1437
#ifdef liftS
1438
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1439
#else
1440
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1441
#endif
1442
    }
1443
}
1444

    
1445
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1446
    int i;
1447
    
1448
    for(i=0; i<width; i++){
1449
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1450
    }
1451
}
1452

    
1453
static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1454
    int i;
1455
    
1456
    for(i=0; i<width; i++){
1457
#ifndef lift5
1458
        int r;
1459
#endif
1460
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1461
#ifdef lift5
1462
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1463
#else
1464
        r= 3*(b2[i] + b4[i]);
1465
        r+= r>>4;
1466
        r+= r>>8;
1467
        b3[i] -= (r+W_CO)>>W_CS;
1468
#endif
1469
#ifdef liftS
1470
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1471
#else
1472
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1473
#endif
1474
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1475
    }
1476
}
1477

    
1478
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1479
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1480
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1481
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1482
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1483
    cs->y = -3;
1484
}
1485

    
1486
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1487
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1488
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1489
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1490
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1491
    cs->y = -3;
1492
}
1493

    
1494
static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1495
    int y = cs->y;
1496
    
1497
    int mirror0 = mirror(y - 1, height - 1);
1498
    int mirror1 = mirror(y + 0, height - 1);
1499
    int mirror2 = mirror(y + 1, height - 1);
1500
    int mirror3 = mirror(y + 2, height - 1);
1501
    int mirror4 = mirror(y + 3, height - 1);
1502
    int mirror5 = mirror(y + 4, height - 1);
1503
    DWTELEM *b0= cs->b0;
1504
    DWTELEM *b1= cs->b1;
1505
    DWTELEM *b2= cs->b2;
1506
    DWTELEM *b3= cs->b3;
1507
    DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1508
    DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1509
        
1510
{START_TIMER
1511
    if(y>0 && y+4<height){
1512
        vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1513
    }else{
1514
        if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1515
        if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1516
        if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1517
        if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1518
    }
1519
if(width>400){
1520
STOP_TIMER("vertical_compose97i")}}
1521

    
1522
{START_TIMER
1523
        if(y-1>=  0) horizontal_compose97i(b0, width);
1524
        if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1525
if(width>400 && mirror0 <= mirror2){
1526
STOP_TIMER("horizontal_compose97i")}}
1527

    
1528
    cs->b0=b2;
1529
    cs->b1=b3;
1530
    cs->b2=b4;
1531
    cs->b3=b5;
1532
    cs->y += 2;
1533
}
1534

    
1535
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1536
    int y = cs->y;
1537
    DWTELEM *b0= cs->b0;
1538
    DWTELEM *b1= cs->b1;
1539
    DWTELEM *b2= cs->b2;
1540
    DWTELEM *b3= cs->b3;
1541
    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1542
    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1543

    
1544
        if(stride == width && y+4 < height && 0){ 
1545
            int x;
1546
            for(x=0; x<width/2; x++)
1547
                b5[x] += 64*2;
1548
            for(; x<width; x++)
1549
                b5[x] += 169*2;
1550
        }
1551
        
1552
{START_TIMER
1553
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1554
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1555
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1556
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1557
if(width>400){
1558
STOP_TIMER("vertical_compose97i")}}
1559

    
1560
{START_TIMER
1561
        if(y-1>=  0) horizontal_compose97i(b0, width);
1562
        if(b0 <= b2) horizontal_compose97i(b1, width);
1563
if(width>400 && b0 <= b2){
1564
STOP_TIMER("horizontal_compose97i")}}
1565

    
1566
    cs->b0=b2;
1567
    cs->b1=b3;
1568
    cs->b2=b4;
1569
    cs->b3=b5;
1570
    cs->y += 2;
1571
}
1572

    
1573
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1574
    dwt_compose_t cs;
1575
    spatial_compose97i_init(&cs, buffer, height, stride);
1576
    while(cs.y <= height)
1577
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1578
}
1579

    
1580
void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1581
    int level;
1582
    for(level=decomposition_count-1; level>=0; level--){
1583
        switch(type){
1584
        case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1585
        case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1586
        /* not slicified yet */
1587
        case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1588
          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1589
        }
1590
    }
1591
}
1592

    
1593
void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1594
    int level;
1595
    for(level=decomposition_count-1; level>=0; level--){
1596
        switch(type){
1597
        case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1598
        case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1599
        /* not slicified yet */
1600
        case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1601
        }
1602
    }
1603
}
1604

    
1605
void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1606
    const int support = type==1 ? 3 : 5;
1607
    int level;
1608
    if(type==2) return;
1609

    
1610
    for(level=decomposition_count-1; level>=0; level--){
1611
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1612
            switch(type){
1613
            case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1614
                    break;
1615
            case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1616
                    break;
1617
            case 2: break;
1618
            }
1619
        }
1620
    }
1621
}
1622

    
1623
void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1624
    const int support = type==1 ? 3 : 5;
1625
    int level;
1626
    if(type==2) return;
1627

    
1628
    for(level=decomposition_count-1; level>=0; level--){
1629
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1630
            switch(type){
1631
            case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1632
                    break;
1633
            case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1634
                    break;
1635
            case 2: break;
1636
            }
1637
        }
1638
    }
1639
}
1640

    
1641
void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1642
    if(type==2){
1643
        int level;
1644
        for(level=decomposition_count-1; level>=0; level--)
1645
            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
1646
    }else{
1647
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1648
        int y;
1649
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1650
        for(y=0; y<height; y+=4)
1651
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1652
    }
1653
}
1654

    
1655
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1656
    const int w= b->width;
1657
    const int h= b->height;
1658
    int x, y;
1659

    
1660
    if(1){
1661
        int run=0;
1662
        int runs[w*h];
1663
        int run_index=0;
1664
        int max_index;
1665
                
1666
        for(y=0; y<h; y++){
1667
            for(x=0; x<w; x++){
1668
                int v, p=0;
1669
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1670
                v= src[x + y*stride];
1671

    
1672
                if(y){
1673
                    t= src[x + (y-1)*stride];
1674
                    if(x){
1675
                        lt= src[x - 1 + (y-1)*stride];
1676
                    }
1677
                    if(x + 1 < w){
1678
                        rt= src[x + 1 + (y-1)*stride];
1679
                    }
1680
                }
1681
                if(x){
1682
                    l= src[x - 1 + y*stride];
1683
                    /*if(x > 1){
1684
                        if(orientation==1) ll= src[y + (x-2)*stride];
1685
                        else               ll= src[x - 2 + y*stride];
1686
                    }*/
1687
                }
1688
                if(parent){
1689
                    int px= x>>1;
1690
                    int py= y>>1;
1691
                    if(px<b->parent->width && py<b->parent->height) 
1692
                        p= parent[px + py*2*stride];
1693
                }
1694
                if(!(/*ll|*/l|lt|t|rt|p)){
1695
                    if(v){
1696
                        runs[run_index++]= run;
1697
                        run=0;
1698
                    }else{
1699
                        run++;
1700
                    }
1701
                }
1702
            }
1703
        }
1704
        max_index= run_index;
1705
        runs[run_index++]= run;
1706
        run_index=0;
1707
        run= runs[run_index++];
1708

    
1709
        put_symbol2(&s->c, b->state[30], max_index, 0);
1710
        if(run_index <= max_index)
1711
            put_symbol2(&s->c, b->state[1], run, 3);
1712
        
1713
        for(y=0; y<h; y++){
1714
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1715
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1716
                return -1;
1717
            }
1718
            for(x=0; x<w; x++){
1719
                int v, p=0;
1720
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1721
                v= src[x + y*stride];
1722

    
1723
                if(y){
1724
                    t= src[x + (y-1)*stride];
1725
                    if(x){
1726
                        lt= src[x - 1 + (y-1)*stride];
1727
                    }
1728
                    if(x + 1 < w){
1729
                        rt= src[x + 1 + (y-1)*stride];
1730
                    }
1731
                }
1732
                if(x){
1733
                    l= src[x - 1 + y*stride];
1734
                    /*if(x > 1){
1735
                        if(orientation==1) ll= src[y + (x-2)*stride];
1736
                        else               ll= src[x - 2 + y*stride];
1737
                    }*/
1738
                }
1739
                if(parent){
1740
                    int px= x>>1;
1741
                    int py= y>>1;
1742
                    if(px<b->parent->width && py<b->parent->height) 
1743
                        p= parent[px + py*2*stride];
1744
                }
1745
                if(/*ll|*/l|lt|t|rt|p){
1746
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1747

    
1748
                    put_rac(&s->c, &b->state[0][context], !!v);
1749
                }else{
1750
                    if(!run){
1751
                        run= runs[run_index++];
1752

    
1753
                        if(run_index <= max_index)
1754
                            put_symbol2(&s->c, b->state[1], run, 3);
1755
                        assert(v);
1756
                    }else{
1757
                        run--;
1758
                        assert(!v);
1759
                    }
1760
                }
1761
                if(v){
1762
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1763
                    int l2= 2*ABS(l) + (l<0);
1764
                    int t2= 2*ABS(t) + (t<0);
1765

    
1766
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1767
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1768
                }
1769
            }
1770
        }
1771
    }
1772
    return 0;
1773
}
1774

    
1775
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1776
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1777
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1778
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1779
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1780
}
1781

    
1782
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1783
    const int w= b->width;
1784
    const int h= b->height;
1785
    int x,y;
1786
    
1787
    if(1){
1788
        int run, runs;
1789
        x_and_coeff *xc= b->x_coeff;
1790
        x_and_coeff *prev_xc= NULL;
1791
        x_and_coeff *prev2_xc= xc;
1792
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1793
        x_and_coeff *prev_parent_xc= parent_xc;
1794

    
1795
        runs= get_symbol2(&s->c, b->state[30], 0);
1796
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1797
        else           run= INT_MAX;
1798

    
1799
        for(y=0; y<h; y++){
1800
            int v=0;
1801
            int lt=0, t=0, rt=0;
1802

    
1803
            if(y && prev_xc->x == 0){
1804
                rt= prev_xc->coeff;
1805
            }
1806
            for(x=0; x<w; x++){
1807
                int p=0;
1808
                const int l= v;
1809
                
1810
                lt= t; t= rt;
1811

    
1812
                if(y){
1813
                    if(prev_xc->x <= x)
1814
                        prev_xc++;
1815
                    if(prev_xc->x == x + 1)
1816
                        rt= prev_xc->coeff;
1817
                    else
1818
                        rt=0;
1819
                }
1820
                if(parent_xc){
1821
                    if(x>>1 > parent_xc->x){
1822
                        parent_xc++;
1823
                    }
1824
                    if(x>>1 == parent_xc->x){
1825
                        p= parent_xc->coeff;
1826
                    }
1827
                }
1828
                if(/*ll|*/l|lt|t|rt|p){
1829
                    int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1830

    
1831
                    v=get_rac(&s->c, &b->state[0][context]);
1832
                    if(v){
1833
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1834
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1835
                        
1836
                        xc->x=x;
1837
                        (xc++)->coeff= v;
1838
                    }
1839
                }else{
1840
                    if(!run){
1841
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1842
                        else           run= INT_MAX;
1843
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1844
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1845
                        
1846
                        xc->x=x;
1847
                        (xc++)->coeff= v;
1848
                    }else{
1849
                        int max_run;
1850
                        run--;
1851
                        v=0;
1852

    
1853
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1854
                        else  max_run= FFMIN(run, w-x-1);
1855
                        if(parent_xc)
1856
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1857
                        x+= max_run;
1858
                        run-= max_run;
1859
                    }
1860
                }
1861
            }
1862
            (xc++)->x= w+1; //end marker
1863
            prev_xc= prev2_xc;
1864
            prev2_xc= xc;
1865
            
1866
            if(parent_xc){
1867
                if(y&1){
1868
                    while(parent_xc->x != parent->width+1)
1869
                        parent_xc++;
1870
                    parent_xc++;
1871
                    prev_parent_xc= parent_xc;
1872
                }else{
1873
                    parent_xc= prev_parent_xc;
1874
                }
1875
            }
1876
        }
1877

    
1878
        (xc++)->x= w+1; //end marker
1879
    }
1880
}
1881

    
1882
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1883
    const int w= b->width;
1884
    int y;
1885
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1886
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1887
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1888
    int new_index = 0;
1889
    
1890
    START_TIMER
1891

    
1892
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1893
        qadd= 0;
1894
        qmul= 1<<QEXPSHIFT;
1895
    }
1896

    
1897
    /* If we are on the second or later slice, restore our index. */
1898
    if (start_y != 0)
1899
        new_index = save_state[0];
1900

    
1901
        
1902
    for(y=start_y; y<h; y++){
1903
        int x = 0;
1904
        int v;
1905
        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1906
        memset(line, 0, b->width*sizeof(DWTELEM));
1907
        v = b->x_coeff[new_index].coeff;
1908
        x = b->x_coeff[new_index++].x;
1909
        while(x < w)
1910
        {
1911
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1912
            register int u= -(v&1);
1913
            line[x] = (t^u) - u;
1914

    
1915
            v = b->x_coeff[new_index].coeff;
1916
            x = b->x_coeff[new_index++].x;
1917
        }
1918
    }
1919
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1920
        STOP_TIMER("decode_subband")
1921
    }
1922
        
1923
    /* Save our variables for the next slice. */
1924
    save_state[0] = new_index;
1925
        
1926
    return;
1927
}
1928

    
1929
static void reset_contexts(SnowContext *s){
1930
    int plane_index, level, orientation;
1931

    
1932
    for(plane_index=0; plane_index<3; plane_index++){
1933
        for(level=0; level<s->spatial_decomposition_count; level++){
1934
            for(orientation=level ? 1:0; orientation<4; orientation++){
1935
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1936
            }
1937
        }
1938
    }
1939
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1940
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1941
}
1942

    
1943
static int alloc_blocks(SnowContext *s){
1944
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1945
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1946
    
1947
    s->b_width = w;
1948
    s->b_height= h;
1949
    
1950
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1951
    return 0;
1952
}
1953

    
1954
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1955
    uint8_t *bytestream= d->bytestream;
1956
    uint8_t *bytestream_start= d->bytestream_start;
1957
    *d= *s;
1958
    d->bytestream= bytestream;
1959
    d->bytestream_start= bytestream_start;
1960
}
1961

    
1962
//near copy & paste from dsputil, FIXME
1963
static int pix_sum(uint8_t * pix, int line_size, int w)
1964
{
1965
    int s, i, j;
1966

    
1967
    s = 0;
1968
    for (i = 0; i < w; i++) {
1969
        for (j = 0; j < w; j++) {
1970
            s += pix[0];
1971
            pix ++;
1972
        }
1973
        pix += line_size - w;
1974
    }
1975
    return s;
1976
}
1977

    
1978
//near copy & paste from dsputil, FIXME
1979
static int pix_norm1(uint8_t * pix, int line_size, int w)
1980
{
1981
    int s, i, j;
1982
    uint32_t *sq = squareTbl + 256;
1983

    
1984
    s = 0;
1985
    for (i = 0; i < w; i++) {
1986
        for (j = 0; j < w; j ++) {
1987
            s += sq[pix[0]];
1988
            pix ++;
1989
        }
1990
        pix += line_size - w;
1991
    }
1992
    return s;
1993
}
1994

    
1995
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1996
    const int w= s->b_width << s->block_max_depth;
1997
    const int rem_depth= s->block_max_depth - level;
1998
    const int index= (x + y*w) << rem_depth;
1999
    const int block_w= 1<<rem_depth;
2000
    BlockNode block;
2001
    int i,j;
2002
    
2003
    block.color[0]= l;
2004
    block.color[1]= cb;
2005
    block.color[2]= cr;
2006
    block.mx= mx;
2007
    block.my= my;
2008
    block.type= type;
2009
    block.level= level;
2010

    
2011
    for(j=0; j<block_w; j++){
2012
        for(i=0; i<block_w; i++){
2013
            s->block[index + i + j*w]= block;
2014
        }
2015
    }
2016
}
2017

    
2018
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2019
    const int offset[3]= {
2020
          y*c->  stride + x,
2021
        ((y*c->uvstride + x)>>1),
2022
        ((y*c->uvstride + x)>>1),
2023
    };
2024
    int i;
2025
    for(i=0; i<3; i++){
2026
        c->src[0][i]= src [i];
2027
        c->ref[0][i]= ref [i] + offset[i];
2028
    }
2029
    assert(!ref_index);
2030
}
2031

    
2032
//FIXME copy&paste
2033
#define P_LEFT P[1]
2034
#define P_TOP P[2]
2035
#define P_TOPRIGHT P[3]
2036
#define P_MEDIAN P[4]
2037
#define P_MV1 P[9]
2038
#define FLAG_QPEL   1 //must be 1
2039

    
2040
static int encode_q_branch(SnowContext *s, int level, int x, int y){
2041
    uint8_t p_buffer[1024];
2042
    uint8_t i_buffer[1024];
2043
    uint8_t p_state[sizeof(s->block_state)];
2044
    uint8_t i_state[sizeof(s->block_state)];
2045
    RangeCoder pc, ic;
2046
    uint8_t *pbbak= s->c.bytestream;
2047
    uint8_t *pbbak_start= s->c.bytestream_start;
2048
    int score, score2, iscore, i_len, p_len, block_s, sum;
2049
    const int w= s->b_width  << s->block_max_depth;
2050
    const int h= s->b_height << s->block_max_depth;
2051
    const int rem_depth= s->block_max_depth - level;
2052
    const int index= (x + y*w) << rem_depth;
2053
    const int block_w= 1<<(LOG2_MB_SIZE - level);
2054
    static BlockNode null_block= { //FIXME add border maybe
2055
        .color= {128,128,128},
2056
        .mx= 0,
2057
        .my= 0,
2058
        .type= 0,
2059
        .level= 0,
2060
    };
2061
    int trx= (x+1)<<rem_depth;
2062
    int try= (y+1)<<rem_depth;
2063
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2064
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2065
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2066
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2067
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2068
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2069
    int pl = left->color[0];
2070
    int pcb= left->color[1];
2071
    int pcr= left->color[2];
2072
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2073
    int pmy= mid_pred(left->my, top->my, tr->my);
2074
    int mx=0, my=0;
2075
    int l,cr,cb, i;
2076
    const int stride= s->current_picture.linesize[0];
2077
    const int uvstride= s->current_picture.linesize[1];
2078
    const int instride= s->input_picture.linesize[0];
2079
    const int uvinstride= s->input_picture.linesize[1];
2080
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
2081
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2082
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2083
    uint8_t current_mb[3][stride*block_w];
2084
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2085
    int P[10][2];
2086
    int16_t last_mv[3][2];
2087
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2088
    const int shift= 1+qpel;
2089
    MotionEstContext *c= &s->m.me;
2090
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2091
    int my_context= av_log2(2*ABS(left->my - top->my));
2092
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2093

    
2094
    assert(sizeof(s->block_state) >= 256);
2095
    if(s->keyframe){
2096
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2097
        return 0;
2098
    }
2099

    
2100
    //FIXME optimize
2101
    for(i=0; i<block_w; i++)
2102
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
2103
    for(i=0; i<block_w>>1; i++)
2104
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2105
    for(i=0; i<block_w>>1; i++)
2106
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2107

    
2108
//    clip predictors / edge ?
2109

    
2110
    P_LEFT[0]= left->mx;
2111
    P_LEFT[1]= left->my;
2112
    P_TOP [0]= top->mx;
2113
    P_TOP [1]= top->my;
2114
    P_TOPRIGHT[0]= tr->mx;
2115
    P_TOPRIGHT[1]= tr->my;
2116
    
2117
    last_mv[0][0]= s->block[index].mx;
2118
    last_mv[0][1]= s->block[index].my;
2119
    last_mv[1][0]= right->mx;
2120
    last_mv[1][1]= right->my;
2121
    last_mv[2][0]= bottom->mx;
2122
    last_mv[2][1]= bottom->my;
2123
    
2124
    s->m.mb_stride=2;
2125
    s->m.mb_x= 
2126
    s->m.mb_y= 0;
2127
    s->m.me.skip= 0;
2128

    
2129
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2130
    
2131
    assert(s->m.me.  stride ==   stride);
2132
    assert(s->m.me.uvstride == uvstride);
2133
    
2134
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2135
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2136
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2137
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2138
    
2139
    c->xmin = - x*block_w - 16+2;
2140
    c->ymin = - y*block_w - 16+2;
2141
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2142
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2143

    
2144
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2145
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
2146
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2147
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2148
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2149
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2150
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2151

    
2152
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2153
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2154

    
2155
    if (!y) {
2156
        c->pred_x= P_LEFT[0];
2157
        c->pred_y= P_LEFT[1];
2158
    } else {
2159
        c->pred_x = P_MEDIAN[0];
2160
        c->pred_y = P_MEDIAN[1];
2161
    }
2162

    
2163
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
2164
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2165

    
2166
    assert(mx >= c->xmin);
2167
    assert(mx <= c->xmax);
2168
    assert(my >= c->ymin);
2169
    assert(my <= c->ymax);
2170
    
2171
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2172
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2173
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2174
                             
2175
  //  subpel search
2176
    pc= s->c;
2177
    pc.bytestream_start=
2178
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2179
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2180

    
2181
    if(level!=s->block_max_depth)
2182
        put_rac(&pc, &p_state[4 + s_context], 1);
2183
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2184
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2185
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2186
    p_len= pc.bytestream - pc.bytestream_start;
2187
    score += (s->lambda2*(p_len*8
2188
              + (pc.outstanding_count - s->c.outstanding_count)*8
2189
              + (-av_log2(pc.range)    + av_log2(s->c.range))
2190
             ))>>FF_LAMBDA_SHIFT;
2191

    
2192
    block_s= block_w*block_w;
2193
    sum = pix_sum(&current_mb[0][0], stride, block_w);
2194
    l= (sum + block_s/2)/block_s;
2195
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2196
    
2197
    block_s= block_w*block_w>>2;
2198
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2199
    cb= (sum + block_s/2)/block_s;
2200
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2201
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2202
    cr= (sum + block_s/2)/block_s;
2203
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2204

    
2205
    ic= s->c;
2206
    ic.bytestream_start=
2207
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2208
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2209
    if(level!=s->block_max_depth)
2210
        put_rac(&ic, &i_state[4 + s_context], 1);
2211
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2212
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2213
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2214
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2215
    i_len= ic.bytestream - ic.bytestream_start;
2216
    iscore += (s->lambda2*(i_len*8
2217
              + (ic.outstanding_count - s->c.outstanding_count)*8
2218
              + (-av_log2(ic.range)    + av_log2(s->c.range))
2219
             ))>>FF_LAMBDA_SHIFT;
2220

    
2221
//    assert(score==256*256*256*64-1);
2222
    assert(iscore < 255*255*256 + s->lambda2*10);
2223
    assert(iscore >= 0);
2224
    assert(l>=0 && l<=255);
2225
    assert(pl>=0 && pl<=255);
2226

    
2227
    if(level==0){
2228
        int varc= iscore >> 8;
2229
        int vard= score >> 8;
2230
        if (vard <= 64 || vard < varc)
2231
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2232
        else
2233
            c->scene_change_score+= s->m.qscale;
2234
    }
2235
        
2236
    if(level!=s->block_max_depth){
2237
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2238
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2239
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2240
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2241
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2242
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2243
    
2244
        if(score2 < score && score2 < iscore)
2245
            return score2;
2246
    }
2247
    
2248
    if(iscore < score){
2249
        memcpy(pbbak, i_buffer, i_len);
2250
        s->c= ic;
2251
        s->c.bytestream_start= pbbak_start;
2252
        s->c.bytestream= pbbak + i_len;
2253
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2254
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2255
        return iscore;
2256
    }else{
2257
        memcpy(pbbak, p_buffer, p_len);
2258
        s->c= pc;
2259
        s->c.bytestream_start= pbbak_start;
2260
        s->c.bytestream= pbbak + p_len;
2261
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2262
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2263
        return score;
2264
    }
2265
}
2266

    
2267
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2268
    const int w= s->b_width << s->block_max_depth;
2269
    const int rem_depth= s->block_max_depth - level;
2270
    const int index= (x + y*w) << rem_depth;
2271
    static BlockNode null_block= { //FIXME add border maybe
2272
        .color= {128,128,128},
2273
        .mx= 0,
2274
        .my= 0,
2275
        .type= 0,
2276
        .level= 0,
2277
    };
2278
    int trx= (x+1)<<rem_depth;
2279
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2280
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2281
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2282
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2283
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2284
    
2285
    if(s->keyframe){
2286
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2287
        return;
2288
    }
2289

    
2290
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2291
        int type;
2292
        int l = left->color[0];
2293
        int cb= left->color[1];
2294
        int cr= left->color[2];
2295
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2296
        int my= mid_pred(left->my, top->my, tr->my);
2297
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2298
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2299
        
2300
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2301

    
2302
        if(type){
2303
            l += get_symbol(&s->c, &s->block_state[32], 1);
2304
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2305
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2306
        }else{
2307
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2308
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2309
        }
2310
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2311
    }else{
2312
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2313
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2314
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2315
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2316
    }
2317
}
2318

    
2319
static void encode_blocks(SnowContext *s){
2320
    int x, y;
2321
    int w= s->b_width;
2322
    int h= s->b_height;
2323

    
2324
    for(y=0; y<h; y++){
2325
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2326
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2327
            return;
2328
        }
2329
        for(x=0; x<w; x++){
2330
            encode_q_branch(s, 0, x, y);
2331
        }
2332
    }
2333
}
2334

    
2335
static void decode_blocks(SnowContext *s){
2336
    int x, y;
2337
    int w= s->b_width;
2338
    int h= s->b_height;
2339

    
2340
    for(y=0; y<h; y++){
2341
        for(x=0; x<w; x++){
2342
            decode_q_branch(s, 0, x, y);
2343
        }
2344
    }
2345
}
2346

    
2347
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2348
    int x, y;
2349
START_TIMER
2350
    for(y=0; y < b_h+5; y++){
2351
        for(x=0; x < b_w; x++){
2352
            int a0= src[x    ];
2353
            int a1= src[x + 1];
2354
            int a2= src[x + 2];
2355
            int a3= src[x + 3];
2356
            int a4= src[x + 4];
2357
            int a5= src[x + 5];
2358
//            int am= 9*(a1+a2) - (a0+a3);
2359
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2360
//            int am= 18*(a2+a3) - 2*(a1+a4);
2361
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2362
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2363

    
2364
//            if(b_w==16) am= 8*(a1+a2);
2365

    
2366
            if(dx<8) am = (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2367
            else     am = (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2368
            
2369
            /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2370
            if(am&(~255)) am= ~(am>>31);
2371
            
2372
            tmp[x] = am;
2373

    
2374
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2375
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2376
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2377
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2378
        }
2379
        tmp += stride;
2380
        src += stride;
2381
    }
2382
    tmp -= (b_h+5)*stride;
2383
    
2384
    for(y=0; y < b_h; y++){
2385
        for(x=0; x < b_w; x++){
2386
            int a0= tmp[x + 0*stride];
2387
            int a1= tmp[x + 1*stride];
2388
            int a2= tmp[x + 2*stride];
2389
            int a3= tmp[x + 3*stride];
2390
            int a4= tmp[x + 4*stride];
2391
            int a5= tmp[x + 5*stride];
2392
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2393
//            int am= 18*(a2+a3) - 2*(a1+a4);
2394
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2395
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2396
            
2397
//            if(b_w==16) am= 8*(a1+a2);
2398

    
2399
            if(dy<8) am =  (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2400
            else     am = (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2401

    
2402
            if(am&(~255)) am= ~(am>>31);
2403
            
2404
            dst[x] = am;
2405
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2406
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2407
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2408
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2409
        }
2410
        dst += stride;
2411
        tmp += stride;
2412
    }
2413
STOP_TIMER("mc_block")
2414
}
2415

    
2416
#define mca(dx,dy,b_w)\
2417
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2418
    uint8_t tmp[stride*(b_w+5)];\
2419
    assert(h==b_w);\
2420
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2421
}
2422

    
2423
mca( 0, 0,16)
2424
mca( 8, 0,16)
2425
mca( 0, 8,16)
2426
mca( 8, 8,16)
2427
mca( 0, 0,8)
2428
mca( 8, 0,8)
2429
mca( 0, 8,8)
2430
mca( 8, 8,8)
2431

    
2432
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2433
    if(block->type){
2434
        int x, y;
2435
        const int color= block->color[plane_index];
2436
        for(y=0; y < b_h; y++){
2437
            for(x=0; x < b_w; x++){
2438
                dst[x + y*stride]= color;
2439
            }
2440
        }
2441
    }else{
2442
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2443
        int mx= block->mx*scale;
2444
        int my= block->my*scale;
2445
        const int dx= mx&15;
2446
        const int dy= my&15;
2447
        sx += (mx>>4) - 2;
2448
        sy += (my>>4) - 2;
2449
        src += sx + sy*stride;
2450
        if(   (unsigned)sx >= w - b_w - 4
2451
           || (unsigned)sy >= h - b_h - 4){
2452
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2453
            src= tmp + MB_SIZE;
2454
        }
2455
        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2456
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2457
        else
2458
            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2459
    }
2460
}
2461

    
2462
static always_inline int same_block(BlockNode *a, BlockNode *b){
2463
    return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2464
}
2465

    
2466
//FIXME name clenup (b_w, block_w, b_width stuff)
2467
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2468
    DWTELEM * dst = NULL;
2469
    const int b_width = s->b_width  << s->block_max_depth;
2470
    const int b_height= s->b_height << s->block_max_depth;
2471
    const int b_stride= b_width;
2472
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2473
    BlockNode *rt= lt+1;
2474
    BlockNode *lb= lt+b_stride;
2475
    BlockNode *rb= lb+1;
2476
    uint8_t *block[4]; 
2477
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2478
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2479
    uint8_t *ptmp;
2480
    int x,y;
2481

    
2482
    if(b_x<0){
2483
        lt= rt;
2484
        lb= rb;
2485
    }else if(b_x + 1 >= b_width){
2486
        rt= lt;
2487
        rb= lb;
2488
    }
2489
    if(b_y<0){
2490
        lt= lb;
2491
        rt= rb;
2492
    }else if(b_y + 1 >= b_height){
2493
        lb= lt;
2494
        rb= rt;
2495
    }
2496
        
2497
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2498
        obmc -= src_x;
2499
        b_w += src_x;
2500
        src_x=0;
2501
    }else if(src_x + b_w > w){
2502
        b_w = w - src_x;
2503
    }
2504
    if(src_y<0){
2505
        obmc -= src_y*obmc_stride;
2506
        b_h += src_y;
2507
        src_y=0;
2508
    }else if(src_y + b_h> h){
2509
        b_h = h - src_y;
2510
    }
2511
    
2512
    if(b_w<=0 || b_h<=0) return;
2513

    
2514
assert(src_stride > 2*MB_SIZE + 5);
2515
//    old_dst += src_x + src_y*dst_stride;
2516
    dst8+= src_x + src_y*src_stride;
2517
//    src += src_x + src_y*src_stride;
2518

    
2519
    ptmp= tmp + 3*tmp_step;
2520
    block[0]= ptmp;
2521
    ptmp+=tmp_step;
2522
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2523

    
2524
    if(same_block(lt, rt)){
2525
        block[1]= block[0];
2526
    }else{
2527
        block[1]= ptmp;
2528
        ptmp+=tmp_step;
2529
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2530
    }
2531
        
2532
    if(same_block(lt, lb)){
2533
        block[2]= block[0];
2534
    }else if(same_block(rt, lb)){
2535
        block[2]= block[1];
2536
    }else{
2537
        block[2]= ptmp;
2538
        ptmp+=tmp_step;
2539
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2540
    }
2541

    
2542
    if(same_block(lt, rb) ){
2543
        block[3]= block[0];
2544
    }else if(same_block(rt, rb)){
2545
        block[3]= block[1];
2546
    }else if(same_block(lb, rb)){
2547
        block[3]= block[2];
2548
    }else{
2549
        block[3]= ptmp;
2550
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2551
    }
2552
#if 0
2553
    for(y=0; y<b_h; y++){
2554
        for(x=0; x<b_w; x++){
2555
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2556
            if(add) dst[x + y*dst_stride] += v;
2557
            else    dst[x + y*dst_stride] -= v;
2558
        }
2559
    }
2560
    for(y=0; y<b_h; y++){
2561
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2562
        for(x=0; x<b_w; x++){
2563
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2564
            if(add) dst[x + y*dst_stride] += v;
2565
            else    dst[x + y*dst_stride] -= v;
2566
        }
2567
    }
2568
    for(y=0; y<b_h; y++){
2569
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2570
        for(x=0; x<b_w; x++){
2571
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2572
            if(add) dst[x + y*dst_stride] += v;
2573
            else    dst[x + y*dst_stride] -= v;
2574
        }
2575
    }
2576
    for(y=0; y<b_h; y++){
2577
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2578
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2579
        for(x=0; x<b_w; x++){
2580
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2581
            if(add) dst[x + y*dst_stride] += v;
2582
            else    dst[x + y*dst_stride] -= v;
2583
        }
2584
    }
2585
#else
2586
{
2587

    
2588
    START_TIMER
2589
    
2590
    for(y=0; y<b_h; y++){
2591
        //FIXME ugly missue of obmc_stride
2592
        uint8_t *obmc1= obmc + y*obmc_stride;
2593
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2594
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2595
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2596
        dst = slice_buffer_get_line(sb, src_y + y);
2597
        for(x=0; x<b_w; x++){
2598
            int v=   obmc1[x] * block[3][x + y*src_stride]
2599
                    +obmc2[x] * block[2][x + y*src_stride]
2600
                    +obmc3[x] * block[1][x + y*src_stride]
2601
                    +obmc4[x] * block[0][x + y*src_stride];
2602

    
2603
            v <<= 8 - LOG2_OBMC_MAX;
2604
            if(FRAC_BITS != 8){
2605
                v += 1<<(7 - FRAC_BITS);
2606
                v >>= 8 - FRAC_BITS;
2607
            }
2608
            if(add){
2609
//                v += old_dst[x + y*dst_stride];
2610
                v += dst[x + src_x];
2611
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2612
                if(v&(~255)) v= ~(v>>31);
2613
                dst8[x + y*src_stride] = v;
2614
            }else{
2615
//                old_dst[x + y*dst_stride] -= v;
2616
                dst[x + src_x] -= v;
2617
            }
2618
        }
2619
    }
2620
        STOP_TIMER("Inner add y block")
2621
}
2622
#endif
2623
}
2624

    
2625
//FIXME name clenup (b_w, block_w, b_width stuff)
2626
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2627
    const int b_width = s->b_width  << s->block_max_depth;
2628
    const int b_height= s->b_height << s->block_max_depth;
2629
    const int b_stride= b_width;
2630
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2631
    BlockNode *rt= lt+1;
2632
    BlockNode *lb= lt+b_stride;
2633
    BlockNode *rb= lb+1;
2634
    uint8_t *block[4]; 
2635
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2636
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2637
    uint8_t *ptmp;
2638
    int x,y;
2639

    
2640
    if(b_x<0){
2641
        lt= rt;
2642
        lb= rb;
2643
    }else if(b_x + 1 >= b_width){
2644
        rt= lt;
2645
        rb= lb;
2646
    }
2647
    if(b_y<0){
2648
        lt= lb;
2649
        rt= rb;
2650
    }else if(b_y + 1 >= b_height){
2651
        lb= lt;
2652
        rb= rt;
2653
    }
2654
        
2655
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2656
        obmc -= src_x;
2657
        b_w += src_x;
2658
        src_x=0;
2659
    }else if(src_x + b_w > w){
2660
        b_w = w - src_x;
2661
    }
2662
    if(src_y<0){
2663
        obmc -= src_y*obmc_stride;
2664
        b_h += src_y;
2665
        src_y=0;
2666
    }else if(src_y + b_h> h){
2667
        b_h = h - src_y;
2668
    }
2669
    
2670
    if(b_w<=0 || b_h<=0) return;
2671

    
2672
assert(src_stride > 2*MB_SIZE + 5);
2673
    dst += src_x + src_y*dst_stride;
2674
    dst8+= src_x + src_y*src_stride;
2675
//    src += src_x + src_y*src_stride;
2676

    
2677
    ptmp= tmp + 3*tmp_step;
2678
    block[0]= ptmp;
2679
    ptmp+=tmp_step;
2680
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2681

    
2682
    if(same_block(lt, rt)){
2683
        block[1]= block[0];
2684
    }else{
2685
        block[1]= ptmp;
2686
        ptmp+=tmp_step;
2687
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2688
    }
2689
        
2690
    if(same_block(lt, lb)){
2691
        block[2]= block[0];
2692
    }else if(same_block(rt, lb)){
2693
        block[2]= block[1];
2694
    }else{
2695
        block[2]= ptmp;
2696
        ptmp+=tmp_step;
2697
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2698
    }
2699

    
2700
    if(same_block(lt, rb) ){
2701
        block[3]= block[0];
2702
    }else if(same_block(rt, rb)){
2703
        block[3]= block[1];
2704
    }else if(same_block(lb, rb)){
2705
        block[3]= block[2];
2706
    }else{
2707
        block[3]= ptmp;
2708
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2709
    }
2710
#if 0
2711
    for(y=0; y<b_h; y++){
2712
        for(x=0; x<b_w; x++){
2713
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2714
            if(add) dst[x + y*dst_stride] += v;
2715
            else    dst[x + y*dst_stride] -= v;
2716
        }
2717
    }
2718
    for(y=0; y<b_h; y++){
2719
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2720
        for(x=0; x<b_w; x++){
2721
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2722
            if(add) dst[x + y*dst_stride] += v;
2723
            else    dst[x + y*dst_stride] -= v;
2724
        }
2725
    }
2726
    for(y=0; y<b_h; y++){
2727
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2728
        for(x=0; x<b_w; x++){
2729
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2730
            if(add) dst[x + y*dst_stride] += v;
2731
            else    dst[x + y*dst_stride] -= v;
2732
        }
2733
    }
2734
    for(y=0; y<b_h; y++){
2735
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2736
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2737
        for(x=0; x<b_w; x++){
2738
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2739
            if(add) dst[x + y*dst_stride] += v;
2740
            else    dst[x + y*dst_stride] -= v;
2741
        }
2742
    }
2743
#else
2744
    for(y=0; y<b_h; y++){
2745
        //FIXME ugly missue of obmc_stride
2746
        uint8_t *obmc1= obmc + y*obmc_stride;
2747
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2748
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2749
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2750
        for(x=0; x<b_w; x++){
2751
            int v=   obmc1[x] * block[3][x + y*src_stride]
2752
                    +obmc2[x] * block[2][x + y*src_stride]
2753
                    +obmc3[x] * block[1][x + y*src_stride]
2754
                    +obmc4[x] * block[0][x + y*src_stride];
2755
            
2756
            v <<= 8 - LOG2_OBMC_MAX;
2757
            if(FRAC_BITS != 8){
2758
                v += 1<<(7 - FRAC_BITS);
2759
                v >>= 8 - FRAC_BITS;
2760
            }
2761
            if(add){
2762
                v += dst[x + y*dst_stride];
2763
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2764
                if(v&(~255)) v= ~(v>>31);
2765
                dst8[x + y*src_stride] = v;
2766
            }else{
2767
                dst[x + y*dst_stride] -= v;
2768
            }
2769
        }
2770
    }
2771
#endif
2772
}
2773

    
2774
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2775
    Plane *p= &s->plane[plane_index];
2776
    const int mb_w= s->b_width  << s->block_max_depth;
2777
    const int mb_h= s->b_height << s->block_max_depth;
2778
    int x, y, mb_x;
2779
    int block_size = MB_SIZE >> s->block_max_depth;
2780
    int block_w    = plane_index ? block_size/2 : block_size;
2781
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2782
    int obmc_stride= plane_index ? block_size : 2*block_size;
2783
    int ref_stride= s->current_picture.linesize[plane_index];
2784
    uint8_t *ref  = s->last_picture.data[plane_index];
2785
    uint8_t *dst8= s->current_picture.data[plane_index];
2786
    int w= p->width;
2787
    int h= p->height;
2788
    START_TIMER
2789
    
2790
    if(s->keyframe || (s->avctx->debug&512)){
2791
        if(mb_y==mb_h)
2792
            return;
2793

    
2794
        if(add){
2795
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2796
            {
2797
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2798
                DWTELEM * line = sb->line[y];
2799
                for(x=0; x<w; x++)
2800
                {
2801
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2802
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2803
                    v >>= FRAC_BITS;
2804
                    if(v&(~255)) v= ~(v>>31);
2805
                    dst8[x + y*ref_stride]= v;
2806
                }
2807
            }
2808
        }else{
2809
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2810
            {
2811
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2812
                DWTELEM * line = sb->line[y];
2813
                for(x=0; x<w; x++)
2814
                {
2815
                    line[x] -= 128 << FRAC_BITS;
2816
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2817
                }
2818
            }
2819
        }
2820

    
2821
        return;
2822
    }
2823
    
2824
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2825
            START_TIMER
2826

    
2827
            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, 
2828
                       block_w*mb_x - block_w/2,
2829
                       block_w*mb_y - block_w/2,
2830
                       block_w, block_w,
2831
                       w, h,
2832
                       w, ref_stride, obmc_stride,
2833
                       mb_x - 1, mb_y - 1,
2834
                       add, plane_index);
2835
            
2836
            STOP_TIMER("add_yblock")
2837
        }
2838
    
2839
    STOP_TIMER("predict_slice")
2840
}
2841

    
2842
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2843
    Plane *p= &s->plane[plane_index];
2844
    const int mb_w= s->b_width  << s->block_max_depth;
2845
    const int mb_h= s->b_height << s->block_max_depth;
2846
    int x, y, mb_x;
2847
    int block_size = MB_SIZE >> s->block_max_depth;
2848
    int block_w    = plane_index ? block_size/2 : block_size;
2849
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2850
    int obmc_stride= plane_index ? block_size : 2*block_size;
2851
    int ref_stride= s->current_picture.linesize[plane_index];
2852
    uint8_t *ref  = s->last_picture.data[plane_index];
2853
    uint8_t *dst8= s->current_picture.data[plane_index];
2854
    int w= p->width;
2855
    int h= p->height;
2856
    START_TIMER
2857
    
2858
    if(s->keyframe || (s->avctx->debug&512)){
2859
        if(mb_y==mb_h)
2860
            return;
2861

    
2862
        if(add){
2863
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2864
                for(x=0; x<w; x++){
2865
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2866
                    v >>= FRAC_BITS;
2867
                    if(v&(~255)) v= ~(v>>31);
2868
                    dst8[x + y*ref_stride]= v;
2869
                }
2870
            }
2871
        }else{
2872
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2873
                for(x=0; x<w; x++){
2874
                    buf[x + y*w]-= 128<<FRAC_BITS;
2875
                }
2876
            }
2877
        }
2878

    
2879
        return;
2880
    }
2881
    
2882
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2883
            START_TIMER
2884

    
2885
            add_yblock(s, buf, dst8, ref, obmc, 
2886
                       block_w*mb_x - block_w/2,
2887
                       block_w*mb_y - block_w/2,
2888
                       block_w, block_w,
2889
                       w, h,
2890
                       w, ref_stride, obmc_stride,
2891
                       mb_x - 1, mb_y - 1,
2892
                       add, plane_index);
2893
            
2894
            STOP_TIMER("add_yblock")
2895
        }
2896
    
2897
    STOP_TIMER("predict_slice")
2898
}
2899

    
2900
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2901
    const int mb_h= s->b_height << s->block_max_depth;
2902
    int mb_y;
2903
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2904
        predict_slice(s, buf, plane_index, add, mb_y);
2905
}
2906

    
2907
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2908
    const int level= b->level;
2909
    const int w= b->width;
2910
    const int h= b->height;
2911
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2912
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2913
    int x,y, thres1, thres2;
2914
//    START_TIMER
2915

    
2916
    if(s->qlog == LOSSLESS_QLOG) return;
2917
 
2918
    bias= bias ? 0 : (3*qmul)>>3;
2919
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2920
    thres2= 2*thres1;
2921
    
2922
    if(!bias){
2923
        for(y=0; y<h; y++){
2924
            for(x=0; x<w; x++){
2925
                int i= src[x + y*stride];
2926
                
2927
                if((unsigned)(i+thres1) > thres2){
2928
                    if(i>=0){
2929
                        i<<= QEXPSHIFT;
2930
                        i/= qmul; //FIXME optimize
2931
                        src[x + y*stride]=  i;
2932
                    }else{
2933
                        i= -i;
2934
                        i<<= QEXPSHIFT;
2935
                        i/= qmul; //FIXME optimize
2936
                        src[x + y*stride]= -i;
2937
                    }
2938
                }else
2939
                    src[x + y*stride]= 0;
2940
            }
2941
        }
2942
    }else{
2943
        for(y=0; y<h; y++){
2944
            for(x=0; x<w; x++){
2945
                int i= src[x + y*stride]; 
2946
                
2947
                if((unsigned)(i+thres1) > thres2){
2948
                    if(i>=0){
2949
                        i<<= QEXPSHIFT;
2950
                        i= (i + bias) / qmul; //FIXME optimize
2951
                        src[x + y*stride]=  i;
2952
                    }else{
2953
                        i= -i;
2954
                        i<<= QEXPSHIFT;
2955
                        i= (i + bias) / qmul; //FIXME optimize
2956
                        src[x + y*stride]= -i;
2957
                    }
2958
                }else
2959
                    src[x + y*stride]= 0;
2960
            }
2961
        }
2962
    }
2963
    if(level+1 == s->spatial_decomposition_count){
2964
//        STOP_TIMER("quantize")
2965
    }
2966
}
2967

    
2968
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
2969
    const int w= b->width;
2970
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2971
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2972
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2973
    int x,y;
2974
    START_TIMER
2975
    
2976
    if(s->qlog == LOSSLESS_QLOG) return;
2977
    
2978
    for(y=start_y; y<end_y; y++){
2979
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2980
        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2981
        for(x=0; x<w; x++){
2982
            int i= line[x];
2983
            if(i<0){
2984
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2985
            }else if(i>0){
2986
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2987
            }
2988
        }
2989
    }
2990
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2991
        STOP_TIMER("dquant")
2992
    }
2993
}
2994

    
2995
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2996
    const int w= b->width;
2997
    const int h= b->height;
2998
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2999
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3000
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3001
    int x,y;
3002
    START_TIMER
3003
    
3004
    if(s->qlog == LOSSLESS_QLOG) return;
3005
    
3006
    for(y=0; y<h; y++){
3007
        for(x=0; x<w; x++){
3008
            int i= src[x + y*stride];
3009
            if(i<0){
3010
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3011
            }else if(i>0){
3012
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3013
            }
3014
        }
3015
    }
3016
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3017
        STOP_TIMER("dquant")
3018
    }
3019
}
3020

    
3021
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3022
    const int w= b->width;
3023
    const int h= b->height;
3024
    int x,y;
3025
    
3026
    for(y=h-1; y>=0; y--){
3027
        for(x=w-1; x>=0; x--){
3028
            int i= x + y*stride;
3029
            
3030
            if(x){
3031
                if(use_median){
3032
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3033
                    else  src[i] -= src[i - 1];
3034
                }else{
3035
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3036
                    else  src[i] -= src[i - 1];
3037
                }
3038
            }else{
3039
                if(y) src[i] -= src[i - stride];
3040
            }
3041
        }
3042
    }
3043
}
3044

    
3045
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3046
    const int w= b->width;
3047
    int x,y;
3048
    
3049
//    START_TIMER
3050
    
3051
    DWTELEM * line;
3052
    DWTELEM * prev;
3053
    
3054
    if (start_y != 0)
3055
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3056
    
3057
    for(y=start_y; y<end_y; y++){
3058
        prev = line;
3059
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3060
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3061
        for(x=0; x<w; x++){
3062
            if(x){
3063
                if(use_median){
3064
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3065
                    else  line[x] += line[x - 1];
3066
                }else{
3067
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3068
                    else  line[x] += line[x - 1];
3069
                }
3070
            }else{
3071
                if(y) line[x] += prev[x];
3072
            }
3073
        }
3074
    }
3075
    
3076
//    STOP_TIMER("correlate")
3077
}
3078

    
3079
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3080
    const int w= b->width;
3081
    const int h= b->height;
3082
    int x,y;
3083
    
3084
    for(y=0; y<h; y++){
3085
        for(x=0; x<w; x++){
3086
            int i= x + y*stride;
3087
            
3088
            if(x){
3089
                if(use_median){
3090
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3091
                    else  src[i] += src[i - 1];
3092
                }else{
3093
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3094
                    else  src[i] += src[i - 1];
3095
                }
3096
            }else{
3097
                if(y) src[i] += src[i - stride];
3098
            }
3099
        }
3100
    }
3101
}
3102

    
3103
static void encode_header(SnowContext *s){
3104
    int plane_index, level, orientation;
3105
    uint8_t kstate[32]; 
3106
    
3107
    memset(kstate, MID_STATE, sizeof(kstate));   
3108

    
3109
    put_rac(&s->c, kstate, s->keyframe);
3110
    if(s->keyframe || s->always_reset)
3111
        reset_contexts(s);
3112
    if(s->keyframe){
3113
        put_symbol(&s->c, s->header_state, s->version, 0);
3114
        put_rac(&s->c, s->header_state, s->always_reset);
3115
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3116
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3117
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3118
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3119
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3120
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3121
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3122
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3123

    
3124
        for(plane_index=0; plane_index<2; plane_index++){
3125
            for(level=0; level<s->spatial_decomposition_count; level++){
3126
                for(orientation=level ? 1:0; orientation<4; orientation++){
3127
                    if(orientation==2) continue;
3128
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3129
                }
3130
            }
3131
        }
3132
    }
3133
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3134
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
3135
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
3136
    put_symbol(&s->c, s->header_state, s->qbias, 1);
3137
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3138
}
3139

    
3140
static int decode_header(SnowContext *s){
3141
    int plane_index, level, orientation;
3142
    uint8_t kstate[32];
3143

    
3144
    memset(kstate, MID_STATE, sizeof(kstate));   
3145

    
3146
    s->keyframe= get_rac(&s->c, kstate);
3147
    if(s->keyframe || s->always_reset)
3148
        reset_contexts(s);
3149
    if(s->keyframe){
3150
        s->version= get_symbol(&s->c, s->header_state, 0);
3151
        if(s->version>0){
3152
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3153
            return -1;
3154
        }
3155
        s->always_reset= get_rac(&s->c, s->header_state);
3156
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3157
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3158
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3159
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3160
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3161
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3162
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3163
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3164

    
3165
        for(plane_index=0; plane_index<3; plane_index++){
3166
            for(level=0; level<s->spatial_decomposition_count; level++){
3167
                for(orientation=level ? 1:0; orientation<4; orientation++){
3168
                    int q;
3169
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3170
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3171
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3172
                    s->plane[plane_index].band[level][orientation].qlog= q;
3173
                }
3174
            }
3175
        }
3176
    }
3177
    
3178
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3179
    if(s->spatial_decomposition_type > 2){
3180
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3181
        return -1;
3182
    }
3183
    
3184
    s->qlog= get_symbol(&s->c, s->header_state, 1);
3185
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3186
    s->qbias= get_symbol(&s->c, s->header_state, 1);
3187
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3188

    
3189
    return 0;
3190
}
3191

    
3192
static void init_qexp(){
3193
    int i;
3194
    double v=128;
3195

    
3196
    for(i=0; i<QROOT; i++){
3197
        qexp[i]= lrintf(v);
3198
        v *= pow(2, 1.0 / QROOT); 
3199
    }
3200
}
3201

    
3202
static int common_init(AVCodecContext *avctx){
3203
    SnowContext *s = avctx->priv_data;
3204
    int width, height;
3205
    int level, orientation, plane_index, dec;
3206

    
3207
    s->avctx= avctx;
3208
        
3209
    dsputil_init(&s->dsp, avctx);
3210

    
3211
#define mcf(dx,dy)\
3212
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3213
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3214
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3215
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3216
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3217
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3218

    
3219
    mcf( 0, 0)
3220
    mcf( 4, 0)
3221
    mcf( 8, 0)
3222
    mcf(12, 0)
3223
    mcf( 0, 4)
3224
    mcf( 4, 4)
3225
    mcf( 8, 4)
3226
    mcf(12, 4)
3227
    mcf( 0, 8)
3228
    mcf( 4, 8)
3229
    mcf( 8, 8)
3230
    mcf(12, 8)
3231
    mcf( 0,12)
3232
    mcf( 4,12)
3233
    mcf( 8,12)
3234
    mcf(12,12)
3235

    
3236
#define mcfh(dx,dy)\
3237
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3238
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3239
        mc_block_hpel ## dx ## dy ## 16;\
3240
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3241
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3242
        mc_block_hpel ## dx ## dy ## 8;
3243

    
3244
    mcfh(0, 0)
3245
    mcfh(8, 0)
3246
    mcfh(0, 8)
3247
    mcfh(8, 8)
3248

    
3249
    if(!qexp[0])
3250
        init_qexp();
3251

    
3252
    dec= s->spatial_decomposition_count= 5;
3253
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3254
    
3255
    s->chroma_h_shift= 1; //FIXME XXX
3256
    s->chroma_v_shift= 1;
3257
    
3258
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3259
    
3260
    width= s->avctx->width;
3261
    height= s->avctx->height;
3262

    
3263
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3264
    
3265
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3266
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3267
    
3268
    for(plane_index=0; plane_index<3; plane_index++){    
3269
        int w= s->avctx->width;
3270
        int h= s->avctx->height;
3271

    
3272
        if(plane_index){
3273
            w>>= s->chroma_h_shift;
3274
            h>>= s->chroma_v_shift;
3275
        }
3276
        s->plane[plane_index].width = w;
3277
        s->plane[plane_index].height= h;
3278
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3279
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3280
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3281
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3282
                
3283
                b->buf= s->spatial_dwt_buffer;
3284
                b->level= level;
3285
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3286
                b->width = (w + !(orientation&1))>>1;
3287
                b->height= (h + !(orientation>1))>>1;
3288
                
3289
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3290
                b->buf_x_offset = 0;
3291
                b->buf_y_offset = 0;
3292
                
3293
                if(orientation&1){
3294
                    b->buf += (w+1)>>1;
3295
                    b->buf_x_offset = (w+1)>>1;
3296
                }
3297
                if(orientation>1){
3298
                    b->buf += b->stride>>1;
3299
                    b->buf_y_offset = b->stride_line >> 1;
3300
                }
3301
                
3302
                if(level)
3303
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3304
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3305
            }
3306
            w= (w+1)>>1;
3307
            h= (h+1)>>1;
3308
        }
3309
    }
3310
    
3311
    reset_contexts(s);
3312
/*    
3313
    width= s->width= avctx->width;
3314
    height= s->height= avctx->height;
3315
    
3316
    assert(width && height);
3317
*/
3318
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3319
    
3320
    return 0;
3321
}
3322

    
3323

    
3324
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3325
    int width = p->width;
3326
    int height= p->height;
3327
    int level, orientation, x, y;
3328

    
3329
    for(level=0; level<s->spatial_decomposition_count; level++){
3330
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3331
            SubBand *b= &p->band[level][orientation];
3332
            DWTELEM *buf= b->buf;
3333
            int64_t error=0;
3334
            
3335
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3336
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
3337
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3338
            for(y=0; y<height; y++){
3339
                for(x=0; x<width; x++){
3340
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
3341
                    error += d*d;
3342
                }
3343
            }
3344

    
3345
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3346
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3347
        }
3348
    }
3349
}
3350

    
3351
static int encode_init(AVCodecContext *avctx)
3352
{
3353
    SnowContext *s = avctx->priv_data;
3354
    int plane_index;
3355

    
3356
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3357
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3358
               "use vstrict=-2 / -strict -2 to use it anyway\n");
3359
        return -1;
3360
    }
3361
 
3362
    common_init(avctx);
3363
    alloc_blocks(s);
3364
 
3365
    s->version=0;
3366
    
3367
    s->m.avctx   = avctx;
3368
    s->m.flags   = avctx->flags;
3369
    s->m.bit_rate= avctx->bit_rate;
3370

    
3371
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3372
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3373
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3374
    h263_encode_init(&s->m); //mv_penalty
3375

    
3376
    if(avctx->flags&CODEC_FLAG_PASS1){
3377
        if(!avctx->stats_out)
3378
            avctx->stats_out = av_mallocz(256);
3379
    }
3380
    if(avctx->flags&CODEC_FLAG_PASS2){
3381
        if(ff_rate_control_init(&s->m) < 0)
3382
            return -1;
3383
    }
3384

    
3385
    for(plane_index=0; plane_index<3; plane_index++){
3386
        calculate_vissual_weight(s, &s->plane[plane_index]);
3387
    }
3388
    
3389
    
3390
    avctx->coded_frame= &s->current_picture;
3391
    switch(avctx->pix_fmt){
3392
//    case PIX_FMT_YUV444P:
3393
//    case PIX_FMT_YUV422P:
3394
    case PIX_FMT_YUV420P:
3395
    case PIX_FMT_GRAY8:
3396
//    case PIX_FMT_YUV411P:
3397
//    case PIX_FMT_YUV410P:
3398
        s->colorspace_type= 0;
3399
        break;
3400
/*    case PIX_FMT_RGBA32:
3401
        s->colorspace= 1;
3402
        break;*/
3403
    default:
3404
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3405
        return -1;
3406
    }
3407
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3408
    s->chroma_h_shift= 1;
3409
    s->chroma_v_shift= 1;
3410
    return 0;
3411
}
3412

    
3413
static int frame_start(SnowContext *s){
3414
   AVFrame tmp;
3415
   int w= s->avctx->width; //FIXME round up to x16 ?
3416
   int h= s->avctx->height;
3417

    
3418
    if(s->current_picture.data[0]){
3419
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
3420
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3421
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3422
    }
3423

    
3424
    tmp= s->last_picture;
3425
    s->last_picture= s->current_picture;
3426
    s->current_picture= tmp;
3427
    
3428
    s->current_picture.reference= 1;
3429
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3430
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3431
        return -1;
3432
    }
3433
    
3434
    return 0;
3435
}
3436

    
3437
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3438
    SnowContext *s = avctx->priv_data;
3439
    RangeCoder * const c= &s->c;
3440
    AVFrame *pict = data;
3441
    const int width= s->avctx->width;
3442
    const int height= s->avctx->height;
3443
    int level, orientation, plane_index;
3444

    
3445
    ff_init_range_encoder(c, buf, buf_size);
3446
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3447
    
3448
    s->input_picture = *pict;
3449

    
3450
    if(avctx->flags&CODEC_FLAG_PASS2){
3451
        s->m.pict_type =
3452
        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3453
        s->keyframe= pict->pict_type==FF_I_TYPE;
3454
        s->m.picture_number= avctx->frame_number;
3455
        pict->quality= ff_rate_estimate_qscale(&s->m);
3456
    }else{
3457
        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3458
        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3459
    }
3460
    
3461
    if(pict->quality){
3462
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3463
        //<64 >60
3464
        s->qlog += 61*QROOT/8;
3465
    }else{
3466
        s->qlog= LOSSLESS_QLOG;
3467
    }
3468

    
3469
    frame_start(s);
3470
    s->current_picture.key_frame= s->keyframe;
3471

    
3472
    s->m.current_picture_ptr= &s->m.current_picture;
3473
    if(pict->pict_type == P_TYPE){
3474
        int block_width = (width +15)>>4;
3475
        int block_height= (height+15)>>4;
3476
        int stride= s->current_picture.linesize[0];
3477
        
3478
        assert(s->current_picture.data[0]);
3479
        assert(s->last_picture.data[0]);
3480
     
3481
        s->m.avctx= s->avctx;
3482
        s->m.current_picture.data[0]= s->current_picture.data[0];
3483
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
3484
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3485
        s->m.   last_picture_ptr= &s->m.   last_picture;
3486
        s->m.linesize=
3487
        s->m.   last_picture.linesize[0]=
3488
        s->m.    new_picture.linesize[0]=
3489
        s->m.current_picture.linesize[0]= stride;
3490
        s->m.uvlinesize= s->current_picture.linesize[1];
3491
        s->m.width = width;
3492
        s->m.height= height;
3493
        s->m.mb_width = block_width;
3494
        s->m.mb_height= block_height;
3495
        s->m.mb_stride=   s->m.mb_width+1;
3496
        s->m.b8_stride= 2*s->m.mb_width+1;
3497
        s->m.f_code=1;
3498
        s->m.pict_type= pict->pict_type;
3499
        s->m.me_method= s->avctx->me_method;
3500
        s->m.me.scene_change_score=0;
3501
        s->m.flags= s->avctx->flags;
3502
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3503
        s->m.out_format= FMT_H263;
3504
        s->m.unrestricted_mv= 1;
3505

    
3506
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3507
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3508
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3509

    
3510
        s->m.dsp= s->dsp; //move
3511
        ff_init_me(&s->m);
3512
    }
3513
    
3514
redo_frame:
3515
        
3516
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3517

    
3518
    encode_header(s);
3519
    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3520
    encode_blocks(s);
3521
    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3522
      
3523
    for(plane_index=0; plane_index<3; plane_index++){
3524
        Plane *p= &s->plane[plane_index];
3525
        int w= p->width;
3526
        int h= p->height;
3527
        int x, y;
3528
//        int bits= put_bits_count(&s->c.pb);
3529

    
3530
        //FIXME optimize
3531
     if(pict->data[plane_index]) //FIXME gray hack
3532
        for(y=0; y<h; y++){
3533
            for(x=0; x<w; x++){
3534
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3535
            }
3536
        }
3537
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3538
        
3539
        if(   plane_index==0 
3540
           && pict->pict_type == P_TYPE 
3541
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3542
            ff_init_range_encoder(c, buf, buf_size);
3543
            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3544
            pict->pict_type= FF_I_TYPE;
3545
            s->keyframe=1;
3546
            reset_contexts(s);
3547
            goto redo_frame;
3548
        }
3549
        
3550
        if(s->qlog == LOSSLESS_QLOG){
3551
            for(y=0; y<h; y++){
3552
                for(x=0; x<w; x++){
3553
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3554
                }
3555
            }
3556
        }
3557
 
3558
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3559

    
3560
        for(level=0; level<s->spatial_decomposition_count; level++){
3561
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3562
                SubBand *b= &p->band[level][orientation];
3563
                
3564
                quantize(s, b, b->buf, b->stride, s->qbias);
3565
                if(orientation==0)
3566
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3567
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3568
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
3569
                if(orientation==0)
3570
                    correlate(s, b, b->buf, b->stride, 1, 0);
3571
            }
3572
        }
3573
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3574

    
3575
        for(level=0; level<s->spatial_decomposition_count; level++){
3576
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3577
                SubBand *b= &p->band[level][orientation];
3578

    
3579
                dequantize(s, b, b->buf, b->stride);
3580
            }
3581
        }
3582

    
3583
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3584
        if(s->qlog == LOSSLESS_QLOG){
3585
            for(y=0; y<h; y++){
3586
                for(x=0; x<w; x++){
3587
                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
3588
                }
3589
            }
3590
        }
3591
{START_TIMER
3592
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3593
STOP_TIMER("pred-conv")}
3594
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3595
            int64_t error= 0;
3596
            
3597
    if(pict->data[plane_index]) //FIXME gray hack
3598
            for(y=0; y<h; y++){
3599
                for(x=0; x<w; x++){
3600
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3601
                    error += d*d;
3602
                }
3603
            }
3604
            s->avctx->error[plane_index] += error;
3605
            s->current_picture.error[plane_index] = error;
3606
        }
3607
    }
3608

    
3609
    if(s->last_picture.data[0])
3610
        avctx->release_buffer(avctx, &s->last_picture);
3611

    
3612
    s->current_picture.coded_picture_number = avctx->frame_number;
3613
    s->current_picture.pict_type = pict->pict_type;
3614
    s->current_picture.quality = pict->quality;
3615
    if(avctx->flags&CODEC_FLAG_PASS1){
3616
        s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
3617
        s->m.current_picture.display_picture_number =
3618
        s->m.current_picture.coded_picture_number = avctx->frame_number;
3619
        s->m.pict_type = pict->pict_type;
3620
        s->m.current_picture.quality = pict->quality;
3621
        ff_write_pass1_stats(&s->m);
3622
    }
3623
    if(avctx->flags&CODEC_FLAG_PASS2){
3624
        s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3625
    }
3626

    
3627
    emms_c();
3628
    
3629
    return ff_rac_terminate(c);
3630
}
3631

    
3632
static void common_end(SnowContext *s){
3633
    int plane_index, level, orientation;
3634

    
3635
    av_freep(&s->spatial_dwt_buffer);
3636

    
3637
    av_freep(&s->m.me.scratchpad);    
3638
    av_freep(&s->m.me.map);
3639
    av_freep(&s->m.me.score_map);
3640
 
3641
    av_freep(&s->block);
3642

    
3643
    for(plane_index=0; plane_index<3; plane_index++){    
3644
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3645
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3646
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3647
                
3648
                av_freep(&b->x_coeff);
3649
            }
3650
        }
3651
    }
3652
}
3653

    
3654
static int encode_end(AVCodecContext *avctx)
3655
{
3656
    SnowContext *s = avctx->priv_data;
3657

    
3658
    common_end(s);
3659
    av_free(avctx->stats_out);
3660

    
3661
    return 0;
3662
}
3663

    
3664
static int decode_init(AVCodecContext *avctx)
3665
{
3666
    SnowContext *s = avctx->priv_data;
3667
    int block_size;
3668
    
3669
    avctx->pix_fmt= PIX_FMT_YUV420P;
3670

    
3671
    common_init(avctx);
3672
    
3673
    block_size = MB_SIZE >> s->block_max_depth;
3674
    slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 2)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
3675
    
3676
    return 0;
3677
}
3678

    
3679
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3680
    SnowContext *s = avctx->priv_data;
3681
    RangeCoder * const c= &s->c;
3682
    int bytes_read;
3683
    AVFrame *picture = data;
3684
    int level, orientation, plane_index;
3685

    
3686
    ff_init_range_decoder(c, buf, buf_size);
3687
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3688

    
3689
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3690
    decode_header(s);
3691
    if(!s->block) alloc_blocks(s);
3692

    
3693
    frame_start(s);
3694
    //keyframe flag dupliaction mess FIXME
3695
    if(avctx->debug&FF_DEBUG_PICT_INFO)
3696
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3697
    
3698
    decode_blocks(s);
3699

    
3700
    for(plane_index=0; plane_index<3; plane_index++){
3701
        Plane *p= &s->plane[plane_index];
3702
        int w= p->width;
3703
        int h= p->height;
3704
        int x, y;
3705
        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
3706
        
3707
if(s->avctx->debug&2048){
3708
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3709
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3710

    
3711
        for(y=0; y<h; y++){
3712
            for(x=0; x<w; x++){
3713
                int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
3714
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3715
            }
3716
        }
3717
}
3718

    
3719
{   START_TIMER
3720
    for(level=0; level<s->spatial_decomposition_count; level++){
3721
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3722
            SubBand *b= &p->band[level][orientation];
3723
            unpack_coeffs(s, b, b->parent, orientation);
3724
        }
3725
    }
3726
    STOP_TIMER("unpack coeffs");
3727
}
3728

    
3729
{START_TIMER
3730
    const int mb_h= s->b_height << s->block_max_depth;
3731
    const int block_size = MB_SIZE >> s->block_max_depth;
3732
    const int block_w    = plane_index ? block_size/2 : block_size;
3733
    int mb_y;
3734
    dwt_compose_t cs[MAX_DECOMPOSITIONS];
3735
    int yd=0, yq=0;
3736
    int y;
3737
    int end_y;
3738

    
3739
    ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
3740
    for(mb_y=0; mb_y<=mb_h; mb_y++){
3741
        
3742
        int slice_starty = block_w*mb_y;
3743
        int slice_h = block_w*(mb_y+1);
3744
        if (!(s->keyframe || s->avctx->debug&512)){
3745
            slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
3746
            slice_h -= (block_w >> 1);
3747
        }
3748

    
3749
        {        
3750
        START_TIMER
3751
        for(level=0; level<s->spatial_decomposition_count; level++){
3752
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3753
                SubBand *b= &p->band[level][orientation];
3754
                int start_y;
3755
                int end_y;
3756
                int our_mb_start = mb_y;
3757
                int our_mb_end = (mb_y + 1);
3758
                start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2: 0);
3759
                end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2);
3760
                if (!(s->keyframe || s->avctx->debug&512)){
3761
                    start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
3762
                    end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
3763
                }
3764
                start_y = FFMIN(b->height, start_y);
3765
                end_y = FFMIN(b->height, end_y);
3766
                
3767
                if (start_y != end_y){
3768
                    if (orientation == 0){
3769
                        SubBand * correlate_band = &p->band[0][0];
3770
                        int correlate_end_y = FFMIN(b->height, end_y + 1);
3771
                        int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
3772
                        decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
3773
                        correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
3774
                        dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
3775
                    }
3776
                    else
3777
                        decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
3778
                }
3779
            }
3780
        }
3781
        STOP_TIMER("decode_subband_slice");
3782
        }
3783
        
3784
{   START_TIMER
3785
        for(; yd<slice_h; yd+=4){
3786
            ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
3787
        }
3788
    STOP_TIMER("idwt slice");}
3789

    
3790
        
3791
        if(s->qlog == LOSSLESS_QLOG){
3792
            for(; yq<slice_h && yq<h; yq++){
3793
                DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
3794
                for(x=0; x<w; x++){
3795
                    line[x] <<= FRAC_BITS;
3796
                }
3797
            }
3798
        }
3799

    
3800
        predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
3801
        
3802
        y = FFMIN(p->height, slice_starty);
3803
        end_y = FFMIN(p->height, slice_h);
3804
        while(y < end_y)
3805
            slice_buffer_release(&s->sb, y++);
3806
    }
3807
    
3808
    slice_buffer_flush(&s->sb);
3809
    
3810
STOP_TIMER("idwt + predict_slices")}
3811
    }
3812
            
3813
    emms_c();
3814

    
3815
    if(s->last_picture.data[0])
3816
        avctx->release_buffer(avctx, &s->last_picture);
3817

    
3818
if(!(s->avctx->debug&2048))        
3819
    *picture= s->current_picture;
3820
else
3821
    *picture= s->mconly_picture;
3822
    
3823
    *data_size = sizeof(AVFrame);
3824
    
3825
    bytes_read= c->bytestream - c->bytestream_start;
3826
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
3827

    
3828
    return bytes_read;
3829
}
3830

    
3831
static int decode_end(AVCodecContext *avctx)
3832
{
3833
    SnowContext *s = avctx->priv_data;
3834

    
3835
    slice_buffer_destroy(&s->sb);
3836
    
3837
    common_end(s);
3838

    
3839
    return 0;
3840
}
3841

    
3842
AVCodec snow_decoder = {
3843
    "snow",
3844
    CODEC_TYPE_VIDEO,
3845
    CODEC_ID_SNOW,
3846
    sizeof(SnowContext),
3847
    decode_init,
3848
    NULL,
3849
    decode_end,
3850
    decode_frame,
3851
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3852
    NULL
3853
};
3854

    
3855
#ifdef CONFIG_ENCODERS
3856
AVCodec snow_encoder = {
3857
    "snow",
3858
    CODEC_TYPE_VIDEO,
3859
    CODEC_ID_SNOW,
3860
    sizeof(SnowContext),
3861
    encode_init,
3862
    encode_frame,
3863
    encode_end,
3864
};
3865
#endif
3866

    
3867

    
3868
#if 0
3869
#undef malloc
3870
#undef free
3871
#undef printf
3872

3873
int main(){
3874
    int width=256;
3875
    int height=256;
3876
    int buffer[2][width*height];
3877
    SnowContext s;
3878
    int i;
3879
    s.spatial_decomposition_count=6;
3880
    s.spatial_decomposition_type=1;
3881
    
3882
    printf("testing 5/3 DWT\n");
3883
    for(i=0; i<width*height; i++)
3884
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3885
    
3886
    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3887
    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3888
    
3889
    for(i=0; i<width*height; i++)
3890
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3891

3892
    printf("testing 9/7 DWT\n");
3893
    s.spatial_decomposition_type=0;
3894
    for(i=0; i<width*height; i++)
3895
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3896
    
3897
    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3898
    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3899
    
3900
    for(i=0; i<width*height; i++)
3901
        if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3902
        
3903
#if 0
3904
    printf("testing AC coder\n");
3905
    memset(s.header_state, 0, sizeof(s.header_state));
3906
    ff_init_range_encoder(&s.c, buffer[0], 256*256);
3907
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3908
        
3909
    for(i=-256; i<256; i++){
3910
START_TIMER
3911
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3912
STOP_TIMER("put_symbol")
3913
    }
3914
    ff_rac_terminate(&s.c);
3915

3916
    memset(s.header_state, 0, sizeof(s.header_state));
3917
    ff_init_range_decoder(&s.c, buffer[0], 256*256);
3918
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3919
    
3920
    for(i=-256; i<256; i++){
3921
        int j;
3922
START_TIMER
3923
        j= get_symbol(&s.c, s.header_state, 1);
3924
STOP_TIMER("get_symbol")
3925
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3926
    }
3927
#endif
3928
{
3929
int level, orientation, x, y;
3930
int64_t errors[8][4];
3931
int64_t g=0;
3932

    
3933
    memset(errors, 0, sizeof(errors));
3934
    s.spatial_decomposition_count=3;
3935
    s.spatial_decomposition_type=0;
3936
    for(level=0; level<s.spatial_decomposition_count; level++){
3937
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3938
            int w= width  >> (s.spatial_decomposition_count-level);
3939
            int h= height >> (s.spatial_decomposition_count-level);
3940
            int stride= width  << (s.spatial_decomposition_count-level);
3941
            DWTELEM *buf= buffer[0];
3942
            int64_t error=0;
3943

    
3944
            if(orientation&1) buf+=w;
3945
            if(orientation>1) buf+=stride>>1;
3946
            
3947
            memset(buffer[0], 0, sizeof(int)*width*height);
3948
            buf[w/2 + h/2*stride]= 256*256;
3949
            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3950
            for(y=0; y<height; y++){
3951
                for(x=0; x<width; x++){
3952
                    int64_t d= buffer[0][x + y*width];
3953
                    error += d*d;
3954
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3955
                }
3956
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3957
            }
3958
            error= (int)(sqrt(error)+0.5);
3959
            errors[level][orientation]= error;
3960
            if(g) g=ff_gcd(g, error);
3961
            else g= error;
3962
        }
3963
    }
3964
    printf("static int const visual_weight[][4]={\n");
3965
    for(level=0; level<s.spatial_decomposition_count; level++){
3966
        printf("  {");
3967
        for(orientation=0; orientation<4; orientation++){
3968
            printf("%8lld,", errors[level][orientation]/g);
3969
        }
3970
        printf("},\n");
3971
    }
3972
    printf("};\n");
3973
    {
3974
            int level=2;
3975
            int orientation=3;
3976
            int w= width  >> (s.spatial_decomposition_count-level);
3977
            int h= height >> (s.spatial_decomposition_count-level);
3978
            int stride= width  << (s.spatial_decomposition_count-level);
3979
            DWTELEM *buf= buffer[0];
3980
            int64_t error=0;
3981

    
3982
            buf+=w;
3983
            buf+=stride>>1;
3984
            
3985
            memset(buffer[0], 0, sizeof(int)*width*height);
3986
#if 1
3987
            for(y=0; y<height; y++){
3988
                for(x=0; x<width; x++){
3989
                    int tab[4]={0,2,3,1};
3990
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3991
                }
3992
            }
3993
            ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3994
#else
3995
            for(y=0; y<h; y++){
3996
                for(x=0; x<w; x++){
3997
                    buf[x + y*stride  ]=169;
3998
                    buf[x + y*stride-w]=64;
3999
                }
4000
            }
4001
            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4002
#endif
4003
            for(y=0; y<height; y++){
4004
                for(x=0; x<width; x++){
4005
                    int64_t d= buffer[0][x + y*width];
4006
                    error += d*d;
4007
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4008
                }
4009
                if(ABS(height/2-y)<9) printf("\n");
4010
            }
4011
    }
4012

    
4013
}
4014
    return 0;
4015
}
4016
#endif
4017