Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ cc884a35

History | View | Annotate | Download (135 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22

    
23
#include "rangecoder.h"
24
#define MID_STATE 128
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QSHIFT 5
35
#define QROOT (1<<QSHIFT)
36
#define LOSSLESS_QLOG -128
37
#define FRAC_BITS 8
38

    
39
static const int8_t quant3[256]={
40
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56
};
57
static const int8_t quant3b[256]={
58
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74
};
75
static const int8_t quant3bA[256]={
76
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92
};
93
static const int8_t quant5[256]={
94
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110
};
111
static const int8_t quant7[256]={
112
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128
};
129
static const int8_t quant9[256]={
130
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146
};
147
static const int8_t quant11[256]={
148
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164
};
165
static const int8_t quant13[256]={
166
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182
};
183

    
184
#define LOG2_OBMC_MAX 6
185
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
186
#if 0 //64*cubic
187
static const uint8_t obmc32[1024]={
188
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220
//error:0.000022
221
};
222
static const uint8_t obmc16[256]={
223
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239
//error:0.000033
240
};
241
#elif 1 // 64*linear
242
static const uint8_t obmc32[1024]={
243
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275
 //error:0.000020
276
};
277
static const uint8_t obmc16[256]={
278
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294
//error:0.000015
295
};
296
#else //64*cos
297
static const uint8_t obmc32[1024]={
298
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330
//error:0.000022
331
};
332
static const uint8_t obmc16[256]={
333
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349
//error:0.000022
350
};
351
#endif
352

    
353
//linear *64
354
static const uint8_t obmc8[64]={
355
 1, 3, 5, 7, 7, 5, 3, 1,
356
 3, 9,15,21,21,15, 9, 3,
357
 5,15,25,35,35,25,15, 5,
358
 7,21,35,49,49,35,21, 7,
359
 7,21,35,49,49,35,21, 7,
360
 5,15,25,35,35,25,15, 5,
361
 3, 9,15,21,21,15, 9, 3,
362
 1, 3, 5, 7, 7, 5, 3, 1,
363
//error:0.000000
364
};
365

    
366
//linear *64
367
static const uint8_t obmc4[16]={
368
 4,12,12, 4,
369
12,36,36,12,
370
12,36,36,12,
371
 4,12,12, 4,
372
//error:0.000000
373
};
374

    
375
static const uint8_t *obmc_tab[4]={
376
    obmc32, obmc16, obmc8, obmc4
377
};
378

    
379
typedef struct BlockNode{
380
    int16_t mx;
381
    int16_t my;
382
    uint8_t color[3];
383
    uint8_t type;
384
//#define TYPE_SPLIT    1
385
#define BLOCK_INTRA   1
386
//#define TYPE_NOCOLOR  4
387
    uint8_t level; //FIXME merge into type?
388
}BlockNode;
389

    
390
#define LOG2_MB_SIZE 4
391
#define MB_SIZE (1<<LOG2_MB_SIZE)
392

    
393
typedef struct x_and_coeff{
394
    int16_t x;
395
    uint16_t coeff;
396
} x_and_coeff;
397

    
398
typedef struct SubBand{
399
    int level;
400
    int stride;
401
    int width;
402
    int height;
403
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
404
    DWTELEM *buf;
405
    int buf_x_offset;
406
    int buf_y_offset;
407
    int stride_line; ///< Stride measured in lines, not pixels.
408
    x_and_coeff * x_coeff;
409
    struct SubBand *parent;
410
    uint8_t state[/*7*2*/ 7 + 512][32];
411
}SubBand;
412

    
413
typedef struct Plane{
414
    int width;
415
    int height;
416
    SubBand band[MAX_DECOMPOSITIONS][4];
417
}Plane;
418

    
419
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
420
typedef struct {
421
    DWTELEM * * line; ///< For use by idwt and predict_slices.
422
    DWTELEM * * data_stack; ///< Used for internal purposes.
423
    int data_stack_top;
424
    int line_count;
425
    int line_width;
426
    int data_count;
427
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
428
} slice_buffer;
429

    
430
typedef struct SnowContext{
431
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
432

    
433
    AVCodecContext *avctx;
434
    RangeCoder c;
435
    DSPContext dsp;
436
    AVFrame input_picture;
437
    AVFrame current_picture;
438
    AVFrame last_picture;
439
    AVFrame mconly_picture;
440
//     uint8_t q_context[16];
441
    uint8_t header_state[32];
442
    uint8_t block_state[128 + 32*128];
443
    int keyframe;
444
    int always_reset;
445
    int version;
446
    int spatial_decomposition_type;
447
    int temporal_decomposition_type;
448
    int spatial_decomposition_count;
449
    int temporal_decomposition_count;
450
    DWTELEM *spatial_dwt_buffer;
451
    int colorspace_type;
452
    int chroma_h_shift;
453
    int chroma_v_shift;
454
    int spatial_scalability;
455
    int qlog;
456
    int lambda;
457
    int lambda2;
458
    int mv_scale;
459
    int qbias;
460
#define QBIAS_SHIFT 3
461
    int b_width;
462
    int b_height;
463
    int block_max_depth;
464
    Plane plane[MAX_PLANES];
465
    BlockNode *block;
466
    slice_buffer sb;
467

    
468
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
469
}SnowContext;
470

    
471
typedef struct {
472
    DWTELEM *b0;
473
    DWTELEM *b1;
474
    DWTELEM *b2;
475
    DWTELEM *b3;
476
    int y;
477
} dwt_compose_t;
478

    
479
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
480
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
481

    
482
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
483
{
484
    int i;
485
  
486
    buf->base_buffer = base_buffer;
487
    buf->line_count = line_count;
488
    buf->line_width = line_width;
489
    buf->data_count = max_allocated_lines;
490
    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
491
    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
492
  
493
    for (i = 0; i < max_allocated_lines; i++)
494
    {
495
      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
496
    }
497
    
498
    buf->data_stack_top = max_allocated_lines - 1;
499
}
500

    
501
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
502
{
503
    int i;
504
    int offset;
505
    DWTELEM * buffer;
506
  
507
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);  
508
  
509
    assert(buf->data_stack_top >= 0);
510
//  assert(!buf->line[line]);
511
    if (buf->line[line])
512
        return buf->line[line];
513
    
514
    offset = buf->line_width * line;
515
    buffer = buf->data_stack[buf->data_stack_top];
516
    buf->data_stack_top--;
517
    buf->line[line] = buffer;
518
  
519
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
520
  
521
    return buffer;
522
}
523

    
524
static void slice_buffer_release(slice_buffer * buf, int line)
525
{
526
    int i;
527
    int offset;
528
    DWTELEM * buffer;
529

    
530
    assert(line >= 0 && line < buf->line_count);
531
    assert(buf->line[line]);
532

    
533
    offset = buf->line_width * line;
534
    buffer = buf->line[line];
535
    buf->data_stack_top++;
536
    buf->data_stack[buf->data_stack_top] = buffer;
537
    buf->line[line] = NULL;
538
  
539
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
540
}
541

    
542
static void slice_buffer_flush(slice_buffer * buf)
543
{
544
    int i;
545
    for (i = 0; i < buf->line_count; i++)
546
    {
547
        if (buf->line[i])
548
        {
549
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
550
            slice_buffer_release(buf, i);
551
        }
552
    }
553
}
554

    
555
static void slice_buffer_destroy(slice_buffer * buf)
556
{
557
    int i;
558
    slice_buffer_flush(buf);
559
  
560
    for (i = buf->data_count - 1; i >= 0; i--)
561
    {
562
        assert(buf->data_stack[i]);
563
        av_free(buf->data_stack[i]);
564
    }
565
    assert(buf->data_stack);
566
    av_free(buf->data_stack);
567
    assert(buf->line);
568
    av_free(buf->line);
569
}
570

    
571
#ifdef        __sgi
572
// Avoid a name clash on SGI IRIX
573
#undef        qexp
574
#endif
575
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
576
static uint8_t qexp[QROOT];
577

    
578
static inline int mirror(int v, int m){
579
    if     (v<0) return -v;
580
    else if(v>m) return 2*m-v;
581
    else         return v;
582
}
583

    
584
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
585
    int i;
586

    
587
    if(v){
588
        const int a= ABS(v);
589
        const int e= av_log2(a);
590
#if 1
591
        const int el= FFMIN(e, 10);   
592
        put_rac(c, state+0, 0);
593

    
594
        for(i=0; i<el; i++){
595
            put_rac(c, state+1+i, 1);  //1..10
596
        }
597
        for(; i<e; i++){
598
            put_rac(c, state+1+9, 1);  //1..10
599
        }
600
        put_rac(c, state+1+FFMIN(i,9), 0);
601

    
602
        for(i=e-1; i>=el; i--){
603
            put_rac(c, state+22+9, (a>>i)&1); //22..31
604
        }
605
        for(; i>=0; i--){
606
            put_rac(c, state+22+i, (a>>i)&1); //22..31
607
        }
608

    
609
        if(is_signed)
610
            put_rac(c, state+11 + el, v < 0); //11..21
611
#else
612
        
613
        put_rac(c, state+0, 0);
614
        if(e<=9){
615
            for(i=0; i<e; i++){
616
                put_rac(c, state+1+i, 1);  //1..10
617
            }
618
            put_rac(c, state+1+i, 0);
619

    
620
            for(i=e-1; i>=0; i--){
621
                put_rac(c, state+22+i, (a>>i)&1); //22..31
622
            }
623

    
624
            if(is_signed)
625
                put_rac(c, state+11 + e, v < 0); //11..21
626
        }else{
627
            for(i=0; i<e; i++){
628
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
629
            }
630
            put_rac(c, state+1+FFMIN(i,9), 0);
631

    
632
            for(i=e-1; i>=0; i--){
633
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
634
            }
635

    
636
            if(is_signed)
637
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
638
        }
639
#endif
640
    }else{
641
        put_rac(c, state+0, 1);
642
    }
643
}
644

    
645
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
646
    if(get_rac(c, state+0))
647
        return 0;
648
    else{
649
        int i, e, a;
650
        e= 0;
651
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
652
            e++;
653
        }
654

    
655
        a= 1;
656
        for(i=e-1; i>=0; i--){
657
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
658
        }
659

    
660
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
661
            return -a;
662
        else
663
            return a;
664
    }
665
}
666

    
667
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
668
    int i;
669
    int r= log2>=0 ? 1<<log2 : 1;
670

    
671
    assert(v>=0);
672
    assert(log2>=-4);
673

    
674
    while(v >= r){
675
        put_rac(c, state+4+log2, 1);
676
        v -= r;
677
        log2++;
678
        if(log2>0) r+=r;
679
    }
680
    put_rac(c, state+4+log2, 0);
681
    
682
    for(i=log2-1; i>=0; i--){
683
        put_rac(c, state+31-i, (v>>i)&1);
684
    }
685
}
686

    
687
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
688
    int i;
689
    int r= log2>=0 ? 1<<log2 : 1;
690
    int v=0;
691

    
692
    assert(log2>=-4);
693

    
694
    while(get_rac(c, state+4+log2)){
695
        v+= r;
696
        log2++;
697
        if(log2>0) r+=r;
698
    }
699
    
700
    for(i=log2-1; i>=0; i--){
701
        v+= get_rac(c, state+31-i)<<i;
702
    }
703

    
704
    return v;
705
}
706

    
707
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
708
    const int mirror_left= !highpass;
709
    const int mirror_right= (width&1) ^ highpass;
710
    const int w= (width>>1) - 1 + (highpass & width);
711
    int i;
712

    
713
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
714
    if(mirror_left){
715
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
716
        dst += dst_step;
717
        src += src_step;
718
    }
719
    
720
    for(i=0; i<w; i++){
721
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
722
    }
723
    
724
    if(mirror_right){
725
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
726
    }
727
}
728

    
729
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
730
    const int mirror_left= !highpass;
731
    const int mirror_right= (width&1) ^ highpass;
732
    const int w= (width>>1) - 1 + (highpass & width);
733
    int i;
734

    
735
    if(mirror_left){
736
        int r= 3*2*ref[0];
737
        r += r>>4;
738
        r += r>>8;
739
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
740
        dst += dst_step;
741
        src += src_step;
742
    }
743
    
744
    for(i=0; i<w; i++){
745
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
746
        r += r>>4;
747
        r += r>>8;
748
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
749
    }
750
    
751
    if(mirror_right){
752
        int r= 3*2*ref[w*ref_step];
753
        r += r>>4;
754
        r += r>>8;
755
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
756
    }
757
}
758

    
759
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
760
    const int mirror_left= !highpass;
761
    const int mirror_right= (width&1) ^ highpass;
762
    const int w= (width>>1) - 1 + (highpass & width);
763
    int i;
764

    
765
    assert(shift == 4);
766
#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
767
    if(mirror_left){
768
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
769
        dst += dst_step;
770
        src += src_step;
771
    }
772
    
773
    for(i=0; i<w; i++){
774
        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
775
    }
776
    
777
    if(mirror_right){
778
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
779
    }
780
}
781

    
782

    
783
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
784
    int x, i;
785
    
786
    for(x=start; x<width; x+=2){
787
        int64_t sum=0;
788

    
789
        for(i=0; i<n; i++){
790
            int x2= x + 2*i - n + 1;
791
            if     (x2<     0) x2= -x2;
792
            else if(x2>=width) x2= 2*width-x2-2;
793
            sum += coeffs[i]*(int64_t)dst[x2];
794
        }
795
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
796
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
797
    }
798
}
799

    
800
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
801
    int x, y, i;
802
    for(y=start; y<height; y+=2){
803
        for(x=0; x<width; x++){
804
            int64_t sum=0;
805
    
806
            for(i=0; i<n; i++){
807
                int y2= y + 2*i - n + 1;
808
                if     (y2<      0) y2= -y2;
809
                else if(y2>=height) y2= 2*height-y2-2;
810
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
811
            }
812
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
813
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
814
        }
815
    }
816
}
817

    
818
#define SCALEX 1
819
#define LX0 0
820
#define LX1 1
821

    
822
#if 0 // more accurate 9/7
823
#define N1 2
824
#define SHIFT1 14
825
#define COEFFS1 (int[]){-25987,-25987}
826
#define N2 2
827
#define SHIFT2 19
828
#define COEFFS2 (int[]){-27777,-27777}
829
#define N3 2
830
#define SHIFT3 15
831
#define COEFFS3 (int[]){28931,28931}
832
#define N4 2
833
#define SHIFT4 15
834
#define COEFFS4 (int[]){14533,14533}
835
#elif 1 // 13/7 CRF
836
#define N1 4
837
#define SHIFT1 4
838
#define COEFFS1 (int[]){1,-9,-9,1}
839
#define N2 4
840
#define SHIFT2 4
841
#define COEFFS2 (int[]){-1,5,5,-1}
842
#define N3 0
843
#define SHIFT3 1
844
#define COEFFS3 NULL
845
#define N4 0
846
#define SHIFT4 1
847
#define COEFFS4 NULL
848
#elif 1 // 3/5
849
#define LX0 1
850
#define LX1 0
851
#define SCALEX 0.5
852
#define N1 2
853
#define SHIFT1 1
854
#define COEFFS1 (int[]){1,1}
855
#define N2 2
856
#define SHIFT2 2
857
#define COEFFS2 (int[]){-1,-1}
858
#define N3 0
859
#define SHIFT3 0
860
#define COEFFS3 NULL
861
#define N4 0
862
#define SHIFT4 0
863
#define COEFFS4 NULL
864
#elif 1 // 11/5 
865
#define N1 0
866
#define SHIFT1 1
867
#define COEFFS1 NULL
868
#define N2 2
869
#define SHIFT2 2
870
#define COEFFS2 (int[]){-1,-1}
871
#define N3 2
872
#define SHIFT3 0
873
#define COEFFS3 (int[]){-1,-1}
874
#define N4 4
875
#define SHIFT4 7
876
#define COEFFS4 (int[]){-5,29,29,-5}
877
#define SCALEX 4
878
#elif 1 // 9/7 CDF
879
#define N1 2
880
#define SHIFT1 7
881
#define COEFFS1 (int[]){-203,-203}
882
#define N2 2
883
#define SHIFT2 12
884
#define COEFFS2 (int[]){-217,-217}
885
#define N3 2
886
#define SHIFT3 7
887
#define COEFFS3 (int[]){113,113}
888
#define N4 2
889
#define SHIFT4 9
890
#define COEFFS4 (int[]){227,227}
891
#define SCALEX 1
892
#elif 1 // 7/5 CDF
893
#define N1 0
894
#define SHIFT1 1
895
#define COEFFS1 NULL
896
#define N2 2
897
#define SHIFT2 2
898
#define COEFFS2 (int[]){-1,-1}
899
#define N3 2
900
#define SHIFT3 0
901
#define COEFFS3 (int[]){-1,-1}
902
#define N4 2
903
#define SHIFT4 4
904
#define COEFFS4 (int[]){3,3}
905
#elif 1 // 9/7 MN
906
#define N1 4
907
#define SHIFT1 4
908
#define COEFFS1 (int[]){1,-9,-9,1}
909
#define N2 2
910
#define SHIFT2 2
911
#define COEFFS2 (int[]){1,1}
912
#define N3 0
913
#define SHIFT3 1
914
#define COEFFS3 NULL
915
#define N4 0
916
#define SHIFT4 1
917
#define COEFFS4 NULL
918
#else // 13/7 CRF
919
#define N1 4
920
#define SHIFT1 4
921
#define COEFFS1 (int[]){1,-9,-9,1}
922
#define N2 4
923
#define SHIFT2 4
924
#define COEFFS2 (int[]){-1,5,5,-1}
925
#define N3 0
926
#define SHIFT3 1
927
#define COEFFS3 NULL
928
#define N4 0
929
#define SHIFT4 1
930
#define COEFFS4 NULL
931
#endif
932
static void horizontal_decomposeX(DWTELEM *b, int width){
933
    DWTELEM temp[width];
934
    const int width2= width>>1;
935
    const int w2= (width+1)>>1;
936
    int A1,A2,A3,A4, x;
937

    
938
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
939
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
940
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
941
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
942
    
943
    for(x=0; x<width2; x++){
944
        temp[x   ]= b[2*x    ];
945
        temp[x+w2]= b[2*x + 1];
946
    }
947
    if(width&1)
948
        temp[x   ]= b[2*x    ];
949
    memcpy(b, temp, width*sizeof(int));
950
}
951

    
952
static void horizontal_composeX(DWTELEM *b, int width){
953
    DWTELEM temp[width];
954
    const int width2= width>>1;
955
    int A1,A2,A3,A4, x;
956
    const int w2= (width+1)>>1;
957

    
958
    memcpy(temp, b, width*sizeof(int));
959
    for(x=0; x<width2; x++){
960
        b[2*x    ]= temp[x   ];
961
        b[2*x + 1]= temp[x+w2];
962
    }
963
    if(width&1)
964
        b[2*x    ]= temp[x   ];
965

    
966
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
967
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
968
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
969
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
970
}
971

    
972
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
973
    int x, y;
974
  
975
    for(y=0; y<height; y++){
976
        for(x=0; x<width; x++){
977
            buffer[y*stride + x] *= SCALEX;
978
        }
979
    }
980

    
981
    for(y=0; y<height; y++){
982
        horizontal_decomposeX(buffer + y*stride, width);
983
    }
984
    
985
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
986
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
987
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
988
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
989
}
990

    
991
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
992
    int x, y;
993
  
994
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
995
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
996
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
997
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
998

    
999
    for(y=0; y<height; y++){
1000
        horizontal_composeX(buffer + y*stride, width);
1001
    }
1002

    
1003
    for(y=0; y<height; y++){
1004
        for(x=0; x<width; x++){
1005
            buffer[y*stride + x] /= SCALEX;
1006
        }
1007
    }
1008
}
1009

    
1010
static void horizontal_decompose53i(DWTELEM *b, int width){
1011
    DWTELEM temp[width];
1012
    const int width2= width>>1;
1013
    int A1,A2,A3,A4, x;
1014
    const int w2= (width+1)>>1;
1015

    
1016
    for(x=0; x<width2; x++){
1017
        temp[x   ]= b[2*x    ];
1018
        temp[x+w2]= b[2*x + 1];
1019
    }
1020
    if(width&1)
1021
        temp[x   ]= b[2*x    ];
1022
#if 0
1023
    A2= temp[1       ];
1024
    A4= temp[0       ];
1025
    A1= temp[0+width2];
1026
    A1 -= (A2 + A4)>>1;
1027
    A4 += (A1 + 1)>>1;
1028
    b[0+width2] = A1;
1029
    b[0       ] = A4;
1030
    for(x=1; x+1<width2; x+=2){
1031
        A3= temp[x+width2];
1032
        A4= temp[x+1     ];
1033
        A3 -= (A2 + A4)>>1;
1034
        A2 += (A1 + A3 + 2)>>2;
1035
        b[x+width2] = A3;
1036
        b[x       ] = A2;
1037

1038
        A1= temp[x+1+width2];
1039
        A2= temp[x+2       ];
1040
        A1 -= (A2 + A4)>>1;
1041
        A4 += (A1 + A3 + 2)>>2;
1042
        b[x+1+width2] = A1;
1043
        b[x+1       ] = A4;
1044
    }
1045
    A3= temp[width-1];
1046
    A3 -= A2;
1047
    A2 += (A1 + A3 + 2)>>2;
1048
    b[width -1] = A3;
1049
    b[width2-1] = A2;
1050
#else        
1051
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1052
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
1053
#endif
1054
}
1055

    
1056
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1057
    int i;
1058
    
1059
    for(i=0; i<width; i++){
1060
        b1[i] -= (b0[i] + b2[i])>>1;
1061
    }
1062
}
1063

    
1064
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1065
    int i;
1066
    
1067
    for(i=0; i<width; i++){
1068
        b1[i] += (b0[i] + b2[i] + 2)>>2;
1069
    }
1070
}
1071

    
1072
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1073
    int y;
1074
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1075
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
1076
  
1077
    for(y=-2; y<height; y+=2){
1078
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1079
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1080

    
1081
{START_TIMER
1082
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
1083
        if(y+2 < height) horizontal_decompose53i(b3, width);
1084
STOP_TIMER("horizontal_decompose53i")}
1085
        
1086
{START_TIMER
1087
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1088
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1089
STOP_TIMER("vertical_decompose53i*")}
1090
        
1091
        b0=b2;
1092
        b1=b3;
1093
    }
1094
}
1095

    
1096
#define liftS lift
1097
#define lift5 lift
1098
#if 1
1099
#define W_AM 3
1100
#define W_AO 0
1101
#define W_AS 1
1102

    
1103
#undef liftS
1104
#define W_BM 1
1105
#define W_BO 8
1106
#define W_BS 4
1107

    
1108
#define W_CM 1
1109
#define W_CO 0
1110
#define W_CS 0
1111

    
1112
#define W_DM 3
1113
#define W_DO 4
1114
#define W_DS 3
1115
#elif 0
1116
#define W_AM 55
1117
#define W_AO 16
1118
#define W_AS 5
1119

    
1120
#define W_BM 3
1121
#define W_BO 32
1122
#define W_BS 6
1123

    
1124
#define W_CM 127
1125
#define W_CO 64
1126
#define W_CS 7
1127

    
1128
#define W_DM 7
1129
#define W_DO 8
1130
#define W_DS 4
1131
#elif 0
1132
#define W_AM 97
1133
#define W_AO 32
1134
#define W_AS 6
1135

    
1136
#define W_BM 63
1137
#define W_BO 512
1138
#define W_BS 10
1139

    
1140
#define W_CM 13
1141
#define W_CO 8
1142
#define W_CS 4
1143

    
1144
#define W_DM 15
1145
#define W_DO 16
1146
#define W_DS 5
1147

    
1148
#else
1149

    
1150
#define W_AM 203
1151
#define W_AO 64
1152
#define W_AS 7
1153

    
1154
#define W_BM 217
1155
#define W_BO 2048
1156
#define W_BS 12
1157

    
1158
#define W_CM 113
1159
#define W_CO 64
1160
#define W_CS 7
1161

    
1162
#define W_DM 227
1163
#define W_DO 128
1164
#define W_DS 9
1165
#endif
1166
static void horizontal_decompose97i(DWTELEM *b, int width){
1167
    DWTELEM temp[width];
1168
    const int w2= (width+1)>>1;
1169

    
1170
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1171
    liftS(temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1172
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1173
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1174
}
1175

    
1176

    
1177
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1178
    int i;
1179
    
1180
    for(i=0; i<width; i++){
1181
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1182
    }
1183
}
1184

    
1185
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1186
    int i;
1187
    
1188
    for(i=0; i<width; i++){
1189
#ifdef lift5
1190
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1191
#else
1192
        int r= 3*(b0[i] + b2[i]);
1193
        r+= r>>4;
1194
        r+= r>>8;
1195
        b1[i] += (r+W_CO)>>W_CS;
1196
#endif
1197
    }
1198
}
1199

    
1200
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1201
    int i;
1202
    
1203
    for(i=0; i<width; i++){
1204
#ifdef liftS
1205
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1206
#else
1207
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1208
#endif
1209
    }
1210
}
1211

    
1212
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1213
    int i;
1214
    
1215
    for(i=0; i<width; i++){
1216
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1217
    }
1218
}
1219

    
1220
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1221
    int y;
1222
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1223
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1224
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1225
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1226
  
1227
    for(y=-4; y<height; y+=2){
1228
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1229
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1230

    
1231
{START_TIMER
1232
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1233
        if(y+4 < height) horizontal_decompose97i(b5, width);
1234
if(width>400){
1235
STOP_TIMER("horizontal_decompose97i")
1236
}}
1237
        
1238
{START_TIMER
1239
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1240
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1241
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1242
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1243

    
1244
if(width>400){
1245
STOP_TIMER("vertical_decompose97i")
1246
}}
1247
        
1248
        b0=b2;
1249
        b1=b3;
1250
        b2=b4;
1251
        b3=b5;
1252
    }
1253
}
1254

    
1255
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1256
    int level;
1257
    
1258
    for(level=0; level<decomposition_count; level++){
1259
        switch(type){
1260
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1261
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1262
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1263
        }
1264
    }
1265
}
1266

    
1267
static void horizontal_compose53i(DWTELEM *b, int width){
1268
    DWTELEM temp[width];
1269
    const int width2= width>>1;
1270
    const int w2= (width+1)>>1;
1271
    int A1,A2,A3,A4, x;
1272

    
1273
#if 0
1274
    A2= temp[1       ];
1275
    A4= temp[0       ];
1276
    A1= temp[0+width2];
1277
    A1 -= (A2 + A4)>>1;
1278
    A4 += (A1 + 1)>>1;
1279
    b[0+width2] = A1;
1280
    b[0       ] = A4;
1281
    for(x=1; x+1<width2; x+=2){
1282
        A3= temp[x+width2];
1283
        A4= temp[x+1     ];
1284
        A3 -= (A2 + A4)>>1;
1285
        A2 += (A1 + A3 + 2)>>2;
1286
        b[x+width2] = A3;
1287
        b[x       ] = A2;
1288

1289
        A1= temp[x+1+width2];
1290
        A2= temp[x+2       ];
1291
        A1 -= (A2 + A4)>>1;
1292
        A4 += (A1 + A3 + 2)>>2;
1293
        b[x+1+width2] = A1;
1294
        b[x+1       ] = A4;
1295
    }
1296
    A3= temp[width-1];
1297
    A3 -= A2;
1298
    A2 += (A1 + A3 + 2)>>2;
1299
    b[width -1] = A3;
1300
    b[width2-1] = A2;
1301
#else   
1302
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1303
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1304
#endif
1305
    for(x=0; x<width2; x++){
1306
        b[2*x    ]= temp[x   ];
1307
        b[2*x + 1]= temp[x+w2];
1308
    }
1309
    if(width&1)
1310
        b[2*x    ]= temp[x   ];
1311
}
1312

    
1313
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1314
    int i;
1315
    
1316
    for(i=0; i<width; i++){
1317
        b1[i] += (b0[i] + b2[i])>>1;
1318
    }
1319
}
1320

    
1321
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1322
    int i;
1323
    
1324
    for(i=0; i<width; i++){
1325
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1326
    }
1327
}
1328

    
1329
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1330
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1331
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1332
    cs->y = -1;
1333
}
1334

    
1335
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1336
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1337
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1338
    cs->y = -1;
1339
}
1340

    
1341
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1342
    int y= cs->y;
1343
    int mirror0 = mirror(y-1, height-1);
1344
    int mirror1 = mirror(y  , height-1);
1345
    int mirror2 = mirror(y+1, height-1);
1346
    int mirror3 = mirror(y+2, height-1);
1347
    
1348
    DWTELEM *b0= cs->b0;
1349
    DWTELEM *b1= cs->b1;
1350
    DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1351
    DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1352

    
1353
{START_TIMER
1354
        if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1355
        if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1356
STOP_TIMER("vertical_compose53i*")}
1357

    
1358
{START_TIMER
1359
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1360
        if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1361
STOP_TIMER("horizontal_compose53i")}
1362

    
1363
    cs->b0 = b2;
1364
    cs->b1 = b3;
1365
    cs->y += 2;
1366
}
1367

    
1368
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1369
    int y= cs->y;
1370
    DWTELEM *b0= cs->b0;
1371
    DWTELEM *b1= cs->b1;
1372
    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1373
    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1374

    
1375
{START_TIMER
1376
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1377
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1378
STOP_TIMER("vertical_compose53i*")}
1379

    
1380
{START_TIMER
1381
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1382
        if(b0 <= b2) horizontal_compose53i(b1, width);
1383
STOP_TIMER("horizontal_compose53i")}
1384

    
1385
    cs->b0 = b2;
1386
    cs->b1 = b3;
1387
    cs->y += 2;
1388
}
1389

    
1390
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1391
    dwt_compose_t cs;
1392
    spatial_compose53i_init(&cs, buffer, height, stride);
1393
    while(cs.y <= height)
1394
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1395
}   
1396

    
1397
 
1398
static void horizontal_compose97i(DWTELEM *b, int width){
1399
    DWTELEM temp[width];
1400
    const int w2= (width+1)>>1;
1401

    
1402
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1403
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1404
    liftS(b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1405
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1406
}
1407

    
1408
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1409
    int i;
1410
    
1411
    for(i=0; i<width; i++){
1412
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1413
    }
1414
}
1415

    
1416
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1417
    int i;
1418
    
1419
    for(i=0; i<width; i++){
1420
#ifdef lift5
1421
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1422
#else
1423
        int r= 3*(b0[i] + b2[i]);
1424
        r+= r>>4;
1425
        r+= r>>8;
1426
        b1[i] -= (r+W_CO)>>W_CS;
1427
#endif
1428
    }
1429
}
1430

    
1431
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1432
    int i;
1433
    
1434
    for(i=0; i<width; i++){
1435
#ifdef liftS
1436
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1437
#else
1438
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1439
#endif
1440
    }
1441
}
1442

    
1443
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1444
    int i;
1445
    
1446
    for(i=0; i<width; i++){
1447
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1448
    }
1449
}
1450

    
1451
static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1452
    int i;
1453
    
1454
    for(i=0; i<width; i++){
1455
        int r;
1456
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1457
#ifdef lift5
1458
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1459
#else
1460
        r= 3*(b2[i] + b4[i]);
1461
        r+= r>>4;
1462
        r+= r>>8;
1463
        b3[i] -= (r+W_CO)>>W_CS;
1464
#endif
1465
#ifdef liftS
1466
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1467
#else
1468
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1469
#endif
1470
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1471
    }
1472
}
1473

    
1474
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1475
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1476
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1477
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1478
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1479
    cs->y = -3;
1480
}
1481

    
1482
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1483
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1484
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1485
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1486
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1487
    cs->y = -3;
1488
}
1489

    
1490
static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1491
    int y = cs->y;
1492
    
1493
    int mirror0 = mirror(y - 1, height - 1);
1494
    int mirror1 = mirror(y + 0, height - 1);
1495
    int mirror2 = mirror(y + 1, height - 1);
1496
    int mirror3 = mirror(y + 2, height - 1);
1497
    int mirror4 = mirror(y + 3, height - 1);
1498
    int mirror5 = mirror(y + 4, height - 1);
1499
    DWTELEM *b0= cs->b0;
1500
    DWTELEM *b1= cs->b1;
1501
    DWTELEM *b2= cs->b2;
1502
    DWTELEM *b3= cs->b3;
1503
    DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1504
    DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1505
        
1506
{START_TIMER
1507
    if(y>0 && y+4<height){
1508
        vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1509
    }else{
1510
        if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1511
        if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1512
        if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1513
        if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1514
    }
1515
if(width>400){
1516
STOP_TIMER("vertical_compose97i")}}
1517

    
1518
{START_TIMER
1519
        if(y-1>=  0) horizontal_compose97i(b0, width);
1520
        if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1521
if(width>400 && mirror0 <= mirror2){
1522
STOP_TIMER("horizontal_compose97i")}}
1523

    
1524
    cs->b0=b2;
1525
    cs->b1=b3;
1526
    cs->b2=b4;
1527
    cs->b3=b5;
1528
    cs->y += 2;
1529
}
1530

    
1531
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1532
    int y = cs->y;
1533
    DWTELEM *b0= cs->b0;
1534
    DWTELEM *b1= cs->b1;
1535
    DWTELEM *b2= cs->b2;
1536
    DWTELEM *b3= cs->b3;
1537
    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1538
    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1539

    
1540
        if(stride == width && y+4 < height && 0){ 
1541
            int x;
1542
            for(x=0; x<width/2; x++)
1543
                b5[x] += 64*2;
1544
            for(; x<width; x++)
1545
                b5[x] += 169*2;
1546
        }
1547
        
1548
{START_TIMER
1549
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1550
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1551
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1552
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1553
if(width>400){
1554
STOP_TIMER("vertical_compose97i")}}
1555

    
1556
{START_TIMER
1557
        if(y-1>=  0) horizontal_compose97i(b0, width);
1558
        if(b0 <= b2) horizontal_compose97i(b1, width);
1559
if(width>400 && b0 <= b2){
1560
STOP_TIMER("horizontal_compose97i")}}
1561

    
1562
    cs->b0=b2;
1563
    cs->b1=b3;
1564
    cs->b2=b4;
1565
    cs->b3=b5;
1566
    cs->y += 2;
1567
}
1568

    
1569
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1570
    dwt_compose_t cs;
1571
    spatial_compose97i_init(&cs, buffer, height, stride);
1572
    while(cs.y <= height)
1573
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1574
}
1575

    
1576
void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1577
    int level;
1578
    for(level=decomposition_count-1; level>=0; level--){
1579
        switch(type){
1580
        case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1581
        case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1582
        /* not slicified yet */
1583
        case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1584
          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1585
        }
1586
    }
1587
}
1588

    
1589
void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1590
    int level;
1591
    for(level=decomposition_count-1; level>=0; level--){
1592
        switch(type){
1593
        case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1594
        case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1595
        /* not slicified yet */
1596
        case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1597
        }
1598
    }
1599
}
1600

    
1601
void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1602
    const int support = type==1 ? 3 : 5;
1603
    int level;
1604
    if(type==2) return;
1605

    
1606
    for(level=decomposition_count-1; level>=0; level--){
1607
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1608
            switch(type){
1609
            case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1610
                    break;
1611
            case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1612
                    break;
1613
            case 2: break;
1614
            }
1615
        }
1616
    }
1617
}
1618

    
1619
void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1620
    const int support = type==1 ? 3 : 5;
1621
    int level;
1622
    if(type==2) return;
1623

    
1624
    for(level=decomposition_count-1; level>=0; level--){
1625
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1626
            switch(type){
1627
            case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1628
                    break;
1629
            case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1630
                    break;
1631
            case 2: break;
1632
            }
1633
        }
1634
    }
1635
}
1636

    
1637
void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1638
    if(type==2){
1639
        int level;
1640
        for(level=decomposition_count-1; level>=0; level--)
1641
            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
1642
    }else{
1643
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1644
        int y;
1645
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1646
        for(y=0; y<height; y+=4)
1647
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1648
    }
1649
}
1650

    
1651
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1652
    const int w= b->width;
1653
    const int h= b->height;
1654
    int x, y;
1655

    
1656
    if(1){
1657
        int run=0;
1658
        int runs[w*h];
1659
        int run_index=0;
1660
        int max_index;
1661
                
1662
        for(y=0; y<h; y++){
1663
            for(x=0; x<w; x++){
1664
                int v, p=0;
1665
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1666
                v= src[x + y*stride];
1667

    
1668
                if(y){
1669
                    t= src[x + (y-1)*stride];
1670
                    if(x){
1671
                        lt= src[x - 1 + (y-1)*stride];
1672
                    }
1673
                    if(x + 1 < w){
1674
                        rt= src[x + 1 + (y-1)*stride];
1675
                    }
1676
                }
1677
                if(x){
1678
                    l= src[x - 1 + y*stride];
1679
                    /*if(x > 1){
1680
                        if(orientation==1) ll= src[y + (x-2)*stride];
1681
                        else               ll= src[x - 2 + y*stride];
1682
                    }*/
1683
                }
1684
                if(parent){
1685
                    int px= x>>1;
1686
                    int py= y>>1;
1687
                    if(px<b->parent->width && py<b->parent->height) 
1688
                        p= parent[px + py*2*stride];
1689
                }
1690
                if(!(/*ll|*/l|lt|t|rt|p)){
1691
                    if(v){
1692
                        runs[run_index++]= run;
1693
                        run=0;
1694
                    }else{
1695
                        run++;
1696
                    }
1697
                }
1698
            }
1699
        }
1700
        max_index= run_index;
1701
        runs[run_index++]= run;
1702
        run_index=0;
1703
        run= runs[run_index++];
1704

    
1705
        put_symbol2(&s->c, b->state[30], max_index, 0);
1706
        if(run_index <= max_index)
1707
            put_symbol2(&s->c, b->state[1], run, 3);
1708
        
1709
        for(y=0; y<h; y++){
1710
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1711
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1712
                return -1;
1713
            }
1714
            for(x=0; x<w; x++){
1715
                int v, p=0;
1716
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1717
                v= src[x + y*stride];
1718

    
1719
                if(y){
1720
                    t= src[x + (y-1)*stride];
1721
                    if(x){
1722
                        lt= src[x - 1 + (y-1)*stride];
1723
                    }
1724
                    if(x + 1 < w){
1725
                        rt= src[x + 1 + (y-1)*stride];
1726
                    }
1727
                }
1728
                if(x){
1729
                    l= src[x - 1 + y*stride];
1730
                    /*if(x > 1){
1731
                        if(orientation==1) ll= src[y + (x-2)*stride];
1732
                        else               ll= src[x - 2 + y*stride];
1733
                    }*/
1734
                }
1735
                if(parent){
1736
                    int px= x>>1;
1737
                    int py= y>>1;
1738
                    if(px<b->parent->width && py<b->parent->height) 
1739
                        p= parent[px + py*2*stride];
1740
                }
1741
                if(/*ll|*/l|lt|t|rt|p){
1742
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1743

    
1744
                    put_rac(&s->c, &b->state[0][context], !!v);
1745
                }else{
1746
                    if(!run){
1747
                        run= runs[run_index++];
1748

    
1749
                        if(run_index <= max_index)
1750
                            put_symbol2(&s->c, b->state[1], run, 3);
1751
                        assert(v);
1752
                    }else{
1753
                        run--;
1754
                        assert(!v);
1755
                    }
1756
                }
1757
                if(v){
1758
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1759
                    int l2= 2*ABS(l) + (l<0);
1760
                    int t2= 2*ABS(t) + (t<0);
1761

    
1762
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1763
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1764
                }
1765
            }
1766
        }
1767
    }
1768
    return 0;
1769
}
1770

    
1771
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1772
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1773
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1774
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1775
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1776
}
1777

    
1778
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1779
    const int w= b->width;
1780
    const int h= b->height;
1781
    int x,y;
1782
    
1783
    if(1){
1784
        int run, runs;
1785
        x_and_coeff *xc= b->x_coeff;
1786
        x_and_coeff *prev_xc= NULL;
1787
        x_and_coeff *prev2_xc= xc;
1788
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1789
        x_and_coeff *prev_parent_xc= parent_xc;
1790

    
1791
        runs= get_symbol2(&s->c, b->state[30], 0);
1792
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1793
        else           run= INT_MAX;
1794

    
1795
        for(y=0; y<h; y++){
1796
            int v=0;
1797
            int lt=0, t=0, rt=0;
1798

    
1799
            if(y && prev_xc->x == 0){
1800
                rt= prev_xc->coeff;
1801
            }
1802
            for(x=0; x<w; x++){
1803
                int p=0;
1804
                const int l= v;
1805
                
1806
                lt= t; t= rt;
1807

    
1808
                if(y){
1809
                    if(prev_xc->x <= x)
1810
                        prev_xc++;
1811
                    if(prev_xc->x == x + 1)
1812
                        rt= prev_xc->coeff;
1813
                    else
1814
                        rt=0;
1815
                }
1816
                if(parent_xc){
1817
                    if(x>>1 > parent_xc->x){
1818
                        parent_xc++;
1819
                    }
1820
                    if(x>>1 == parent_xc->x){
1821
                        p= parent_xc->coeff;
1822
                    }
1823
                }
1824
                if(/*ll|*/l|lt|t|rt|p){
1825
                    int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1826

    
1827
                    v=get_rac(&s->c, &b->state[0][context]);
1828
                    if(v){
1829
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1830
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1831
                        
1832
                        xc->x=x;
1833
                        (xc++)->coeff= v;
1834
                    }
1835
                }else{
1836
                    if(!run){
1837
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1838
                        else           run= INT_MAX;
1839
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1840
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1841
                        
1842
                        xc->x=x;
1843
                        (xc++)->coeff= v;
1844
                    }else{
1845
                        int max_run;
1846
                        run--;
1847
                        v=0;
1848

    
1849
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1850
                        else  max_run= FFMIN(run, w-x-1);
1851
                        if(parent_xc)
1852
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1853
                        x+= max_run;
1854
                        run-= max_run;
1855
                    }
1856
                }
1857
            }
1858
            (xc++)->x= w+1; //end marker
1859
            prev_xc= prev2_xc;
1860
            prev2_xc= xc;
1861
            
1862
            if(parent_xc){
1863
                if(y&1){
1864
                    while(parent_xc->x != parent->width+1)
1865
                        parent_xc++;
1866
                    parent_xc++;
1867
                    prev_parent_xc= parent_xc;
1868
                }else{
1869
                    parent_xc= prev_parent_xc;
1870
                }
1871
            }
1872
        }
1873

    
1874
        (xc++)->x= w+1; //end marker
1875
    }
1876
}
1877

    
1878
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1879
    const int w= b->width;
1880
    int x,y;
1881
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1882
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1883
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1884
    int new_index = 0;
1885
    
1886
    START_TIMER
1887

    
1888
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1889
        qadd= 0;
1890
        qmul= 1<<QEXPSHIFT;
1891
    }
1892

    
1893
    /* If we are on the second or later slice, restore our index. */
1894
    if (start_y != 0)
1895
        new_index = save_state[0];
1896

    
1897
        
1898
    for(y=start_y; y<h; y++){
1899
        int x = 0;
1900
        int v;
1901
        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1902
        memset(line, 0, b->width*sizeof(DWTELEM));
1903
        v = b->x_coeff[new_index].coeff;
1904
        x = b->x_coeff[new_index++].x;
1905
        while(x < w)
1906
        {
1907
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1908
            register int u= -(v&1);
1909
            line[x] = (t^u) - u;
1910

    
1911
            v = b->x_coeff[new_index].coeff;
1912
            x = b->x_coeff[new_index++].x;
1913
        }
1914
    }
1915
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1916
        STOP_TIMER("decode_subband")
1917
    }
1918
        
1919
    /* Save our variables for the next slice. */
1920
    save_state[0] = new_index;
1921
        
1922
    return;
1923
}
1924

    
1925
static void reset_contexts(SnowContext *s){
1926
    int plane_index, level, orientation;
1927

    
1928
    for(plane_index=0; plane_index<3; plane_index++){
1929
        for(level=0; level<s->spatial_decomposition_count; level++){
1930
            for(orientation=level ? 1:0; orientation<4; orientation++){
1931
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1932
            }
1933
        }
1934
    }
1935
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1936
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1937
}
1938

    
1939
static int alloc_blocks(SnowContext *s){
1940
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1941
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1942
    
1943
    s->b_width = w;
1944
    s->b_height= h;
1945
    
1946
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1947
    return 0;
1948
}
1949

    
1950
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1951
    uint8_t *bytestream= d->bytestream;
1952
    uint8_t *bytestream_start= d->bytestream_start;
1953
    *d= *s;
1954
    d->bytestream= bytestream;
1955
    d->bytestream_start= bytestream_start;
1956
}
1957

    
1958
//near copy & paste from dsputil, FIXME
1959
static int pix_sum(uint8_t * pix, int line_size, int w)
1960
{
1961
    int s, i, j;
1962

    
1963
    s = 0;
1964
    for (i = 0; i < w; i++) {
1965
        for (j = 0; j < w; j++) {
1966
            s += pix[0];
1967
            pix ++;
1968
        }
1969
        pix += line_size - w;
1970
    }
1971
    return s;
1972
}
1973

    
1974
//near copy & paste from dsputil, FIXME
1975
static int pix_norm1(uint8_t * pix, int line_size, int w)
1976
{
1977
    int s, i, j;
1978
    uint32_t *sq = squareTbl + 256;
1979

    
1980
    s = 0;
1981
    for (i = 0; i < w; i++) {
1982
        for (j = 0; j < w; j ++) {
1983
            s += sq[pix[0]];
1984
            pix ++;
1985
        }
1986
        pix += line_size - w;
1987
    }
1988
    return s;
1989
}
1990

    
1991
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1992
    const int w= s->b_width << s->block_max_depth;
1993
    const int rem_depth= s->block_max_depth - level;
1994
    const int index= (x + y*w) << rem_depth;
1995
    const int block_w= 1<<rem_depth;
1996
    BlockNode block;
1997
    int i,j;
1998
    
1999
    block.color[0]= l;
2000
    block.color[1]= cb;
2001
    block.color[2]= cr;
2002
    block.mx= mx;
2003
    block.my= my;
2004
    block.type= type;
2005
    block.level= level;
2006

    
2007
    for(j=0; j<block_w; j++){
2008
        for(i=0; i<block_w; i++){
2009
            s->block[index + i + j*w]= block;
2010
        }
2011
    }
2012
}
2013

    
2014
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2015
    const int offset[3]= {
2016
          y*c->  stride + x,
2017
        ((y*c->uvstride + x)>>1),
2018
        ((y*c->uvstride + x)>>1),
2019
    };
2020
    int i;
2021
    for(i=0; i<3; i++){
2022
        c->src[0][i]= src [i];
2023
        c->ref[0][i]= ref [i] + offset[i];
2024
    }
2025
    assert(!ref_index);
2026
}
2027

    
2028
//FIXME copy&paste
2029
#define P_LEFT P[1]
2030
#define P_TOP P[2]
2031
#define P_TOPRIGHT P[3]
2032
#define P_MEDIAN P[4]
2033
#define P_MV1 P[9]
2034
#define FLAG_QPEL   1 //must be 1
2035

    
2036
static int encode_q_branch(SnowContext *s, int level, int x, int y){
2037
    uint8_t p_buffer[1024];
2038
    uint8_t i_buffer[1024];
2039
    uint8_t p_state[sizeof(s->block_state)];
2040
    uint8_t i_state[sizeof(s->block_state)];
2041
    RangeCoder pc, ic;
2042
    uint8_t *pbbak= s->c.bytestream;
2043
    uint8_t *pbbak_start= s->c.bytestream_start;
2044
    int score, score2, iscore, i_len, p_len, block_s, sum;
2045
    const int w= s->b_width  << s->block_max_depth;
2046
    const int h= s->b_height << s->block_max_depth;
2047
    const int rem_depth= s->block_max_depth - level;
2048
    const int index= (x + y*w) << rem_depth;
2049
    const int block_w= 1<<(LOG2_MB_SIZE - level);
2050
    static BlockNode null_block= { //FIXME add border maybe
2051
        .color= {128,128,128},
2052
        .mx= 0,
2053
        .my= 0,
2054
        .type= 0,
2055
        .level= 0,
2056
    };
2057
    int trx= (x+1)<<rem_depth;
2058
    int try= (y+1)<<rem_depth;
2059
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2060
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2061
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2062
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2063
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2064
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2065
    int pl = left->color[0];
2066
    int pcb= left->color[1];
2067
    int pcr= left->color[2];
2068
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2069
    int pmy= mid_pred(left->my, top->my, tr->my);
2070
    int mx=0, my=0;
2071
    int l,cr,cb, i;
2072
    const int stride= s->current_picture.linesize[0];
2073
    const int uvstride= s->current_picture.linesize[1];
2074
    const int instride= s->input_picture.linesize[0];
2075
    const int uvinstride= s->input_picture.linesize[1];
2076
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
2077
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2078
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2079
    uint8_t current_mb[3][stride*block_w];
2080
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2081
    int P[10][2];
2082
    int16_t last_mv[3][2];
2083
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2084
    const int shift= 1+qpel;
2085
    MotionEstContext *c= &s->m.me;
2086
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2087
    int my_context= av_log2(2*ABS(left->my - top->my));
2088
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2089

    
2090
    assert(sizeof(s->block_state) >= 256);
2091
    if(s->keyframe){
2092
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2093
        return 0;
2094
    }
2095

    
2096
    //FIXME optimize
2097
    for(i=0; i<block_w; i++)
2098
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
2099
    for(i=0; i<block_w>>1; i++)
2100
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2101
    for(i=0; i<block_w>>1; i++)
2102
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2103

    
2104
//    clip predictors / edge ?
2105

    
2106
    P_LEFT[0]= left->mx;
2107
    P_LEFT[1]= left->my;
2108
    P_TOP [0]= top->mx;
2109
    P_TOP [1]= top->my;
2110
    P_TOPRIGHT[0]= tr->mx;
2111
    P_TOPRIGHT[1]= tr->my;
2112
    
2113
    last_mv[0][0]= s->block[index].mx;
2114
    last_mv[0][1]= s->block[index].my;
2115
    last_mv[1][0]= right->mx;
2116
    last_mv[1][1]= right->my;
2117
    last_mv[2][0]= bottom->mx;
2118
    last_mv[2][1]= bottom->my;
2119
    
2120
    s->m.mb_stride=2;
2121
    s->m.mb_x= 
2122
    s->m.mb_y= 0;
2123
    s->m.me.skip= 0;
2124

    
2125
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2126
    
2127
    assert(s->m.me.  stride ==   stride);
2128
    assert(s->m.me.uvstride == uvstride);
2129
    
2130
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2131
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2132
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2133
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2134
    
2135
    c->xmin = - x*block_w - 16+2;
2136
    c->ymin = - y*block_w - 16+2;
2137
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2138
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2139

    
2140
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2141
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
2142
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2143
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2144
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2145
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2146
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2147

    
2148
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2149
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2150

    
2151
    if (!y) {
2152
        c->pred_x= P_LEFT[0];
2153
        c->pred_y= P_LEFT[1];
2154
    } else {
2155
        c->pred_x = P_MEDIAN[0];
2156
        c->pred_y = P_MEDIAN[1];
2157
    }
2158

    
2159
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
2160
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2161

    
2162
    assert(mx >= c->xmin);
2163
    assert(mx <= c->xmax);
2164
    assert(my >= c->ymin);
2165
    assert(my <= c->ymax);
2166
    
2167
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2168
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2169
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2170
                             
2171
  //  subpel search
2172
    pc= s->c;
2173
    pc.bytestream_start=
2174
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2175
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2176

    
2177
    if(level!=s->block_max_depth)
2178
        put_rac(&pc, &p_state[4 + s_context], 1);
2179
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2180
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2181
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2182
    p_len= pc.bytestream - pc.bytestream_start;
2183
    score += (s->lambda2*(p_len*8
2184
              + (pc.outstanding_count - s->c.outstanding_count)*8
2185
              + (-av_log2(pc.range)    + av_log2(s->c.range))
2186
             ))>>FF_LAMBDA_SHIFT;
2187

    
2188
    block_s= block_w*block_w;
2189
    sum = pix_sum(&current_mb[0][0], stride, block_w);
2190
    l= (sum + block_s/2)/block_s;
2191
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2192
    
2193
    block_s= block_w*block_w>>2;
2194
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2195
    cb= (sum + block_s/2)/block_s;
2196
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2197
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2198
    cr= (sum + block_s/2)/block_s;
2199
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2200

    
2201
    ic= s->c;
2202
    ic.bytestream_start=
2203
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2204
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2205
    if(level!=s->block_max_depth)
2206
        put_rac(&ic, &i_state[4 + s_context], 1);
2207
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2208
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2209
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2210
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2211
    i_len= ic.bytestream - ic.bytestream_start;
2212
    iscore += (s->lambda2*(i_len*8
2213
              + (ic.outstanding_count - s->c.outstanding_count)*8
2214
              + (-av_log2(ic.range)    + av_log2(s->c.range))
2215
             ))>>FF_LAMBDA_SHIFT;
2216

    
2217
//    assert(score==256*256*256*64-1);
2218
    assert(iscore < 255*255*256 + s->lambda2*10);
2219
    assert(iscore >= 0);
2220
    assert(l>=0 && l<=255);
2221
    assert(pl>=0 && pl<=255);
2222

    
2223
    if(level==0){
2224
        int varc= iscore >> 8;
2225
        int vard= score >> 8;
2226
        if (vard <= 64 || vard < varc)
2227
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2228
        else
2229
            c->scene_change_score+= s->m.qscale;
2230
    }
2231
        
2232
    if(level!=s->block_max_depth){
2233
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2234
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2235
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2236
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2237
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2238
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2239
    
2240
        if(score2 < score && score2 < iscore)
2241
            return score2;
2242
    }
2243
    
2244
    if(iscore < score){
2245
        memcpy(pbbak, i_buffer, i_len);
2246
        s->c= ic;
2247
        s->c.bytestream_start= pbbak_start;
2248
        s->c.bytestream= pbbak + i_len;
2249
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2250
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2251
        return iscore;
2252
    }else{
2253
        memcpy(pbbak, p_buffer, p_len);
2254
        s->c= pc;
2255
        s->c.bytestream_start= pbbak_start;
2256
        s->c.bytestream= pbbak + p_len;
2257
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2258
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2259
        return score;
2260
    }
2261
}
2262

    
2263
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2264
    const int w= s->b_width << s->block_max_depth;
2265
    const int rem_depth= s->block_max_depth - level;
2266
    const int index= (x + y*w) << rem_depth;
2267
    static BlockNode null_block= { //FIXME add border maybe
2268
        .color= {128,128,128},
2269
        .mx= 0,
2270
        .my= 0,
2271
        .type= 0,
2272
        .level= 0,
2273
    };
2274
    int trx= (x+1)<<rem_depth;
2275
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2276
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2277
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2278
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2279
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2280
    
2281
    if(s->keyframe){
2282
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2283
        return;
2284
    }
2285

    
2286
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2287
        int type;
2288
        int l = left->color[0];
2289
        int cb= left->color[1];
2290
        int cr= left->color[2];
2291
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2292
        int my= mid_pred(left->my, top->my, tr->my);
2293
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2294
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2295
        
2296
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2297

    
2298
        if(type){
2299
            l += get_symbol(&s->c, &s->block_state[32], 1);
2300
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2301
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2302
        }else{
2303
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2304
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2305
        }
2306
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2307
    }else{
2308
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2309
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2310
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2311
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2312
    }
2313
}
2314

    
2315
static void encode_blocks(SnowContext *s){
2316
    int x, y;
2317
    int w= s->b_width;
2318
    int h= s->b_height;
2319

    
2320
    for(y=0; y<h; y++){
2321
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2322
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2323
            return;
2324
        }
2325
        for(x=0; x<w; x++){
2326
            encode_q_branch(s, 0, x, y);
2327
        }
2328
    }
2329
}
2330

    
2331
static void decode_blocks(SnowContext *s){
2332
    int x, y;
2333
    int w= s->b_width;
2334
    int h= s->b_height;
2335

    
2336
    for(y=0; y<h; y++){
2337
        for(x=0; x<w; x++){
2338
            decode_q_branch(s, 0, x, y);
2339
        }
2340
    }
2341
}
2342

    
2343
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2344
    int x, y;
2345
START_TIMER
2346
    for(y=0; y < b_h+5; y++){
2347
        for(x=0; x < b_w; x++){
2348
            int a0= src[x    ];
2349
            int a1= src[x + 1];
2350
            int a2= src[x + 2];
2351
            int a3= src[x + 3];
2352
            int a4= src[x + 4];
2353
            int a5= src[x + 5];
2354
//            int am= 9*(a1+a2) - (a0+a3);
2355
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2356
//            int am= 18*(a2+a3) - 2*(a1+a4);
2357
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2358
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2359

    
2360
//            if(b_w==16) am= 8*(a1+a2);
2361

    
2362
            if(dx<8) am = (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2363
            else     am = (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2364
            
2365
            /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2366
            if(am&(~255)) am= ~(am>>31);
2367
            
2368
            tmp[x] = am;
2369

    
2370
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2371
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2372
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2373
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2374
        }
2375
        tmp += stride;
2376
        src += stride;
2377
    }
2378
    tmp -= (b_h+5)*stride;
2379
    
2380
    for(y=0; y < b_h; y++){
2381
        for(x=0; x < b_w; x++){
2382
            int a0= tmp[x + 0*stride];
2383
            int a1= tmp[x + 1*stride];
2384
            int a2= tmp[x + 2*stride];
2385
            int a3= tmp[x + 3*stride];
2386
            int a4= tmp[x + 4*stride];
2387
            int a5= tmp[x + 5*stride];
2388
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2389
//            int am= 18*(a2+a3) - 2*(a1+a4);
2390
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2391
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2392
            
2393
//            if(b_w==16) am= 8*(a1+a2);
2394

    
2395
            if(dy<8) am =  (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2396
            else     am = (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2397

    
2398
            if(am&(~255)) am= ~(am>>31);
2399
            
2400
            dst[x] = am;
2401
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2402
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2403
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2404
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2405
        }
2406
        dst += stride;
2407
        tmp += stride;
2408
    }
2409
STOP_TIMER("mc_block")
2410
}
2411

    
2412
#define mca(dx,dy,b_w)\
2413
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2414
    uint8_t tmp[stride*(b_w+5)];\
2415
    assert(h==b_w);\
2416
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2417
}
2418

    
2419
mca( 0, 0,16)
2420
mca( 8, 0,16)
2421
mca( 0, 8,16)
2422
mca( 8, 8,16)
2423
mca( 0, 0,8)
2424
mca( 8, 0,8)
2425
mca( 0, 8,8)
2426
mca( 8, 8,8)
2427

    
2428
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2429
    if(block->type){
2430
        int x, y;
2431
        const int color= block->color[plane_index];
2432
        for(y=0; y < b_h; y++){
2433
            for(x=0; x < b_w; x++){
2434
                dst[x + y*stride]= color;
2435
            }
2436
        }
2437
    }else{
2438
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2439
        int mx= block->mx*scale;
2440
        int my= block->my*scale;
2441
        const int dx= mx&15;
2442
        const int dy= my&15;
2443
        sx += (mx>>4) - 2;
2444
        sy += (my>>4) - 2;
2445
        src += sx + sy*stride;
2446
        if(   (unsigned)sx >= w - b_w - 4
2447
           || (unsigned)sy >= h - b_h - 4){
2448
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2449
            src= tmp + MB_SIZE;
2450
        }
2451
        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2452
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2453
        else
2454
            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2455
    }
2456
}
2457

    
2458
static always_inline int same_block(BlockNode *a, BlockNode *b){
2459
    return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2460
}
2461

    
2462
//FIXME name clenup (b_w, block_w, b_width stuff)
2463
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2464
    DWTELEM * dst = NULL;
2465
    const int b_width = s->b_width  << s->block_max_depth;
2466
    const int b_height= s->b_height << s->block_max_depth;
2467
    const int b_stride= b_width;
2468
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2469
    BlockNode *rt= lt+1;
2470
    BlockNode *lb= lt+b_stride;
2471
    BlockNode *rb= lb+1;
2472
    uint8_t *block[4]; 
2473
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2474
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2475
    uint8_t *ptmp;
2476
    int x,y;
2477

    
2478
    if(b_x<0){
2479
        lt= rt;
2480
        lb= rb;
2481
    }else if(b_x + 1 >= b_width){
2482
        rt= lt;
2483
        rb= lb;
2484
    }
2485
    if(b_y<0){
2486
        lt= lb;
2487
        rt= rb;
2488
    }else if(b_y + 1 >= b_height){
2489
        lb= lt;
2490
        rb= rt;
2491
    }
2492
        
2493
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2494
        obmc -= src_x;
2495
        b_w += src_x;
2496
        src_x=0;
2497
    }else if(src_x + b_w > w){
2498
        b_w = w - src_x;
2499
    }
2500
    if(src_y<0){
2501
        obmc -= src_y*obmc_stride;
2502
        b_h += src_y;
2503
        src_y=0;
2504
    }else if(src_y + b_h> h){
2505
        b_h = h - src_y;
2506
    }
2507
    
2508
    if(b_w<=0 || b_h<=0) return;
2509

    
2510
assert(src_stride > 2*MB_SIZE + 5);
2511
//    old_dst += src_x + src_y*dst_stride;
2512
    dst8+= src_x + src_y*src_stride;
2513
//    src += src_x + src_y*src_stride;
2514

    
2515
    ptmp= tmp + 3*tmp_step;
2516
    block[0]= ptmp;
2517
    ptmp+=tmp_step;
2518
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2519

    
2520
    if(same_block(lt, rt)){
2521
        block[1]= block[0];
2522
    }else{
2523
        block[1]= ptmp;
2524
        ptmp+=tmp_step;
2525
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2526
    }
2527
        
2528
    if(same_block(lt, lb)){
2529
        block[2]= block[0];
2530
    }else if(same_block(rt, lb)){
2531
        block[2]= block[1];
2532
    }else{
2533
        block[2]= ptmp;
2534
        ptmp+=tmp_step;
2535
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2536
    }
2537

    
2538
    if(same_block(lt, rb) ){
2539
        block[3]= block[0];
2540
    }else if(same_block(rt, rb)){
2541
        block[3]= block[1];
2542
    }else if(same_block(lb, rb)){
2543
        block[3]= block[2];
2544
    }else{
2545
        block[3]= ptmp;
2546
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2547
    }
2548
#if 0
2549
    for(y=0; y<b_h; y++){
2550
        for(x=0; x<b_w; x++){
2551
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2552
            if(add) dst[x + y*dst_stride] += v;
2553
            else    dst[x + y*dst_stride] -= v;
2554
        }
2555
    }
2556
    for(y=0; y<b_h; y++){
2557
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2558
        for(x=0; x<b_w; x++){
2559
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2560
            if(add) dst[x + y*dst_stride] += v;
2561
            else    dst[x + y*dst_stride] -= v;
2562
        }
2563
    }
2564
    for(y=0; y<b_h; y++){
2565
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2566
        for(x=0; x<b_w; x++){
2567
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2568
            if(add) dst[x + y*dst_stride] += v;
2569
            else    dst[x + y*dst_stride] -= v;
2570
        }
2571
    }
2572
    for(y=0; y<b_h; y++){
2573
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2574
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2575
        for(x=0; x<b_w; x++){
2576
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2577
            if(add) dst[x + y*dst_stride] += v;
2578
            else    dst[x + y*dst_stride] -= v;
2579
        }
2580
    }
2581
#else
2582
{
2583

    
2584
    START_TIMER
2585
    
2586
    int block_index = 0;
2587
    for(y=0; y<b_h; y++){
2588
        //FIXME ugly missue of obmc_stride
2589
        uint8_t *obmc1= obmc + y*obmc_stride;
2590
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2591
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2592
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2593
        dst = slice_buffer_get_line(sb, src_y + y);
2594
        for(x=0; x<b_w; x++){
2595
            int v=   obmc1[x] * block[3][x + y*src_stride]
2596
                    +obmc2[x] * block[2][x + y*src_stride]
2597
                    +obmc3[x] * block[1][x + y*src_stride]
2598
                    +obmc4[x] * block[0][x + y*src_stride];
2599

    
2600
            v <<= 8 - LOG2_OBMC_MAX;
2601
            if(FRAC_BITS != 8){
2602
                v += 1<<(7 - FRAC_BITS);
2603
                v >>= 8 - FRAC_BITS;
2604
            }
2605
            if(add){
2606
//                v += old_dst[x + y*dst_stride];
2607
                v += dst[x + src_x];
2608
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2609
                if(v&(~255)) v= ~(v>>31);
2610
                dst8[x + y*src_stride] = v;
2611
            }else{
2612
//                old_dst[x + y*dst_stride] -= v;
2613
                dst[x + src_x] -= v;
2614
            }
2615
        }
2616
    }
2617
        STOP_TIMER("Inner add y block")
2618
}
2619
#endif
2620
}
2621

    
2622
//FIXME name clenup (b_w, block_w, b_width stuff)
2623
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2624
    const int b_width = s->b_width  << s->block_max_depth;
2625
    const int b_height= s->b_height << s->block_max_depth;
2626
    const int b_stride= b_width;
2627
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2628
    BlockNode *rt= lt+1;
2629
    BlockNode *lb= lt+b_stride;
2630
    BlockNode *rb= lb+1;
2631
    uint8_t *block[4]; 
2632
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2633
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2634
    uint8_t *ptmp;
2635
    int x,y;
2636

    
2637
    if(b_x<0){
2638
        lt= rt;
2639
        lb= rb;
2640
    }else if(b_x + 1 >= b_width){
2641
        rt= lt;
2642
        rb= lb;
2643
    }
2644
    if(b_y<0){
2645
        lt= lb;
2646
        rt= rb;
2647
    }else if(b_y + 1 >= b_height){
2648
        lb= lt;
2649
        rb= rt;
2650
    }
2651
        
2652
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2653
        obmc -= src_x;
2654
        b_w += src_x;
2655
        src_x=0;
2656
    }else if(src_x + b_w > w){
2657
        b_w = w - src_x;
2658
    }
2659
    if(src_y<0){
2660
        obmc -= src_y*obmc_stride;
2661
        b_h += src_y;
2662
        src_y=0;
2663
    }else if(src_y + b_h> h){
2664
        b_h = h - src_y;
2665
    }
2666
    
2667
    if(b_w<=0 || b_h<=0) return;
2668

    
2669
assert(src_stride > 2*MB_SIZE + 5);
2670
    dst += src_x + src_y*dst_stride;
2671
    dst8+= src_x + src_y*src_stride;
2672
//    src += src_x + src_y*src_stride;
2673

    
2674
    ptmp= tmp + 3*tmp_step;
2675
    block[0]= ptmp;
2676
    ptmp+=tmp_step;
2677
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2678

    
2679
    if(same_block(lt, rt)){
2680
        block[1]= block[0];
2681
    }else{
2682
        block[1]= ptmp;
2683
        ptmp+=tmp_step;
2684
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2685
    }
2686
        
2687
    if(same_block(lt, lb)){
2688
        block[2]= block[0];
2689
    }else if(same_block(rt, lb)){
2690
        block[2]= block[1];
2691
    }else{
2692
        block[2]= ptmp;
2693
        ptmp+=tmp_step;
2694
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2695
    }
2696

    
2697
    if(same_block(lt, rb) ){
2698
        block[3]= block[0];
2699
    }else if(same_block(rt, rb)){
2700
        block[3]= block[1];
2701
    }else if(same_block(lb, rb)){
2702
        block[3]= block[2];
2703
    }else{
2704
        block[3]= ptmp;
2705
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2706
    }
2707
#if 0
2708
    for(y=0; y<b_h; y++){
2709
        for(x=0; x<b_w; x++){
2710
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2711
            if(add) dst[x + y*dst_stride] += v;
2712
            else    dst[x + y*dst_stride] -= v;
2713
        }
2714
    }
2715
    for(y=0; y<b_h; y++){
2716
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2717
        for(x=0; x<b_w; x++){
2718
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2719
            if(add) dst[x + y*dst_stride] += v;
2720
            else    dst[x + y*dst_stride] -= v;
2721
        }
2722
    }
2723
    for(y=0; y<b_h; y++){
2724
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2725
        for(x=0; x<b_w; x++){
2726
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2727
            if(add) dst[x + y*dst_stride] += v;
2728
            else    dst[x + y*dst_stride] -= v;
2729
        }
2730
    }
2731
    for(y=0; y<b_h; y++){
2732
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2733
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2734
        for(x=0; x<b_w; x++){
2735
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2736
            if(add) dst[x + y*dst_stride] += v;
2737
            else    dst[x + y*dst_stride] -= v;
2738
        }
2739
    }
2740
#else
2741
    for(y=0; y<b_h; y++){
2742
        //FIXME ugly missue of obmc_stride
2743
        uint8_t *obmc1= obmc + y*obmc_stride;
2744
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2745
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2746
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2747
        for(x=0; x<b_w; x++){
2748
            int v=   obmc1[x] * block[3][x + y*src_stride]
2749
                    +obmc2[x] * block[2][x + y*src_stride]
2750
                    +obmc3[x] * block[1][x + y*src_stride]
2751
                    +obmc4[x] * block[0][x + y*src_stride];
2752
            
2753
            v <<= 8 - LOG2_OBMC_MAX;
2754
            if(FRAC_BITS != 8){
2755
                v += 1<<(7 - FRAC_BITS);
2756
                v >>= 8 - FRAC_BITS;
2757
            }
2758
            if(add){
2759
                v += dst[x + y*dst_stride];
2760
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2761
                if(v&(~255)) v= ~(v>>31);
2762
                dst8[x + y*src_stride] = v;
2763
            }else{
2764
                dst[x + y*dst_stride] -= v;
2765
            }
2766
        }
2767
    }
2768
#endif
2769
}
2770

    
2771
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2772
    Plane *p= &s->plane[plane_index];
2773
    const int mb_w= s->b_width  << s->block_max_depth;
2774
    const int mb_h= s->b_height << s->block_max_depth;
2775
    int x, y, mb_x;
2776
    int block_size = MB_SIZE >> s->block_max_depth;
2777
    int block_w    = plane_index ? block_size/2 : block_size;
2778
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2779
    int obmc_stride= plane_index ? block_size : 2*block_size;
2780
    int ref_stride= s->current_picture.linesize[plane_index];
2781
    uint8_t *ref  = s->last_picture.data[plane_index];
2782
    uint8_t *dst8= s->current_picture.data[plane_index];
2783
    int w= p->width;
2784
    int h= p->height;
2785
    START_TIMER
2786
    
2787
    if(s->keyframe || (s->avctx->debug&512)){
2788
        if(mb_y==mb_h)
2789
            return;
2790

    
2791
        if(add){
2792
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2793
            {
2794
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2795
                DWTELEM * line = sb->line[y];
2796
                for(x=0; x<w; x++)
2797
                {
2798
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2799
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2800
                    v >>= FRAC_BITS;
2801
                    if(v&(~255)) v= ~(v>>31);
2802
                    dst8[x + y*ref_stride]= v;
2803
                }
2804
            }
2805
        }else{
2806
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2807
            {
2808
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2809
                DWTELEM * line = sb->line[y];
2810
                for(x=0; x<w; x++)
2811
                {
2812
                    line[x] -= 128 << FRAC_BITS;
2813
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2814
                }
2815
            }
2816
        }
2817

    
2818
        return;
2819
    }
2820
    
2821
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2822
            START_TIMER
2823

    
2824
            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, 
2825
                       block_w*mb_x - block_w/2,
2826
                       block_w*mb_y - block_w/2,
2827
                       block_w, block_w,
2828
                       w, h,
2829
                       w, ref_stride, obmc_stride,
2830
                       mb_x - 1, mb_y - 1,
2831
                       add, plane_index);
2832
            
2833
            STOP_TIMER("add_yblock")
2834
        }
2835
    
2836
    STOP_TIMER("predict_slice")
2837
}
2838

    
2839
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2840
    Plane *p= &s->plane[plane_index];
2841
    const int mb_w= s->b_width  << s->block_max_depth;
2842
    const int mb_h= s->b_height << s->block_max_depth;
2843
    int x, y, mb_x;
2844
    int block_size = MB_SIZE >> s->block_max_depth;
2845
    int block_w    = plane_index ? block_size/2 : block_size;
2846
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2847
    int obmc_stride= plane_index ? block_size : 2*block_size;
2848
    int ref_stride= s->current_picture.linesize[plane_index];
2849
    uint8_t *ref  = s->last_picture.data[plane_index];
2850
    uint8_t *dst8= s->current_picture.data[plane_index];
2851
    int w= p->width;
2852
    int h= p->height;
2853
    START_TIMER
2854
    
2855
    if(s->keyframe || (s->avctx->debug&512)){
2856
        if(mb_y==mb_h)
2857
            return;
2858

    
2859
        if(add){
2860
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2861
                for(x=0; x<w; x++){
2862
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2863
                    v >>= FRAC_BITS;
2864
                    if(v&(~255)) v= ~(v>>31);
2865
                    dst8[x + y*ref_stride]= v;
2866
                }
2867
            }
2868
        }else{
2869
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2870
                for(x=0; x<w; x++){
2871
                    buf[x + y*w]-= 128<<FRAC_BITS;
2872
                }
2873
            }
2874
        }
2875

    
2876
        return;
2877
    }
2878
    
2879
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2880
            START_TIMER
2881

    
2882
            add_yblock(s, buf, dst8, ref, obmc, 
2883
                       block_w*mb_x - block_w/2,
2884
                       block_w*mb_y - block_w/2,
2885
                       block_w, block_w,
2886
                       w, h,
2887
                       w, ref_stride, obmc_stride,
2888
                       mb_x - 1, mb_y - 1,
2889
                       add, plane_index);
2890
            
2891
            STOP_TIMER("add_yblock")
2892
        }
2893
    
2894
    STOP_TIMER("predict_slice")
2895
}
2896

    
2897
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2898
    const int mb_h= s->b_height << s->block_max_depth;
2899
    int mb_y;
2900
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2901
        predict_slice(s, buf, plane_index, add, mb_y);
2902
}
2903

    
2904
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2905
    const int level= b->level;
2906
    const int w= b->width;
2907
    const int h= b->height;
2908
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2909
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2910
    int x,y, thres1, thres2;
2911
    START_TIMER
2912

    
2913
    if(s->qlog == LOSSLESS_QLOG) return;
2914
 
2915
    bias= bias ? 0 : (3*qmul)>>3;
2916
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2917
    thres2= 2*thres1;
2918
    
2919
    if(!bias){
2920
        for(y=0; y<h; y++){
2921
            for(x=0; x<w; x++){
2922
                int i= src[x + y*stride];
2923
                
2924
                if((unsigned)(i+thres1) > thres2){
2925
                    if(i>=0){
2926
                        i<<= QEXPSHIFT;
2927
                        i/= qmul; //FIXME optimize
2928
                        src[x + y*stride]=  i;
2929
                    }else{
2930
                        i= -i;
2931
                        i<<= QEXPSHIFT;
2932
                        i/= qmul; //FIXME optimize
2933
                        src[x + y*stride]= -i;
2934
                    }
2935
                }else
2936
                    src[x + y*stride]= 0;
2937
            }
2938
        }
2939
    }else{
2940
        for(y=0; y<h; y++){
2941
            for(x=0; x<w; x++){
2942
                int i= src[x + y*stride]; 
2943
                
2944
                if((unsigned)(i+thres1) > thres2){
2945
                    if(i>=0){
2946
                        i<<= QEXPSHIFT;
2947
                        i= (i + bias) / qmul; //FIXME optimize
2948
                        src[x + y*stride]=  i;
2949
                    }else{
2950
                        i= -i;
2951
                        i<<= QEXPSHIFT;
2952
                        i= (i + bias) / qmul; //FIXME optimize
2953
                        src[x + y*stride]= -i;
2954
                    }
2955
                }else
2956
                    src[x + y*stride]= 0;
2957
            }
2958
        }
2959
    }
2960
    if(level+1 == s->spatial_decomposition_count){
2961
//        STOP_TIMER("quantize")
2962
    }
2963
}
2964

    
2965
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
2966
    const int w= b->width;
2967
    const int h= b->height;
2968
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2969
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2970
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2971
    int x,y;
2972
    START_TIMER
2973
    
2974
    if(s->qlog == LOSSLESS_QLOG) return;
2975
    
2976
    for(y=start_y; y<end_y; y++){
2977
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2978
        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2979
        for(x=0; x<w; x++){
2980
            int i= line[x];
2981
            if(i<0){
2982
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2983
            }else if(i>0){
2984
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2985
            }
2986
        }
2987
    }
2988
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2989
        STOP_TIMER("dquant")
2990
    }
2991
}
2992

    
2993
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2994
    const int w= b->width;
2995
    const int h= b->height;
2996
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2997
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2998
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2999
    int x,y;
3000
    START_TIMER
3001
    
3002
    if(s->qlog == LOSSLESS_QLOG) return;
3003
    
3004
    for(y=0; y<h; y++){
3005
        for(x=0; x<w; x++){
3006
            int i= src[x + y*stride];
3007
            if(i<0){
3008
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3009
            }else if(i>0){
3010
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3011
            }
3012
        }
3013
    }
3014
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3015
        STOP_TIMER("dquant")
3016
    }
3017
}
3018

    
3019
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3020
    const int w= b->width;
3021
    const int h= b->height;
3022
    int x,y;
3023
    
3024
    for(y=h-1; y>=0; y--){
3025
        for(x=w-1; x>=0; x--){
3026
            int i= x + y*stride;
3027
            
3028
            if(x){
3029
                if(use_median){
3030
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3031
                    else  src[i] -= src[i - 1];
3032
                }else{
3033
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3034
                    else  src[i] -= src[i - 1];
3035
                }
3036
            }else{
3037
                if(y) src[i] -= src[i - stride];
3038
            }
3039
        }
3040
    }
3041
}
3042

    
3043
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3044
    const int w= b->width;
3045
    const int h= b->height;
3046
    int x,y;
3047
    
3048
//    START_TIMER
3049
    
3050
    DWTELEM * line;
3051
    DWTELEM * prev;
3052
    
3053
    if (start_y != 0)
3054
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3055
    
3056
    for(y=start_y; y<end_y; y++){
3057
        prev = line;
3058
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3059
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3060
        for(x=0; x<w; x++){
3061
            if(x){
3062
                if(use_median){
3063
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3064
                    else  line[x] += line[x - 1];
3065
                }else{
3066
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3067
                    else  line[x] += line[x - 1];
3068
                }
3069
            }else{
3070
                if(y) line[x] += prev[x];
3071
            }
3072
        }
3073
    }
3074
    
3075
//    STOP_TIMER("correlate")
3076
}
3077

    
3078
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3079
    const int w= b->width;
3080
    const int h= b->height;
3081
    int x,y;
3082
    
3083
    for(y=0; y<h; y++){
3084
        for(x=0; x<w; x++){
3085
            int i= x + y*stride;
3086
            
3087
            if(x){
3088
                if(use_median){
3089
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3090
                    else  src[i] += src[i - 1];
3091
                }else{
3092
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3093
                    else  src[i] += src[i - 1];
3094
                }
3095
            }else{
3096
                if(y) src[i] += src[i - stride];
3097
            }
3098
        }
3099
    }
3100
}
3101

    
3102
static void encode_header(SnowContext *s){
3103
    int plane_index, level, orientation;
3104
    uint8_t kstate[32]; 
3105
    
3106
    memset(kstate, MID_STATE, sizeof(kstate));   
3107

    
3108
    put_rac(&s->c, kstate, s->keyframe);
3109
    if(s->keyframe || s->always_reset)
3110
        reset_contexts(s);
3111
    if(s->keyframe){
3112
        put_symbol(&s->c, s->header_state, s->version, 0);
3113
        put_rac(&s->c, s->header_state, s->always_reset);
3114
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3115
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3116
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3117
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3118
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3119
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3120
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3121
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3122

    
3123
        for(plane_index=0; plane_index<2; plane_index++){
3124
            for(level=0; level<s->spatial_decomposition_count; level++){
3125
                for(orientation=level ? 1:0; orientation<4; orientation++){
3126
                    if(orientation==2) continue;
3127
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3128
                }
3129
            }
3130
        }
3131
    }
3132
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3133
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
3134
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
3135
    put_symbol(&s->c, s->header_state, s->qbias, 1);
3136
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3137
}
3138

    
3139
static int decode_header(SnowContext *s){
3140
    int plane_index, level, orientation;
3141
    uint8_t kstate[32];
3142

    
3143
    memset(kstate, MID_STATE, sizeof(kstate));   
3144

    
3145
    s->keyframe= get_rac(&s->c, kstate);
3146
    if(s->keyframe || s->always_reset)
3147
        reset_contexts(s);
3148
    if(s->keyframe){
3149
        s->version= get_symbol(&s->c, s->header_state, 0);
3150
        if(s->version>0){
3151
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3152
            return -1;
3153
        }
3154
        s->always_reset= get_rac(&s->c, s->header_state);
3155
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3156
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3157
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3158
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3159
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3160
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3161
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3162
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3163

    
3164
        for(plane_index=0; plane_index<3; plane_index++){
3165
            for(level=0; level<s->spatial_decomposition_count; level++){
3166
                for(orientation=level ? 1:0; orientation<4; orientation++){
3167
                    int q;
3168
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3169
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3170
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3171
                    s->plane[plane_index].band[level][orientation].qlog= q;
3172
                }
3173
            }
3174
        }
3175
    }
3176
    
3177
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3178
    if(s->spatial_decomposition_type > 2){
3179
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3180
        return -1;
3181
    }
3182
    
3183
    s->qlog= get_symbol(&s->c, s->header_state, 1);
3184
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3185
    s->qbias= get_symbol(&s->c, s->header_state, 1);
3186
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3187

    
3188
    return 0;
3189
}
3190

    
3191
static void init_qexp(){
3192
    int i;
3193
    double v=128;
3194

    
3195
    for(i=0; i<QROOT; i++){
3196
        qexp[i]= lrintf(v);
3197
        v *= pow(2, 1.0 / QROOT); 
3198
    }
3199
}
3200

    
3201
static int common_init(AVCodecContext *avctx){
3202
    SnowContext *s = avctx->priv_data;
3203
    int width, height;
3204
    int level, orientation, plane_index, dec;
3205

    
3206
    s->avctx= avctx;
3207
        
3208
    dsputil_init(&s->dsp, avctx);
3209

    
3210
#define mcf(dx,dy)\
3211
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3212
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3213
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3214
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3215
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3216
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3217

    
3218
    mcf( 0, 0)
3219
    mcf( 4, 0)
3220
    mcf( 8, 0)
3221
    mcf(12, 0)
3222
    mcf( 0, 4)
3223
    mcf( 4, 4)
3224
    mcf( 8, 4)
3225
    mcf(12, 4)
3226
    mcf( 0, 8)
3227
    mcf( 4, 8)
3228
    mcf( 8, 8)
3229
    mcf(12, 8)
3230
    mcf( 0,12)
3231
    mcf( 4,12)
3232
    mcf( 8,12)
3233
    mcf(12,12)
3234

    
3235
#define mcfh(dx,dy)\
3236
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3237
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3238
        mc_block_hpel ## dx ## dy ## 16;\
3239
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3240
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3241
        mc_block_hpel ## dx ## dy ## 8;
3242

    
3243
    mcfh(0, 0)
3244
    mcfh(8, 0)
3245
    mcfh(0, 8)
3246
    mcfh(8, 8)
3247

    
3248
    if(!qexp[0])
3249
        init_qexp();
3250

    
3251
    dec= s->spatial_decomposition_count= 5;
3252
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3253
    
3254
    s->chroma_h_shift= 1; //FIXME XXX
3255
    s->chroma_v_shift= 1;
3256
    
3257
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3258
    
3259
    width= s->avctx->width;
3260
    height= s->avctx->height;
3261

    
3262
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3263
    
3264
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3265
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3266
    
3267
    for(plane_index=0; plane_index<3; plane_index++){    
3268
        int w= s->avctx->width;
3269
        int h= s->avctx->height;
3270

    
3271
        if(plane_index){
3272
            w>>= s->chroma_h_shift;
3273
            h>>= s->chroma_v_shift;
3274
        }
3275
        s->plane[plane_index].width = w;
3276
        s->plane[plane_index].height= h;
3277
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3278
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3279
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3280
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3281
                
3282
                b->buf= s->spatial_dwt_buffer;
3283
                b->level= level;
3284
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3285
                b->width = (w + !(orientation&1))>>1;
3286
                b->height= (h + !(orientation>1))>>1;
3287
                
3288
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3289
                b->buf_x_offset = 0;
3290
                b->buf_y_offset = 0;
3291
                
3292
                if(orientation&1){
3293
                    b->buf += (w+1)>>1;
3294
                    b->buf_x_offset = (w+1)>>1;
3295
                }
3296
                if(orientation>1){
3297
                    b->buf += b->stride>>1;
3298
                    b->buf_y_offset = b->stride_line >> 1;
3299
                }
3300
                
3301
                if(level)
3302
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3303
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3304
            }
3305
            w= (w+1)>>1;
3306
            h= (h+1)>>1;
3307
        }
3308
    }
3309
    
3310
    reset_contexts(s);
3311
/*    
3312
    width= s->width= avctx->width;
3313
    height= s->height= avctx->height;
3314
    
3315
    assert(width && height);
3316
*/
3317
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3318
    
3319
    return 0;
3320
}
3321

    
3322

    
3323
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3324
    int width = p->width;
3325
    int height= p->height;
3326
    int level, orientation, x, y;
3327

    
3328
    for(level=0; level<s->spatial_decomposition_count; level++){
3329
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3330
            SubBand *b= &p->band[level][orientation];
3331
            DWTELEM *buf= b->buf;
3332
            int64_t error=0;
3333
            
3334
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3335
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
3336
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3337
            for(y=0; y<height; y++){
3338
                for(x=0; x<width; x++){
3339
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
3340
                    error += d*d;
3341
                }
3342
            }
3343

    
3344
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3345
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3346
        }
3347
    }
3348
}
3349

    
3350
static int encode_init(AVCodecContext *avctx)
3351
{
3352
    SnowContext *s = avctx->priv_data;
3353
    int plane_index;
3354

    
3355
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3356
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3357
               "use vstrict=-2 / -strict -2 to use it anyway\n");
3358
        return -1;
3359
    }
3360
 
3361
    common_init(avctx);
3362
    alloc_blocks(s);
3363
 
3364
    s->version=0;
3365
    
3366
    s->m.avctx   = avctx;
3367
    s->m.flags   = avctx->flags;
3368
    s->m.bit_rate= avctx->bit_rate;
3369

    
3370
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3371
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3372
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3373
    h263_encode_init(&s->m); //mv_penalty
3374

    
3375
    if(avctx->flags&CODEC_FLAG_PASS1){
3376
        if(!avctx->stats_out)
3377
            avctx->stats_out = av_mallocz(256);
3378
    }
3379
    if(avctx->flags&CODEC_FLAG_PASS2){
3380
        if(ff_rate_control_init(&s->m) < 0)
3381
            return -1;
3382
    }
3383

    
3384
    for(plane_index=0; plane_index<3; plane_index++){
3385
        calculate_vissual_weight(s, &s->plane[plane_index]);
3386
    }
3387
    
3388
    
3389
    avctx->coded_frame= &s->current_picture;
3390
    switch(avctx->pix_fmt){
3391
//    case PIX_FMT_YUV444P:
3392
//    case PIX_FMT_YUV422P:
3393
    case PIX_FMT_YUV420P:
3394
    case PIX_FMT_GRAY8:
3395
//    case PIX_FMT_YUV411P:
3396
//    case PIX_FMT_YUV410P:
3397
        s->colorspace_type= 0;
3398
        break;
3399
/*    case PIX_FMT_RGBA32:
3400
        s->colorspace= 1;
3401
        break;*/
3402
    default:
3403
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3404
        return -1;
3405
    }
3406
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3407
    s->chroma_h_shift= 1;
3408
    s->chroma_v_shift= 1;
3409
    return 0;
3410
}
3411

    
3412
static int frame_start(SnowContext *s){
3413
   AVFrame tmp;
3414
   int w= s->avctx->width; //FIXME round up to x16 ?
3415
   int h= s->avctx->height;
3416

    
3417
    if(s->current_picture.data[0]){
3418
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
3419
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3420
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3421
    }
3422

    
3423
    tmp= s->last_picture;
3424
    s->last_picture= s->current_picture;
3425
    s->current_picture= tmp;
3426
    
3427
    s->current_picture.reference= 1;
3428
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3429
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3430
        return -1;
3431
    }
3432
    
3433
    return 0;
3434
}
3435

    
3436
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3437
    SnowContext *s = avctx->priv_data;
3438
    RangeCoder * const c= &s->c;
3439
    AVFrame *pict = data;
3440
    const int width= s->avctx->width;
3441
    const int height= s->avctx->height;
3442
    int level, orientation, plane_index;
3443

    
3444
    ff_init_range_encoder(c, buf, buf_size);
3445
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3446
    
3447
    s->input_picture = *pict;
3448

    
3449
    if(avctx->flags&CODEC_FLAG_PASS2){
3450
        s->m.pict_type =
3451
        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3452
        s->keyframe= pict->pict_type==FF_I_TYPE;
3453
        s->m.picture_number= avctx->frame_number;
3454
        pict->quality= ff_rate_estimate_qscale(&s->m);
3455
    }else{
3456
        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3457
        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3458
    }
3459
    
3460
    if(pict->quality){
3461
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3462
        //<64 >60
3463
        s->qlog += 61*QROOT/8;
3464
    }else{
3465
        s->qlog= LOSSLESS_QLOG;
3466
    }
3467

    
3468
    frame_start(s);
3469
    s->current_picture.key_frame= s->keyframe;
3470

    
3471
    s->m.current_picture_ptr= &s->m.current_picture;
3472
    if(pict->pict_type == P_TYPE){
3473
        int block_width = (width +15)>>4;
3474
        int block_height= (height+15)>>4;
3475
        int stride= s->current_picture.linesize[0];
3476
        
3477
        assert(s->current_picture.data[0]);
3478
        assert(s->last_picture.data[0]);
3479
     
3480
        s->m.avctx= s->avctx;
3481
        s->m.current_picture.data[0]= s->current_picture.data[0];
3482
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
3483
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3484
        s->m.   last_picture_ptr= &s->m.   last_picture;
3485
        s->m.linesize=
3486
        s->m.   last_picture.linesize[0]=
3487
        s->m.    new_picture.linesize[0]=
3488
        s->m.current_picture.linesize[0]= stride;
3489
        s->m.uvlinesize= s->current_picture.linesize[1];
3490
        s->m.width = width;
3491
        s->m.height= height;
3492
        s->m.mb_width = block_width;
3493
        s->m.mb_height= block_height;
3494
        s->m.mb_stride=   s->m.mb_width+1;
3495
        s->m.b8_stride= 2*s->m.mb_width+1;
3496
        s->m.f_code=1;
3497
        s->m.pict_type= pict->pict_type;
3498
        s->m.me_method= s->avctx->me_method;
3499
        s->m.me.scene_change_score=0;
3500
        s->m.flags= s->avctx->flags;
3501
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3502
        s->m.out_format= FMT_H263;
3503
        s->m.unrestricted_mv= 1;
3504

    
3505
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3506
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3507
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3508

    
3509
        s->m.dsp= s->dsp; //move
3510
        ff_init_me(&s->m);
3511
    }
3512
    
3513
redo_frame:
3514
        
3515
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3516

    
3517
    encode_header(s);
3518
    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3519
    encode_blocks(s);
3520
    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3521
      
3522
    for(plane_index=0; plane_index<3; plane_index++){
3523
        Plane *p= &s->plane[plane_index];
3524
        int w= p->width;
3525
        int h= p->height;
3526
        int x, y;
3527
//        int bits= put_bits_count(&s->c.pb);
3528

    
3529
        //FIXME optimize
3530
     if(pict->data[plane_index]) //FIXME gray hack
3531
        for(y=0; y<h; y++){
3532
            for(x=0; x<w; x++){
3533
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3534
            }
3535
        }
3536
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3537
        
3538
        if(   plane_index==0 
3539
           && pict->pict_type == P_TYPE 
3540
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3541
            ff_init_range_encoder(c, buf, buf_size);
3542
            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3543
            pict->pict_type= FF_I_TYPE;
3544
            s->keyframe=1;
3545
            reset_contexts(s);
3546
            goto redo_frame;
3547
        }
3548
        
3549
        if(s->qlog == LOSSLESS_QLOG){
3550
            for(y=0; y<h; y++){
3551
                for(x=0; x<w; x++){
3552
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3553
                }
3554
            }
3555
        }
3556
 
3557
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3558

    
3559
        for(level=0; level<s->spatial_decomposition_count; level++){
3560
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3561
                SubBand *b= &p->band[level][orientation];
3562
                
3563
                quantize(s, b, b->buf, b->stride, s->qbias);
3564
                if(orientation==0)
3565
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3566
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3567
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
3568
                if(orientation==0)
3569
                    correlate(s, b, b->buf, b->stride, 1, 0);
3570
            }
3571
        }
3572
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3573

    
3574
        for(level=0; level<s->spatial_decomposition_count; level++){
3575
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3576
                SubBand *b= &p->band[level][orientation];
3577

    
3578
                dequantize(s, b, b->buf, b->stride);
3579
            }
3580
        }
3581

    
3582
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3583
        if(s->qlog == LOSSLESS_QLOG){
3584
            for(y=0; y<h; y++){
3585
                for(x=0; x<w; x++){
3586
                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
3587
                }
3588
            }
3589
        }
3590
{START_TIMER
3591
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3592
STOP_TIMER("pred-conv")}
3593
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3594
            int64_t error= 0;
3595
            
3596
    if(pict->data[plane_index]) //FIXME gray hack
3597
            for(y=0; y<h; y++){
3598
                for(x=0; x<w; x++){
3599
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3600
                    error += d*d;
3601
                }
3602
            }
3603
            s->avctx->error[plane_index] += error;
3604
            s->current_picture.error[plane_index] = error;
3605
        }
3606
    }
3607

    
3608
    if(s->last_picture.data[0])
3609
        avctx->release_buffer(avctx, &s->last_picture);
3610

    
3611
    s->current_picture.coded_picture_number = avctx->frame_number;
3612
    s->current_picture.pict_type = pict->pict_type;
3613
    s->current_picture.quality = pict->quality;
3614
    if(avctx->flags&CODEC_FLAG_PASS1){
3615
        s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
3616
        s->m.current_picture.display_picture_number =
3617
        s->m.current_picture.coded_picture_number = avctx->frame_number;
3618
        s->m.pict_type = pict->pict_type;
3619
        s->m.current_picture.quality = pict->quality;
3620
        ff_write_pass1_stats(&s->m);
3621
    }
3622
    if(avctx->flags&CODEC_FLAG_PASS2){
3623
        s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3624
    }
3625

    
3626
    emms_c();
3627
    
3628
    return ff_rac_terminate(c);
3629
}
3630

    
3631
static void common_end(SnowContext *s){
3632
    int plane_index, level, orientation;
3633

    
3634
    av_freep(&s->spatial_dwt_buffer);
3635

    
3636
    av_freep(&s->m.me.scratchpad);    
3637
    av_freep(&s->m.me.map);
3638
    av_freep(&s->m.me.score_map);
3639
 
3640
    av_freep(&s->block);
3641

    
3642
    for(plane_index=0; plane_index<3; plane_index++){    
3643
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3644
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3645
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3646
                
3647
                av_freep(&b->x_coeff);
3648
            }
3649
        }
3650
    }
3651
}
3652

    
3653
static int encode_end(AVCodecContext *avctx)
3654
{
3655
    SnowContext *s = avctx->priv_data;
3656

    
3657
    common_end(s);
3658
    av_free(avctx->stats_out);
3659

    
3660
    return 0;
3661
}
3662

    
3663
static int decode_init(AVCodecContext *avctx)
3664
{
3665
    SnowContext *s = avctx->priv_data;
3666
    int block_size;
3667
    
3668
    avctx->pix_fmt= PIX_FMT_YUV420P;
3669

    
3670
    common_init(avctx);
3671
    
3672
    block_size = MB_SIZE >> s->block_max_depth;
3673
    slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 2)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
3674
    
3675
    return 0;
3676
}
3677

    
3678
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3679
    SnowContext *s = avctx->priv_data;
3680
    RangeCoder * const c= &s->c;
3681
    int bytes_read;
3682
    AVFrame *picture = data;
3683
    int level, orientation, plane_index;
3684

    
3685
    ff_init_range_decoder(c, buf, buf_size);
3686
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3687

    
3688
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3689
    decode_header(s);
3690
    if(!s->block) alloc_blocks(s);
3691

    
3692
    frame_start(s);
3693
    //keyframe flag dupliaction mess FIXME
3694
    if(avctx->debug&FF_DEBUG_PICT_INFO)
3695
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3696
    
3697
    decode_blocks(s);
3698

    
3699
    for(plane_index=0; plane_index<3; plane_index++){
3700
        Plane *p= &s->plane[plane_index];
3701
        int w= p->width;
3702
        int h= p->height;
3703
        int x, y;
3704
        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
3705
        
3706
if(s->avctx->debug&2048){
3707
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3708
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3709

    
3710
        for(y=0; y<h; y++){
3711
            for(x=0; x<w; x++){
3712
                int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
3713
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3714
            }
3715
        }
3716
}
3717

    
3718
{   START_TIMER
3719
    for(level=0; level<s->spatial_decomposition_count; level++){
3720
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3721
            SubBand *b= &p->band[level][orientation];
3722
            unpack_coeffs(s, b, b->parent, orientation);
3723
        }
3724
    }
3725
    STOP_TIMER("unpack coeffs");
3726
}
3727

    
3728
{START_TIMER
3729
    const int mb_h= s->b_height << s->block_max_depth;
3730
    const int block_size = MB_SIZE >> s->block_max_depth;
3731
    const int block_w    = plane_index ? block_size/2 : block_size;
3732
    int mb_y;
3733
    dwt_compose_t cs[MAX_DECOMPOSITIONS];
3734
    int yd=0, yq=0;
3735
    int y;
3736
    int end_y;
3737

    
3738
    ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
3739
    for(mb_y=0; mb_y<=mb_h; mb_y++){
3740
        
3741
        int slice_starty = block_w*mb_y;
3742
        int slice_h = block_w*(mb_y+1);
3743
        if (!(s->keyframe || s->avctx->debug&512)){
3744
            slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
3745
            slice_h -= (block_w >> 1);
3746
        }
3747

    
3748
        {        
3749
        START_TIMER
3750
        for(level=0; level<s->spatial_decomposition_count; level++){
3751
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3752
                SubBand *b= &p->band[level][orientation];
3753
                int start_y;
3754
                int end_y;
3755
                int our_mb_start = mb_y;
3756
                int our_mb_end = (mb_y + 1);
3757
                start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2: 0);
3758
                end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2);
3759
                if (!(s->keyframe || s->avctx->debug&512)){
3760
                    start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
3761
                    end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
3762
                }
3763
                start_y = FFMIN(b->height, start_y);
3764
                end_y = FFMIN(b->height, end_y);
3765
                
3766
                if (start_y != end_y){
3767
                    if (orientation == 0){
3768
                        SubBand * correlate_band = &p->band[0][0];
3769
                        int correlate_end_y = FFMIN(b->height, end_y + 1);
3770
                        int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
3771
                        decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
3772
                        correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
3773
                        dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
3774
                    }
3775
                    else
3776
                        decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
3777
                }
3778
            }
3779
        }
3780
        STOP_TIMER("decode_subband_slice");
3781
        }
3782
        
3783
{   START_TIMER
3784
        for(; yd<slice_h; yd+=4){
3785
            ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
3786
        }
3787
    STOP_TIMER("idwt slice");}
3788

    
3789
        
3790
        if(s->qlog == LOSSLESS_QLOG){
3791
            for(; yq<slice_h && yq<h; yq++){
3792
                DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
3793
                for(x=0; x<w; x++){
3794
                    line[x] <<= FRAC_BITS;
3795
                }
3796
            }
3797
        }
3798

    
3799
        predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
3800
        
3801
        y = FFMIN(p->height, slice_starty);
3802
        end_y = FFMIN(p->height, slice_h);
3803
        while(y < end_y)
3804
            slice_buffer_release(&s->sb, y++);
3805
    }
3806
    
3807
    slice_buffer_flush(&s->sb);
3808
    
3809
STOP_TIMER("idwt + predict_slices")}
3810
    }
3811
            
3812
    emms_c();
3813

    
3814
    if(s->last_picture.data[0])
3815
        avctx->release_buffer(avctx, &s->last_picture);
3816

    
3817
if(!(s->avctx->debug&2048))        
3818
    *picture= s->current_picture;
3819
else
3820
    *picture= s->mconly_picture;
3821
    
3822
    *data_size = sizeof(AVFrame);
3823
    
3824
    bytes_read= c->bytestream - c->bytestream_start;
3825
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
3826

    
3827
    return bytes_read;
3828
}
3829

    
3830
static int decode_end(AVCodecContext *avctx)
3831
{
3832
    SnowContext *s = avctx->priv_data;
3833

    
3834
    slice_buffer_destroy(&s->sb);
3835
    
3836
    common_end(s);
3837

    
3838
    return 0;
3839
}
3840

    
3841
AVCodec snow_decoder = {
3842
    "snow",
3843
    CODEC_TYPE_VIDEO,
3844
    CODEC_ID_SNOW,
3845
    sizeof(SnowContext),
3846
    decode_init,
3847
    NULL,
3848
    decode_end,
3849
    decode_frame,
3850
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3851
    NULL
3852
};
3853

    
3854
#ifdef CONFIG_ENCODERS
3855
AVCodec snow_encoder = {
3856
    "snow",
3857
    CODEC_TYPE_VIDEO,
3858
    CODEC_ID_SNOW,
3859
    sizeof(SnowContext),
3860
    encode_init,
3861
    encode_frame,
3862
    encode_end,
3863
};
3864
#endif
3865

    
3866

    
3867
#if 0
3868
#undef malloc
3869
#undef free
3870
#undef printf
3871

3872
int main(){
3873
    int width=256;
3874
    int height=256;
3875
    int buffer[2][width*height];
3876
    SnowContext s;
3877
    int i;
3878
    s.spatial_decomposition_count=6;
3879
    s.spatial_decomposition_type=1;
3880
    
3881
    printf("testing 5/3 DWT\n");
3882
    for(i=0; i<width*height; i++)
3883
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3884
    
3885
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3886
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3887
    
3888
    for(i=0; i<width*height; i++)
3889
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3890

3891
    printf("testing 9/7 DWT\n");
3892
    s.spatial_decomposition_type=0;
3893
    for(i=0; i<width*height; i++)
3894
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3895
    
3896
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3897
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3898
    
3899
    for(i=0; i<width*height; i++)
3900
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3901
        
3902
    printf("testing AC coder\n");
3903
    memset(s.header_state, 0, sizeof(s.header_state));
3904
    ff_init_range_encoder(&s.c, buffer[0], 256*256);
3905
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3906
        
3907
    for(i=-256; i<256; i++){
3908
START_TIMER
3909
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3910
STOP_TIMER("put_symbol")
3911
    }
3912
    ff_rac_terminate(&s.c);
3913

3914
    memset(s.header_state, 0, sizeof(s.header_state));
3915
    ff_init_range_decoder(&s.c, buffer[0], 256*256);
3916
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3917
    
3918
    for(i=-256; i<256; i++){
3919
        int j;
3920
START_TIMER
3921
        j= get_symbol(&s.c, s.header_state, 1);
3922
STOP_TIMER("get_symbol")
3923
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3924
    }
3925
{
3926
int level, orientation, x, y;
3927
int64_t errors[8][4];
3928
int64_t g=0;
3929

3930
    memset(errors, 0, sizeof(errors));
3931
    s.spatial_decomposition_count=3;
3932
    s.spatial_decomposition_type=0;
3933
    for(level=0; level<s.spatial_decomposition_count; level++){
3934
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3935
            int w= width  >> (s.spatial_decomposition_count-level);
3936
            int h= height >> (s.spatial_decomposition_count-level);
3937
            int stride= width  << (s.spatial_decomposition_count-level);
3938
            DWTELEM *buf= buffer[0];
3939
            int64_t error=0;
3940

3941
            if(orientation&1) buf+=w;
3942
            if(orientation>1) buf+=stride>>1;
3943
            
3944
            memset(buffer[0], 0, sizeof(int)*width*height);
3945
            buf[w/2 + h/2*stride]= 256*256;
3946
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3947
            for(y=0; y<height; y++){
3948
                for(x=0; x<width; x++){
3949
                    int64_t d= buffer[0][x + y*width];
3950
                    error += d*d;
3951
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3952
                }
3953
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3954
            }
3955
            error= (int)(sqrt(error)+0.5);
3956
            errors[level][orientation]= error;
3957
            if(g) g=ff_gcd(g, error);
3958
            else g= error;
3959
        }
3960
    }
3961
    printf("static int const visual_weight[][4]={\n");
3962
    for(level=0; level<s.spatial_decomposition_count; level++){
3963
        printf("  {");
3964
        for(orientation=0; orientation<4; orientation++){
3965
            printf("%8lld,", errors[level][orientation]/g);
3966
        }
3967
        printf("},\n");
3968
    }
3969
    printf("};\n");
3970
    {
3971
            int level=2;
3972
            int orientation=3;
3973
            int w= width  >> (s.spatial_decomposition_count-level);
3974
            int h= height >> (s.spatial_decomposition_count-level);
3975
            int stride= width  << (s.spatial_decomposition_count-level);
3976
            DWTELEM *buf= buffer[0];
3977
            int64_t error=0;
3978

3979
            buf+=w;
3980
            buf+=stride>>1;
3981
            
3982
            memset(buffer[0], 0, sizeof(int)*width*height);
3983
#if 1
3984
            for(y=0; y<height; y++){
3985
                for(x=0; x<width; x++){
3986
                    int tab[4]={0,2,3,1};
3987
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3988
                }
3989
            }
3990
            ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3991
#else
3992
            for(y=0; y<h; y++){
3993
                for(x=0; x<w; x++){
3994
                    buf[x + y*stride  ]=169;
3995
                    buf[x + y*stride-w]=64;
3996
                }
3997
            }
3998
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3999
#endif
4000
            for(y=0; y<height; y++){
4001
                for(x=0; x<width; x++){
4002
                    int64_t d= buffer[0][x + y*width];
4003
                    error += d*d;
4004
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4005
                }
4006
                if(ABS(height/2-y)<9) printf("\n");
4007
            }
4008
    }
4009

    
4010
}
4011
    return 0;
4012
}
4013
#endif
4014