Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ ff06e067

History | View | Annotate | Download (162 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "avcodec.h"
22
#include "dsputil.h"
23
#include "snow.h"
24

    
25
#include "rangecoder.h"
26

    
27
#include "mpegvideo.h"
28

    
29
#undef NDEBUG
30
#include <assert.h>
31

    
32
static const int8_t quant3[256]={
33
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49
};
50
static const int8_t quant3b[256]={
51
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
};
68
static const int8_t quant3bA[256]={
69
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
};
86
static const int8_t quant5[256]={
87
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103
};
104
static const int8_t quant7[256]={
105
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121
};
122
static const int8_t quant9[256]={
123
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139
};
140
static const int8_t quant11[256]={
141
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157
};
158
static const int8_t quant13[256]={
159
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
175
};
176

    
177
#if 0 //64*cubic
178
static const uint8_t obmc32[1024]={
179
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
181
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
182
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
183
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
184
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
185
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
186
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
187
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
189
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
190
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
191
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
192
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
193
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
194
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
197
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
198
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
199
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
201
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
202
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
203
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
204
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
205
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
206
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
207
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
208
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
209
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
210
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211
//error:0.000022
212
};
213
static const uint8_t obmc16[256]={
214
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
216
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
217
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
218
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
219
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
220
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
221
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
224
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
225
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
226
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
228
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
229
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
230
//error:0.000033
231
};
232
#elif 1 // 64*linear
233
static const uint8_t obmc32[1024]={
234
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
235
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
236
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
237
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
238
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
239
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
240
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
241
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
242
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
243
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
244
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
245
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
246
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
247
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
248
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
249
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
250
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
251
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
252
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
253
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
254
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
255
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
256
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
257
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
258
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
259
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
260
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
261
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
262
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
263
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
264
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
265
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
266
 //error:0.000020
267
};
268
static const uint8_t obmc16[256]={
269
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
270
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
271
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
272
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
273
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
274
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
281
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
282
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
283
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
284
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
285
//error:0.000015
286
};
287
#else //64*cos
288
static const uint8_t obmc32[1024]={
289
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
291
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
292
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
293
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
294
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
295
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
296
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
297
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
299
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
300
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
301
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
302
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
303
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
304
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
307
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
308
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
309
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
311
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
312
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
313
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
314
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
315
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
316
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
317
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
318
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
319
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
320
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
321
//error:0.000022
322
};
323
static const uint8_t obmc16[256]={
324
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
326
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
327
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
328
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
329
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
330
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
331
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
334
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
335
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
336
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
338
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
339
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
340
//error:0.000022
341
};
342
#endif
343

    
344
//linear *64
345
static const uint8_t obmc8[64]={
346
  4, 12, 20, 28, 28, 20, 12,  4,
347
 12, 36, 60, 84, 84, 60, 36, 12,
348
 20, 60,100,140,140,100, 60, 20,
349
 28, 84,140,196,196,140, 84, 28,
350
 28, 84,140,196,196,140, 84, 28,
351
 20, 60,100,140,140,100, 60, 20,
352
 12, 36, 60, 84, 84, 60, 36, 12,
353
  4, 12, 20, 28, 28, 20, 12,  4,
354
//error:0.000000
355
};
356

    
357
//linear *64
358
static const uint8_t obmc4[16]={
359
 16, 48, 48, 16,
360
 48,144,144, 48,
361
 48,144,144, 48,
362
 16, 48, 48, 16,
363
//error:0.000000
364
};
365

    
366
static const uint8_t *obmc_tab[4]={
367
    obmc32, obmc16, obmc8, obmc4
368
};
369

    
370
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
371

    
372
typedef struct BlockNode{
373
    int16_t mx;
374
    int16_t my;
375
    uint8_t ref;
376
    uint8_t color[3];
377
    uint8_t type;
378
//#define TYPE_SPLIT    1
379
#define BLOCK_INTRA   1
380
#define BLOCK_OPT     2
381
//#define TYPE_NOCOLOR  4
382
    uint8_t level; //FIXME merge into type?
383
}BlockNode;
384

    
385
static const BlockNode null_block= { //FIXME add border maybe
386
    .color= {128,128,128},
387
    .mx= 0,
388
    .my= 0,
389
    .ref= 0,
390
    .type= 0,
391
    .level= 0,
392
};
393

    
394
#define LOG2_MB_SIZE 4
395
#define MB_SIZE (1<<LOG2_MB_SIZE)
396
#define ENCODER_EXTRA_BITS 4
397

    
398
typedef struct x_and_coeff{
399
    int16_t x;
400
    uint16_t coeff;
401
} x_and_coeff;
402

    
403
typedef struct SubBand{
404
    int level;
405
    int stride;
406
    int width;
407
    int height;
408
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
409
    DWTELEM *buf;
410
    IDWTELEM *ibuf;
411
    int buf_x_offset;
412
    int buf_y_offset;
413
    int stride_line; ///< Stride measured in lines, not pixels.
414
    x_and_coeff * x_coeff;
415
    struct SubBand *parent;
416
    uint8_t state[/*7*2*/ 7 + 512][32];
417
}SubBand;
418

    
419
typedef struct Plane{
420
    int width;
421
    int height;
422
    SubBand band[MAX_DECOMPOSITIONS][4];
423
}Plane;
424

    
425
typedef struct SnowContext{
426
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
427

    
428
    AVCodecContext *avctx;
429
    RangeCoder c;
430
    DSPContext dsp;
431
    AVFrame new_picture;
432
    AVFrame input_picture;              ///< new_picture with the internal linesizes
433
    AVFrame current_picture;
434
    AVFrame last_picture[MAX_REF_FRAMES];
435
    AVFrame mconly_picture;
436
//     uint8_t q_context[16];
437
    uint8_t header_state[32];
438
    uint8_t block_state[128 + 32*128];
439
    int keyframe;
440
    int always_reset;
441
    int version;
442
    int spatial_decomposition_type;
443
    int last_spatial_decomposition_type;
444
    int temporal_decomposition_type;
445
    int spatial_decomposition_count;
446
    int temporal_decomposition_count;
447
    int max_ref_frames;
448
    int ref_frames;
449
    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
450
    uint32_t *ref_scores[MAX_REF_FRAMES];
451
    DWTELEM *spatial_dwt_buffer;
452
    IDWTELEM *spatial_idwt_buffer;
453
    int colorspace_type;
454
    int chroma_h_shift;
455
    int chroma_v_shift;
456
    int spatial_scalability;
457
    int qlog;
458
    int last_qlog;
459
    int lambda;
460
    int lambda2;
461
    int pass1_rc;
462
    int mv_scale;
463
    int last_mv_scale;
464
    int qbias;
465
    int last_qbias;
466
#define QBIAS_SHIFT 3
467
    int b_width;
468
    int b_height;
469
    int block_max_depth;
470
    int last_block_max_depth;
471
    Plane plane[MAX_PLANES];
472
    BlockNode *block;
473
#define ME_CACHE_SIZE 1024
474
    int me_cache[ME_CACHE_SIZE];
475
    int me_cache_generation;
476
    slice_buffer sb;
477

    
478
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
479
}SnowContext;
480

    
481
typedef struct {
482
    IDWTELEM *b0;
483
    IDWTELEM *b1;
484
    IDWTELEM *b2;
485
    IDWTELEM *b3;
486
    int y;
487
} dwt_compose_t;
488

    
489
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
490
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
491

    
492
static void iterative_me(SnowContext *s);
493

    
494
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
495
{
496
    int i;
497

    
498
    buf->base_buffer = base_buffer;
499
    buf->line_count = line_count;
500
    buf->line_width = line_width;
501
    buf->data_count = max_allocated_lines;
502
    buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
503
    buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
504

    
505
    for (i = 0; i < max_allocated_lines; i++)
506
    {
507
        buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
508
    }
509

    
510
    buf->data_stack_top = max_allocated_lines - 1;
511
}
512

    
513
static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
514
{
515
    int offset;
516
    IDWTELEM * buffer;
517

    
518
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
519

    
520
    assert(buf->data_stack_top >= 0);
521
//  assert(!buf->line[line]);
522
    if (buf->line[line])
523
        return buf->line[line];
524

    
525
    offset = buf->line_width * line;
526
    buffer = buf->data_stack[buf->data_stack_top];
527
    buf->data_stack_top--;
528
    buf->line[line] = buffer;
529

    
530
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
531

    
532
    return buffer;
533
}
534

    
535
static void slice_buffer_release(slice_buffer * buf, int line)
536
{
537
    int offset;
538
    IDWTELEM * buffer;
539

    
540
    assert(line >= 0 && line < buf->line_count);
541
    assert(buf->line[line]);
542

    
543
    offset = buf->line_width * line;
544
    buffer = buf->line[line];
545
    buf->data_stack_top++;
546
    buf->data_stack[buf->data_stack_top] = buffer;
547
    buf->line[line] = NULL;
548

    
549
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
550
}
551

    
552
static void slice_buffer_flush(slice_buffer * buf)
553
{
554
    int i;
555
    for (i = 0; i < buf->line_count; i++)
556
    {
557
        if (buf->line[i])
558
        {
559
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
560
            slice_buffer_release(buf, i);
561
        }
562
    }
563
}
564

    
565
static void slice_buffer_destroy(slice_buffer * buf)
566
{
567
    int i;
568
    slice_buffer_flush(buf);
569

    
570
    for (i = buf->data_count - 1; i >= 0; i--)
571
    {
572
        assert(buf->data_stack[i]);
573
        av_freep(&buf->data_stack[i]);
574
    }
575
    assert(buf->data_stack);
576
    av_freep(&buf->data_stack);
577
    assert(buf->line);
578
    av_freep(&buf->line);
579
}
580

    
581
#ifdef __sgi
582
// Avoid a name clash on SGI IRIX
583
#undef qexp
584
#endif
585
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
586
static uint8_t qexp[QROOT];
587

    
588
static inline int mirror(int v, int m){
589
    while((unsigned)v > (unsigned)m){
590
        v=-v;
591
        if(v<0) v+= 2*m;
592
    }
593
    return v;
594
}
595

    
596
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
597
    int i;
598

    
599
    if(v){
600
        const int a= FFABS(v);
601
        const int e= av_log2(a);
602
#if 1
603
        const int el= FFMIN(e, 10);
604
        put_rac(c, state+0, 0);
605

    
606
        for(i=0; i<el; i++){
607
            put_rac(c, state+1+i, 1);  //1..10
608
        }
609
        for(; i<e; i++){
610
            put_rac(c, state+1+9, 1);  //1..10
611
        }
612
        put_rac(c, state+1+FFMIN(i,9), 0);
613

    
614
        for(i=e-1; i>=el; i--){
615
            put_rac(c, state+22+9, (a>>i)&1); //22..31
616
        }
617
        for(; i>=0; i--){
618
            put_rac(c, state+22+i, (a>>i)&1); //22..31
619
        }
620

    
621
        if(is_signed)
622
            put_rac(c, state+11 + el, v < 0); //11..21
623
#else
624

    
625
        put_rac(c, state+0, 0);
626
        if(e<=9){
627
            for(i=0; i<e; i++){
628
                put_rac(c, state+1+i, 1);  //1..10
629
            }
630
            put_rac(c, state+1+i, 0);
631

    
632
            for(i=e-1; i>=0; i--){
633
                put_rac(c, state+22+i, (a>>i)&1); //22..31
634
            }
635

    
636
            if(is_signed)
637
                put_rac(c, state+11 + e, v < 0); //11..21
638
        }else{
639
            for(i=0; i<e; i++){
640
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
641
            }
642
            put_rac(c, state+1+FFMIN(i,9), 0);
643

    
644
            for(i=e-1; i>=0; i--){
645
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
646
            }
647

    
648
            if(is_signed)
649
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
650
        }
651
#endif
652
    }else{
653
        put_rac(c, state+0, 1);
654
    }
655
}
656

    
657
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
658
    if(get_rac(c, state+0))
659
        return 0;
660
    else{
661
        int i, e, a;
662
        e= 0;
663
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
664
            e++;
665
        }
666

    
667
        a= 1;
668
        for(i=e-1; i>=0; i--){
669
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
670
        }
671

    
672
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
673
            return -a;
674
        else
675
            return a;
676
    }
677
}
678

    
679
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
680
    int i;
681
    int r= log2>=0 ? 1<<log2 : 1;
682

    
683
    assert(v>=0);
684
    assert(log2>=-4);
685

    
686
    while(v >= r){
687
        put_rac(c, state+4+log2, 1);
688
        v -= r;
689
        log2++;
690
        if(log2>0) r+=r;
691
    }
692
    put_rac(c, state+4+log2, 0);
693

    
694
    for(i=log2-1; i>=0; i--){
695
        put_rac(c, state+31-i, (v>>i)&1);
696
    }
697
}
698

    
699
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
700
    int i;
701
    int r= log2>=0 ? 1<<log2 : 1;
702
    int v=0;
703

    
704
    assert(log2>=-4);
705

    
706
    while(get_rac(c, state+4+log2)){
707
        v+= r;
708
        log2++;
709
        if(log2>0) r+=r;
710
    }
711

    
712
    for(i=log2-1; i>=0; i--){
713
        v+= get_rac(c, state+31-i)<<i;
714
    }
715

    
716
    return v;
717
}
718

    
719
static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
720
    const int mirror_left= !highpass;
721
    const int mirror_right= (width&1) ^ highpass;
722
    const int w= (width>>1) - 1 + (highpass & width);
723
    int i;
724

    
725
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
726
    if(mirror_left){
727
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
728
        dst += dst_step;
729
        src += src_step;
730
    }
731

    
732
    for(i=0; i<w; i++){
733
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
734
    }
735

    
736
    if(mirror_right){
737
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
738
    }
739
}
740

    
741
static av_always_inline void inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
742
    const int mirror_left= !highpass;
743
    const int mirror_right= (width&1) ^ highpass;
744
    const int w= (width>>1) - 1 + (highpass & width);
745
    int i;
746

    
747
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
748
    if(mirror_left){
749
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
750
        dst += dst_step;
751
        src += src_step;
752
    }
753

    
754
    for(i=0; i<w; i++){
755
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
756
    }
757

    
758
    if(mirror_right){
759
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
760
    }
761
}
762

    
763
#ifndef liftS
764
static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
765
    const int mirror_left= !highpass;
766
    const int mirror_right= (width&1) ^ highpass;
767
    const int w= (width>>1) - 1 + (highpass & width);
768
    int i;
769

    
770
    assert(shift == 4);
771
#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
772
    if(mirror_left){
773
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
774
        dst += dst_step;
775
        src += src_step;
776
    }
777

    
778
    for(i=0; i<w; i++){
779
        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
780
    }
781

    
782
    if(mirror_right){
783
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
784
    }
785
}
786
static av_always_inline void inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
787
    const int mirror_left= !highpass;
788
    const int mirror_right= (width&1) ^ highpass;
789
    const int w= (width>>1) - 1 + (highpass & width);
790
    int i;
791

    
792
    assert(shift == 4);
793
#define LIFTS(src, ref, inv) ((inv) ? (src) + (((ref) + 4*(src))>>shift): -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
794
    if(mirror_left){
795
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
796
        dst += dst_step;
797
        src += src_step;
798
    }
799

    
800
    for(i=0; i<w; i++){
801
        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
802
    }
803

    
804
    if(mirror_right){
805
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
806
    }
807
}
808
#endif
809

    
810
static void horizontal_decompose53i(DWTELEM *b, int width){
811
    DWTELEM temp[width];
812
    const int width2= width>>1;
813
    int x;
814
    const int w2= (width+1)>>1;
815

    
816
    for(x=0; x<width2; x++){
817
        temp[x   ]= b[2*x    ];
818
        temp[x+w2]= b[2*x + 1];
819
    }
820
    if(width&1)
821
        temp[x   ]= b[2*x    ];
822
#if 0
823
    {
824
    int A1,A2,A3,A4;
825
    A2= temp[1       ];
826
    A4= temp[0       ];
827
    A1= temp[0+width2];
828
    A1 -= (A2 + A4)>>1;
829
    A4 += (A1 + 1)>>1;
830
    b[0+width2] = A1;
831
    b[0       ] = A4;
832
    for(x=1; x+1<width2; x+=2){
833
        A3= temp[x+width2];
834
        A4= temp[x+1     ];
835
        A3 -= (A2 + A4)>>1;
836
        A2 += (A1 + A3 + 2)>>2;
837
        b[x+width2] = A3;
838
        b[x       ] = A2;
839

840
        A1= temp[x+1+width2];
841
        A2= temp[x+2       ];
842
        A1 -= (A2 + A4)>>1;
843
        A4 += (A1 + A3 + 2)>>2;
844
        b[x+1+width2] = A1;
845
        b[x+1       ] = A4;
846
    }
847
    A3= temp[width-1];
848
    A3 -= A2;
849
    A2 += (A1 + A3 + 2)>>2;
850
    b[width -1] = A3;
851
    b[width2-1] = A2;
852
    }
853
#else
854
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
855
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
856
#endif
857
}
858

    
859
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
860
    int i;
861

    
862
    for(i=0; i<width; i++){
863
        b1[i] -= (b0[i] + b2[i])>>1;
864
    }
865
}
866

    
867
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
868
    int i;
869

    
870
    for(i=0; i<width; i++){
871
        b1[i] += (b0[i] + b2[i] + 2)>>2;
872
    }
873
}
874

    
875
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
876
    int y;
877
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
878
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
879

    
880
    for(y=-2; y<height; y+=2){
881
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
882
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
883

    
884
{START_TIMER
885
        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
886
        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
887
STOP_TIMER("horizontal_decompose53i")}
888

    
889
{START_TIMER
890
        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
891
        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
892
STOP_TIMER("vertical_decompose53i*")}
893

    
894
        b0=b2;
895
        b1=b3;
896
    }
897
}
898

    
899
static void horizontal_decompose97i(DWTELEM *b, int width){
900
    DWTELEM temp[width];
901
    const int w2= (width+1)>>1;
902

    
903
    lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
904
    liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
905
    lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
906
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
907
}
908

    
909

    
910
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
911
    int i;
912

    
913
    for(i=0; i<width; i++){
914
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
915
    }
916
}
917

    
918
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
919
    int i;
920

    
921
    for(i=0; i<width; i++){
922
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
923
    }
924
}
925

    
926
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
927
    int i;
928

    
929
    for(i=0; i<width; i++){
930
#ifdef liftS
931
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
932
#else
933
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
934
#endif
935
    }
936
}
937

    
938
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
939
    int i;
940

    
941
    for(i=0; i<width; i++){
942
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
943
    }
944
}
945

    
946
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
947
    int y;
948
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
949
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
950
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
951
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
952

    
953
    for(y=-4; y<height; y+=2){
954
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
955
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
956

    
957
{START_TIMER
958
        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
959
        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
960
if(width>400){
961
STOP_TIMER("horizontal_decompose97i")
962
}}
963

    
964
{START_TIMER
965
        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
966
        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
967
        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
968
        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
969

    
970
if(width>400){
971
STOP_TIMER("vertical_decompose97i")
972
}}
973

    
974
        b0=b2;
975
        b1=b3;
976
        b2=b4;
977
        b3=b5;
978
    }
979
}
980

    
981
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
982
    int level;
983

    
984
    for(level=0; level<decomposition_count; level++){
985
        switch(type){
986
        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
987
        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
988
        }
989
    }
990
}
991

    
992
static void horizontal_compose53i(IDWTELEM *b, int width){
993
    IDWTELEM temp[width];
994
    const int width2= width>>1;
995
    const int w2= (width+1)>>1;
996
    int x;
997

    
998
#if 0
999
    int A1,A2,A3,A4;
1000
    A2= temp[1       ];
1001
    A4= temp[0       ];
1002
    A1= temp[0+width2];
1003
    A1 -= (A2 + A4)>>1;
1004
    A4 += (A1 + 1)>>1;
1005
    b[0+width2] = A1;
1006
    b[0       ] = A4;
1007
    for(x=1; x+1<width2; x+=2){
1008
        A3= temp[x+width2];
1009
        A4= temp[x+1     ];
1010
        A3 -= (A2 + A4)>>1;
1011
        A2 += (A1 + A3 + 2)>>2;
1012
        b[x+width2] = A3;
1013
        b[x       ] = A2;
1014

1015
        A1= temp[x+1+width2];
1016
        A2= temp[x+2       ];
1017
        A1 -= (A2 + A4)>>1;
1018
        A4 += (A1 + A3 + 2)>>2;
1019
        b[x+1+width2] = A1;
1020
        b[x+1       ] = A4;
1021
    }
1022
    A3= temp[width-1];
1023
    A3 -= A2;
1024
    A2 += (A1 + A3 + 2)>>2;
1025
    b[width -1] = A3;
1026
    b[width2-1] = A2;
1027
#else
1028
    inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1029
    inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1030
#endif
1031
    for(x=0; x<width2; x++){
1032
        b[2*x    ]= temp[x   ];
1033
        b[2*x + 1]= temp[x+w2];
1034
    }
1035
    if(width&1)
1036
        b[2*x    ]= temp[x   ];
1037
}
1038

    
1039
static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1040
    int i;
1041

    
1042
    for(i=0; i<width; i++){
1043
        b1[i] += (b0[i] + b2[i])>>1;
1044
    }
1045
}
1046

    
1047
static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1048
    int i;
1049

    
1050
    for(i=0; i<width; i++){
1051
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1052
    }
1053
}
1054

    
1055
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1056
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1057
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1058
    cs->y = -1;
1059
}
1060

    
1061
static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1062
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1063
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1064
    cs->y = -1;
1065
}
1066

    
1067
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1068
    int y= cs->y;
1069

    
1070
    IDWTELEM *b0= cs->b0;
1071
    IDWTELEM *b1= cs->b1;
1072
    IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1073
    IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1074

    
1075
{START_TIMER
1076
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1077
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1078
STOP_TIMER("vertical_compose53i*")}
1079

    
1080
{START_TIMER
1081
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1082
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1083
STOP_TIMER("horizontal_compose53i")}
1084

    
1085
    cs->b0 = b2;
1086
    cs->b1 = b3;
1087
    cs->y += 2;
1088
}
1089

    
1090
static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1091
    int y= cs->y;
1092
    IDWTELEM *b0= cs->b0;
1093
    IDWTELEM *b1= cs->b1;
1094
    IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1095
    IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1096

    
1097
{START_TIMER
1098
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1099
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1100
STOP_TIMER("vertical_compose53i*")}
1101

    
1102
{START_TIMER
1103
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1104
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1105
STOP_TIMER("horizontal_compose53i")}
1106

    
1107
    cs->b0 = b2;
1108
    cs->b1 = b3;
1109
    cs->y += 2;
1110
}
1111

    
1112
static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1113
    dwt_compose_t cs;
1114
    spatial_compose53i_init(&cs, buffer, height, stride);
1115
    while(cs.y <= height)
1116
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1117
}
1118

    
1119

    
1120
void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1121
    IDWTELEM temp[width];
1122
    const int w2= (width+1)>>1;
1123

    
1124
    inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1125
    inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1126
    inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1127
    inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1128
}
1129

    
1130
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1131
    int i;
1132

    
1133
    for(i=0; i<width; i++){
1134
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1135
    }
1136
}
1137

    
1138
static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1139
    int i;
1140

    
1141
    for(i=0; i<width; i++){
1142
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1143
    }
1144
}
1145

    
1146
static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1147
    int i;
1148

    
1149
    for(i=0; i<width; i++){
1150
#ifdef liftS
1151
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1152
#else
1153
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1154
#endif
1155
    }
1156
}
1157

    
1158
static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1159
    int i;
1160

    
1161
    for(i=0; i<width; i++){
1162
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1163
    }
1164
}
1165

    
1166
void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1167
    int i;
1168

    
1169
    for(i=0; i<width; i++){
1170
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1171
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1172
#ifdef liftS
1173
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1174
#else
1175
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1176
#endif
1177
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1178
    }
1179
}
1180

    
1181
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1182
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1183
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1184
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1185
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1186
    cs->y = -3;
1187
}
1188

    
1189
static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1190
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1191
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1192
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1193
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1194
    cs->y = -3;
1195
}
1196

    
1197
static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1198
    int y = cs->y;
1199

    
1200
    IDWTELEM *b0= cs->b0;
1201
    IDWTELEM *b1= cs->b1;
1202
    IDWTELEM *b2= cs->b2;
1203
    IDWTELEM *b3= cs->b3;
1204
    IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1205
    IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1206

    
1207
{START_TIMER
1208
    if(y>0 && y+4<height){
1209
        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1210
    }else{
1211
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1212
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1213
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1214
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1215
    }
1216
if(width>400){
1217
STOP_TIMER("vertical_compose97i")}}
1218

    
1219
{START_TIMER
1220
        if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1221
        if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1222
if(width>400 && y+0<(unsigned)height){
1223
STOP_TIMER("horizontal_compose97i")}}
1224

    
1225
    cs->b0=b2;
1226
    cs->b1=b3;
1227
    cs->b2=b4;
1228
    cs->b3=b5;
1229
    cs->y += 2;
1230
}
1231

    
1232
static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1233
    int y = cs->y;
1234
    IDWTELEM *b0= cs->b0;
1235
    IDWTELEM *b1= cs->b1;
1236
    IDWTELEM *b2= cs->b2;
1237
    IDWTELEM *b3= cs->b3;
1238
    IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1239
    IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1240

    
1241
{START_TIMER
1242
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1243
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1244
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1245
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1246
if(width>400){
1247
STOP_TIMER("vertical_compose97i")}}
1248

    
1249
{START_TIMER
1250
        if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1251
        if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1252
if(width>400 && b0 <= b2){
1253
STOP_TIMER("horizontal_compose97i")}}
1254

    
1255
    cs->b0=b2;
1256
    cs->b1=b3;
1257
    cs->b2=b4;
1258
    cs->b3=b5;
1259
    cs->y += 2;
1260
}
1261

    
1262
static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1263
    dwt_compose_t cs;
1264
    spatial_compose97i_init(&cs, buffer, height, stride);
1265
    while(cs.y <= height)
1266
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1267
}
1268

    
1269
static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1270
    int level;
1271
    for(level=decomposition_count-1; level>=0; level--){
1272
        switch(type){
1273
        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1274
        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1275
        }
1276
    }
1277
}
1278

    
1279
static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1280
    int level;
1281
    for(level=decomposition_count-1; level>=0; level--){
1282
        switch(type){
1283
        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1284
        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1285
        }
1286
    }
1287
}
1288

    
1289
static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1290
    const int support = type==1 ? 3 : 5;
1291
    int level;
1292
    if(type==2) return;
1293

    
1294
    for(level=decomposition_count-1; level>=0; level--){
1295
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1296
            switch(type){
1297
            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1298
                    break;
1299
            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1300
                    break;
1301
            }
1302
        }
1303
    }
1304
}
1305

    
1306
static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1307
    const int support = type==1 ? 3 : 5;
1308
    int level;
1309
    if(type==2) return;
1310

    
1311
    for(level=decomposition_count-1; level>=0; level--){
1312
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1313
            switch(type){
1314
            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1315
                    break;
1316
            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1317
                    break;
1318
            }
1319
        }
1320
    }
1321
}
1322

    
1323
static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1324
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1325
        int y;
1326
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1327
        for(y=0; y<height; y+=4)
1328
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1329
}
1330

    
1331
static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1332
    const int w= b->width;
1333
    const int h= b->height;
1334
    int x, y;
1335

    
1336
    if(1){
1337
        int run=0;
1338
        int runs[w*h];
1339
        int run_index=0;
1340
        int max_index;
1341

    
1342
        for(y=0; y<h; y++){
1343
            for(x=0; x<w; x++){
1344
                int v, p=0;
1345
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1346
                v= src[x + y*stride];
1347

    
1348
                if(y){
1349
                    t= src[x + (y-1)*stride];
1350
                    if(x){
1351
                        lt= src[x - 1 + (y-1)*stride];
1352
                    }
1353
                    if(x + 1 < w){
1354
                        rt= src[x + 1 + (y-1)*stride];
1355
                    }
1356
                }
1357
                if(x){
1358
                    l= src[x - 1 + y*stride];
1359
                    /*if(x > 1){
1360
                        if(orientation==1) ll= src[y + (x-2)*stride];
1361
                        else               ll= src[x - 2 + y*stride];
1362
                    }*/
1363
                }
1364
                if(parent){
1365
                    int px= x>>1;
1366
                    int py= y>>1;
1367
                    if(px<b->parent->width && py<b->parent->height)
1368
                        p= parent[px + py*2*stride];
1369
                }
1370
                if(!(/*ll|*/l|lt|t|rt|p)){
1371
                    if(v){
1372
                        runs[run_index++]= run;
1373
                        run=0;
1374
                    }else{
1375
                        run++;
1376
                    }
1377
                }
1378
            }
1379
        }
1380
        max_index= run_index;
1381
        runs[run_index++]= run;
1382
        run_index=0;
1383
        run= runs[run_index++];
1384

    
1385
        put_symbol2(&s->c, b->state[30], max_index, 0);
1386
        if(run_index <= max_index)
1387
            put_symbol2(&s->c, b->state[1], run, 3);
1388

    
1389
        for(y=0; y<h; y++){
1390
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1391
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1392
                return -1;
1393
            }
1394
            for(x=0; x<w; x++){
1395
                int v, p=0;
1396
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1397
                v= src[x + y*stride];
1398

    
1399
                if(y){
1400
                    t= src[x + (y-1)*stride];
1401
                    if(x){
1402
                        lt= src[x - 1 + (y-1)*stride];
1403
                    }
1404
                    if(x + 1 < w){
1405
                        rt= src[x + 1 + (y-1)*stride];
1406
                    }
1407
                }
1408
                if(x){
1409
                    l= src[x - 1 + y*stride];
1410
                    /*if(x > 1){
1411
                        if(orientation==1) ll= src[y + (x-2)*stride];
1412
                        else               ll= src[x - 2 + y*stride];
1413
                    }*/
1414
                }
1415
                if(parent){
1416
                    int px= x>>1;
1417
                    int py= y>>1;
1418
                    if(px<b->parent->width && py<b->parent->height)
1419
                        p= parent[px + py*2*stride];
1420
                }
1421
                if(/*ll|*/l|lt|t|rt|p){
1422
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1423

    
1424
                    put_rac(&s->c, &b->state[0][context], !!v);
1425
                }else{
1426
                    if(!run){
1427
                        run= runs[run_index++];
1428

    
1429
                        if(run_index <= max_index)
1430
                            put_symbol2(&s->c, b->state[1], run, 3);
1431
                        assert(v);
1432
                    }else{
1433
                        run--;
1434
                        assert(!v);
1435
                    }
1436
                }
1437
                if(v){
1438
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1439
                    int l2= 2*FFABS(l) + (l<0);
1440
                    int t2= 2*FFABS(t) + (t<0);
1441

    
1442
                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1443
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1444
                }
1445
            }
1446
        }
1447
    }
1448
    return 0;
1449
}
1450

    
1451
static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1452
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1453
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1454
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1455
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1456
}
1457

    
1458
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1459
    const int w= b->width;
1460
    const int h= b->height;
1461
    int x,y;
1462

    
1463
    if(1){
1464
        int run, runs;
1465
        x_and_coeff *xc= b->x_coeff;
1466
        x_and_coeff *prev_xc= NULL;
1467
        x_and_coeff *prev2_xc= xc;
1468
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1469
        x_and_coeff *prev_parent_xc= parent_xc;
1470

    
1471
        runs= get_symbol2(&s->c, b->state[30], 0);
1472
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1473
        else           run= INT_MAX;
1474

    
1475
        for(y=0; y<h; y++){
1476
            int v=0;
1477
            int lt=0, t=0, rt=0;
1478

    
1479
            if(y && prev_xc->x == 0){
1480
                rt= prev_xc->coeff;
1481
            }
1482
            for(x=0; x<w; x++){
1483
                int p=0;
1484
                const int l= v;
1485

    
1486
                lt= t; t= rt;
1487

    
1488
                if(y){
1489
                    if(prev_xc->x <= x)
1490
                        prev_xc++;
1491
                    if(prev_xc->x == x + 1)
1492
                        rt= prev_xc->coeff;
1493
                    else
1494
                        rt=0;
1495
                }
1496
                if(parent_xc){
1497
                    if(x>>1 > parent_xc->x){
1498
                        parent_xc++;
1499
                    }
1500
                    if(x>>1 == parent_xc->x){
1501
                        p= parent_xc->coeff;
1502
                    }
1503
                }
1504
                if(/*ll|*/l|lt|t|rt|p){
1505
                    int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1506

    
1507
                    v=get_rac(&s->c, &b->state[0][context]);
1508
                    if(v){
1509
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1510
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1511

    
1512
                        xc->x=x;
1513
                        (xc++)->coeff= v;
1514
                    }
1515
                }else{
1516
                    if(!run){
1517
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1518
                        else           run= INT_MAX;
1519
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1520
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1521

    
1522
                        xc->x=x;
1523
                        (xc++)->coeff= v;
1524
                    }else{
1525
                        int max_run;
1526
                        run--;
1527
                        v=0;
1528

    
1529
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1530
                        else  max_run= FFMIN(run, w-x-1);
1531
                        if(parent_xc)
1532
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1533
                        x+= max_run;
1534
                        run-= max_run;
1535
                    }
1536
                }
1537
            }
1538
            (xc++)->x= w+1; //end marker
1539
            prev_xc= prev2_xc;
1540
            prev2_xc= xc;
1541

    
1542
            if(parent_xc){
1543
                if(y&1){
1544
                    while(parent_xc->x != parent->width+1)
1545
                        parent_xc++;
1546
                    parent_xc++;
1547
                    prev_parent_xc= parent_xc;
1548
                }else{
1549
                    parent_xc= prev_parent_xc;
1550
                }
1551
            }
1552
        }
1553

    
1554
        (xc++)->x= w+1; //end marker
1555
    }
1556
}
1557

    
1558
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1559
    const int w= b->width;
1560
    int y;
1561
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1562
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1563
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1564
    int new_index = 0;
1565

    
1566
    START_TIMER
1567

    
1568
    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1569
        qadd= 0;
1570
        qmul= 1<<QEXPSHIFT;
1571
    }
1572

    
1573
    /* If we are on the second or later slice, restore our index. */
1574
    if (start_y != 0)
1575
        new_index = save_state[0];
1576

    
1577

    
1578
    for(y=start_y; y<h; y++){
1579
        int x = 0;
1580
        int v;
1581
        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1582
        memset(line, 0, b->width*sizeof(IDWTELEM));
1583
        v = b->x_coeff[new_index].coeff;
1584
        x = b->x_coeff[new_index++].x;
1585
        while(x < w)
1586
        {
1587
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1588
            register int u= -(v&1);
1589
            line[x] = (t^u) - u;
1590

    
1591
            v = b->x_coeff[new_index].coeff;
1592
            x = b->x_coeff[new_index++].x;
1593
        }
1594
    }
1595
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1596
        STOP_TIMER("decode_subband")
1597
    }
1598

    
1599
    /* Save our variables for the next slice. */
1600
    save_state[0] = new_index;
1601

    
1602
    return;
1603
}
1604

    
1605
static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1606
    int plane_index, level, orientation;
1607

    
1608
    for(plane_index=0; plane_index<3; plane_index++){
1609
        for(level=0; level<s->spatial_decomposition_count; level++){
1610
            for(orientation=level ? 1:0; orientation<4; orientation++){
1611
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1612
            }
1613
        }
1614
    }
1615
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1616
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1617
}
1618

    
1619
static int alloc_blocks(SnowContext *s){
1620
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1621
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1622

    
1623
    s->b_width = w;
1624
    s->b_height= h;
1625

    
1626
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1627
    return 0;
1628
}
1629

    
1630
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1631
    uint8_t *bytestream= d->bytestream;
1632
    uint8_t *bytestream_start= d->bytestream_start;
1633
    *d= *s;
1634
    d->bytestream= bytestream;
1635
    d->bytestream_start= bytestream_start;
1636
}
1637

    
1638
//near copy & paste from dsputil, FIXME
1639
static int pix_sum(uint8_t * pix, int line_size, int w)
1640
{
1641
    int s, i, j;
1642

    
1643
    s = 0;
1644
    for (i = 0; i < w; i++) {
1645
        for (j = 0; j < w; j++) {
1646
            s += pix[0];
1647
            pix ++;
1648
        }
1649
        pix += line_size - w;
1650
    }
1651
    return s;
1652
}
1653

    
1654
//near copy & paste from dsputil, FIXME
1655
static int pix_norm1(uint8_t * pix, int line_size, int w)
1656
{
1657
    int s, i, j;
1658
    uint32_t *sq = ff_squareTbl + 256;
1659

    
1660
    s = 0;
1661
    for (i = 0; i < w; i++) {
1662
        for (j = 0; j < w; j ++) {
1663
            s += sq[pix[0]];
1664
            pix ++;
1665
        }
1666
        pix += line_size - w;
1667
    }
1668
    return s;
1669
}
1670

    
1671
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1672
    const int w= s->b_width << s->block_max_depth;
1673
    const int rem_depth= s->block_max_depth - level;
1674
    const int index= (x + y*w) << rem_depth;
1675
    const int block_w= 1<<rem_depth;
1676
    BlockNode block;
1677
    int i,j;
1678

    
1679
    block.color[0]= l;
1680
    block.color[1]= cb;
1681
    block.color[2]= cr;
1682
    block.mx= mx;
1683
    block.my= my;
1684
    block.ref= ref;
1685
    block.type= type;
1686
    block.level= level;
1687

    
1688
    for(j=0; j<block_w; j++){
1689
        for(i=0; i<block_w; i++){
1690
            s->block[index + i + j*w]= block;
1691
        }
1692
    }
1693
}
1694

    
1695
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1696
    const int offset[3]= {
1697
          y*c->  stride + x,
1698
        ((y*c->uvstride + x)>>1),
1699
        ((y*c->uvstride + x)>>1),
1700
    };
1701
    int i;
1702
    for(i=0; i<3; i++){
1703
        c->src[0][i]= src [i];
1704
        c->ref[0][i]= ref [i] + offset[i];
1705
    }
1706
    assert(!ref_index);
1707
}
1708

    
1709
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1710
                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1711
    if(s->ref_frames == 1){
1712
        *mx = mid_pred(left->mx, top->mx, tr->mx);
1713
        *my = mid_pred(left->my, top->my, tr->my);
1714
    }else{
1715
        const int *scale = scale_mv_ref[ref];
1716
        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1717
                       (top ->mx * scale[top ->ref] + 128) >>8,
1718
                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1719
        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1720
                       (top ->my * scale[top ->ref] + 128) >>8,
1721
                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
1722
    }
1723
}
1724

    
1725
//FIXME copy&paste
1726
#define P_LEFT P[1]
1727
#define P_TOP P[2]
1728
#define P_TOPRIGHT P[3]
1729
#define P_MEDIAN P[4]
1730
#define P_MV1 P[9]
1731
#define FLAG_QPEL   1 //must be 1
1732

    
1733
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1734
    uint8_t p_buffer[1024];
1735
    uint8_t i_buffer[1024];
1736
    uint8_t p_state[sizeof(s->block_state)];
1737
    uint8_t i_state[sizeof(s->block_state)];
1738
    RangeCoder pc, ic;
1739
    uint8_t *pbbak= s->c.bytestream;
1740
    uint8_t *pbbak_start= s->c.bytestream_start;
1741
    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1742
    const int w= s->b_width  << s->block_max_depth;
1743
    const int h= s->b_height << s->block_max_depth;
1744
    const int rem_depth= s->block_max_depth - level;
1745
    const int index= (x + y*w) << rem_depth;
1746
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1747
    int trx= (x+1)<<rem_depth;
1748
    int try= (y+1)<<rem_depth;
1749
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1750
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1751
    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1752
    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1753
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1754
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1755
    int pl = left->color[0];
1756
    int pcb= left->color[1];
1757
    int pcr= left->color[2];
1758
    int pmx, pmy;
1759
    int mx=0, my=0;
1760
    int l,cr,cb;
1761
    const int stride= s->current_picture.linesize[0];
1762
    const int uvstride= s->current_picture.linesize[1];
1763
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
1764
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1765
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1766
    int P[10][2];
1767
    int16_t last_mv[3][2];
1768
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1769
    const int shift= 1+qpel;
1770
    MotionEstContext *c= &s->m.me;
1771
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1772
    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1773
    int my_context= av_log2(2*FFABS(left->my - top->my));
1774
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1775
    int ref, best_ref, ref_score, ref_mx, ref_my;
1776

    
1777
    assert(sizeof(s->block_state) >= 256);
1778
    if(s->keyframe){
1779
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1780
        return 0;
1781
    }
1782

    
1783
//    clip predictors / edge ?
1784

    
1785
    P_LEFT[0]= left->mx;
1786
    P_LEFT[1]= left->my;
1787
    P_TOP [0]= top->mx;
1788
    P_TOP [1]= top->my;
1789
    P_TOPRIGHT[0]= tr->mx;
1790
    P_TOPRIGHT[1]= tr->my;
1791

    
1792
    last_mv[0][0]= s->block[index].mx;
1793
    last_mv[0][1]= s->block[index].my;
1794
    last_mv[1][0]= right->mx;
1795
    last_mv[1][1]= right->my;
1796
    last_mv[2][0]= bottom->mx;
1797
    last_mv[2][1]= bottom->my;
1798

    
1799
    s->m.mb_stride=2;
1800
    s->m.mb_x=
1801
    s->m.mb_y= 0;
1802
    c->skip= 0;
1803

    
1804
    assert(c->  stride ==   stride);
1805
    assert(c->uvstride == uvstride);
1806

    
1807
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1808
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1809
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1810
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1811

    
1812
    c->xmin = - x*block_w - 16+2;
1813
    c->ymin = - y*block_w - 16+2;
1814
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1815
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1816

    
1817
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1818
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
1819
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1820
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1821
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1822
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1823
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1824

    
1825
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1826
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1827

    
1828
    if (!y) {
1829
        c->pred_x= P_LEFT[0];
1830
        c->pred_y= P_LEFT[1];
1831
    } else {
1832
        c->pred_x = P_MEDIAN[0];
1833
        c->pred_y = P_MEDIAN[1];
1834
    }
1835

    
1836
    score= INT_MAX;
1837
    best_ref= 0;
1838
    for(ref=0; ref<s->ref_frames; ref++){
1839
        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1840

    
1841
        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1842
                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1843

    
1844
        assert(ref_mx >= c->xmin);
1845
        assert(ref_mx <= c->xmax);
1846
        assert(ref_my >= c->ymin);
1847
        assert(ref_my <= c->ymax);
1848

    
1849
        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1850
        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1851
        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1852
        if(s->ref_mvs[ref]){
1853
            s->ref_mvs[ref][index][0]= ref_mx;
1854
            s->ref_mvs[ref][index][1]= ref_my;
1855
            s->ref_scores[ref][index]= ref_score;
1856
        }
1857
        if(score > ref_score){
1858
            score= ref_score;
1859
            best_ref= ref;
1860
            mx= ref_mx;
1861
            my= ref_my;
1862
        }
1863
    }
1864
    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1865

    
1866
  //  subpel search
1867
    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1868
    pc= s->c;
1869
    pc.bytestream_start=
1870
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1871
    memcpy(p_state, s->block_state, sizeof(s->block_state));
1872

    
1873
    if(level!=s->block_max_depth)
1874
        put_rac(&pc, &p_state[4 + s_context], 1);
1875
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1876
    if(s->ref_frames > 1)
1877
        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1878
    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1879
    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1880
    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1881
    p_len= pc.bytestream - pc.bytestream_start;
1882
    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1883

    
1884
    block_s= block_w*block_w;
1885
    sum = pix_sum(current_data[0], stride, block_w);
1886
    l= (sum + block_s/2)/block_s;
1887
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1888

    
1889
    block_s= block_w*block_w>>2;
1890
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
1891
    cb= (sum + block_s/2)/block_s;
1892
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1893
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
1894
    cr= (sum + block_s/2)/block_s;
1895
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1896

    
1897
    ic= s->c;
1898
    ic.bytestream_start=
1899
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1900
    memcpy(i_state, s->block_state, sizeof(s->block_state));
1901
    if(level!=s->block_max_depth)
1902
        put_rac(&ic, &i_state[4 + s_context], 1);
1903
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1904
    put_symbol(&ic, &i_state[32],  l-pl , 1);
1905
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1906
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1907
    i_len= ic.bytestream - ic.bytestream_start;
1908
    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1909

    
1910
//    assert(score==256*256*256*64-1);
1911
    assert(iscore < 255*255*256 + s->lambda2*10);
1912
    assert(iscore >= 0);
1913
    assert(l>=0 && l<=255);
1914
    assert(pl>=0 && pl<=255);
1915

    
1916
    if(level==0){
1917
        int varc= iscore >> 8;
1918
        int vard= score >> 8;
1919
        if (vard <= 64 || vard < varc)
1920
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1921
        else
1922
            c->scene_change_score+= s->m.qscale;
1923
    }
1924

    
1925
    if(level!=s->block_max_depth){
1926
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
1927
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1928
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1929
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1930
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1931
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1932

    
1933
        if(score2 < score && score2 < iscore)
1934
            return score2;
1935
    }
1936

    
1937
    if(iscore < score){
1938
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1939
        memcpy(pbbak, i_buffer, i_len);
1940
        s->c= ic;
1941
        s->c.bytestream_start= pbbak_start;
1942
        s->c.bytestream= pbbak + i_len;
1943
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1944
        memcpy(s->block_state, i_state, sizeof(s->block_state));
1945
        return iscore;
1946
    }else{
1947
        memcpy(pbbak, p_buffer, p_len);
1948
        s->c= pc;
1949
        s->c.bytestream_start= pbbak_start;
1950
        s->c.bytestream= pbbak + p_len;
1951
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1952
        memcpy(s->block_state, p_state, sizeof(s->block_state));
1953
        return score;
1954
    }
1955
}
1956

    
1957
static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1958
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1959
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1960
    }else{
1961
        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1962
    }
1963
}
1964

    
1965
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1966
    const int w= s->b_width  << s->block_max_depth;
1967
    const int rem_depth= s->block_max_depth - level;
1968
    const int index= (x + y*w) << rem_depth;
1969
    int trx= (x+1)<<rem_depth;
1970
    BlockNode *b= &s->block[index];
1971
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1972
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1973
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1974
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1975
    int pl = left->color[0];
1976
    int pcb= left->color[1];
1977
    int pcr= left->color[2];
1978
    int pmx, pmy;
1979
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1980
    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1981
    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1982
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1983

    
1984
    if(s->keyframe){
1985
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1986
        return;
1987
    }
1988

    
1989
    if(level!=s->block_max_depth){
1990
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1991
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
1992
        }else{
1993
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
1994
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1995
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1996
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1997
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
1998
            return;
1999
        }
2000
    }
2001
    if(b->type & BLOCK_INTRA){
2002
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2003
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2004
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2005
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2006
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2007
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2008
    }else{
2009
        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2010
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2011
        if(s->ref_frames > 1)
2012
            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2013
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2014
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2015
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2016
    }
2017
}
2018

    
2019
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2020
    const int w= s->b_width << s->block_max_depth;
2021
    const int rem_depth= s->block_max_depth - level;
2022
    const int index= (x + y*w) << rem_depth;
2023
    int trx= (x+1)<<rem_depth;
2024
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2025
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2026
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2027
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2028
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2029

    
2030
    if(s->keyframe){
2031
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2032
        return;
2033
    }
2034

    
2035
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2036
        int type, mx, my;
2037
        int l = left->color[0];
2038
        int cb= left->color[1];
2039
        int cr= left->color[2];
2040
        int ref = 0;
2041
        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2042
        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2043
        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2044

    
2045
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2046

    
2047
        if(type){
2048
            pred_mv(s, &mx, &my, 0, left, top, tr);
2049
            l += get_symbol(&s->c, &s->block_state[32], 1);
2050
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2051
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2052
        }else{
2053
            if(s->ref_frames > 1)
2054
                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2055
            pred_mv(s, &mx, &my, ref, left, top, tr);
2056
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2057
            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2058
        }
2059
        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2060
    }else{
2061
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2062
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2063
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2064
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2065
    }
2066
}
2067

    
2068
static void encode_blocks(SnowContext *s, int search){
2069
    int x, y;
2070
    int w= s->b_width;
2071
    int h= s->b_height;
2072

    
2073
    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2074
        iterative_me(s);
2075

    
2076
    for(y=0; y<h; y++){
2077
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2078
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2079
            return;
2080
        }
2081
        for(x=0; x<w; x++){
2082
            if(s->avctx->me_method == ME_ITER || !search)
2083
                encode_q_branch2(s, 0, x, y);
2084
            else
2085
                encode_q_branch (s, 0, x, y);
2086
        }
2087
    }
2088
}
2089

    
2090
static void decode_blocks(SnowContext *s){
2091
    int x, y;
2092
    int w= s->b_width;
2093
    int h= s->b_height;
2094

    
2095
    for(y=0; y<h; y++){
2096
        for(x=0; x<w; x++){
2097
            decode_q_branch(s, 0, x, y);
2098
        }
2099
    }
2100
}
2101

    
2102
static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2103
    int x, y;
2104
START_TIMER
2105
    for(y=0; y < b_h+5; y++){
2106
        for(x=0; x < b_w; x++){
2107
            int a0= src[x    ];
2108
            int a1= src[x + 1];
2109
            int a2= src[x + 2];
2110
            int a3= src[x + 3];
2111
            int a4= src[x + 4];
2112
            int a5= src[x + 5];
2113
//            int am= 9*(a1+a2) - (a0+a3);
2114
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2115
//            int am= 18*(a2+a3) - 2*(a1+a4);
2116
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2117
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2118

    
2119
//            if(b_w==16) am= 8*(a1+a2);
2120

    
2121
            if(dx<8) am = (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2122
            else     am = (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2123

    
2124
            /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2125
            if(am&(~255)) am= ~(am>>31);
2126

    
2127
            tmp[x] = am;
2128

    
2129
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2130
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2131
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2132
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2133
        }
2134
        tmp += stride;
2135
        src += stride;
2136
    }
2137
    tmp -= (b_h+5)*stride;
2138

    
2139
    for(y=0; y < b_h; y++){
2140
        for(x=0; x < b_w; x++){
2141
            int a0= tmp[x + 0*stride];
2142
            int a1= tmp[x + 1*stride];
2143
            int a2= tmp[x + 2*stride];
2144
            int a3= tmp[x + 3*stride];
2145
            int a4= tmp[x + 4*stride];
2146
            int a5= tmp[x + 5*stride];
2147
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2148
//            int am= 18*(a2+a3) - 2*(a1+a4);
2149
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2150
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2151

    
2152
//            if(b_w==16) am= 8*(a1+a2);
2153

    
2154
            if(dy<8) am =  (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2155
            else     am = (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2156

    
2157
            if(am&(~255)) am= ~(am>>31);
2158

    
2159
            dst[x] = am;
2160
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2161
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2162
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2163
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2164
        }
2165
        dst += stride;
2166
        tmp += stride;
2167
    }
2168
STOP_TIMER("mc_block")
2169
}
2170

    
2171
#define mca(dx,dy,b_w)\
2172
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2173
    uint8_t tmp[stride*(b_w+5)];\
2174
    assert(h==b_w);\
2175
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2176
}
2177

    
2178
mca( 0, 0,16)
2179
mca( 8, 0,16)
2180
mca( 0, 8,16)
2181
mca( 8, 8,16)
2182
mca( 0, 0,8)
2183
mca( 8, 0,8)
2184
mca( 0, 8,8)
2185
mca( 8, 8,8)
2186

    
2187
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2188
    if(block->type & BLOCK_INTRA){
2189
        int x, y;
2190
        const int color = block->color[plane_index];
2191
        const int color4= color*0x01010101;
2192
        if(b_w==32){
2193
            for(y=0; y < b_h; y++){
2194
                *(uint32_t*)&dst[0 + y*stride]= color4;
2195
                *(uint32_t*)&dst[4 + y*stride]= color4;
2196
                *(uint32_t*)&dst[8 + y*stride]= color4;
2197
                *(uint32_t*)&dst[12+ y*stride]= color4;
2198
                *(uint32_t*)&dst[16+ y*stride]= color4;
2199
                *(uint32_t*)&dst[20+ y*stride]= color4;
2200
                *(uint32_t*)&dst[24+ y*stride]= color4;
2201
                *(uint32_t*)&dst[28+ y*stride]= color4;
2202
            }
2203
        }else if(b_w==16){
2204
            for(y=0; y < b_h; y++){
2205
                *(uint32_t*)&dst[0 + y*stride]= color4;
2206
                *(uint32_t*)&dst[4 + y*stride]= color4;
2207
                *(uint32_t*)&dst[8 + y*stride]= color4;
2208
                *(uint32_t*)&dst[12+ y*stride]= color4;
2209
            }
2210
        }else if(b_w==8){
2211
            for(y=0; y < b_h; y++){
2212
                *(uint32_t*)&dst[0 + y*stride]= color4;
2213
                *(uint32_t*)&dst[4 + y*stride]= color4;
2214
            }
2215
        }else if(b_w==4){
2216
            for(y=0; y < b_h; y++){
2217
                *(uint32_t*)&dst[0 + y*stride]= color4;
2218
            }
2219
        }else{
2220
            for(y=0; y < b_h; y++){
2221
                for(x=0; x < b_w; x++){
2222
                    dst[x + y*stride]= color;
2223
                }
2224
            }
2225
        }
2226
    }else{
2227
        uint8_t *src= s->last_picture[block->ref].data[plane_index];
2228
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2229
        int mx= block->mx*scale;
2230
        int my= block->my*scale;
2231
        const int dx= mx&15;
2232
        const int dy= my&15;
2233
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2234
        sx += (mx>>4) - 2;
2235
        sy += (my>>4) - 2;
2236
        src += sx + sy*stride;
2237
        if(   (unsigned)sx >= w - b_w - 4
2238
           || (unsigned)sy >= h - b_h - 4){
2239
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2240
            src= tmp + MB_SIZE;
2241
        }
2242
//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2243
//        assert(!(b_w&(b_w-1)));
2244
        assert(b_w>1 && b_h>1);
2245
        assert(tab_index>=0 && tab_index<4 || b_w==32);
2246
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2247
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2248
        else if(b_w==32){
2249
            int y;
2250
            for(y=0; y<b_h; y+=16){
2251
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2252
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2253
            }
2254
        }else if(b_w==b_h)
2255
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2256
        else if(b_w==2*b_h){
2257
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 2       + 2*stride,stride);
2258
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2259
        }else{
2260
            assert(2*b_w==b_h);
2261
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 2 + 2*stride           ,stride);
2262
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2263
        }
2264
    }
2265
}
2266

    
2267
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2268
                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2269
    int y, x;
2270
    IDWTELEM * dst;
2271
    for(y=0; y<b_h; y++){
2272
        //FIXME ugly misuse of obmc_stride
2273
        const uint8_t *obmc1= obmc + y*obmc_stride;
2274
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2275
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2276
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2277
        dst = slice_buffer_get_line(sb, src_y + y);
2278
        for(x=0; x<b_w; x++){
2279
            int v=   obmc1[x] * block[3][x + y*src_stride]
2280
                    +obmc2[x] * block[2][x + y*src_stride]
2281
                    +obmc3[x] * block[1][x + y*src_stride]
2282
                    +obmc4[x] * block[0][x + y*src_stride];
2283

    
2284
            v <<= 8 - LOG2_OBMC_MAX;
2285
            if(FRAC_BITS != 8){
2286
                v >>= 8 - FRAC_BITS;
2287
            }
2288
            if(add){
2289
                v += dst[x + src_x];
2290
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2291
                if(v&(~255)) v= ~(v>>31);
2292
                dst8[x + y*src_stride] = v;
2293
            }else{
2294
                dst[x + src_x] -= v;
2295
            }
2296
        }
2297
    }
2298
}
2299

    
2300
//FIXME name clenup (b_w, block_w, b_width stuff)
2301
static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2302
    const int b_width = s->b_width  << s->block_max_depth;
2303
    const int b_height= s->b_height << s->block_max_depth;
2304
    const int b_stride= b_width;
2305
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2306
    BlockNode *rt= lt+1;
2307
    BlockNode *lb= lt+b_stride;
2308
    BlockNode *rb= lb+1;
2309
    uint8_t *block[4];
2310
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2311
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2312
    uint8_t *ptmp;
2313
    int x,y;
2314

    
2315
    if(b_x<0){
2316
        lt= rt;
2317
        lb= rb;
2318
    }else if(b_x + 1 >= b_width){
2319
        rt= lt;
2320
        rb= lb;
2321
    }
2322
    if(b_y<0){
2323
        lt= lb;
2324
        rt= rb;
2325
    }else if(b_y + 1 >= b_height){
2326
        lb= lt;
2327
        rb= rt;
2328
    }
2329

    
2330
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2331
        obmc -= src_x;
2332
        b_w += src_x;
2333
        if(!sliced && !offset_dst)
2334
            dst -= src_x;
2335
        src_x=0;
2336
    }else if(src_x + b_w > w){
2337
        b_w = w - src_x;
2338
    }
2339
    if(src_y<0){
2340
        obmc -= src_y*obmc_stride;
2341
        b_h += src_y;
2342
        if(!sliced && !offset_dst)
2343
            dst -= src_y*dst_stride;
2344
        src_y=0;
2345
    }else if(src_y + b_h> h){
2346
        b_h = h - src_y;
2347
    }
2348

    
2349
    if(b_w<=0 || b_h<=0) return;
2350

    
2351
assert(src_stride > 2*MB_SIZE + 5);
2352
    if(!sliced && offset_dst)
2353
        dst += src_x + src_y*dst_stride;
2354
    dst8+= src_x + src_y*src_stride;
2355
//    src += src_x + src_y*src_stride;
2356

    
2357
    ptmp= tmp + 3*tmp_step;
2358
    block[0]= ptmp;
2359
    ptmp+=tmp_step;
2360
    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2361

    
2362
    if(same_block(lt, rt)){
2363
        block[1]= block[0];
2364
    }else{
2365
        block[1]= ptmp;
2366
        ptmp+=tmp_step;
2367
        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2368
    }
2369

    
2370
    if(same_block(lt, lb)){
2371
        block[2]= block[0];
2372
    }else if(same_block(rt, lb)){
2373
        block[2]= block[1];
2374
    }else{
2375
        block[2]= ptmp;
2376
        ptmp+=tmp_step;
2377
        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2378
    }
2379

    
2380
    if(same_block(lt, rb) ){
2381
        block[3]= block[0];
2382
    }else if(same_block(rt, rb)){
2383
        block[3]= block[1];
2384
    }else if(same_block(lb, rb)){
2385
        block[3]= block[2];
2386
    }else{
2387
        block[3]= ptmp;
2388
        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2389
    }
2390
#if 0
2391
    for(y=0; y<b_h; y++){
2392
        for(x=0; x<b_w; x++){
2393
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2394
            if(add) dst[x + y*dst_stride] += v;
2395
            else    dst[x + y*dst_stride] -= v;
2396
        }
2397
    }
2398
    for(y=0; y<b_h; y++){
2399
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2400
        for(x=0; x<b_w; x++){
2401
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2402
            if(add) dst[x + y*dst_stride] += v;
2403
            else    dst[x + y*dst_stride] -= v;
2404
        }
2405
    }
2406
    for(y=0; y<b_h; y++){
2407
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2408
        for(x=0; x<b_w; x++){
2409
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2410
            if(add) dst[x + y*dst_stride] += v;
2411
            else    dst[x + y*dst_stride] -= v;
2412
        }
2413
    }
2414
    for(y=0; y<b_h; y++){
2415
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2416
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2417
        for(x=0; x<b_w; x++){
2418
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2419
            if(add) dst[x + y*dst_stride] += v;
2420
            else    dst[x + y*dst_stride] -= v;
2421
        }
2422
    }
2423
#else
2424
    if(sliced){
2425
        START_TIMER
2426

    
2427
        s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2428
        STOP_TIMER("inner_add_yblock")
2429
    }else
2430
    for(y=0; y<b_h; y++){
2431
        //FIXME ugly misuse of obmc_stride
2432
        const uint8_t *obmc1= obmc + y*obmc_stride;
2433
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2434
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2435
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2436
        for(x=0; x<b_w; x++){
2437
            int v=   obmc1[x] * block[3][x + y*src_stride]
2438
                    +obmc2[x] * block[2][x + y*src_stride]
2439
                    +obmc3[x] * block[1][x + y*src_stride]
2440
                    +obmc4[x] * block[0][x + y*src_stride];
2441

    
2442
            v <<= 8 - LOG2_OBMC_MAX;
2443
            if(FRAC_BITS != 8){
2444
                v >>= 8 - FRAC_BITS;
2445
            }
2446
            if(add){
2447
                v += dst[x + y*dst_stride];
2448
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2449
                if(v&(~255)) v= ~(v>>31);
2450
                dst8[x + y*src_stride] = v;
2451
            }else{
2452
                dst[x + y*dst_stride] -= v;
2453
            }
2454
        }
2455
    }
2456
#endif
2457
}
2458

    
2459
static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2460
    Plane *p= &s->plane[plane_index];
2461
    const int mb_w= s->b_width  << s->block_max_depth;
2462
    const int mb_h= s->b_height << s->block_max_depth;
2463
    int x, y, mb_x;
2464
    int block_size = MB_SIZE >> s->block_max_depth;
2465
    int block_w    = plane_index ? block_size/2 : block_size;
2466
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2467
    int obmc_stride= plane_index ? block_size : 2*block_size;
2468
    int ref_stride= s->current_picture.linesize[plane_index];
2469
    uint8_t *dst8= s->current_picture.data[plane_index];
2470
    int w= p->width;
2471
    int h= p->height;
2472
    START_TIMER
2473

    
2474
    if(s->keyframe || (s->avctx->debug&512)){
2475
        if(mb_y==mb_h)
2476
            return;
2477

    
2478
        if(add){
2479
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2480
            {
2481
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2482
                IDWTELEM * line = sb->line[y];
2483
                for(x=0; x<w; x++)
2484
                {
2485
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2486
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2487
                    v >>= FRAC_BITS;
2488
                    if(v&(~255)) v= ~(v>>31);
2489
                    dst8[x + y*ref_stride]= v;
2490
                }
2491
            }
2492
        }else{
2493
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2494
            {
2495
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2496
                IDWTELEM * line = sb->line[y];
2497
                for(x=0; x<w; x++)
2498
                {
2499
                    line[x] -= 128 << FRAC_BITS;
2500
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2501
                }
2502
            }
2503
        }
2504

    
2505
        return;
2506
    }
2507

    
2508
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2509
            START_TIMER
2510

    
2511
            add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2512
                       block_w*mb_x - block_w/2,
2513
                       block_w*mb_y - block_w/2,
2514
                       block_w, block_w,
2515
                       w, h,
2516
                       w, ref_stride, obmc_stride,
2517
                       mb_x - 1, mb_y - 1,
2518
                       add, 0, plane_index);
2519

    
2520
            STOP_TIMER("add_yblock")
2521
        }
2522

    
2523
    STOP_TIMER("predict_slice")
2524
}
2525

    
2526
static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2527
    Plane *p= &s->plane[plane_index];
2528
    const int mb_w= s->b_width  << s->block_max_depth;
2529
    const int mb_h= s->b_height << s->block_max_depth;
2530
    int x, y, mb_x;
2531
    int block_size = MB_SIZE >> s->block_max_depth;
2532
    int block_w    = plane_index ? block_size/2 : block_size;
2533
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2534
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2535
    int ref_stride= s->current_picture.linesize[plane_index];
2536
    uint8_t *dst8= s->current_picture.data[plane_index];
2537
    int w= p->width;
2538
    int h= p->height;
2539
    START_TIMER
2540

    
2541
    if(s->keyframe || (s->avctx->debug&512)){
2542
        if(mb_y==mb_h)
2543
            return;
2544

    
2545
        if(add){
2546
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2547
                for(x=0; x<w; x++){
2548
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2549
                    v >>= FRAC_BITS;
2550
                    if(v&(~255)) v= ~(v>>31);
2551
                    dst8[x + y*ref_stride]= v;
2552
                }
2553
            }
2554
        }else{
2555
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2556
                for(x=0; x<w; x++){
2557
                    buf[x + y*w]-= 128<<FRAC_BITS;
2558
                }
2559
            }
2560
        }
2561

    
2562
        return;
2563
    }
2564

    
2565
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2566
            START_TIMER
2567

    
2568
            add_yblock(s, 0, NULL, buf, dst8, obmc,
2569
                       block_w*mb_x - block_w/2,
2570
                       block_w*mb_y - block_w/2,
2571
                       block_w, block_w,
2572
                       w, h,
2573
                       w, ref_stride, obmc_stride,
2574
                       mb_x - 1, mb_y - 1,
2575
                       add, 1, plane_index);
2576

    
2577
            STOP_TIMER("add_yblock")
2578
        }
2579

    
2580
    STOP_TIMER("predict_slice")
2581
}
2582

    
2583
static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2584
    const int mb_h= s->b_height << s->block_max_depth;
2585
    int mb_y;
2586
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2587
        predict_slice(s, buf, plane_index, add, mb_y);
2588
}
2589

    
2590
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2591
    int i, x2, y2;
2592
    Plane *p= &s->plane[plane_index];
2593
    const int block_size = MB_SIZE >> s->block_max_depth;
2594
    const int block_w    = plane_index ? block_size/2 : block_size;
2595
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2596
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2597
    const int ref_stride= s->current_picture.linesize[plane_index];
2598
    uint8_t *src= s-> input_picture.data[plane_index];
2599
    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2600
    const int b_stride = s->b_width << s->block_max_depth;
2601
    const int w= p->width;
2602
    const int h= p->height;
2603
    int index= mb_x + mb_y*b_stride;
2604
    BlockNode *b= &s->block[index];
2605
    BlockNode backup= *b;
2606
    int ab=0;
2607
    int aa=0;
2608

    
2609
    b->type|= BLOCK_INTRA;
2610
    b->color[plane_index]= 0;
2611
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2612

    
2613
    for(i=0; i<4; i++){
2614
        int mb_x2= mb_x + (i &1) - 1;
2615
        int mb_y2= mb_y + (i>>1) - 1;
2616
        int x= block_w*mb_x2 + block_w/2;
2617
        int y= block_w*mb_y2 + block_w/2;
2618

    
2619
        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2620
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2621

    
2622
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2623
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2624
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2625
                int obmc_v= obmc[index];
2626
                int d;
2627
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2628
                if(x<0) obmc_v += obmc[index + block_w];
2629
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2630
                if(x+block_w>w) obmc_v += obmc[index - block_w];
2631
                //FIXME precalc this or simplify it somehow else
2632

    
2633
                d = -dst[index] + (1<<(FRAC_BITS-1));
2634
                dst[index] = d;
2635
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2636
                aa += obmc_v * obmc_v; //FIXME precalclate this
2637
            }
2638
        }
2639
    }
2640
    *b= backup;
2641

    
2642
    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2643
}
2644

    
2645
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2646
    const int b_stride = s->b_width << s->block_max_depth;
2647
    const int b_height = s->b_height<< s->block_max_depth;
2648
    int index= x + y*b_stride;
2649
    const BlockNode *b     = &s->block[index];
2650
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2651
    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2652
    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2653
    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2654
    int dmx, dmy;
2655
//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2656
//  int my_context= av_log2(2*FFABS(left->my - top->my));
2657

    
2658
    if(x<0 || x>=b_stride || y>=b_height)
2659
        return 0;
2660
/*
2661
1            0      0
2662
01X          1-2    1
2663
001XX        3-6    2-3
2664
0001XXX      7-14   4-7
2665
00001XXXX   15-30   8-15
2666
*/
2667
//FIXME try accurate rate
2668
//FIXME intra and inter predictors if surrounding blocks arent the same type
2669
    if(b->type & BLOCK_INTRA){
2670
        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2671
                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
2672
                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
2673
    }else{
2674
        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2675
        dmx-= b->mx;
2676
        dmy-= b->my;
2677
        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2678
                    + av_log2(2*FFABS(dmy))
2679
                    + av_log2(2*b->ref));
2680
    }
2681
}
2682

    
2683
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2684
    Plane *p= &s->plane[plane_index];
2685
    const int block_size = MB_SIZE >> s->block_max_depth;
2686
    const int block_w    = plane_index ? block_size/2 : block_size;
2687
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2688
    const int ref_stride= s->current_picture.linesize[plane_index];
2689
    uint8_t *dst= s->current_picture.data[plane_index];
2690
    uint8_t *src= s->  input_picture.data[plane_index];
2691
    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2692
    uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2693
    uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
2694
    const int b_stride = s->b_width << s->block_max_depth;
2695
    const int b_height = s->b_height<< s->block_max_depth;
2696
    const int w= p->width;
2697
    const int h= p->height;
2698
    int distortion;
2699
    int rate= 0;
2700
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2701
    int sx= block_w*mb_x - block_w/2;
2702
    int sy= block_w*mb_y - block_w/2;
2703
    int x0= FFMAX(0,-sx);
2704
    int y0= FFMAX(0,-sy);
2705
    int x1= FFMIN(block_w*2, w-sx);
2706
    int y1= FFMIN(block_w*2, h-sy);
2707
    int i,x,y;
2708

    
2709
    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2710

    
2711
    for(y=y0; y<y1; y++){
2712
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2713
        const IDWTELEM *pred1 = pred + y*obmc_stride;
2714
        uint8_t *cur1 = cur + y*ref_stride;
2715
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2716
        for(x=x0; x<x1; x++){
2717
#if FRAC_BITS >= LOG2_OBMC_MAX
2718
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2719
#else
2720
            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2721
#endif
2722
            v = (v + pred1[x]) >> FRAC_BITS;
2723
            if(v&(~255)) v= ~(v>>31);
2724
            dst1[x] = v;
2725
        }
2726
    }
2727

    
2728
    /* copy the regions where obmc[] = (uint8_t)256 */
2729
    if(LOG2_OBMC_MAX == 8
2730
        && (mb_x == 0 || mb_x == b_stride-1)
2731
        && (mb_y == 0 || mb_y == b_height-1)){
2732
        if(mb_x == 0)
2733
            x1 = block_w;
2734
        else
2735
            x0 = block_w;
2736
        if(mb_y == 0)
2737
            y1 = block_w;
2738
        else
2739
            y0 = block_w;
2740
        for(y=y0; y<y1; y++)
2741
            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2742
    }
2743

    
2744
    if(block_w==16){
2745
        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2746
        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2747
        /* FIXME cmps overlap but don't cover the wavelet's whole support,
2748
         * so improving the score of one block is not strictly guaranteed to
2749
         * improve the score of the whole frame, so iterative motion est
2750
         * doesn't always converge. */
2751
        if(s->avctx->me_cmp == FF_CMP_W97)
2752
            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2753
        else if(s->avctx->me_cmp == FF_CMP_W53)
2754
            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2755
        else{
2756
            distortion = 0;
2757
            for(i=0; i<4; i++){
2758
                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2759
                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2760
            }
2761
        }
2762
    }else{
2763
        assert(block_w==8);
2764
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2765
    }
2766

    
2767
    if(plane_index==0){
2768
        for(i=0; i<4; i++){
2769
/* ..RRr
2770
 * .RXx.
2771
 * rxx..
2772
 */
2773
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2774
        }
2775
        if(mb_x == b_stride-2)
2776
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2777
    }
2778
    return distortion + rate*penalty_factor;
2779
}
2780

    
2781
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2782
    int i, y2;
2783
    Plane *p= &s->plane[plane_index];
2784
    const int block_size = MB_SIZE >> s->block_max_depth;
2785
    const int block_w    = plane_index ? block_size/2 : block_size;
2786
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2787
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2788
    const int ref_stride= s->current_picture.linesize[plane_index];
2789
    uint8_t *dst= s->current_picture.data[plane_index];
2790
    uint8_t *src= s-> input_picture.data[plane_index];
2791
    static const IDWTELEM zero_dst[4096]; //FIXME
2792
    const int b_stride = s->b_width << s->block_max_depth;
2793
    const int w= p->width;
2794
    const int h= p->height;
2795
    int distortion= 0;
2796
    int rate= 0;
2797
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2798

    
2799
    for(i=0; i<9; i++){
2800
        int mb_x2= mb_x + (i%3) - 1;
2801
        int mb_y2= mb_y + (i/3) - 1;
2802
        int x= block_w*mb_x2 + block_w/2;
2803
        int y= block_w*mb_y2 + block_w/2;
2804

    
2805
        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2806
                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2807

    
2808
        //FIXME find a cleaner/simpler way to skip the outside stuff
2809
        for(y2= y; y2<0; y2++)
2810
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2811
        for(y2= h; y2<y+block_w; y2++)
2812
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2813
        if(x<0){
2814
            for(y2= y; y2<y+block_w; y2++)
2815
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2816
        }
2817
        if(x+block_w > w){
2818
            for(y2= y; y2<y+block_w; y2++)
2819
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2820
        }
2821

    
2822
        assert(block_w== 8 || block_w==16);
2823
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2824
    }
2825

    
2826
    if(plane_index==0){
2827
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2828
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2829

    
2830
/* ..RRRr
2831
 * .RXXx.
2832
 * .RXXx.
2833
 * rxxx.
2834
 */
2835
        if(merged)
2836
            rate = get_block_bits(s, mb_x, mb_y, 2);
2837
        for(i=merged?4:0; i<9; i++){
2838
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2839
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2840
        }
2841
    }
2842
    return distortion + rate*penalty_factor;
2843
}
2844

    
2845
static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2846
    const int b_stride= s->b_width << s->block_max_depth;
2847
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2848
    BlockNode backup= *block;
2849
    int rd, index, value;
2850

    
2851
    assert(mb_x>=0 && mb_y>=0);
2852
    assert(mb_x<b_stride);
2853

    
2854
    if(intra){
2855
        block->color[0] = p[0];
2856
        block->color[1] = p[1];
2857
        block->color[2] = p[2];
2858
        block->type |= BLOCK_INTRA;
2859
    }else{
2860
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2861
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2862
        if(s->me_cache[index] == value)
2863
            return 0;
2864
        s->me_cache[index]= value;
2865

    
2866
        block->mx= p[0];
2867
        block->my= p[1];
2868
        block->type &= ~BLOCK_INTRA;
2869
    }
2870

    
2871
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2872

    
2873
//FIXME chroma
2874
    if(rd < *best_rd){
2875
        *best_rd= rd;
2876
        return 1;
2877
    }else{
2878
        *block= backup;
2879
        return 0;
2880
    }
2881
}
2882

    
2883
/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
2884
static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2885
    int p[2] = {p0, p1};
2886
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2887
}
2888

    
2889
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2890
    const int b_stride= s->b_width << s->block_max_depth;
2891
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2892
    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2893
    int rd, index, value;
2894

    
2895
    assert(mb_x>=0 && mb_y>=0);
2896
    assert(mb_x<b_stride);
2897
    assert(((mb_x|mb_y)&1) == 0);
2898

    
2899
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2900
    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
2901
    if(s->me_cache[index] == value)
2902
        return 0;
2903
    s->me_cache[index]= value;
2904

    
2905
    block->mx= p0;
2906
    block->my= p1;
2907
    block->ref= ref;
2908
    block->type &= ~BLOCK_INTRA;
2909
    block[1]= block[b_stride]= block[b_stride+1]= *block;
2910

    
2911
    rd= get_4block_rd(s, mb_x, mb_y, 0);
2912

    
2913
//FIXME chroma
2914
    if(rd < *best_rd){
2915
        *best_rd= rd;
2916
        return 1;
2917
    }else{
2918
        block[0]= backup[0];
2919
        block[1]= backup[1];
2920
        block[b_stride]= backup[2];
2921
        block[b_stride+1]= backup[3];
2922
        return 0;
2923
    }
2924
}
2925

    
2926
static void iterative_me(SnowContext *s){
2927
    int pass, mb_x, mb_y;
2928
    const int b_width = s->b_width  << s->block_max_depth;
2929
    const int b_height= s->b_height << s->block_max_depth;
2930
    const int b_stride= b_width;
2931
    int color[3];
2932

    
2933
    {
2934
        RangeCoder r = s->c;
2935
        uint8_t state[sizeof(s->block_state)];
2936
        memcpy(state, s->block_state, sizeof(s->block_state));
2937
        for(mb_y= 0; mb_y<s->b_height; mb_y++)
2938
            for(mb_x= 0; mb_x<s->b_width; mb_x++)
2939
                encode_q_branch(s, 0, mb_x, mb_y);
2940
        s->c = r;
2941
        memcpy(s->block_state, state, sizeof(s->block_state));
2942
    }
2943

    
2944
    for(pass=0; pass<25; pass++){
2945
        int change= 0;
2946

    
2947
        for(mb_y= 0; mb_y<b_height; mb_y++){
2948
            for(mb_x= 0; mb_x<b_width; mb_x++){
2949
                int dia_change, i, j, ref;
2950
                int best_rd= INT_MAX, ref_rd;
2951
                BlockNode backup, ref_b;
2952
                const int index= mb_x + mb_y * b_stride;
2953
                BlockNode *block= &s->block[index];
2954
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
2955
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
2956
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
2957
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
2958
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
2959
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
2960
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
2961
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
2962
                const int b_w= (MB_SIZE >> s->block_max_depth);
2963
                uint8_t obmc_edged[b_w*2][b_w*2];
2964

    
2965
                if(pass && (block->type & BLOCK_OPT))
2966
                    continue;
2967
                block->type |= BLOCK_OPT;
2968

    
2969
                backup= *block;
2970

    
2971
                if(!s->me_cache_generation)
2972
                    memset(s->me_cache, 0, sizeof(s->me_cache));
2973
                s->me_cache_generation += 1<<22;
2974

    
2975
                //FIXME precalc
2976
                {
2977
                    int x, y;
2978
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
2979
                    if(mb_x==0)
2980
                        for(y=0; y<b_w*2; y++)
2981
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
2982
                    if(mb_x==b_stride-1)
2983
                        for(y=0; y<b_w*2; y++)
2984
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
2985
                    if(mb_y==0){
2986
                        for(x=0; x<b_w*2; x++)
2987
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
2988
                        for(y=1; y<b_w; y++)
2989
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
2990
                    }
2991
                    if(mb_y==b_height-1){
2992
                        for(x=0; x<b_w*2; x++)
2993
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
2994
                        for(y=b_w; y<b_w*2-1; y++)
2995
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
2996
                    }
2997
                }
2998

    
2999
                //skip stuff outside the picture
3000
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3001
                {
3002
                    uint8_t *src= s->  input_picture.data[0];
3003
                    uint8_t *dst= s->current_picture.data[0];
3004
                    const int stride= s->current_picture.linesize[0];
3005
                    const int block_w= MB_SIZE >> s->block_max_depth;
3006
                    const int sx= block_w*mb_x - block_w/2;
3007
                    const int sy= block_w*mb_y - block_w/2;
3008
                    const int w= s->plane[0].width;
3009
                    const int h= s->plane[0].height;
3010
                    int y;
3011

    
3012
                    for(y=sy; y<0; y++)
3013
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3014
                    for(y=h; y<sy+block_w*2; y++)
3015
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3016
                    if(sx<0){
3017
                        for(y=sy; y<sy+block_w*2; y++)
3018
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3019
                    }
3020
                    if(sx+block_w*2 > w){
3021
                        for(y=sy; y<sy+block_w*2; y++)
3022
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3023
                    }
3024
                }
3025

    
3026
                // intra(black) = neighbors' contribution to the current block
3027
                for(i=0; i<3; i++)
3028
                    color[i]= get_dc(s, mb_x, mb_y, i);
3029

    
3030
                // get previous score (cannot be cached due to OBMC)
3031
                if(pass > 0 && (block->type&BLOCK_INTRA)){
3032
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3033
                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3034
                }else
3035
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3036

    
3037
                ref_b= *block;
3038
                ref_rd= best_rd;
3039
                for(ref=0; ref < s->ref_frames; ref++){
3040
                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3041
                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3042
                        continue;
3043
                    block->ref= ref;
3044
                    best_rd= INT_MAX;
3045

    
3046
                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3047
                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3048
                    if(tb)
3049
                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3050
                    if(lb)
3051
                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3052
                    if(rb)
3053
                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3054
                    if(bb)
3055
                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3056

    
3057
                    /* fullpel ME */
3058
                    //FIXME avoid subpel interpol / round to nearest integer
3059
                    do{
3060
                        dia_change=0;
3061
                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3062
                            for(j=0; j<i; j++){
3063
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3064
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3065
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3066
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3067
                            }
3068
                        }
3069
                    }while(dia_change);
3070
                    /* subpel ME */
3071
                    do{
3072
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3073
                        dia_change=0;
3074
                        for(i=0; i<8; i++)
3075
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3076
                    }while(dia_change);
3077
                    //FIXME or try the standard 2 pass qpel or similar
3078

    
3079
                    mvr[0][0]= block->mx;
3080
                    mvr[0][1]= block->my;
3081
                    if(ref_rd > best_rd){
3082
                        ref_rd= best_rd;
3083
                        ref_b= *block;
3084
                    }
3085
                }
3086
                best_rd= ref_rd;
3087
                *block= ref_b;
3088
#if 1
3089
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3090
                //FIXME RD style color selection
3091
#endif
3092
                if(!same_block(block, &backup)){
3093
                    if(tb ) tb ->type &= ~BLOCK_OPT;
3094
                    if(lb ) lb ->type &= ~BLOCK_OPT;
3095
                    if(rb ) rb ->type &= ~BLOCK_OPT;
3096
                    if(bb ) bb ->type &= ~BLOCK_OPT;
3097
                    if(tlb) tlb->type &= ~BLOCK_OPT;
3098
                    if(trb) trb->type &= ~BLOCK_OPT;
3099
                    if(blb) blb->type &= ~BLOCK_OPT;
3100
                    if(brb) brb->type &= ~BLOCK_OPT;
3101
                    change ++;
3102
                }
3103
            }
3104
        }
3105
        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3106
        if(!change)
3107
            break;
3108
    }
3109

    
3110
    if(s->block_max_depth == 1){
3111
        int change= 0;
3112
        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3113
            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3114
                int i;
3115
                int best_rd, init_rd;
3116
                const int index= mb_x + mb_y * b_stride;
3117
                BlockNode *b[4];
3118

    
3119
                b[0]= &s->block[index];
3120
                b[1]= b[0]+1;
3121
                b[2]= b[0]+b_stride;
3122
                b[3]= b[2]+1;
3123
                if(same_block(b[0], b[1]) &&
3124
                   same_block(b[0], b[2]) &&
3125
                   same_block(b[0], b[3]))
3126
                    continue;
3127

    
3128
                if(!s->me_cache_generation)
3129
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3130
                s->me_cache_generation += 1<<22;
3131

    
3132
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3133

    
3134
                //FIXME more multiref search?
3135
                check_4block_inter(s, mb_x, mb_y,
3136
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3137
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3138

    
3139
                for(i=0; i<4; i++)
3140
                    if(!(b[i]->type&BLOCK_INTRA))
3141
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3142

    
3143
                if(init_rd != best_rd)
3144
                    change++;
3145
            }
3146
        }
3147
        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3148
    }
3149
}
3150

    
3151
static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3152
    const int level= b->level;
3153
    const int w= b->width;
3154
    const int h= b->height;
3155
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3156
    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3157
    int x,y, thres1, thres2;
3158
//    START_TIMER
3159

    
3160
    if(s->qlog == LOSSLESS_QLOG){
3161
        for(y=0; y<h; y++)
3162
            for(x=0; x<w; x++)
3163
                dst[x + y*stride]= src[x + y*stride];
3164
        return;
3165
    }
3166

    
3167
    bias= bias ? 0 : (3*qmul)>>3;
3168
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3169
    thres2= 2*thres1;
3170

    
3171
    if(!bias){
3172
        for(y=0; y<h; y++){
3173
            for(x=0; x<w; x++){
3174
                int i= src[x + y*stride];
3175

    
3176
                if((unsigned)(i+thres1) > thres2){
3177
                    if(i>=0){
3178
                        i<<= QEXPSHIFT;
3179
                        i/= qmul; //FIXME optimize
3180
                        dst[x + y*stride]=  i;
3181
                    }else{
3182
                        i= -i;
3183
                        i<<= QEXPSHIFT;
3184
                        i/= qmul; //FIXME optimize
3185
                        dst[x + y*stride]= -i;
3186
                    }
3187
                }else
3188
                    dst[x + y*stride]= 0;
3189
            }
3190
        }
3191
    }else{
3192
        for(y=0; y<h; y++){
3193
            for(x=0; x<w; x++){
3194
                int i= src[x + y*stride];
3195

    
3196
                if((unsigned)(i+thres1) > thres2){
3197
                    if(i>=0){
3198
                        i<<= QEXPSHIFT;
3199
                        i= (i + bias) / qmul; //FIXME optimize
3200
                        dst[x + y*stride]=  i;
3201
                    }else{
3202
                        i= -i;
3203
                        i<<= QEXPSHIFT;
3204
                        i= (i + bias) / qmul; //FIXME optimize
3205
                        dst[x + y*stride]= -i;
3206
                    }
3207
                }else
3208
                    dst[x + y*stride]= 0;
3209
            }
3210
        }
3211
    }
3212
    if(level+1 == s->spatial_decomposition_count){
3213
//        STOP_TIMER("quantize")
3214
    }
3215
}
3216

    
3217
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3218
    const int w= b->width;
3219
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3220
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3221
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3222
    int x,y;
3223
    START_TIMER
3224

    
3225
    if(s->qlog == LOSSLESS_QLOG) return;
3226

    
3227
    for(y=start_y; y<end_y; y++){
3228
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3229
        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3230
        for(x=0; x<w; x++){
3231
            int i= line[x];
3232
            if(i<0){
3233
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3234
            }else if(i>0){
3235
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3236
            }
3237
        }
3238
    }
3239
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3240
        STOP_TIMER("dquant")
3241
    }
3242
}
3243

    
3244
static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3245
    const int w= b->width;
3246
    const int h= b->height;
3247
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3248
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3249
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3250
    int x,y;
3251
    START_TIMER
3252

    
3253
    if(s->qlog == LOSSLESS_QLOG) return;
3254

    
3255
    for(y=0; y<h; y++){
3256
        for(x=0; x<w; x++){
3257
            int i= src[x + y*stride];
3258
            if(i<0){
3259
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3260
            }else if(i>0){
3261
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3262
            }
3263
        }
3264
    }
3265
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3266
        STOP_TIMER("dquant")
3267
    }
3268
}
3269

    
3270
static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3271
    const int w= b->width;
3272
    const int h= b->height;
3273
    int x,y;
3274

    
3275
    for(y=h-1; y>=0; y--){
3276
        for(x=w-1; x>=0; x--){
3277
            int i= x + y*stride;
3278

    
3279
            if(x){
3280
                if(use_median){
3281
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3282
                    else  src[i] -= src[i - 1];
3283
                }else{
3284
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3285
                    else  src[i] -= src[i - 1];
3286
                }
3287
            }else{
3288
                if(y) src[i] -= src[i - stride];
3289
            }
3290
        }
3291
    }
3292
}
3293

    
3294
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3295
    const int w= b->width;
3296
    int x,y;
3297

    
3298
//    START_TIMER
3299

    
3300
    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3301
    IDWTELEM * prev;
3302

    
3303
    if (start_y != 0)
3304
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3305

    
3306
    for(y=start_y; y<end_y; y++){
3307
        prev = line;
3308
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3309
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3310
        for(x=0; x<w; x++){
3311
            if(x){
3312
                if(use_median){
3313
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3314
                    else  line[x] += line[x - 1];
3315
                }else{
3316
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3317
                    else  line[x] += line[x - 1];
3318
                }
3319
            }else{
3320
                if(y) line[x] += prev[x];
3321
            }
3322
        }
3323
    }
3324

    
3325
//    STOP_TIMER("correlate")
3326
}
3327

    
3328
static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3329
    const int w= b->width;
3330
    const int h= b->height;
3331
    int x,y;
3332

    
3333
    for(y=0; y<h; y++){
3334
        for(x=0; x<w; x++){
3335
            int i= x + y*stride;
3336

    
3337
            if(x){
3338
                if(use_median){
3339
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3340
                    else  src[i] += src[i - 1];
3341
                }else{
3342
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3343
                    else  src[i] += src[i - 1];
3344
                }
3345
            }else{
3346
                if(y) src[i] += src[i - stride];
3347
            }
3348
        }
3349
    }
3350
}
3351

    
3352
static void encode_header(SnowContext *s){
3353
    int plane_index, level, orientation;
3354
    uint8_t kstate[32];
3355

    
3356
    memset(kstate, MID_STATE, sizeof(kstate));
3357

    
3358
    put_rac(&s->c, kstate, s->keyframe);
3359
    if(s->keyframe || s->always_reset){
3360
        reset_contexts(s);
3361
        s->last_spatial_decomposition_type=
3362
        s->last_qlog=
3363
        s->last_qbias=
3364
        s->last_mv_scale=
3365
        s->last_block_max_depth= 0;
3366
    }
3367
    if(s->keyframe){
3368
        put_symbol(&s->c, s->header_state, s->version, 0);
3369
        put_rac(&s->c, s->header_state, s->always_reset);
3370
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3371
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3372
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3373
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3374
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3375
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3376
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3377
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3378
        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3379

    
3380
        for(plane_index=0; plane_index<2; plane_index++){
3381
            for(level=0; level<s->spatial_decomposition_count; level++){
3382
                for(orientation=level ? 1:0; orientation<4; orientation++){
3383
                    if(orientation==2) continue;
3384
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3385
                }
3386
            }
3387
        }
3388
    }
3389
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3390
    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3391
    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3392
    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3393
    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3394

    
3395
    s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3396
    s->last_qlog                      = s->qlog;
3397
    s->last_qbias                     = s->qbias;
3398
    s->last_mv_scale                  = s->mv_scale;
3399
    s->last_block_max_depth           = s->block_max_depth;
3400
}
3401

    
3402
static int decode_header(SnowContext *s){
3403
    int plane_index, level, orientation;
3404
    uint8_t kstate[32];
3405

    
3406
    memset(kstate, MID_STATE, sizeof(kstate));
3407

    
3408
    s->keyframe= get_rac(&s->c, kstate);
3409
    if(s->keyframe || s->always_reset){
3410
        reset_contexts(s);
3411
        s->spatial_decomposition_type=
3412
        s->qlog=
3413
        s->qbias=
3414
        s->mv_scale=
3415
        s->block_max_depth= 0;
3416
    }
3417
    if(s->keyframe){
3418
        s->version= get_symbol(&s->c, s->header_state, 0);
3419
        if(s->version>0){
3420
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3421
            return -1;
3422
        }
3423
        s->always_reset= get_rac(&s->c, s->header_state);
3424
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3425
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3426
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3427
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3428
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3429
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3430
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3431
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3432
        s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3433

    
3434
        for(plane_index=0; plane_index<3; plane_index++){
3435
            for(level=0; level<s->spatial_decomposition_count; level++){
3436
                for(orientation=level ? 1:0; orientation<4; orientation++){
3437
                    int q;
3438
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3439
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3440
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3441
                    s->plane[plane_index].band[level][orientation].qlog= q;
3442
                }
3443
            }
3444
        }
3445
    }
3446

    
3447
    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3448
    if(s->spatial_decomposition_type > 1){
3449
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3450
        return -1;
3451
    }
3452

    
3453
    s->qlog           += get_symbol(&s->c, s->header_state, 1);
3454
    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
3455
    s->qbias          += get_symbol(&s->c, s->header_state, 1);
3456
    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3457
    if(s->block_max_depth > 1 || s->block_max_depth < 0){
3458
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3459
        s->block_max_depth= 0;
3460
        return -1;
3461
    }
3462

    
3463
    return 0;
3464
}
3465

    
3466
static void init_qexp(void){
3467
    int i;
3468
    double v=128;
3469

    
3470
    for(i=0; i<QROOT; i++){
3471
        qexp[i]= lrintf(v);
3472
        v *= pow(2, 1.0 / QROOT);
3473
    }
3474
}
3475

    
3476
static int common_init(AVCodecContext *avctx){
3477
    SnowContext *s = avctx->priv_data;
3478
    int width, height;
3479
    int level, orientation, plane_index, dec;
3480
    int i, j;
3481

    
3482
    s->avctx= avctx;
3483

    
3484
    dsputil_init(&s->dsp, avctx);
3485

    
3486
#define mcf(dx,dy)\
3487
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3488
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3489
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3490
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3491
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3492
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3493

    
3494
    mcf( 0, 0)
3495
    mcf( 4, 0)
3496
    mcf( 8, 0)
3497
    mcf(12, 0)
3498
    mcf( 0, 4)
3499
    mcf( 4, 4)
3500
    mcf( 8, 4)
3501
    mcf(12, 4)
3502
    mcf( 0, 8)
3503
    mcf( 4, 8)
3504
    mcf( 8, 8)
3505
    mcf(12, 8)
3506
    mcf( 0,12)
3507
    mcf( 4,12)
3508
    mcf( 8,12)
3509
    mcf(12,12)
3510

    
3511
#define mcfh(dx,dy)\
3512
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3513
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3514
        mc_block_hpel ## dx ## dy ## 16;\
3515
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3516
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3517
        mc_block_hpel ## dx ## dy ## 8;
3518

    
3519
    mcfh(0, 0)
3520
    mcfh(8, 0)
3521
    mcfh(0, 8)
3522
    mcfh(8, 8)
3523

    
3524
    if(!qexp[0])
3525
        init_qexp();
3526

    
3527
    dec= s->spatial_decomposition_count= 5;
3528
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3529

    
3530
    s->chroma_h_shift= 1; //FIXME XXX
3531
    s->chroma_v_shift= 1;
3532

    
3533
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3534

    
3535
    width= s->avctx->width;
3536
    height= s->avctx->height;
3537

    
3538
    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3539
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
3540

    
3541
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3542
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3543

    
3544
    for(plane_index=0; plane_index<3; plane_index++){
3545
        int w= s->avctx->width;
3546
        int h= s->avctx->height;
3547

    
3548
        if(plane_index){
3549
            w>>= s->chroma_h_shift;
3550
            h>>= s->chroma_v_shift;
3551
        }
3552
        s->plane[plane_index].width = w;
3553
        s->plane[plane_index].height= h;
3554
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3555
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3556
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3557
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3558

    
3559
                b->buf= s->spatial_dwt_buffer;
3560
                b->level= level;
3561
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3562
                b->width = (w + !(orientation&1))>>1;
3563
                b->height= (h + !(orientation>1))>>1;
3564

    
3565
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3566
                b->buf_x_offset = 0;
3567
                b->buf_y_offset = 0;
3568

    
3569
                if(orientation&1){
3570
                    b->buf += (w+1)>>1;
3571
                    b->buf_x_offset = (w+1)>>1;
3572
                }
3573
                if(orientation>1){
3574
                    b->buf += b->stride>>1;
3575
                    b->buf_y_offset = b->stride_line >> 1;
3576
                }
3577
                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3578

    
3579
                if(level)
3580
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3581
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3582
            }
3583
            w= (w+1)>>1;
3584
            h= (h+1)>>1;
3585
        }
3586
    }
3587

    
3588
    for(i=0; i<MAX_REF_FRAMES; i++)
3589
        for(j=0; j<MAX_REF_FRAMES; j++)
3590
            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3591

    
3592
    reset_contexts(s);
3593
/*
3594
    width= s->width= avctx->width;
3595
    height= s->height= avctx->height;
3596

3597
    assert(width && height);
3598
*/
3599
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3600

    
3601
    return 0;
3602
}
3603

    
3604
static int qscale2qlog(int qscale){
3605
    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3606
           + 61*QROOT/8; //<64 >60
3607
}
3608

    
3609
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3610
{
3611
    /* estimate the frame's complexity as a sum of weighted dwt coefs.
3612
     * FIXME we know exact mv bits at this point,
3613
     * but ratecontrol isn't set up to include them. */
3614
    uint32_t coef_sum= 0;
3615
    int level, orientation, delta_qlog;
3616

    
3617
    for(level=0; level<s->spatial_decomposition_count; level++){
3618
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3619
            SubBand *b= &s->plane[0].band[level][orientation];
3620
            IDWTELEM *buf= b->ibuf;
3621
            const int w= b->width;
3622
            const int h= b->height;
3623
            const int stride= b->stride;
3624
            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3625
            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3626
            const int qdiv= (1<<16)/qmul;
3627
            int x, y;
3628
            //FIXME this is ugly
3629
            for(y=0; y<h; y++)
3630
                for(x=0; x<w; x++)
3631
                    buf[x+y*stride]= b->buf[x+y*stride];
3632
            if(orientation==0)
3633
                decorrelate(s, b, buf, stride, 1, 0);
3634
            for(y=0; y<h; y++)
3635
                for(x=0; x<w; x++)
3636
                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3637
        }
3638
    }
3639

    
3640
    /* ugly, ratecontrol just takes a sqrt again */
3641
    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3642
    assert(coef_sum < INT_MAX);
3643

    
3644
    if(pict->pict_type == I_TYPE){
3645
        s->m.current_picture.mb_var_sum= coef_sum;
3646
        s->m.current_picture.mc_mb_var_sum= 0;
3647
    }else{
3648
        s->m.current_picture.mc_mb_var_sum= coef_sum;
3649
        s->m.current_picture.mb_var_sum= 0;
3650
    }
3651

    
3652
    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3653
    if (pict->quality < 0)
3654
        return INT_MIN;
3655
    s->lambda= pict->quality * 3/2;
3656
    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3657
    s->qlog+= delta_qlog;
3658
    return delta_qlog;
3659
}
3660

    
3661
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3662
    int width = p->width;
3663
    int height= p->height;
3664
    int level, orientation, x, y;
3665

    
3666
    for(level=0; level<s->spatial_decomposition_count; level++){
3667
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3668
            SubBand *b= &p->band[level][orientation];
3669
            IDWTELEM *ibuf= b->ibuf;
3670
            int64_t error=0;
3671

    
3672
            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3673
            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3674
            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3675
            for(y=0; y<height; y++){
3676
                for(x=0; x<width; x++){
3677
                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3678
                    error += d*d;
3679
                }
3680
            }
3681

    
3682
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3683
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3684
        }
3685
    }
3686
}
3687

    
3688
static int encode_init(AVCodecContext *avctx)
3689
{
3690
    SnowContext *s = avctx->priv_data;
3691
    int plane_index;
3692

    
3693
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3694
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3695
               "use vstrict=-2 / -strict -2 to use it anyway\n");
3696
        return -1;
3697
    }
3698

    
3699
    if(avctx->prediction_method == DWT_97
3700
       && (avctx->flags & CODEC_FLAG_QSCALE)
3701
       && avctx->global_quality == 0){
3702
        av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3703
        return -1;
3704
    }
3705

    
3706
    common_init(avctx);
3707
    alloc_blocks(s);
3708

    
3709
    s->version=0;
3710

    
3711
    s->m.avctx   = avctx;
3712
    s->m.flags   = avctx->flags;
3713
    s->m.bit_rate= avctx->bit_rate;
3714

    
3715
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3716
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3717
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3718
    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3719
    h263_encode_init(&s->m); //mv_penalty
3720

    
3721
    s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3722

    
3723
    if(avctx->flags&CODEC_FLAG_PASS1){
3724
        if(!avctx->stats_out)
3725
            avctx->stats_out = av_mallocz(256);
3726
    }
3727
    if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3728
        if(ff_rate_control_init(&s->m) < 0)
3729
            return -1;
3730
    }
3731
    s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3732

    
3733
    for(plane_index=0; plane_index<3; plane_index++){
3734
        calculate_vissual_weight(s, &s->plane[plane_index]);
3735
    }
3736

    
3737

    
3738
    avctx->coded_frame= &s->current_picture;
3739
    switch(avctx->pix_fmt){
3740
//    case PIX_FMT_YUV444P:
3741
//    case PIX_FMT_YUV422P:
3742
    case PIX_FMT_YUV420P:
3743
    case PIX_FMT_GRAY8:
3744
//    case PIX_FMT_YUV411P:
3745
//    case PIX_FMT_YUV410P:
3746
        s->colorspace_type= 0;
3747
        break;
3748
/*    case PIX_FMT_RGB32:
3749
        s->colorspace= 1;
3750
        break;*/
3751
    default:
3752
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3753
        return -1;
3754
    }
3755
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3756
    s->chroma_h_shift= 1;
3757
    s->chroma_v_shift= 1;
3758

    
3759
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3760
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3761

    
3762
    s->avctx->get_buffer(s->avctx, &s->input_picture);
3763

    
3764
    if(s->avctx->me_method == ME_ITER){
3765
        int i;
3766
        int size= s->b_width * s->b_height << 2*s->block_max_depth;
3767
        for(i=0; i<s->max_ref_frames; i++){
3768
            s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
3769
            s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
3770
        }
3771
    }
3772

    
3773
    return 0;
3774
}
3775

    
3776
static int frame_start(SnowContext *s){
3777
   AVFrame tmp;
3778
   int w= s->avctx->width; //FIXME round up to x16 ?
3779
   int h= s->avctx->height;
3780

    
3781
    if(s->current_picture.data[0]){
3782
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
3783
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3784
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3785
    }
3786

    
3787
    tmp= s->last_picture[s->max_ref_frames-1];
3788
    memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
3789
    s->last_picture[0]= s->current_picture;
3790
    s->current_picture= tmp;
3791

    
3792
    if(s->keyframe){
3793
        s->ref_frames= 0;
3794
    }else{
3795
        int i;
3796
        for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
3797
            if(i && s->last_picture[i-1].key_frame)
3798
                break;
3799
        s->ref_frames= i;
3800
    }
3801

    
3802
    s->current_picture.reference= 1;
3803
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3804
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3805
        return -1;
3806
    }
3807

    
3808
    s->current_picture.key_frame= s->keyframe;
3809

    
3810
    return 0;
3811
}
3812

    
3813
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3814
    SnowContext *s = avctx->priv_data;
3815
    RangeCoder * const c= &s->c;
3816
    AVFrame *pict = data;
3817
    const int width= s->avctx->width;
3818
    const int height= s->avctx->height;
3819
    int level, orientation, plane_index, i, y;
3820
    uint8_t rc_header_bak[sizeof(s->header_state)];
3821
    uint8_t rc_block_bak[sizeof(s->block_state)];
3822

    
3823
    ff_init_range_encoder(c, buf, buf_size);
3824
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3825

    
3826
    for(i=0; i<3; i++){
3827
        int shift= !!i;
3828
        for(y=0; y<(height>>shift); y++)
3829
            memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3830
                   &pict->data[i][y * pict->linesize[i]],
3831
                   width>>shift);
3832
    }
3833
    s->new_picture = *pict;
3834

    
3835
    s->m.picture_number= avctx->frame_number;
3836
    if(avctx->flags&CODEC_FLAG_PASS2){
3837
        s->m.pict_type =
3838
        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3839
        s->keyframe= pict->pict_type==FF_I_TYPE;
3840
        if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3841
            pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3842
            if (pict->quality < 0)
3843
                return -1;
3844
        }
3845
    }else{
3846
        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3847
        s->m.pict_type=
3848
        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3849
    }
3850

    
3851
    if(s->pass1_rc && avctx->frame_number == 0)
3852
        pict->quality= 2*FF_QP2LAMBDA;
3853
    if(pict->quality){
3854
        s->qlog= qscale2qlog(pict->quality);
3855
        s->lambda = pict->quality * 3/2;
3856
    }
3857
    if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3858
        s->qlog= LOSSLESS_QLOG;
3859
        s->lambda = 0;
3860
    }//else keep previous frame's qlog until after motion est
3861

    
3862
    frame_start(s);
3863

    
3864
    s->m.current_picture_ptr= &s->m.current_picture;
3865
    if(pict->pict_type == P_TYPE){
3866
        int block_width = (width +15)>>4;
3867
        int block_height= (height+15)>>4;
3868
        int stride= s->current_picture.linesize[0];
3869

    
3870
        assert(s->current_picture.data[0]);
3871
        assert(s->last_picture[0].data[0]);
3872

    
3873
        s->m.avctx= s->avctx;
3874
        s->m.current_picture.data[0]= s->current_picture.data[0];
3875
        s->m.   last_picture.data[0]= s->last_picture[0].data[0];
3876
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3877
        s->m.   last_picture_ptr= &s->m.   last_picture;
3878
        s->m.linesize=
3879
        s->m.   last_picture.linesize[0]=
3880
        s->m.    new_picture.linesize[0]=
3881
        s->m.current_picture.linesize[0]= stride;
3882
        s->m.uvlinesize= s->current_picture.linesize[1];
3883
        s->m.width = width;
3884
        s->m.height= height;
3885
        s->m.mb_width = block_width;
3886
        s->m.mb_height= block_height;
3887
        s->m.mb_stride=   s->m.mb_width+1;
3888
        s->m.b8_stride= 2*s->m.mb_width+1;
3889
        s->m.f_code=1;
3890
        s->m.pict_type= pict->pict_type;
3891
        s->m.me_method= s->avctx->me_method;
3892
        s->m.me.scene_change_score=0;
3893
        s->m.flags= s->avctx->flags;
3894
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3895
        s->m.out_format= FMT_H263;
3896
        s->m.unrestricted_mv= 1;
3897

    
3898
        s->m.lambda = s->lambda;
3899
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3900
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3901

    
3902
        s->m.dsp= s->dsp; //move
3903
        ff_init_me(&s->m);
3904
        s->dsp= s->m.dsp;
3905
    }
3906

    
3907
    if(s->pass1_rc){
3908
        memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
3909
        memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
3910
    }
3911

    
3912
redo_frame:
3913

    
3914
    s->m.pict_type = pict->pict_type;
3915
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3916

    
3917
    encode_header(s);
3918
    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3919
    encode_blocks(s, 1);
3920
    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3921

    
3922
    for(plane_index=0; plane_index<3; plane_index++){
3923
        Plane *p= &s->plane[plane_index];
3924
        int w= p->width;
3925
        int h= p->height;
3926
        int x, y;
3927
//        int bits= put_bits_count(&s->c.pb);
3928

    
3929
    if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
3930
        //FIXME optimize
3931
     if(pict->data[plane_index]) //FIXME gray hack
3932
        for(y=0; y<h; y++){
3933
            for(x=0; x<w; x++){
3934
                s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3935
            }
3936
        }
3937
        predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
3938

    
3939
        if(   plane_index==0
3940
           && pict->pict_type == P_TYPE
3941
           && !(avctx->flags&CODEC_FLAG_PASS2)
3942
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3943
            ff_init_range_encoder(c, buf, buf_size);
3944
            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3945
            pict->pict_type= FF_I_TYPE;
3946
            s->keyframe=1;
3947
            s->current_picture.key_frame=1;
3948
            goto redo_frame;
3949
        }
3950

    
3951
        if(s->qlog == LOSSLESS_QLOG){
3952
            for(y=0; y<h; y++){
3953
                for(x=0; x<w; x++){
3954
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3955
                }
3956
            }
3957
        }else{
3958
            for(y=0; y<h; y++){
3959
                for(x=0; x<w; x++){
3960
                    s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
3961
                }
3962
            }
3963
        }
3964

    
3965
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3966

    
3967
        if(s->pass1_rc && plane_index==0){
3968
            int delta_qlog = ratecontrol_1pass(s, pict);
3969
            if (delta_qlog <= INT_MIN)
3970
                return -1;
3971
            if(delta_qlog){
3972
                //reordering qlog in the bitstream would eliminate this reset
3973
                ff_init_range_encoder(c, buf, buf_size);
3974
                memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
3975
                memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
3976
                encode_header(s);
3977
                encode_blocks(s, 0);
3978
            }
3979
        }
3980

    
3981
        for(level=0; level<s->spatial_decomposition_count; level++){
3982
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3983
                SubBand *b= &p->band[level][orientation];
3984

    
3985
                quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
3986
                if(orientation==0)
3987
                    decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
3988
                encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
3989
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
3990
                if(orientation==0)
3991
                    correlate(s, b, b->ibuf, b->stride, 1, 0);
3992
            }
3993
        }
3994
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3995

    
3996
        for(level=0; level<s->spatial_decomposition_count; level++){
3997
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3998
                SubBand *b= &p->band[level][orientation];
3999

    
4000
                dequantize(s, b, b->ibuf, b->stride);
4001
            }
4002
        }
4003

    
4004
        ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4005
        if(s->qlog == LOSSLESS_QLOG){
4006
            for(y=0; y<h; y++){
4007
                for(x=0; x<w; x++){
4008
                    s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4009
                }
4010
            }
4011
        }
4012
{START_TIMER
4013
        predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4014
STOP_TIMER("pred-conv")}
4015
      }else{
4016
            //ME/MC only
4017
            if(pict->pict_type == I_TYPE){
4018
                for(y=0; y<h; y++){
4019
                    for(x=0; x<w; x++){
4020
                        s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4021
                            pict->data[plane_index][y*pict->linesize[plane_index] + x];
4022
                    }
4023
                }
4024
            }else{
4025
                memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4026
                predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4027
            }
4028
      }
4029
        if(s->avctx->flags&CODEC_FLAG_PSNR){
4030
            int64_t error= 0;
4031

    
4032
    if(pict->data[plane_index]) //FIXME gray hack
4033
            for(y=0; y<h; y++){
4034
                for(x=0; x<w; x++){
4035
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4036
                    error += d*d;
4037
                }
4038
            }
4039
            s->avctx->error[plane_index] += error;
4040
            s->current_picture.error[plane_index] = error;
4041
        }
4042
    }
4043

    
4044
    if(s->last_picture[s->max_ref_frames-1].data[0])
4045
        avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4046

    
4047
    s->current_picture.coded_picture_number = avctx->frame_number;
4048
    s->current_picture.pict_type = pict->pict_type;
4049
    s->current_picture.quality = pict->quality;
4050
    s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4051
    s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4052
    s->m.current_picture.display_picture_number =
4053
    s->m.current_picture.coded_picture_number = avctx->frame_number;
4054
    s->m.current_picture.quality = pict->quality;