Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 4156a436

History | View | Annotate | Download (166 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22
#include "snow.h"
23

    
24
#include "rangecoder.h"
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
static const int8_t quant3[256]={
32
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
33
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
41
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
48
};
49
static const int8_t quant3b[256]={
50
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
59
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
};
67
static const int8_t quant3bA[256]={
68
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
69
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
};
85
static const int8_t quant5[256]={
86
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
87
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
95
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
102
};
103
static const int8_t quant7[256]={
104
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
107
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
113
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
118
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
119
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
120
};
121
static const int8_t quant9[256]={
122
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
123
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
124
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
131
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
137
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
138
};
139
static const int8_t quant11[256]={
140
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
141
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
142
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
143
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
149
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
154
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
155
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
156
};
157
static const int8_t quant13[256]={
158
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
159
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
160
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
162
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
167
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
171
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
172
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
174
};
175

    
176
#if 0 //64*cubic
177
static const uint8_t obmc32[1024]={
178
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
180
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
181
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
182
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
183
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
184
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
185
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
186
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
187
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
189
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
191
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
192
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
193
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
194
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
196
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
197
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
198
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
199
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
200
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
201
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
202
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
203
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
204
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
205
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
206
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
207
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
208
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
209
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210
//error:0.000022
211
};
212
static const uint8_t obmc16[256]={
213
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
214
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
216
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
218
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
219
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
220
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
221
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
223
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
224
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
225
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
226
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
227
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
228
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
229
//error:0.000033
230
};
231
#elif 1 // 64*linear
232
static const uint8_t obmc32[1024]={
233
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
234
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
235
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
236
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
237
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
238
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
239
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
240
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
241
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
242
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
243
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
244
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
245
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
246
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
247
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
248
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
249
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
250
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
251
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
252
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
253
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
254
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
255
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
256
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
257
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
258
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
259
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
260
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
261
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
262
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
263
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
264
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
265
 //error:0.000020
266
};
267
static const uint8_t obmc16[256]={
268
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
269
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
270
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
271
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
272
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
273
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
274
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
275
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
276
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
278
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
280
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
281
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
282
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
283
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
284
//error:0.000015
285
};
286
#else //64*cos
287
static const uint8_t obmc32[1024]={
288
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
289
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
290
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
291
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
292
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
293
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
294
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
295
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
296
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
297
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
299
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
301
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
302
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
303
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
304
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
306
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
307
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
308
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
309
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
310
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
311
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
312
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
313
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
314
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
315
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
316
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
317
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
318
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
319
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
320
//error:0.000022
321
};
322
static const uint8_t obmc16[256]={
323
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
324
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
326
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
328
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
329
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
330
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
331
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
333
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
334
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
335
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
336
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
337
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
338
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
339
//error:0.000022
340
};
341
#endif
342

    
343
//linear *64
344
static const uint8_t obmc8[64]={
345
  4, 12, 20, 28, 28, 20, 12,  4,
346
 12, 36, 60, 84, 84, 60, 36, 12,
347
 20, 60,100,140,140,100, 60, 20,
348
 28, 84,140,196,196,140, 84, 28,
349
 28, 84,140,196,196,140, 84, 28,
350
 20, 60,100,140,140,100, 60, 20,
351
 12, 36, 60, 84, 84, 60, 36, 12,
352
  4, 12, 20, 28, 28, 20, 12,  4,
353
//error:0.000000
354
};
355

    
356
//linear *64
357
static const uint8_t obmc4[16]={
358
 16, 48, 48, 16,
359
 48,144,144, 48,
360
 48,144,144, 48,
361
 16, 48, 48, 16,
362
//error:0.000000
363
};
364

    
365
static const uint8_t *obmc_tab[4]={
366
    obmc32, obmc16, obmc8, obmc4
367
};
368

    
369
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
370

    
371
typedef struct BlockNode{
372
    int16_t mx;
373
    int16_t my;
374
    uint8_t ref;
375
    uint8_t color[3];
376
    uint8_t type;
377
//#define TYPE_SPLIT    1
378
#define BLOCK_INTRA   1
379
#define BLOCK_OPT     2
380
//#define TYPE_NOCOLOR  4
381
    uint8_t level; //FIXME merge into type?
382
}BlockNode;
383

    
384
static const BlockNode null_block= { //FIXME add border maybe
385
    .color= {128,128,128},
386
    .mx= 0,
387
    .my= 0,
388
    .ref= 0,
389
    .type= 0,
390
    .level= 0,
391
};
392

    
393
#define LOG2_MB_SIZE 4
394
#define MB_SIZE (1<<LOG2_MB_SIZE)
395

    
396
typedef struct x_and_coeff{
397
    int16_t x;
398
    uint16_t coeff;
399
} x_and_coeff;
400

    
401
typedef struct SubBand{
402
    int level;
403
    int stride;
404
    int width;
405
    int height;
406
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
407
    DWTELEM *buf;
408
    int buf_x_offset;
409
    int buf_y_offset;
410
    int stride_line; ///< Stride measured in lines, not pixels.
411
    x_and_coeff * x_coeff;
412
    struct SubBand *parent;
413
    uint8_t state[/*7*2*/ 7 + 512][32];
414
}SubBand;
415

    
416
typedef struct Plane{
417
    int width;
418
    int height;
419
    SubBand band[MAX_DECOMPOSITIONS][4];
420
}Plane;
421

    
422
typedef struct SnowContext{
423
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
424

    
425
    AVCodecContext *avctx;
426
    RangeCoder c;
427
    DSPContext dsp;
428
    AVFrame new_picture;
429
    AVFrame input_picture;              ///< new_picture with the internal linesizes
430
    AVFrame current_picture;
431
    AVFrame last_picture[MAX_REF_FRAMES];
432
    AVFrame mconly_picture;
433
//     uint8_t q_context[16];
434
    uint8_t header_state[32];
435
    uint8_t block_state[128 + 32*128];
436
    int keyframe;
437
    int always_reset;
438
    int version;
439
    int spatial_decomposition_type;
440
    int temporal_decomposition_type;
441
    int spatial_decomposition_count;
442
    int temporal_decomposition_count;
443
    int max_ref_frames;
444
    int ref_frames;
445
    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
446
    uint32_t *ref_scores[MAX_REF_FRAMES];
447
    DWTELEM *spatial_dwt_buffer;
448
    int colorspace_type;
449
    int chroma_h_shift;
450
    int chroma_v_shift;
451
    int spatial_scalability;
452
    int qlog;
453
    int lambda;
454
    int lambda2;
455
    int pass1_rc;
456
    int mv_scale;
457
    int qbias;
458
#define QBIAS_SHIFT 3
459
    int b_width;
460
    int b_height;
461
    int block_max_depth;
462
    Plane plane[MAX_PLANES];
463
    BlockNode *block;
464
#define ME_CACHE_SIZE 1024
465
    int me_cache[ME_CACHE_SIZE];
466
    int me_cache_generation;
467
    slice_buffer sb;
468

    
469
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
470
}SnowContext;
471

    
472
typedef struct {
473
    DWTELEM *b0;
474
    DWTELEM *b1;
475
    DWTELEM *b2;
476
    DWTELEM *b3;
477
    int y;
478
} dwt_compose_t;
479

    
480
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
481
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
482

    
483
static void iterative_me(SnowContext *s);
484

    
485
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
486
{
487
    int i;
488

    
489
    buf->base_buffer = base_buffer;
490
    buf->line_count = line_count;
491
    buf->line_width = line_width;
492
    buf->data_count = max_allocated_lines;
493
    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
494
    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
495

    
496
    for (i = 0; i < max_allocated_lines; i++)
497
    {
498
      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
499
    }
500

    
501
    buf->data_stack_top = max_allocated_lines - 1;
502
}
503

    
504
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
505
{
506
    int offset;
507
    DWTELEM * buffer;
508

    
509
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
510

    
511
    assert(buf->data_stack_top >= 0);
512
//  assert(!buf->line[line]);
513
    if (buf->line[line])
514
        return buf->line[line];
515

    
516
    offset = buf->line_width * line;
517
    buffer = buf->data_stack[buf->data_stack_top];
518
    buf->data_stack_top--;
519
    buf->line[line] = buffer;
520

    
521
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
522

    
523
    return buffer;
524
}
525

    
526
static void slice_buffer_release(slice_buffer * buf, int line)
527
{
528
    int offset;
529
    DWTELEM * buffer;
530

    
531
    assert(line >= 0 && line < buf->line_count);
532
    assert(buf->line[line]);
533

    
534
    offset = buf->line_width * line;
535
    buffer = buf->line[line];
536
    buf->data_stack_top++;
537
    buf->data_stack[buf->data_stack_top] = buffer;
538
    buf->line[line] = NULL;
539

    
540
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
541
}
542

    
543
static void slice_buffer_flush(slice_buffer * buf)
544
{
545
    int i;
546
    for (i = 0; i < buf->line_count; i++)
547
    {
548
        if (buf->line[i])
549
        {
550
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
551
            slice_buffer_release(buf, i);
552
        }
553
    }
554
}
555

    
556
static void slice_buffer_destroy(slice_buffer * buf)
557
{
558
    int i;
559
    slice_buffer_flush(buf);
560

    
561
    for (i = buf->data_count - 1; i >= 0; i--)
562
    {
563
        assert(buf->data_stack[i]);
564
        av_freep(&buf->data_stack[i]);
565
    }
566
    assert(buf->data_stack);
567
    av_freep(&buf->data_stack);
568
    assert(buf->line);
569
    av_freep(&buf->line);
570
}
571

    
572
#ifdef __sgi
573
// Avoid a name clash on SGI IRIX
574
#undef qexp
575
#endif
576
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
577
static uint8_t qexp[QROOT];
578

    
579
static inline int mirror(int v, int m){
580
    while((unsigned)v > (unsigned)m){
581
        v=-v;
582
        if(v<0) v+= 2*m;
583
    }
584
    return v;
585
}
586

    
587
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
588
    int i;
589

    
590
    if(v){
591
        const int a= ABS(v);
592
        const int e= av_log2(a);
593
#if 1
594
        const int el= FFMIN(e, 10);
595
        put_rac(c, state+0, 0);
596

    
597
        for(i=0; i<el; i++){
598
            put_rac(c, state+1+i, 1);  //1..10
599
        }
600
        for(; i<e; i++){
601
            put_rac(c, state+1+9, 1);  //1..10
602
        }
603
        put_rac(c, state+1+FFMIN(i,9), 0);
604

    
605
        for(i=e-1; i>=el; i--){
606
            put_rac(c, state+22+9, (a>>i)&1); //22..31
607
        }
608
        for(; i>=0; i--){
609
            put_rac(c, state+22+i, (a>>i)&1); //22..31
610
        }
611

    
612
        if(is_signed)
613
            put_rac(c, state+11 + el, v < 0); //11..21
614
#else
615

    
616
        put_rac(c, state+0, 0);
617
        if(e<=9){
618
            for(i=0; i<e; i++){
619
                put_rac(c, state+1+i, 1);  //1..10
620
            }
621
            put_rac(c, state+1+i, 0);
622

    
623
            for(i=e-1; i>=0; i--){
624
                put_rac(c, state+22+i, (a>>i)&1); //22..31
625
            }
626

    
627
            if(is_signed)
628
                put_rac(c, state+11 + e, v < 0); //11..21
629
        }else{
630
            for(i=0; i<e; i++){
631
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
632
            }
633
            put_rac(c, state+1+FFMIN(i,9), 0);
634

    
635
            for(i=e-1; i>=0; i--){
636
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
637
            }
638

    
639
            if(is_signed)
640
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
641
        }
642
#endif
643
    }else{
644
        put_rac(c, state+0, 1);
645
    }
646
}
647

    
648
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
649
    if(get_rac(c, state+0))
650
        return 0;
651
    else{
652
        int i, e, a;
653
        e= 0;
654
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
655
            e++;
656
        }
657

    
658
        a= 1;
659
        for(i=e-1; i>=0; i--){
660
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
661
        }
662

    
663
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
664
            return -a;
665
        else
666
            return a;
667
    }
668
}
669

    
670
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
671
    int i;
672
    int r= log2>=0 ? 1<<log2 : 1;
673

    
674
    assert(v>=0);
675
    assert(log2>=-4);
676

    
677
    while(v >= r){
678
        put_rac(c, state+4+log2, 1);
679
        v -= r;
680
        log2++;
681
        if(log2>0) r+=r;
682
    }
683
    put_rac(c, state+4+log2, 0);
684

    
685
    for(i=log2-1; i>=0; i--){
686
        put_rac(c, state+31-i, (v>>i)&1);
687
    }
688
}
689

    
690
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
691
    int i;
692
    int r= log2>=0 ? 1<<log2 : 1;
693
    int v=0;
694

    
695
    assert(log2>=-4);
696

    
697
    while(get_rac(c, state+4+log2)){
698
        v+= r;
699
        log2++;
700
        if(log2>0) r+=r;
701
    }
702

    
703
    for(i=log2-1; i>=0; i--){
704
        v+= get_rac(c, state+31-i)<<i;
705
    }
706

    
707
    return v;
708
}
709

    
710
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
711
    const int mirror_left= !highpass;
712
    const int mirror_right= (width&1) ^ highpass;
713
    const int w= (width>>1) - 1 + (highpass & width);
714
    int i;
715

    
716
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
717
    if(mirror_left){
718
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
719
        dst += dst_step;
720
        src += src_step;
721
    }
722

    
723
    for(i=0; i<w; i++){
724
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
725
    }
726

    
727
    if(mirror_right){
728
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
729
    }
730
}
731

    
732
#ifndef lift5
733
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
734
    const int mirror_left= !highpass;
735
    const int mirror_right= (width&1) ^ highpass;
736
    const int w= (width>>1) - 1 + (highpass & width);
737
    int i;
738

    
739
    if(mirror_left){
740
        int r= 3*2*ref[0];
741
        r += r>>4;
742
        r += r>>8;
743
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
744
        dst += dst_step;
745
        src += src_step;
746
    }
747

    
748
    for(i=0; i<w; i++){
749
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
750
        r += r>>4;
751
        r += r>>8;
752
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
753
    }
754

    
755
    if(mirror_right){
756
        int r= 3*2*ref[w*ref_step];
757
        r += r>>4;
758
        r += r>>8;
759
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
760
    }
761
}
762
#endif
763

    
764
#ifndef liftS
765
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
766
    const int mirror_left= !highpass;
767
    const int mirror_right= (width&1) ^ highpass;
768
    const int w= (width>>1) - 1 + (highpass & width);
769
    int i;
770

    
771
    assert(shift == 4);
772
#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
773
    if(mirror_left){
774
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
775
        dst += dst_step;
776
        src += src_step;
777
    }
778

    
779
    for(i=0; i<w; i++){
780
        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
781
    }
782

    
783
    if(mirror_right){
784
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
785
    }
786
}
787
#endif
788

    
789

    
790
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791
    int x, i;
792

    
793
    for(x=start; x<width; x+=2){
794
        int64_t sum=0;
795

    
796
        for(i=0; i<n; i++){
797
            int x2= x + 2*i - n + 1;
798
            if     (x2<     0) x2= -x2;
799
            else if(x2>=width) x2= 2*width-x2-2;
800
            sum += coeffs[i]*(int64_t)dst[x2];
801
        }
802
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
803
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
804
    }
805
}
806

    
807
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
808
    int x, y, i;
809
    for(y=start; y<height; y+=2){
810
        for(x=0; x<width; x++){
811
            int64_t sum=0;
812

    
813
            for(i=0; i<n; i++){
814
                int y2= y + 2*i - n + 1;
815
                if     (y2<      0) y2= -y2;
816
                else if(y2>=height) y2= 2*height-y2-2;
817
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
818
            }
819
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
820
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
821
        }
822
    }
823
}
824

    
825
#define SCALEX 1
826
#define LX0 0
827
#define LX1 1
828

    
829
#if 0 // more accurate 9/7
830
#define N1 2
831
#define SHIFT1 14
832
#define COEFFS1 (int[]){-25987,-25987}
833
#define N2 2
834
#define SHIFT2 19
835
#define COEFFS2 (int[]){-27777,-27777}
836
#define N3 2
837
#define SHIFT3 15
838
#define COEFFS3 (int[]){28931,28931}
839
#define N4 2
840
#define SHIFT4 15
841
#define COEFFS4 (int[]){14533,14533}
842
#elif 1 // 13/7 CRF
843
#define N1 4
844
#define SHIFT1 4
845
#define COEFFS1 (int[]){1,-9,-9,1}
846
#define N2 4
847
#define SHIFT2 4
848
#define COEFFS2 (int[]){-1,5,5,-1}
849
#define N3 0
850
#define SHIFT3 1
851
#define COEFFS3 NULL
852
#define N4 0
853
#define SHIFT4 1
854
#define COEFFS4 NULL
855
#elif 1 // 3/5
856
#define LX0 1
857
#define LX1 0
858
#define SCALEX 0.5
859
#define N1 2
860
#define SHIFT1 1
861
#define COEFFS1 (int[]){1,1}
862
#define N2 2
863
#define SHIFT2 2
864
#define COEFFS2 (int[]){-1,-1}
865
#define N3 0
866
#define SHIFT3 0
867
#define COEFFS3 NULL
868
#define N4 0
869
#define SHIFT4 0
870
#define COEFFS4 NULL
871
#elif 1 // 11/5
872
#define N1 0
873
#define SHIFT1 1
874
#define COEFFS1 NULL
875
#define N2 2
876
#define SHIFT2 2
877
#define COEFFS2 (int[]){-1,-1}
878
#define N3 2
879
#define SHIFT3 0
880
#define COEFFS3 (int[]){-1,-1}
881
#define N4 4
882
#define SHIFT4 7
883
#define COEFFS4 (int[]){-5,29,29,-5}
884
#define SCALEX 4
885
#elif 1 // 9/7 CDF
886
#define N1 2
887
#define SHIFT1 7
888
#define COEFFS1 (int[]){-203,-203}
889
#define N2 2
890
#define SHIFT2 12
891
#define COEFFS2 (int[]){-217,-217}
892
#define N3 2
893
#define SHIFT3 7
894
#define COEFFS3 (int[]){113,113}
895
#define N4 2
896
#define SHIFT4 9
897
#define COEFFS4 (int[]){227,227}
898
#define SCALEX 1
899
#elif 1 // 7/5 CDF
900
#define N1 0
901
#define SHIFT1 1
902
#define COEFFS1 NULL
903
#define N2 2
904
#define SHIFT2 2
905
#define COEFFS2 (int[]){-1,-1}
906
#define N3 2
907
#define SHIFT3 0
908
#define COEFFS3 (int[]){-1,-1}
909
#define N4 2
910
#define SHIFT4 4
911
#define COEFFS4 (int[]){3,3}
912
#elif 1 // 9/7 MN
913
#define N1 4
914
#define SHIFT1 4
915
#define COEFFS1 (int[]){1,-9,-9,1}
916
#define N2 2
917
#define SHIFT2 2
918
#define COEFFS2 (int[]){1,1}
919
#define N3 0
920
#define SHIFT3 1
921
#define COEFFS3 NULL
922
#define N4 0
923
#define SHIFT4 1
924
#define COEFFS4 NULL
925
#else // 13/7 CRF
926
#define N1 4
927
#define SHIFT1 4
928
#define COEFFS1 (int[]){1,-9,-9,1}
929
#define N2 4
930
#define SHIFT2 4
931
#define COEFFS2 (int[]){-1,5,5,-1}
932
#define N3 0
933
#define SHIFT3 1
934
#define COEFFS3 NULL
935
#define N4 0
936
#define SHIFT4 1
937
#define COEFFS4 NULL
938
#endif
939
static void horizontal_decomposeX(DWTELEM *b, int width){
940
    DWTELEM temp[width];
941
    const int width2= width>>1;
942
    const int w2= (width+1)>>1;
943
    int x;
944

    
945
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
946
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
947
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
948
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
949

    
950
    for(x=0; x<width2; x++){
951
        temp[x   ]= b[2*x    ];
952
        temp[x+w2]= b[2*x + 1];
953
    }
954
    if(width&1)
955
        temp[x   ]= b[2*x    ];
956
    memcpy(b, temp, width*sizeof(int));
957
}
958

    
959
static void horizontal_composeX(DWTELEM *b, int width){
960
    DWTELEM temp[width];
961
    const int width2= width>>1;
962
    int x;
963
    const int w2= (width+1)>>1;
964

    
965
    memcpy(temp, b, width*sizeof(int));
966
    for(x=0; x<width2; x++){
967
        b[2*x    ]= temp[x   ];
968
        b[2*x + 1]= temp[x+w2];
969
    }
970
    if(width&1)
971
        b[2*x    ]= temp[x   ];
972

    
973
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
974
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
975
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
976
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
977
}
978

    
979
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
980
    int x, y;
981

    
982
    for(y=0; y<height; y++){
983
        for(x=0; x<width; x++){
984
            buffer[y*stride + x] *= SCALEX;
985
        }
986
    }
987

    
988
    for(y=0; y<height; y++){
989
        horizontal_decomposeX(buffer + y*stride, width);
990
    }
991

    
992
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
993
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
994
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
995
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
996
}
997

    
998
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
999
    int x, y;
1000

    
1001
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1002
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1003
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1004
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1005

    
1006
    for(y=0; y<height; y++){
1007
        horizontal_composeX(buffer + y*stride, width);
1008
    }
1009

    
1010
    for(y=0; y<height; y++){
1011
        for(x=0; x<width; x++){
1012
            buffer[y*stride + x] /= SCALEX;
1013
        }
1014
    }
1015
}
1016

    
1017
static void horizontal_decompose53i(DWTELEM *b, int width){
1018
    DWTELEM temp[width];
1019
    const int width2= width>>1;
1020
    int x;
1021
    const int w2= (width+1)>>1;
1022

    
1023
    for(x=0; x<width2; x++){
1024
        temp[x   ]= b[2*x    ];
1025
        temp[x+w2]= b[2*x + 1];
1026
    }
1027
    if(width&1)
1028
        temp[x   ]= b[2*x    ];
1029
#if 0
1030
    {
1031
    int A1,A2,A3,A4;
1032
    A2= temp[1       ];
1033
    A4= temp[0       ];
1034
    A1= temp[0+width2];
1035
    A1 -= (A2 + A4)>>1;
1036
    A4 += (A1 + 1)>>1;
1037
    b[0+width2] = A1;
1038
    b[0       ] = A4;
1039
    for(x=1; x+1<width2; x+=2){
1040
        A3= temp[x+width2];
1041
        A4= temp[x+1     ];
1042
        A3 -= (A2 + A4)>>1;
1043
        A2 += (A1 + A3 + 2)>>2;
1044
        b[x+width2] = A3;
1045
        b[x       ] = A2;
1046

1047
        A1= temp[x+1+width2];
1048
        A2= temp[x+2       ];
1049
        A1 -= (A2 + A4)>>1;
1050
        A4 += (A1 + A3 + 2)>>2;
1051
        b[x+1+width2] = A1;
1052
        b[x+1       ] = A4;
1053
    }
1054
    A3= temp[width-1];
1055
    A3 -= A2;
1056
    A2 += (A1 + A3 + 2)>>2;
1057
    b[width -1] = A3;
1058
    b[width2-1] = A2;
1059
    }
1060
#else
1061
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1062
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
1063
#endif
1064
}
1065

    
1066
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1067
    int i;
1068

    
1069
    for(i=0; i<width; i++){
1070
        b1[i] -= (b0[i] + b2[i])>>1;
1071
    }
1072
}
1073

    
1074
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1075
    int i;
1076

    
1077
    for(i=0; i<width; i++){
1078
        b1[i] += (b0[i] + b2[i] + 2)>>2;
1079
    }
1080
}
1081

    
1082
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1083
    int y;
1084
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1085
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
1086

    
1087
    for(y=-2; y<height; y+=2){
1088
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1089
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1090

    
1091
{START_TIMER
1092
        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1093
        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1094
STOP_TIMER("horizontal_decompose53i")}
1095

    
1096
{START_TIMER
1097
        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1098
        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1099
STOP_TIMER("vertical_decompose53i*")}
1100

    
1101
        b0=b2;
1102
        b1=b3;
1103
    }
1104
}
1105

    
1106
static void horizontal_decompose97i(DWTELEM *b, int width){
1107
    DWTELEM temp[width];
1108
    const int w2= (width+1)>>1;
1109

    
1110
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1111
    liftS(temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1112
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1113
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1114
}
1115

    
1116

    
1117
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1118
    int i;
1119

    
1120
    for(i=0; i<width; i++){
1121
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1122
    }
1123
}
1124

    
1125
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1126
    int i;
1127

    
1128
    for(i=0; i<width; i++){
1129
#ifdef lift5
1130
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1131
#else
1132
        int r= 3*(b0[i] + b2[i]);
1133
        r+= r>>4;
1134
        r+= r>>8;
1135
        b1[i] += (r+W_CO)>>W_CS;
1136
#endif
1137
    }
1138
}
1139

    
1140
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1141
    int i;
1142

    
1143
    for(i=0; i<width; i++){
1144
#ifdef liftS
1145
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1146
#else
1147
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1148
#endif
1149
    }
1150
}
1151

    
1152
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1153
    int i;
1154

    
1155
    for(i=0; i<width; i++){
1156
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1157
    }
1158
}
1159

    
1160
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1161
    int y;
1162
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1163
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1164
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1165
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1166

    
1167
    for(y=-4; y<height; y+=2){
1168
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1169
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1170

    
1171
{START_TIMER
1172
        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1173
        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1174
if(width>400){
1175
STOP_TIMER("horizontal_decompose97i")
1176
}}
1177

    
1178
{START_TIMER
1179
        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1180
        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1181
        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1182
        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1183

    
1184
if(width>400){
1185
STOP_TIMER("vertical_decompose97i")
1186
}}
1187

    
1188
        b0=b2;
1189
        b1=b3;
1190
        b2=b4;
1191
        b3=b5;
1192
    }
1193
}
1194

    
1195
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1196
    int level;
1197

    
1198
    for(level=0; level<decomposition_count; level++){
1199
        switch(type){
1200
        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1201
        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1202
        case DWT_X: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1203
        }
1204
    }
1205
}
1206

    
1207
static void horizontal_compose53i(DWTELEM *b, int width){
1208
    DWTELEM temp[width];
1209
    const int width2= width>>1;
1210
    const int w2= (width+1)>>1;
1211
    int x;
1212

    
1213
#if 0
1214
    int A1,A2,A3,A4;
1215
    A2= temp[1       ];
1216
    A4= temp[0       ];
1217
    A1= temp[0+width2];
1218
    A1 -= (A2 + A4)>>1;
1219
    A4 += (A1 + 1)>>1;
1220
    b[0+width2] = A1;
1221
    b[0       ] = A4;
1222
    for(x=1; x+1<width2; x+=2){
1223
        A3= temp[x+width2];
1224
        A4= temp[x+1     ];
1225
        A3 -= (A2 + A4)>>1;
1226
        A2 += (A1 + A3 + 2)>>2;
1227
        b[x+width2] = A3;
1228
        b[x       ] = A2;
1229

1230
        A1= temp[x+1+width2];
1231
        A2= temp[x+2       ];
1232
        A1 -= (A2 + A4)>>1;
1233
        A4 += (A1 + A3 + 2)>>2;
1234
        b[x+1+width2] = A1;
1235
        b[x+1       ] = A4;
1236
    }
1237
    A3= temp[width-1];
1238
    A3 -= A2;
1239
    A2 += (A1 + A3 + 2)>>2;
1240
    b[width -1] = A3;
1241
    b[width2-1] = A2;
1242
#else
1243
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1244
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1245
#endif
1246
    for(x=0; x<width2; x++){
1247
        b[2*x    ]= temp[x   ];
1248
        b[2*x + 1]= temp[x+w2];
1249
    }
1250
    if(width&1)
1251
        b[2*x    ]= temp[x   ];
1252
}
1253

    
1254
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1255
    int i;
1256

    
1257
    for(i=0; i<width; i++){
1258
        b1[i] += (b0[i] + b2[i])>>1;
1259
    }
1260
}
1261

    
1262
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1263
    int i;
1264

    
1265
    for(i=0; i<width; i++){
1266
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1267
    }
1268
}
1269

    
1270
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1271
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1272
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1273
    cs->y = -1;
1274
}
1275

    
1276
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1277
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1278
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1279
    cs->y = -1;
1280
}
1281

    
1282
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1283
    int y= cs->y;
1284

    
1285
    DWTELEM *b0= cs->b0;
1286
    DWTELEM *b1= cs->b1;
1287
    DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1288
    DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1289

    
1290
{START_TIMER
1291
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1292
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1293
STOP_TIMER("vertical_compose53i*")}
1294

    
1295
{START_TIMER
1296
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1297
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1298
STOP_TIMER("horizontal_compose53i")}
1299

    
1300
    cs->b0 = b2;
1301
    cs->b1 = b3;
1302
    cs->y += 2;
1303
}
1304

    
1305
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1306
    int y= cs->y;
1307
    DWTELEM *b0= cs->b0;
1308
    DWTELEM *b1= cs->b1;
1309
    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1310
    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1311

    
1312
{START_TIMER
1313
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1314
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1315
STOP_TIMER("vertical_compose53i*")}
1316

    
1317
{START_TIMER
1318
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1319
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1320
STOP_TIMER("horizontal_compose53i")}
1321

    
1322
    cs->b0 = b2;
1323
    cs->b1 = b3;
1324
    cs->y += 2;
1325
}
1326

    
1327
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1328
    dwt_compose_t cs;
1329
    spatial_compose53i_init(&cs, buffer, height, stride);
1330
    while(cs.y <= height)
1331
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1332
}
1333

    
1334

    
1335
void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1336
    DWTELEM temp[width];
1337
    const int w2= (width+1)>>1;
1338

    
1339
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1340
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1341
    liftS(b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1342
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1343
}
1344

    
1345
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1346
    int i;
1347

    
1348
    for(i=0; i<width; i++){
1349
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1350
    }
1351
}
1352

    
1353
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1354
    int i;
1355

    
1356
    for(i=0; i<width; i++){
1357
#ifdef lift5
1358
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1359
#else
1360
        int r= 3*(b0[i] + b2[i]);
1361
        r+= r>>4;
1362
        r+= r>>8;
1363
        b1[i] -= (r+W_CO)>>W_CS;
1364
#endif
1365
    }
1366
}
1367

    
1368
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1369
    int i;
1370

    
1371
    for(i=0; i<width; i++){
1372
#ifdef liftS
1373
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1374
#else
1375
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1376
#endif
1377
    }
1378
}
1379

    
1380
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1381
    int i;
1382

    
1383
    for(i=0; i<width; i++){
1384
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1385
    }
1386
}
1387

    
1388
void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1389
    int i;
1390

    
1391
    for(i=0; i<width; i++){
1392
#ifndef lift5
1393
        int r;
1394
#endif
1395
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1396
#ifdef lift5
1397
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1398
#else
1399
        r= 3*(b2[i] + b4[i]);
1400
        r+= r>>4;
1401
        r+= r>>8;
1402
        b3[i] -= (r+W_CO)>>W_CS;
1403
#endif
1404
#ifdef liftS
1405
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1406
#else
1407
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1408
#endif
1409
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1410
    }
1411
}
1412

    
1413
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1414
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1415
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1416
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1417
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1418
    cs->y = -3;
1419
}
1420

    
1421
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1422
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1423
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1424
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1425
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1426
    cs->y = -3;
1427
}
1428

    
1429
static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1430
    int y = cs->y;
1431

    
1432
    DWTELEM *b0= cs->b0;
1433
    DWTELEM *b1= cs->b1;
1434
    DWTELEM *b2= cs->b2;
1435
    DWTELEM *b3= cs->b3;
1436
    DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1437
    DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1438

    
1439
{START_TIMER
1440
    if(y>0 && y+4<height){
1441
        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1442
    }else{
1443
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1444
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1445
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1446
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1447
    }
1448
if(width>400){
1449
STOP_TIMER("vertical_compose97i")}}
1450

    
1451
{START_TIMER
1452
        if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1453
        if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1454
if(width>400 && y+0<(unsigned)height){
1455
STOP_TIMER("horizontal_compose97i")}}
1456

    
1457
    cs->b0=b2;
1458
    cs->b1=b3;
1459
    cs->b2=b4;
1460
    cs->b3=b5;
1461
    cs->y += 2;
1462
}
1463

    
1464
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1465
    int y = cs->y;
1466
    DWTELEM *b0= cs->b0;
1467
    DWTELEM *b1= cs->b1;
1468
    DWTELEM *b2= cs->b2;
1469
    DWTELEM *b3= cs->b3;
1470
    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1471
    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1472

    
1473
{START_TIMER
1474
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1475
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1476
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1477
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1478
if(width>400){
1479
STOP_TIMER("vertical_compose97i")}}
1480

    
1481
{START_TIMER
1482
        if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1483
        if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1484
if(width>400 && b0 <= b2){
1485
STOP_TIMER("horizontal_compose97i")}}
1486

    
1487
    cs->b0=b2;
1488
    cs->b1=b3;
1489
    cs->b2=b4;
1490
    cs->b3=b5;
1491
    cs->y += 2;
1492
}
1493

    
1494
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1495
    dwt_compose_t cs;
1496
    spatial_compose97i_init(&cs, buffer, height, stride);
1497
    while(cs.y <= height)
1498
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1499
}
1500

    
1501
static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1502
    int level;
1503
    for(level=decomposition_count-1; level>=0; level--){
1504
        switch(type){
1505
        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1506
        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1507
        /* not slicified yet */
1508
        case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1509
          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1510
        }
1511
    }
1512
}
1513

    
1514
static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1515
    int level;
1516
    for(level=decomposition_count-1; level>=0; level--){
1517
        switch(type){
1518
        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1519
        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1520
        /* not slicified yet */
1521
        case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1522
        }
1523
    }
1524
}
1525

    
1526
static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1527
    const int support = type==1 ? 3 : 5;
1528
    int level;
1529
    if(type==2) return;
1530

    
1531
    for(level=decomposition_count-1; level>=0; level--){
1532
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1533
            switch(type){
1534
            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1535
                    break;
1536
            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1537
                    break;
1538
            case DWT_X: break;
1539
            }
1540
        }
1541
    }
1542
}
1543

    
1544
static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1545
    const int support = type==1 ? 3 : 5;
1546
    int level;
1547
    if(type==2) return;
1548

    
1549
    for(level=decomposition_count-1; level>=0; level--){
1550
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1551
            switch(type){
1552
            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1553
                    break;
1554
            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1555
                    break;
1556
            case DWT_X: break;
1557
            }
1558
        }
1559
    }
1560
}
1561

    
1562
static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1563
    if(type==2){
1564
        int level;
1565
        for(level=decomposition_count-1; level>=0; level--)
1566
            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
1567
    }else{
1568
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1569
        int y;
1570
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1571
        for(y=0; y<height; y+=4)
1572
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1573
    }
1574
}
1575

    
1576
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1577
    const int w= b->width;
1578
    const int h= b->height;
1579
    int x, y;
1580

    
1581
    if(1){
1582
        int run=0;
1583
        int runs[w*h];
1584
        int run_index=0;
1585
        int max_index;
1586

    
1587
        for(y=0; y<h; y++){
1588
            for(x=0; x<w; x++){
1589
                int v, p=0;
1590
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1591
                v= src[x + y*stride];
1592

    
1593
                if(y){
1594
                    t= src[x + (y-1)*stride];
1595
                    if(x){
1596
                        lt= src[x - 1 + (y-1)*stride];
1597
                    }
1598
                    if(x + 1 < w){
1599
                        rt= src[x + 1 + (y-1)*stride];
1600
                    }
1601
                }
1602
                if(x){
1603
                    l= src[x - 1 + y*stride];
1604
                    /*if(x > 1){
1605
                        if(orientation==1) ll= src[y + (x-2)*stride];
1606
                        else               ll= src[x - 2 + y*stride];
1607
                    }*/
1608
                }
1609
                if(parent){
1610
                    int px= x>>1;
1611
                    int py= y>>1;
1612
                    if(px<b->parent->width && py<b->parent->height)
1613
                        p= parent[px + py*2*stride];
1614
                }
1615
                if(!(/*ll|*/l|lt|t|rt|p)){
1616
                    if(v){
1617
                        runs[run_index++]= run;
1618
                        run=0;
1619
                    }else{
1620
                        run++;
1621
                    }
1622
                }
1623
            }
1624
        }
1625
        max_index= run_index;
1626
        runs[run_index++]= run;
1627
        run_index=0;
1628
        run= runs[run_index++];
1629

    
1630
        put_symbol2(&s->c, b->state[30], max_index, 0);
1631
        if(run_index <= max_index)
1632
            put_symbol2(&s->c, b->state[1], run, 3);
1633

    
1634
        for(y=0; y<h; y++){
1635
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1636
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1637
                return -1;
1638
            }
1639
            for(x=0; x<w; x++){
1640
                int v, p=0;
1641
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1642
                v= src[x + y*stride];
1643

    
1644
                if(y){
1645
                    t= src[x + (y-1)*stride];
1646
                    if(x){
1647
                        lt= src[x - 1 + (y-1)*stride];
1648
                    }
1649
                    if(x + 1 < w){
1650
                        rt= src[x + 1 + (y-1)*stride];
1651
                    }
1652
                }
1653
                if(x){
1654
                    l= src[x - 1 + y*stride];
1655
                    /*if(x > 1){
1656
                        if(orientation==1) ll= src[y + (x-2)*stride];
1657
                        else               ll= src[x - 2 + y*stride];
1658
                    }*/
1659
                }
1660
                if(parent){
1661
                    int px= x>>1;
1662
                    int py= y>>1;
1663
                    if(px<b->parent->width && py<b->parent->height)
1664
                        p= parent[px + py*2*stride];
1665
                }
1666
                if(/*ll|*/l|lt|t|rt|p){
1667
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1668

    
1669
                    put_rac(&s->c, &b->state[0][context], !!v);
1670
                }else{
1671
                    if(!run){
1672
                        run= runs[run_index++];
1673

    
1674
                        if(run_index <= max_index)
1675
                            put_symbol2(&s->c, b->state[1], run, 3);
1676
                        assert(v);
1677
                    }else{
1678
                        run--;
1679
                        assert(!v);
1680
                    }
1681
                }
1682
                if(v){
1683
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1684
                    int l2= 2*ABS(l) + (l<0);
1685
                    int t2= 2*ABS(t) + (t<0);
1686

    
1687
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1688
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1689
                }
1690
            }
1691
        }
1692
    }
1693
    return 0;
1694
}
1695

    
1696
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1697
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1698
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1699
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1700
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1701
}
1702

    
1703
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1704
    const int w= b->width;
1705
    const int h= b->height;
1706
    int x,y;
1707

    
1708
    if(1){
1709
        int run, runs;
1710
        x_and_coeff *xc= b->x_coeff;
1711
        x_and_coeff *prev_xc= NULL;
1712
        x_and_coeff *prev2_xc= xc;
1713
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1714
        x_and_coeff *prev_parent_xc= parent_xc;
1715

    
1716
        runs= get_symbol2(&s->c, b->state[30], 0);
1717
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1718
        else           run= INT_MAX;
1719

    
1720
        for(y=0; y<h; y++){
1721
            int v=0;
1722
            int lt=0, t=0, rt=0;
1723

    
1724
            if(y && prev_xc->x == 0){
1725
                rt= prev_xc->coeff;
1726
            }
1727
            for(x=0; x<w; x++){
1728
                int p=0;
1729
                const int l= v;
1730

    
1731
                lt= t; t= rt;
1732

    
1733
                if(y){
1734
                    if(prev_xc->x <= x)
1735
                        prev_xc++;
1736
                    if(prev_xc->x == x + 1)
1737
                        rt= prev_xc->coeff;
1738
                    else
1739
                        rt=0;
1740
                }
1741
                if(parent_xc){
1742
                    if(x>>1 > parent_xc->x){
1743
                        parent_xc++;
1744
                    }
1745
                    if(x>>1 == parent_xc->x){
1746
                        p= parent_xc->coeff;
1747
                    }
1748
                }
1749
                if(/*ll|*/l|lt|t|rt|p){
1750
                    int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1751

    
1752
                    v=get_rac(&s->c, &b->state[0][context]);
1753
                    if(v){
1754
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1755
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1756

    
1757
                        xc->x=x;
1758
                        (xc++)->coeff= v;
1759
                    }
1760
                }else{
1761
                    if(!run){
1762
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1763
                        else           run= INT_MAX;
1764
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1765
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1766

    
1767
                        xc->x=x;
1768
                        (xc++)->coeff= v;
1769
                    }else{
1770
                        int max_run;
1771
                        run--;
1772
                        v=0;
1773

    
1774
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1775
                        else  max_run= FFMIN(run, w-x-1);
1776
                        if(parent_xc)
1777
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1778
                        x+= max_run;
1779
                        run-= max_run;
1780
                    }
1781
                }
1782
            }
1783
            (xc++)->x= w+1; //end marker
1784
            prev_xc= prev2_xc;
1785
            prev2_xc= xc;
1786

    
1787
            if(parent_xc){
1788
                if(y&1){
1789
                    while(parent_xc->x != parent->width+1)
1790
                        parent_xc++;
1791
                    parent_xc++;
1792
                    prev_parent_xc= parent_xc;
1793
                }else{
1794
                    parent_xc= prev_parent_xc;
1795
                }
1796
            }
1797
        }
1798

    
1799
        (xc++)->x= w+1; //end marker
1800
    }
1801
}
1802

    
1803
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1804
    const int w= b->width;
1805
    int y;
1806
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1807
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1808
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1809
    int new_index = 0;
1810

    
1811
    START_TIMER
1812

    
1813
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1814
        qadd= 0;
1815
        qmul= 1<<QEXPSHIFT;
1816
    }
1817

    
1818
    /* If we are on the second or later slice, restore our index. */
1819
    if (start_y != 0)
1820
        new_index = save_state[0];
1821

    
1822

    
1823
    for(y=start_y; y<h; y++){
1824
        int x = 0;
1825
        int v;
1826
        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1827
        memset(line, 0, b->width*sizeof(DWTELEM));
1828
        v = b->x_coeff[new_index].coeff;
1829
        x = b->x_coeff[new_index++].x;
1830
        while(x < w)
1831
        {
1832
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1833
            register int u= -(v&1);
1834
            line[x] = (t^u) - u;
1835

    
1836
            v = b->x_coeff[new_index].coeff;
1837
            x = b->x_coeff[new_index++].x;
1838
        }
1839
    }
1840
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1841
        STOP_TIMER("decode_subband")
1842
    }
1843

    
1844
    /* Save our variables for the next slice. */
1845
    save_state[0] = new_index;
1846

    
1847
    return;
1848
}
1849

    
1850
static void reset_contexts(SnowContext *s){
1851
    int plane_index, level, orientation;
1852

    
1853
    for(plane_index=0; plane_index<3; plane_index++){
1854
        for(level=0; level<s->spatial_decomposition_count; level++){
1855
            for(orientation=level ? 1:0; orientation<4; orientation++){
1856
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1857
            }
1858
        }
1859
    }
1860
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1861
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1862
}
1863

    
1864
static int alloc_blocks(SnowContext *s){
1865
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1866
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1867

    
1868
    s->b_width = w;
1869
    s->b_height= h;
1870

    
1871
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1872
    return 0;
1873
}
1874

    
1875
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1876
    uint8_t *bytestream= d->bytestream;
1877
    uint8_t *bytestream_start= d->bytestream_start;
1878
    *d= *s;
1879
    d->bytestream= bytestream;
1880
    d->bytestream_start= bytestream_start;
1881
}
1882

    
1883
//near copy & paste from dsputil, FIXME
1884
static int pix_sum(uint8_t * pix, int line_size, int w)
1885
{
1886
    int s, i, j;
1887

    
1888
    s = 0;
1889
    for (i = 0; i < w; i++) {
1890
        for (j = 0; j < w; j++) {
1891
            s += pix[0];
1892
            pix ++;
1893
        }
1894
        pix += line_size - w;
1895
    }
1896
    return s;
1897
}
1898

    
1899
//near copy & paste from dsputil, FIXME
1900
static int pix_norm1(uint8_t * pix, int line_size, int w)
1901
{
1902
    int s, i, j;
1903
    uint32_t *sq = squareTbl + 256;
1904

    
1905
    s = 0;
1906
    for (i = 0; i < w; i++) {
1907
        for (j = 0; j < w; j ++) {
1908
            s += sq[pix[0]];
1909
            pix ++;
1910
        }
1911
        pix += line_size - w;
1912
    }
1913
    return s;
1914
}
1915

    
1916
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1917
    const int w= s->b_width << s->block_max_depth;
1918
    const int rem_depth= s->block_max_depth - level;
1919
    const int index= (x + y*w) << rem_depth;
1920
    const int block_w= 1<<rem_depth;
1921
    BlockNode block;
1922
    int i,j;
1923

    
1924
    block.color[0]= l;
1925
    block.color[1]= cb;
1926
    block.color[2]= cr;
1927
    block.mx= mx;
1928
    block.my= my;
1929
    block.ref= ref;
1930
    block.type= type;
1931
    block.level= level;
1932

    
1933
    for(j=0; j<block_w; j++){
1934
        for(i=0; i<block_w; i++){
1935
            s->block[index + i + j*w]= block;
1936
        }
1937
    }
1938
}
1939

    
1940
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1941
    const int offset[3]= {
1942
          y*c->  stride + x,
1943
        ((y*c->uvstride + x)>>1),
1944
        ((y*c->uvstride + x)>>1),
1945
    };
1946
    int i;
1947
    for(i=0; i<3; i++){
1948
        c->src[0][i]= src [i];
1949
        c->ref[0][i]= ref [i] + offset[i];
1950
    }
1951
    assert(!ref_index);
1952
}
1953

    
1954
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1955
                           BlockNode *left, BlockNode *top, BlockNode *tr){
1956
    if(s->ref_frames == 1){
1957
        *mx = mid_pred(left->mx, top->mx, tr->mx);
1958
        *my = mid_pred(left->my, top->my, tr->my);
1959
    }else{
1960
        const int *scale = scale_mv_ref[ref];
1961
        *mx = mid_pred(left->mx * scale[left->ref] + 128 >>8,
1962
                       top ->mx * scale[top ->ref] + 128 >>8,
1963
                       tr  ->mx * scale[tr  ->ref] + 128 >>8);
1964
        *my = mid_pred(left->my * scale[left->ref] + 128 >>8,
1965
                       top ->my * scale[top ->ref] + 128 >>8,
1966
                       tr  ->my * scale[tr  ->ref] + 128 >>8);
1967
    }
1968
}
1969

    
1970
//FIXME copy&paste
1971
#define P_LEFT P[1]
1972
#define P_TOP P[2]
1973
#define P_TOPRIGHT P[3]
1974
#define P_MEDIAN P[4]
1975
#define P_MV1 P[9]
1976
#define FLAG_QPEL   1 //must be 1
1977

    
1978
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1979
    uint8_t p_buffer[1024];
1980
    uint8_t i_buffer[1024];
1981
    uint8_t p_state[sizeof(s->block_state)];
1982
    uint8_t i_state[sizeof(s->block_state)];
1983
    RangeCoder pc, ic;
1984
    uint8_t *pbbak= s->c.bytestream;
1985
    uint8_t *pbbak_start= s->c.bytestream_start;
1986
    int score, score2, iscore, i_len, p_len, block_s, sum;
1987
    const int w= s->b_width  << s->block_max_depth;
1988
    const int h= s->b_height << s->block_max_depth;
1989
    const int rem_depth= s->block_max_depth - level;
1990
    const int index= (x + y*w) << rem_depth;
1991
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1992
    int trx= (x+1)<<rem_depth;
1993
    int try= (y+1)<<rem_depth;
1994
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
1995
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
1996
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1997
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1998
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1999
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2000
    int pl = left->color[0];
2001
    int pcb= left->color[1];
2002
    int pcr= left->color[2];
2003
    int pmx, pmy;
2004
    int mx=0, my=0;
2005
    int l,cr,cb;
2006
    const int stride= s->current_picture.linesize[0];
2007
    const int uvstride= s->current_picture.linesize[1];
2008
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
2009
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2010
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2011
    int P[10][2];
2012
    int16_t last_mv[3][2];
2013
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2014
    const int shift= 1+qpel;
2015
    MotionEstContext *c= &s->m.me;
2016
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2017
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2018
    int my_context= av_log2(2*ABS(left->my - top->my));
2019
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2020
    int ref, best_ref, ref_score, ref_mx, ref_my;
2021

    
2022
    assert(sizeof(s->block_state) >= 256);
2023
    if(s->keyframe){
2024
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2025
        return 0;
2026
    }
2027

    
2028
//    clip predictors / edge ?
2029

    
2030
    P_LEFT[0]= left->mx;
2031
    P_LEFT[1]= left->my;
2032
    P_TOP [0]= top->mx;
2033
    P_TOP [1]= top->my;
2034
    P_TOPRIGHT[0]= tr->mx;
2035
    P_TOPRIGHT[1]= tr->my;
2036

    
2037
    last_mv[0][0]= s->block[index].mx;
2038
    last_mv[0][1]= s->block[index].my;
2039
    last_mv[1][0]= right->mx;
2040
    last_mv[1][1]= right->my;
2041
    last_mv[2][0]= bottom->mx;
2042
    last_mv[2][1]= bottom->my;
2043

    
2044
    s->m.mb_stride=2;
2045
    s->m.mb_x=
2046
    s->m.mb_y= 0;
2047
    s->m.me.skip= 0;
2048

    
2049
    assert(s->m.me.  stride ==   stride);
2050
    assert(s->m.me.uvstride == uvstride);
2051

    
2052
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2053
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2054
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2055
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2056

    
2057
    c->xmin = - x*block_w - 16+2;
2058
    c->ymin = - y*block_w - 16+2;
2059
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2060
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2061

    
2062
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2063
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
2064
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2065
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2066
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2067
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2068
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2069

    
2070
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2071
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2072

    
2073
    if (!y) {
2074
        c->pred_x= P_LEFT[0];
2075
        c->pred_y= P_LEFT[1];
2076
    } else {
2077
        c->pred_x = P_MEDIAN[0];
2078
        c->pred_y = P_MEDIAN[1];
2079
    }
2080

    
2081
    score= INT_MAX;
2082
    best_ref= 0;
2083
    for(ref=0; ref<s->ref_frames; ref++){
2084
        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2085

    
2086
        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2087
                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2088

    
2089
        assert(ref_mx >= c->xmin);
2090
        assert(ref_mx <= c->xmax);
2091
        assert(ref_my >= c->ymin);
2092
        assert(ref_my <= c->ymax);
2093

    
2094
        ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2095
        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2096
        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2097
        if(s->ref_mvs[ref]){
2098
            s->ref_mvs[ref][index][0]= ref_mx;
2099
            s->ref_mvs[ref][index][1]= ref_my;
2100
            s->ref_scores[ref][index]= ref_score;
2101
        }
2102
        if(score > ref_score){
2103
            score= ref_score;
2104
            best_ref= ref;
2105
            mx= ref_mx;
2106
            my= ref_my;
2107
        }
2108
    }
2109
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2110

    
2111
  //  subpel search
2112
    pc= s->c;
2113
    pc.bytestream_start=
2114
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2115
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2116

    
2117
    if(level!=s->block_max_depth)
2118
        put_rac(&pc, &p_state[4 + s_context], 1);
2119
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2120
    if(s->ref_frames > 1)
2121
        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2122
    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2123
    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2124
    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2125
    p_len= pc.bytestream - pc.bytestream_start;
2126
    score += (s->lambda2*(p_len*8
2127
              + (pc.outstanding_count - s->c.outstanding_count)*8
2128
              + (-av_log2(pc.range)    + av_log2(s->c.range))
2129
             ))>>FF_LAMBDA_SHIFT;
2130

    
2131
    block_s= block_w*block_w;
2132
    sum = pix_sum(current_data[0], stride, block_w);
2133
    l= (sum + block_s/2)/block_s;
2134
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2135

    
2136
    block_s= block_w*block_w>>2;
2137
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
2138
    cb= (sum + block_s/2)/block_s;
2139
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2140
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
2141
    cr= (sum + block_s/2)/block_s;
2142
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2143

    
2144
    ic= s->c;
2145
    ic.bytestream_start=
2146
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2147
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2148
    if(level!=s->block_max_depth)
2149
        put_rac(&ic, &i_state[4 + s_context], 1);
2150
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2151
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2152
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2153
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2154
    i_len= ic.bytestream - ic.bytestream_start;
2155
    iscore += (s->lambda2*(i_len*8
2156
              + (ic.outstanding_count - s->c.outstanding_count)*8
2157
              + (-av_log2(ic.range)    + av_log2(s->c.range))
2158
             ))>>FF_LAMBDA_SHIFT;
2159

    
2160
//    assert(score==256*256*256*64-1);
2161
    assert(iscore < 255*255*256 + s->lambda2*10);
2162
    assert(iscore >= 0);
2163
    assert(l>=0 && l<=255);
2164
    assert(pl>=0 && pl<=255);
2165

    
2166
    if(level==0){
2167
        int varc= iscore >> 8;
2168
        int vard= score >> 8;
2169
        if (vard <= 64 || vard < varc)
2170
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2171
        else
2172
            c->scene_change_score+= s->m.qscale;
2173
    }
2174

    
2175
    if(level!=s->block_max_depth){
2176
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2177
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2178
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2179
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2180
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2181
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2182

    
2183
        if(score2 < score && score2 < iscore)
2184
            return score2;
2185
    }
2186

    
2187
    if(iscore < score){
2188
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2189
        memcpy(pbbak, i_buffer, i_len);
2190
        s->c= ic;
2191
        s->c.bytestream_start= pbbak_start;
2192
        s->c.bytestream= pbbak + i_len;
2193
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2194
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2195
        return iscore;
2196
    }else{
2197
        memcpy(pbbak, p_buffer, p_len);
2198
        s->c= pc;
2199
        s->c.bytestream_start= pbbak_start;
2200
        s->c.bytestream= pbbak + p_len;
2201
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2202
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2203
        return score;
2204
    }
2205
}
2206

    
2207
static always_inline int same_block(BlockNode *a, BlockNode *b){
2208
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2209
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2210
    }else{
2211
        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2212
    }
2213
}
2214

    
2215
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2216
    const int w= s->b_width  << s->block_max_depth;
2217
    const int rem_depth= s->block_max_depth - level;
2218
    const int index= (x + y*w) << rem_depth;
2219
    int trx= (x+1)<<rem_depth;
2220
    BlockNode *b= &s->block[index];
2221
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2222
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2223
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2224
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2225
    int pl = left->color[0];
2226
    int pcb= left->color[1];
2227
    int pcr= left->color[2];
2228
    int pmx, pmy;
2229
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2230
    int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 16*!!b->ref;
2231
    int my_context= av_log2(2*ABS(left->my - top->my)) + 16*!!b->ref;
2232
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2233

    
2234
    if(s->keyframe){
2235
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2236
        return;
2237
    }
2238

    
2239
    if(level!=s->block_max_depth){
2240
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2241
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
2242
        }else{
2243
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
2244
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2245
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2246
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2247
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2248
            return;
2249
        }
2250
    }
2251
    if(b->type & BLOCK_INTRA){
2252
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2253
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2254
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2255
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2256
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2257
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2258
    }else{
2259
        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2260
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2261
        if(s->ref_frames > 1)
2262
            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2263
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2264
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2265
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2266
    }
2267
}
2268

    
2269
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2270
    const int w= s->b_width << s->block_max_depth;
2271
    const int rem_depth= s->block_max_depth - level;
2272
    const int index= (x + y*w) << rem_depth;
2273
    int trx= (x+1)<<rem_depth;
2274
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2275
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2276
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2277
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2278
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2279

    
2280
    if(s->keyframe){
2281
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2282
        return;
2283
    }
2284

    
2285
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2286
        int type;
2287
        int l = left->color[0];
2288
        int cb= left->color[1];
2289
        int cr= left->color[2];
2290
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2291
        int my= mid_pred(left->my, top->my, tr->my);
2292
        int ref = 0;
2293
        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2294
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2295
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2296

    
2297
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2298

    
2299
        if(type){
2300
            pred_mv(s, &mx, &my, 0, left, top, tr);
2301
            l += get_symbol(&s->c, &s->block_state[32], 1);
2302
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2303
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2304
        }else{
2305
            if(s->ref_frames > 1)
2306
                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2307
            pred_mv(s, &mx, &my, ref, left, top, tr);
2308
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2309
            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2310
        }
2311
        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2312
    }else{
2313
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2314
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2315
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2316
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2317
    }
2318
}
2319

    
2320
static void encode_blocks(SnowContext *s, int search){
2321
    int x, y;
2322
    int w= s->b_width;
2323
    int h= s->b_height;
2324

    
2325
    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2326
        iterative_me(s);
2327

    
2328
    for(y=0; y<h; y++){
2329
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2330
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2331
            return;
2332
        }
2333
        for(x=0; x<w; x++){
2334
            if(s->avctx->me_method == ME_ITER || !search)
2335
                encode_q_branch2(s, 0, x, y);
2336
            else
2337
                encode_q_branch (s, 0, x, y);
2338
        }
2339
    }
2340
}
2341

    
2342
static void decode_blocks(SnowContext *s){
2343
    int x, y;
2344
    int w= s->b_width;
2345
    int h= s->b_height;
2346

    
2347
    for(y=0; y<h; y++){
2348
        for(x=0; x<w; x++){
2349
            decode_q_branch(s, 0, x, y);
2350
        }
2351
    }
2352
}
2353

    
2354
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2355
    int x, y;
2356
START_TIMER
2357
    for(y=0; y < b_h+5; y++){
2358
        for(x=0; x < b_w; x++){
2359
            int a0= src[x    ];
2360
            int a1= src[x + 1];
2361
            int a2= src[x + 2];
2362
            int a3= src[x + 3];
2363
            int a4= src[x + 4];
2364
            int a5= src[x + 5];
2365
//            int am= 9*(a1+a2) - (a0+a3);
2366
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2367
//            int am= 18*(a2+a3) - 2*(a1+a4);
2368
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2369
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2370

    
2371
//            if(b_w==16) am= 8*(a1+a2);
2372

    
2373
            if(dx<8) am = (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2374
            else     am = (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2375

    
2376
            /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2377
            if(am&(~255)) am= ~(am>>31);
2378

    
2379
            tmp[x] = am;
2380

    
2381
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2382
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2383
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2384
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2385
        }
2386
        tmp += stride;
2387
        src += stride;
2388
    }
2389
    tmp -= (b_h+5)*stride;
2390

    
2391
    for(y=0; y < b_h; y++){
2392
        for(x=0; x < b_w; x++){
2393
            int a0= tmp[x + 0*stride];
2394
            int a1= tmp[x + 1*stride];
2395
            int a2= tmp[x + 2*stride];
2396
            int a3= tmp[x + 3*stride];
2397
            int a4= tmp[x + 4*stride];
2398
            int a5= tmp[x + 5*stride];
2399
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2400
//            int am= 18*(a2+a3) - 2*(a1+a4);
2401
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2402
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2403

    
2404
//            if(b_w==16) am= 8*(a1+a2);
2405

    
2406
            if(dy<8) am =  (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2407
            else     am = (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2408

    
2409
            if(am&(~255)) am= ~(am>>31);
2410

    
2411
            dst[x] = am;
2412
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2413
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2414
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2415
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2416
        }
2417
        dst += stride;
2418
        tmp += stride;
2419
    }
2420
STOP_TIMER("mc_block")
2421
}
2422

    
2423
#define mca(dx,dy,b_w)\
2424
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2425
    uint8_t tmp[stride*(b_w+5)];\
2426
    assert(h==b_w);\
2427
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2428
}
2429

    
2430
mca( 0, 0,16)
2431
mca( 8, 0,16)
2432
mca( 0, 8,16)
2433
mca( 8, 8,16)
2434
mca( 0, 0,8)
2435
mca( 8, 0,8)
2436
mca( 0, 8,8)
2437
mca( 8, 8,8)
2438

    
2439
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2440
    if(block->type & BLOCK_INTRA){
2441
        int x, y;
2442
        const int color = block->color[plane_index];
2443
        const int color4= color*0x01010101;
2444
        if(b_w==32){
2445
            for(y=0; y < b_h; y++){
2446
                *(uint32_t*)&dst[0 + y*stride]= color4;
2447
                *(uint32_t*)&dst[4 + y*stride]= color4;
2448
                *(uint32_t*)&dst[8 + y*stride]= color4;
2449
                *(uint32_t*)&dst[12+ y*stride]= color4;
2450
                *(uint32_t*)&dst[16+ y*stride]= color4;
2451
                *(uint32_t*)&dst[20+ y*stride]= color4;
2452
                *(uint32_t*)&dst[24+ y*stride]= color4;
2453
                *(uint32_t*)&dst[28+ y*stride]= color4;
2454
            }
2455
        }else if(b_w==16){
2456
            for(y=0; y < b_h; y++){
2457
                *(uint32_t*)&dst[0 + y*stride]= color4;
2458
                *(uint32_t*)&dst[4 + y*stride]= color4;
2459
                *(uint32_t*)&dst[8 + y*stride]= color4;
2460
                *(uint32_t*)&dst[12+ y*stride]= color4;
2461
            }
2462
        }else if(b_w==8){
2463
            for(y=0; y < b_h; y++){
2464
                *(uint32_t*)&dst[0 + y*stride]= color4;
2465
                *(uint32_t*)&dst[4 + y*stride]= color4;
2466
            }
2467
        }else if(b_w==4){
2468
            for(y=0; y < b_h; y++){
2469
                *(uint32_t*)&dst[0 + y*stride]= color4;
2470
            }
2471
        }else{
2472
            for(y=0; y < b_h; y++){
2473
                for(x=0; x < b_w; x++){
2474
                    dst[x + y*stride]= color;
2475
                }
2476
            }
2477
        }
2478
    }else{
2479
        uint8_t *src= s->last_picture[block->ref].data[plane_index];
2480
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2481
        int mx= block->mx*scale;
2482
        int my= block->my*scale;
2483
        const int dx= mx&15;
2484
        const int dy= my&15;
2485
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2486
        sx += (mx>>4) - 2;
2487
        sy += (my>>4) - 2;
2488
        src += sx + sy*stride;
2489
        if(   (unsigned)sx >= w - b_w - 4
2490
           || (unsigned)sy >= h - b_h - 4){
2491
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2492
            src= tmp + MB_SIZE;
2493
        }
2494
//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2495
//        assert(!(b_w&(b_w-1)));
2496
        assert(b_w>1 && b_h>1);
2497
        assert(tab_index>=0 && tab_index<4 || b_w==32);
2498
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2499
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2500
        else if(b_w==32){
2501
            int y;
2502
            for(y=0; y<b_h; y+=16){
2503
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2504
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2505
            }
2506
        }else if(b_w==b_h)
2507
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2508
        else if(b_w==2*b_h){
2509
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 2       + 2*stride,stride);
2510
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2511
        }else{
2512
            assert(2*b_w==b_h);
2513
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 2 + 2*stride           ,stride);
2514
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2515
        }
2516
    }
2517
}
2518

    
2519
void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2520
                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2521
    int y, x;
2522
    DWTELEM * dst;
2523
    for(y=0; y<b_h; y++){
2524
        //FIXME ugly missue of obmc_stride
2525
        uint8_t *obmc1= obmc + y*obmc_stride;
2526
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2527
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2528
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2529
        dst = slice_buffer_get_line(sb, src_y + y);
2530
        for(x=0; x<b_w; x++){
2531
            int v=   obmc1[x] * block[3][x + y*src_stride]
2532
                    +obmc2[x] * block[2][x + y*src_stride]
2533
                    +obmc3[x] * block[1][x + y*src_stride]
2534
                    +obmc4[x] * block[0][x + y*src_stride];
2535

    
2536
            v <<= 8 - LOG2_OBMC_MAX;
2537
            if(FRAC_BITS != 8){
2538
                v += 1<<(7 - FRAC_BITS);
2539
                v >>= 8 - FRAC_BITS;
2540
            }
2541
            if(add){
2542
                v += dst[x + src_x];
2543
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2544
                if(v&(~255)) v= ~(v>>31);
2545
                dst8[x + y*src_stride] = v;
2546
            }else{
2547
                dst[x + src_x] -= v;
2548
            }
2549
        }
2550
    }
2551
}
2552

    
2553
//FIXME name clenup (b_w, block_w, b_width stuff)
2554
static always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2555
    const int b_width = s->b_width  << s->block_max_depth;
2556
    const int b_height= s->b_height << s->block_max_depth;
2557
    const int b_stride= b_width;
2558
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2559
    BlockNode *rt= lt+1;
2560
    BlockNode *lb= lt+b_stride;
2561
    BlockNode *rb= lb+1;
2562
    uint8_t *block[4];
2563
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2564
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2565
    uint8_t *ptmp;
2566
    int x,y;
2567

    
2568
    if(b_x<0){
2569
        lt= rt;
2570
        lb= rb;
2571
    }else if(b_x + 1 >= b_width){
2572
        rt= lt;
2573
        rb= lb;
2574
    }
2575
    if(b_y<0){
2576
        lt= lb;
2577
        rt= rb;
2578
    }else if(b_y + 1 >= b_height){
2579
        lb= lt;
2580
        rb= rt;
2581
    }
2582

    
2583
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2584
        obmc -= src_x;
2585
        b_w += src_x;
2586
        if(!sliced && !offset_dst)
2587
            dst -= src_x;
2588
        src_x=0;
2589
    }else if(src_x + b_w > w){
2590
        b_w = w - src_x;
2591
    }
2592
    if(src_y<0){
2593
        obmc -= src_y*obmc_stride;
2594
        b_h += src_y;
2595
        if(!sliced && !offset_dst)
2596
            dst -= src_y*dst_stride;
2597
        src_y=0;
2598
    }else if(src_y + b_h> h){
2599
        b_h = h - src_y;
2600
    }
2601

    
2602
    if(b_w<=0 || b_h<=0) return;
2603

    
2604
assert(src_stride > 2*MB_SIZE + 5);
2605
    if(!sliced && offset_dst)
2606
        dst += src_x + src_y*dst_stride;
2607
    dst8+= src_x + src_y*src_stride;
2608
//    src += src_x + src_y*src_stride;
2609

    
2610
    ptmp= tmp + 3*tmp_step;
2611
    block[0]= ptmp;
2612
    ptmp+=tmp_step;
2613
    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2614

    
2615
    if(same_block(lt, rt)){
2616
        block[1]= block[0];
2617
    }else{
2618
        block[1]= ptmp;
2619
        ptmp+=tmp_step;
2620
        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2621
    }
2622

    
2623
    if(same_block(lt, lb)){
2624
        block[2]= block[0];
2625
    }else if(same_block(rt, lb)){
2626
        block[2]= block[1];
2627
    }else{
2628
        block[2]= ptmp;
2629
        ptmp+=tmp_step;
2630
        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2631
    }
2632

    
2633
    if(same_block(lt, rb) ){
2634
        block[3]= block[0];
2635
    }else if(same_block(rt, rb)){
2636
        block[3]= block[1];
2637
    }else if(same_block(lb, rb)){
2638
        block[3]= block[2];
2639
    }else{
2640
        block[3]= ptmp;
2641
        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2642
    }
2643
#if 0
2644
    for(y=0; y<b_h; y++){
2645
        for(x=0; x<b_w; x++){
2646
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2647
            if(add) dst[x + y*dst_stride] += v;
2648
            else    dst[x + y*dst_stride] -= v;
2649
        }
2650
    }
2651
    for(y=0; y<b_h; y++){
2652
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2653
        for(x=0; x<b_w; x++){
2654
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2655
            if(add) dst[x + y*dst_stride] += v;
2656
            else    dst[x + y*dst_stride] -= v;
2657
        }
2658
    }
2659
    for(y=0; y<b_h; y++){
2660
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2661
        for(x=0; x<b_w; x++){
2662
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2663
            if(add) dst[x + y*dst_stride] += v;
2664
            else    dst[x + y*dst_stride] -= v;
2665
        }
2666
    }
2667
    for(y=0; y<b_h; y++){
2668
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2669
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2670
        for(x=0; x<b_w; x++){
2671
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2672
            if(add) dst[x + y*dst_stride] += v;
2673
            else    dst[x + y*dst_stride] -= v;
2674
        }
2675
    }
2676
#else
2677
    if(sliced){
2678
        START_TIMER
2679

    
2680
        s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2681
        STOP_TIMER("inner_add_yblock")
2682
    }else
2683
    for(y=0; y<b_h; y++){
2684
        //FIXME ugly missue of obmc_stride
2685
        uint8_t *obmc1= obmc + y*obmc_stride;
2686
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2687
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2688
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2689
        for(x=0; x<b_w; x++){
2690
            int v=   obmc1[x] * block[3][x + y*src_stride]
2691
                    +obmc2[x] * block[2][x + y*src_stride]
2692
                    +obmc3[x] * block[1][x + y*src_stride]
2693
                    +obmc4[x] * block[0][x + y*src_stride];
2694

    
2695
            v <<= 8 - LOG2_OBMC_MAX;
2696
            if(FRAC_BITS != 8){
2697
                v += 1<<(7 - FRAC_BITS);
2698
                v >>= 8 - FRAC_BITS;
2699
            }
2700
            if(add){
2701
                v += dst[x + y*dst_stride];
2702
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2703
                if(v&(~255)) v= ~(v>>31);
2704
                dst8[x + y*src_stride] = v;
2705
            }else{
2706
                dst[x + y*dst_stride] -= v;
2707
            }
2708
        }
2709
    }
2710
#endif
2711
}
2712

    
2713
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2714
    Plane *p= &s->plane[plane_index];
2715
    const int mb_w= s->b_width  << s->block_max_depth;
2716
    const int mb_h= s->b_height << s->block_max_depth;
2717
    int x, y, mb_x;
2718
    int block_size = MB_SIZE >> s->block_max_depth;
2719
    int block_w    = plane_index ? block_size/2 : block_size;
2720
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2721
    int obmc_stride= plane_index ? block_size : 2*block_size;
2722
    int ref_stride= s->current_picture.linesize[plane_index];
2723
    uint8_t *dst8= s->current_picture.data[plane_index];
2724
    int w= p->width;
2725
    int h= p->height;
2726
    START_TIMER
2727

    
2728
    if(s->keyframe || (s->avctx->debug&512)){
2729
        if(mb_y==mb_h)
2730
            return;
2731

    
2732
        if(add){
2733
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2734
            {
2735
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2736
                DWTELEM * line = sb->line[y];
2737
                for(x=0; x<w; x++)
2738
                {
2739
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2740
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2741
                    v >>= FRAC_BITS;
2742
                    if(v&(~255)) v= ~(v>>31);
2743
                    dst8[x + y*ref_stride]= v;
2744
                }
2745
            }
2746
        }else{
2747
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2748
            {
2749
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2750
                DWTELEM * line = sb->line[y];
2751
                for(x=0; x<w; x++)
2752
                {
2753
                    line[x] -= 128 << FRAC_BITS;
2754
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2755
                }
2756
            }
2757
        }
2758

    
2759
        return;
2760
    }
2761

    
2762
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2763
            START_TIMER
2764

    
2765
            add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2766
                       block_w*mb_x - block_w/2,
2767
                       block_w*mb_y - block_w/2,
2768
                       block_w, block_w,
2769
                       w, h,
2770
                       w, ref_stride, obmc_stride,
2771
                       mb_x - 1, mb_y - 1,
2772
                       add, 0, plane_index);
2773

    
2774
            STOP_TIMER("add_yblock")
2775
        }
2776

    
2777
    STOP_TIMER("predict_slice")
2778
}
2779

    
2780
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2781
    Plane *p= &s->plane[plane_index];
2782
    const int mb_w= s->b_width  << s->block_max_depth;
2783
    const int mb_h= s->b_height << s->block_max_depth;
2784
    int x, y, mb_x;
2785
    int block_size = MB_SIZE >> s->block_max_depth;
2786
    int block_w    = plane_index ? block_size/2 : block_size;
2787
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2788
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2789
    int ref_stride= s->current_picture.linesize[plane_index];
2790
    uint8_t *dst8= s->current_picture.data[plane_index];
2791
    int w= p->width;
2792
    int h= p->height;
2793
    START_TIMER
2794

    
2795
    if(s->keyframe || (s->avctx->debug&512)){
2796
        if(mb_y==mb_h)
2797
            return;
2798

    
2799
        if(add){
2800
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2801
                for(x=0; x<w; x++){
2802
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2803
                    v >>= FRAC_BITS;
2804
                    if(v&(~255)) v= ~(v>>31);
2805
                    dst8[x + y*ref_stride]= v;
2806
                }
2807
            }
2808
        }else{
2809
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2810
                for(x=0; x<w; x++){
2811
                    buf[x + y*w]-= 128<<FRAC_BITS;
2812
                }
2813
            }
2814
        }
2815

    
2816
        return;
2817
    }
2818

    
2819
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2820
            START_TIMER
2821

    
2822
            add_yblock(s, 0, NULL, buf, dst8, obmc,
2823
                       block_w*mb_x - block_w/2,
2824
                       block_w*mb_y - block_w/2,
2825
                       block_w, block_w,
2826
                       w, h,
2827
                       w, ref_stride, obmc_stride,
2828
                       mb_x - 1, mb_y - 1,
2829
                       add, 1, plane_index);
2830

    
2831
            STOP_TIMER("add_yblock")
2832
        }
2833

    
2834
    STOP_TIMER("predict_slice")
2835
}
2836

    
2837
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2838
    const int mb_h= s->b_height << s->block_max_depth;
2839
    int mb_y;
2840
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2841
        predict_slice(s, buf, plane_index, add, mb_y);
2842
}
2843

    
2844
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2845
    int i, x2, y2;
2846
    Plane *p= &s->plane[plane_index];
2847
    const int block_size = MB_SIZE >> s->block_max_depth;
2848
    const int block_w    = plane_index ? block_size/2 : block_size;
2849
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2850
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2851
    const int ref_stride= s->current_picture.linesize[plane_index];
2852
    uint8_t *src= s-> input_picture.data[plane_index];
2853
    DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2854
    const int b_stride = s->b_width << s->block_max_depth;
2855
    const int w= p->width;
2856
    const int h= p->height;
2857
    int index= mb_x + mb_y*b_stride;
2858
    BlockNode *b= &s->block[index];
2859
    BlockNode backup= *b;
2860
    int ab=0;
2861
    int aa=0;
2862

    
2863
    b->type|= BLOCK_INTRA;
2864
    b->color[plane_index]= 0;
2865
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2866

    
2867
    for(i=0; i<4; i++){
2868
        int mb_x2= mb_x + (i &1) - 1;
2869
        int mb_y2= mb_y + (i>>1) - 1;
2870
        int x= block_w*mb_x2 + block_w/2;
2871
        int y= block_w*mb_y2 + block_w/2;
2872

    
2873
        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2874
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2875

    
2876
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2877
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2878
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2879
                int obmc_v= obmc[index];
2880
                int d;
2881
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2882
                if(x<0) obmc_v += obmc[index + block_w];
2883
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2884
                if(x+block_w>w) obmc_v += obmc[index - block_w];
2885
                //FIXME precalc this or simplify it somehow else
2886

    
2887
                d = -dst[index] + (1<<(FRAC_BITS-1));
2888
                dst[index] = d;
2889
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2890
                aa += obmc_v * obmc_v; //FIXME precalclate this
2891
            }
2892
        }
2893
    }
2894
    *b= backup;
2895

    
2896
    return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
2897
}
2898

    
2899
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2900
    const int b_stride = s->b_width << s->block_max_depth;
2901
    const int b_height = s->b_height<< s->block_max_depth;
2902
    int index= x + y*b_stride;
2903
    BlockNode *b     = &s->block[index];
2904
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2905
    BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2906
    BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2907
    BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2908
    int dmx, dmy;
2909
//  int mx_context= av_log2(2*ABS(left->mx - top->mx));
2910
//  int my_context= av_log2(2*ABS(left->my - top->my));
2911

    
2912
    if(x<0 || x>=b_stride || y>=b_height)
2913
        return 0;
2914
/*
2915
1            0      0
2916
01X          1-2    1
2917
001XX        3-6    2-3
2918
0001XXX      7-14   4-7
2919
00001XXXX   15-30   8-15
2920
*/
2921
//FIXME try accurate rate
2922
//FIXME intra and inter predictors if surrounding blocks arent the same type
2923
    if(b->type & BLOCK_INTRA){
2924
        return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
2925
                   + av_log2(2*ABS(left->color[1] - b->color[1]))
2926
                   + av_log2(2*ABS(left->color[2] - b->color[2])));
2927
    }else{
2928
        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2929
        dmx-= b->mx;
2930
        dmy-= b->my;
2931
        return 2*(1 + av_log2(2*ABS(dmx)) //FIXME kill the 2* can be merged in lambda
2932
                    + av_log2(2*ABS(dmy))
2933
                    + av_log2(2*b->ref));
2934
    }
2935
}
2936

    
2937
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2938
    Plane *p= &s->plane[plane_index];
2939
    const int block_size = MB_SIZE >> s->block_max_depth;
2940
    const int block_w    = plane_index ? block_size/2 : block_size;
2941
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2942
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2943
    const int ref_stride= s->current_picture.linesize[plane_index];
2944
    uint8_t *dst= s->current_picture.data[plane_index];
2945
    uint8_t *src= s->  input_picture.data[plane_index];
2946
    DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2947
    uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2948
    uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
2949
    const int b_stride = s->b_width << s->block_max_depth;
2950
    const int b_height = s->b_height<< s->block_max_depth;
2951
    const int w= p->width;
2952
    const int h= p->height;
2953
    int distortion;
2954
    int rate= 0;
2955
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2956
    int sx= block_w*mb_x - block_w/2;
2957
    int sy= block_w*mb_y - block_w/2;
2958
    int x0= FFMAX(0,-sx);
2959
    int y0= FFMAX(0,-sy);
2960
    int x1= FFMIN(block_w*2, w-sx);
2961
    int y1= FFMIN(block_w*2, h-sy);
2962
    int i,x,y;
2963

    
2964
    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2965

    
2966
    for(y=y0; y<y1; y++){
2967
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2968
        const DWTELEM *pred1 = pred + y*obmc_stride;
2969
        uint8_t *cur1 = cur + y*ref_stride;
2970
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2971
        for(x=x0; x<x1; x++){
2972
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2973
            v = (v + pred1[x]) >> FRAC_BITS;
2974
            if(v&(~255)) v= ~(v>>31);
2975
            dst1[x] = v;
2976
        }
2977
    }
2978

    
2979
    /* copy the regions where obmc[] = (uint8_t)256 */
2980
    if(LOG2_OBMC_MAX == 8
2981
        && (mb_x == 0 || mb_x == b_stride-1)
2982
        && (mb_y == 0 || mb_y == b_height-1)){
2983
        if(mb_x == 0)
2984
            x1 = block_w;
2985
        else
2986
            x0 = block_w;
2987
        if(mb_y == 0)
2988
            y1 = block_w;
2989
        else
2990
            y0 = block_w;
2991
        for(y=y0; y<y1; y++)
2992
            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2993
    }
2994

    
2995
    if(block_w==16){
2996
        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2997
        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2998
        /* FIXME cmps overlap but don't cover the wavelet's whole support,
2999
         * so improving the score of one block is not strictly guaranteed to
3000
         * improve the score of the whole frame, so iterative motion est
3001
         * doesn't always converge. */
3002
        if(s->avctx->me_cmp == FF_CMP_W97)
3003
            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3004
        else if(s->avctx->me_cmp == FF_CMP_W53)
3005
            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3006
        else{
3007
            distortion = 0;
3008
            for(i=0; i<4; i++){
3009
                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3010
                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3011
            }
3012
        }
3013
    }else{
3014
        assert(block_w==8);
3015
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3016
    }
3017

    
3018
    if(plane_index==0){
3019
        for(i=0; i<4; i++){
3020
/* ..RRr
3021
 * .RXx.
3022
 * rxx..
3023
 */
3024
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3025
        }
3026
        if(mb_x == b_stride-2)
3027
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3028
    }
3029
    return distortion + rate*penalty_factor;
3030
}
3031

    
3032
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3033
    int i, y2;
3034
    Plane *p= &s->plane[plane_index];
3035
    const int block_size = MB_SIZE >> s->block_max_depth;
3036
    const int block_w    = plane_index ? block_size/2 : block_size;
3037
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3038
    const int obmc_stride= plane_index ? block_size : 2*block_size;
3039
    const int ref_stride= s->current_picture.linesize[plane_index];
3040
    uint8_t *dst= s->current_picture.data[plane_index];
3041
    uint8_t *src= s-> input_picture.data[plane_index];
3042
    static const DWTELEM zero_dst[4096]; //FIXME
3043
    const int b_stride = s->b_width << s->block_max_depth;
3044
    const int b_height = s->b_height<< s->block_max_depth;
3045
    const int w= p->width;
3046
    const int h= p->height;
3047
    int distortion= 0;
3048
    int rate= 0;
3049
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3050

    
3051
    for(i=0; i<9; i++){
3052
        int mb_x2= mb_x + (i%3) - 1;
3053
        int mb_y2= mb_y + (i/3) - 1;
3054
        int x= block_w*mb_x2 + block_w/2;
3055
        int y= block_w*mb_y2 + block_w/2;
3056

    
3057
        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3058
                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3059

    
3060
        //FIXME find a cleaner/simpler way to skip the outside stuff
3061
        for(y2= y; y2<0; y2++)
3062
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3063
        for(y2= h; y2<y+block_w; y2++)
3064
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3065
        if(x<0){
3066
            for(y2= y; y2<y+block_w; y2++)
3067
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3068
        }
3069
        if(x+block_w > w){
3070
            for(y2= y; y2<y+block_w; y2++)
3071
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3072
        }
3073

    
3074
        assert(block_w== 8 || block_w==16);
3075
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3076
    }
3077

    
3078
    if(plane_index==0){
3079
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3080
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3081

    
3082
/* ..RRRr
3083
 * .RXXx.
3084
 * .RXXx.
3085
 * rxxx.
3086
 */
3087
        if(merged)
3088
            rate = get_block_bits(s, mb_x, mb_y, 2);
3089
        for(i=merged?4:0; i<9; i++){
3090
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3091
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3092
        }
3093
    }
3094
    return distortion + rate*penalty_factor;
3095
}
3096

    
3097
static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3098
    const int b_stride= s->b_width << s->block_max_depth;
3099
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3100
    BlockNode backup= *block;
3101
    int rd, index, value;
3102

    
3103
    assert(mb_x>=0 && mb_y>=0);
3104
    assert(mb_x<b_stride);
3105

    
3106
    if(intra){
3107
        block->color[0] = p[0];
3108
        block->color[1] = p[1];
3109
        block->color[2] = p[2];
3110
        block->type |= BLOCK_INTRA;
3111
    }else{
3112
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3113
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3114
        if(s->me_cache[index] == value)
3115
            return 0;
3116
        s->me_cache[index]= value;
3117

    
3118
        block->mx= p[0];
3119
        block->my= p[1];
3120
        block->type &= ~BLOCK_INTRA;
3121
    }
3122

    
3123
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3124

    
3125
//FIXME chroma
3126
    if(rd < *best_rd){
3127
        *best_rd= rd;
3128
        return 1;
3129
    }else{
3130
        *block= backup;
3131
        return 0;
3132
    }
3133
}
3134

    
3135
/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3136
static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3137
    int p[2] = {p0, p1};
3138
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3139
}
3140

    
3141
static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3142
    const int b_stride= s->b_width << s->block_max_depth;
3143
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3144
    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3145
    int rd, index, value;
3146

    
3147
    assert(mb_x>=0 && mb_y>=0);
3148
    assert(mb_x<b_stride);
3149
    assert(((mb_x|mb_y)&1) == 0);
3150

    
3151
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3152
    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3153
    if(s->me_cache[index] == value)
3154
        return 0;
3155
    s->me_cache[index]= value;
3156

    
3157
    block->mx= p0;
3158
    block->my= p1;
3159
    block->ref= ref;
3160
    block->type &= ~BLOCK_INTRA;
3161
    block[1]= block[b_stride]= block[b_stride+1]= *block;
3162

    
3163
    rd= get_4block_rd(s, mb_x, mb_y, 0);
3164

    
3165
//FIXME chroma
3166
    if(rd < *best_rd){
3167
        *best_rd= rd;
3168
        return 1;
3169
    }else{
3170
        block[0]= backup[0];
3171
        block[1]= backup[1];
3172
        block[b_stride]= backup[2];
3173
        block[b_stride+1]= backup[3];
3174
        return 0;
3175
    }
3176
}
3177

    
3178
static void iterative_me(SnowContext *s){
3179
    int pass, mb_x, mb_y;
3180
    const int b_width = s->b_width  << s->block_max_depth;
3181
    const int b_height= s->b_height << s->block_max_depth;
3182
    const int b_stride= b_width;
3183
    int color[3];
3184

    
3185
    {
3186
        RangeCoder r = s->c;
3187
        uint8_t state[sizeof(s->block_state)];
3188
        memcpy(state, s->block_state, sizeof(s->block_state));
3189
        for(mb_y= 0; mb_y<s->b_height; mb_y++)
3190
            for(mb_x= 0; mb_x<s->b_width; mb_x++)
3191
                encode_q_branch(s, 0, mb_x, mb_y);
3192
        s->c = r;
3193
        memcpy(s->block_state, state, sizeof(s->block_state));
3194
    }
3195

    
3196
    for(pass=0; pass<25; pass++){
3197
        int change= 0;
3198

    
3199
        for(mb_y= 0; mb_y<b_height; mb_y++){
3200
            for(mb_x= 0; mb_x<b_width; mb_x++){
3201
                int dia_change, i, j, ref;
3202
                int best_rd= INT_MAX, ref_rd;
3203
                BlockNode backup, ref_b;
3204
                const int index= mb_x + mb_y * b_stride;
3205
                BlockNode *block= &s->block[index];
3206
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3207
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3208
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3209
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3210
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3211
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3212
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3213
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3214
                const int b_w= (MB_SIZE >> s->block_max_depth);
3215
                uint8_t obmc_edged[b_w*2][b_w*2];
3216

    
3217
                if(pass && (block->type & BLOCK_OPT))
3218
                    continue;
3219
                block->type |= BLOCK_OPT;
3220

    
3221
                backup= *block;
3222

    
3223
                if(!s->me_cache_generation)
3224
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3225
                s->me_cache_generation += 1<<22;
3226

    
3227
                //FIXME precalc
3228
                {
3229
                    int x, y;
3230
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3231
                    if(mb_x==0)
3232
                        for(y=0; y<b_w*2; y++)
3233
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3234
                    if(mb_x==b_stride-1)
3235
                        for(y=0; y<b_w*2; y++)
3236
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3237
                    if(mb_y==0){
3238
                        for(x=0; x<b_w*2; x++)
3239
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3240
                        for(y=1; y<b_w; y++)
3241
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3242
                    }
3243
                    if(mb_y==b_height-1){
3244
                        for(x=0; x<b_w*2; x++)
3245
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3246
                        for(y=b_w; y<b_w*2-1; y++)
3247
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3248
                    }
3249
                }
3250

    
3251
                //skip stuff outside the picture
3252
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3253
                {
3254
                    uint8_t *src= s->  input_picture.data[0];
3255
                    uint8_t *dst= s->current_picture.data[0];
3256
                    const int stride= s->current_picture.linesize[0];
3257
                    const int block_w= MB_SIZE >> s->block_max_depth;
3258
                    const int sx= block_w*mb_x - block_w/2;
3259
                    const int sy= block_w*mb_y - block_w/2;
3260
                    const int w= s->plane[0].width;
3261
                    const int h= s->plane[0].height;
3262
                    int y;
3263

    
3264
                    for(y=sy; y<0; y++)
3265
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3266
                    for(y=h; y<sy+block_w*2; y++)
3267
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3268
                    if(sx<0){
3269
                        for(y=sy; y<sy+block_w*2; y++)
3270
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3271
                    }
3272
                    if(sx+block_w*2 > w){
3273
                        for(y=sy; y<sy+block_w*2; y++)
3274
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3275
                    }
3276
                }
3277

    
3278
                // intra(black) = neighbors' contribution to the current block
3279
                for(i=0; i<3; i++)
3280
                    color[i]= get_dc(s, mb_x, mb_y, i);
3281

    
3282
                // get previous score (cant be cached due to OBMC)
3283
                if(pass > 0 && (block->type&BLOCK_INTRA)){
3284
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3285
                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3286
                }else
3287
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3288

    
3289
                ref_b= *block;
3290
                ref_rd= best_rd;
3291
                for(ref=0; ref < s->ref_frames; ref++){
3292
                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3293
                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3294
                        continue;
3295
                    block->ref= ref;
3296
                    best_rd= INT_MAX;
3297

    
3298
                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3299
                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3300
                    if(tb)
3301
                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3302
                    if(lb)
3303
                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3304
                    if(rb)
3305
                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3306
                    if(bb)
3307
                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3308

    
3309
                    /* fullpel ME */
3310
                    //FIXME avoid subpel interpol / round to nearest integer
3311
                    do{
3312
                        dia_change=0;
3313
                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3314
                            for(j=0; j<i; j++){
3315
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3316
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3317
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3318
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3319
                            }
3320
                        }
3321
                    }while(dia_change);
3322
                    /* subpel ME */
3323
                    do{
3324
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3325
                        dia_change=0;
3326
                        for(i=0; i<8; i++)
3327
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3328
                    }while(dia_change);
3329
                    //FIXME or try the standard 2 pass qpel or similar
3330

    
3331
                    mvr[0][0]= block->mx;
3332
                    mvr[0][1]= block->my;
3333
                    if(ref_rd > best_rd){
3334
                        ref_rd= best_rd;
3335
                        ref_b= *block;
3336
                    }
3337
                }
3338
                best_rd= ref_rd;
3339
                *block= ref_b;
3340
#if 1
3341
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3342
                //FIXME RD style color selection
3343
#endif
3344
                if(!same_block(block, &backup)){
3345
                    if(tb ) tb ->type &= ~BLOCK_OPT;
3346
                    if(lb ) lb ->type &= ~BLOCK_OPT;
3347
                    if(rb ) rb ->type &= ~BLOCK_OPT;
3348
                    if(bb ) bb ->type &= ~BLOCK_OPT;
3349
                    if(tlb) tlb->type &= ~BLOCK_OPT;
3350
                    if(trb) trb->type &= ~BLOCK_OPT;
3351
                    if(blb) blb->type &= ~BLOCK_OPT;
3352
                    if(brb) brb->type &= ~BLOCK_OPT;
3353
                    change ++;
3354
                }
3355
            }
3356
        }
3357
        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3358
        if(!change)
3359
            break;
3360
    }
3361

    
3362
    if(s->block_max_depth == 1){
3363
        int change= 0;
3364
        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3365
            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3366
                int i;
3367
                int best_rd, init_rd;
3368
                const int index= mb_x + mb_y * b_stride;
3369
                BlockNode *b[4];
3370

    
3371
                b[0]= &s->block[index];
3372
                b[1]= b[0]+1;
3373
                b[2]= b[0]+b_stride;
3374
                b[3]= b[2]+1;
3375
                if(same_block(b[0], b[1]) &&
3376
                   same_block(b[0], b[2]) &&
3377
                   same_block(b[0], b[3]))
3378
                    continue;
3379

    
3380
                if(!s->me_cache_generation)
3381
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3382
                s->me_cache_generation += 1<<22;
3383

    
3384
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3385

    
3386
                //FIXME more multiref search?
3387
                check_4block_inter(s, mb_x, mb_y,
3388
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3389
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3390

    
3391
                for(i=0; i<4; i++)
3392
                    if(!(b[i]->type&BLOCK_INTRA))
3393
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3394

    
3395
                if(init_rd != best_rd)
3396
                    change++;
3397
            }
3398
        }
3399
        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3400
    }
3401
}
3402

    
3403
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3404
    const int level= b->level;
3405
    const int w= b->width;
3406
    const int h= b->height;
3407
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3408
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3409
    int x,y, thres1, thres2;
3410
//    START_TIMER
3411

    
3412
    if(s->qlog == LOSSLESS_QLOG) return;
3413

    
3414
    bias= bias ? 0 : (3*qmul)>>3;
3415
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3416
    thres2= 2*thres1;
3417

    
3418
    if(!bias){
3419
        for(y=0; y<h; y++){
3420
            for(x=0; x<w; x++){
3421
                int i= src[x + y*stride];
3422

    
3423
                if((unsigned)(i+thres1) > thres2){
3424
                    if(i>=0){
3425
                        i<<= QEXPSHIFT;
3426
                        i/= qmul; //FIXME optimize
3427
                        src[x + y*stride]=  i;
3428
                    }else{
3429
                        i= -i;
3430
                        i<<= QEXPSHIFT;
3431
                        i/= qmul; //FIXME optimize
3432
                        src[x + y*stride]= -i;
3433
                    }
3434
                }else
3435
                    src[x + y*stride]= 0;
3436
            }
3437
        }
3438
    }else{
3439
        for(y=0; y<h; y++){
3440
            for(x=0; x<w; x++){
3441
                int i= src[x + y*stride];
3442

    
3443
                if((unsigned)(i+thres1) > thres2){
3444
                    if(i>=0){
3445
                        i<<= QEXPSHIFT;
3446
                        i= (i + bias) / qmul; //FIXME optimize
3447
                        src[x + y*stride]=  i;
3448
                    }else{
3449
                        i= -i;
3450
                        i<<= QEXPSHIFT;
3451
                        i= (i + bias) / qmul; //FIXME optimize
3452
                        src[x + y*stride]= -i;
3453
                    }
3454
                }else
3455
                    src[x + y*stride]= 0;
3456
            }
3457
        }
3458
    }
3459
    if(level+1 == s->spatial_decomposition_count){
3460
//        STOP_TIMER("quantize")
3461
    }
3462
}
3463

    
3464
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3465
    const int w= b->width;
3466
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3467
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3468
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3469
    int x,y;
3470
    START_TIMER
3471

    
3472
    if(s->qlog == LOSSLESS_QLOG) return;
3473

    
3474
    for(y=start_y; y<end_y; y++){
3475
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3476
        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3477
        for(x=0; x<w; x++){
3478
            int i= line[x];
3479
            if(i<0){
3480
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3481
            }else if(i>0){
3482
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3483
            }
3484
        }
3485
    }
3486
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3487
        STOP_TIMER("dquant")
3488
    }
3489
}
3490

    
3491
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3492
    const int w= b->width;
3493
    const int h= b->height;
3494
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3495
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3496
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3497
    int x,y;
3498
    START_TIMER
3499

    
3500
    if(s->qlog == LOSSLESS_QLOG) return;
3501

    
3502
    for(y=0; y<h; y++){
3503
        for(x=0; x<w; x++){
3504
            int i= src[x + y*stride];
3505
            if(i<0){
3506
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3507
            }else if(i>0){
3508
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3509
            }
3510
        }
3511
    }
3512
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3513
        STOP_TIMER("dquant")
3514
    }
3515
}
3516

    
3517
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3518
    const int w= b->width;
3519
    const int h= b->height;
3520
    int x,y;
3521

    
3522
    for(y=h-1; y>=0; y--){
3523
        for(x=w-1; x>=0; x--){
3524
            int i= x + y*stride;
3525

    
3526
            if(x){
3527
                if(use_median){
3528
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3529
                    else  src[i] -= src[i - 1];
3530
                }else{
3531
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3532
                    else  src[i] -= src[i - 1];
3533
                }
3534
            }else{
3535
                if(y) src[i] -= src[i - stride];
3536
            }
3537
        }
3538
    }
3539
}
3540

    
3541
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3542
    const int w= b->width;
3543
    int x,y;
3544

    
3545
//    START_TIMER
3546

    
3547
    DWTELEM * line;
3548
    DWTELEM * prev;
3549

    
3550
    if (start_y != 0)
3551
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3552

    
3553
    for(y=start_y; y<end_y; y++){
3554
        prev = line;
3555
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3556
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3557
        for(x=0; x<w; x++){
3558
            if(x){
3559
                if(use_median){
3560
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3561
                    else  line[x] += line[x - 1];
3562
                }else{
3563
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3564
                    else  line[x] += line[x - 1];
3565
                }
3566
            }else{
3567
                if(y) line[x] += prev[x];
3568
            }
3569
        }
3570
    }
3571

    
3572
//    STOP_TIMER("correlate")
3573
}
3574

    
3575
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3576
    const int w= b->width;
3577
    const int h= b->height;
3578
    int x,y;
3579

    
3580
    for(y=0; y<h; y++){
3581
        for(x=0; x<w; x++){
3582
            int i= x + y*stride;
3583

    
3584
            if(x){
3585
                if(use_median){
3586
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3587
                    else  src[i] += src[i - 1];
3588
                }else{
3589
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3590
                    else  src[i] += src[i - 1];
3591
                }
3592
            }else{
3593
                if(y) src[i] += src[i - stride];
3594
            }
3595
        }
3596
    }
3597
}
3598

    
3599
static void encode_header(SnowContext *s){
3600
    int plane_index, level, orientation;
3601
    uint8_t kstate[32];
3602

    
3603
    memset(kstate, MID_STATE, sizeof(kstate));
3604

    
3605
    put_rac(&s->c, kstate, s->keyframe);
3606
    if(s->keyframe || s->always_reset)
3607
        reset_contexts(s);
3608
    if(s->keyframe){
3609
        put_symbol(&s->c, s->header_state, s->version, 0);
3610
        put_rac(&s->c, s->header_state, s->always_reset);
3611
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3612
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3613
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3614
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3615
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3616
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3617
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3618
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3619
        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3620

    
3621
        for(plane_index=0; plane_index<2; plane_index++){
3622
            for(level=0; level<s->spatial_decomposition_count; level++){
3623
                for(orientation=level ? 1:0; orientation<4; orientation++){
3624
                    if(orientation==2) continue;
3625
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3626
                }
3627
            }
3628
        }
3629
    }
3630
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3631
    put_symbol(&s->c, s->header_state, s->qlog, 1);
3632
    put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3633
    put_symbol(&s->c, s->header_state, s->qbias, 1);
3634
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3635
}
3636

    
3637
static int decode_header(SnowContext *s){
3638
    int plane_index, level, orientation;
3639
    uint8_t kstate[32];
3640

    
3641
    memset(kstate, MID_STATE, sizeof(kstate));
3642

    
3643
    s->keyframe= get_rac(&s->c, kstate);
3644
    if(s->keyframe || s->always_reset)
3645
        reset_contexts(s);
3646
    if(s->keyframe){
3647
        s->version= get_symbol(&s->c, s->header_state, 0);
3648
        if(s->version>0){
3649
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3650
            return -1;
3651
        }
3652
        s->always_reset= get_rac(&s->c, s->header_state);
3653
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3654
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3655
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3656
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3657
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3658
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3659
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3660
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3661
        s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3662

    
3663
        for(plane_index=0; plane_index<3; plane_index++){
3664
            for(level=0; level<s->spatial_decomposition_count; level++){
3665
                for(orientation=level ? 1:0; orientation<4; orientation++){
3666
                    int q;
3667
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3668
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3669
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3670
                    s->plane[plane_index].band[level][orientation].qlog= q;
3671
                }
3672
            }
3673
        }
3674
    }
3675

    
3676
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3677
    if(s->spatial_decomposition_type > 2){
3678
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3679
        return -1;
3680
    }
3681

    
3682
    s->qlog= get_symbol(&s->c, s->header_state, 1);
3683
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3684
    s->qbias= get_symbol(&s->c, s->header_state, 1);
3685
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3686
    if(s->block_max_depth > 1 || s->block_max_depth < 0){
3687
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3688
        s->block_max_depth= 0;
3689
        return -1;
3690
    }
3691

    
3692
    return 0;
3693
}
3694

    
3695
static void init_qexp(void){
3696
    int i;
3697
    double v=128;
3698

    
3699
    for(i=0; i<QROOT; i++){
3700
        qexp[i]= lrintf(v);
3701
        v *= pow(2, 1.0 / QROOT);
3702
    }
3703
}
3704

    
3705
static int common_init(AVCodecContext *avctx){
3706
    SnowContext *s = avctx->priv_data;
3707
    int width, height;
3708
    int level, orientation, plane_index, dec;
3709
    int i, j;
3710

    
3711
    s->avctx= avctx;
3712

    
3713
    dsputil_init(&s->dsp, avctx);
3714

    
3715
#define mcf(dx,dy)\
3716
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3717
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3718
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3719
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3720
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3721
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3722

    
3723
    mcf( 0, 0)
3724
    mcf( 4, 0)
3725
    mcf( 8, 0)
3726
    mcf(12, 0)
3727
    mcf( 0, 4)
3728
    mcf( 4, 4)
3729
    mcf( 8, 4)
3730
    mcf(12, 4)
3731
    mcf( 0, 8)
3732
    mcf( 4, 8)
3733
    mcf( 8, 8)
3734
    mcf(12, 8)
3735
    mcf( 0,12)
3736
    mcf( 4,12)
3737
    mcf( 8,12)
3738
    mcf(12,12)
3739

    
3740
#define mcfh(dx,dy)\
3741
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3742
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3743
        mc_block_hpel ## dx ## dy ## 16;\
3744
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3745
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3746
        mc_block_hpel ## dx ## dy ## 8;
3747

    
3748
    mcfh(0, 0)
3749
    mcfh(8, 0)
3750
    mcfh(0, 8)
3751
    mcfh(8, 8)
3752

    
3753
    if(!qexp[0])
3754
        init_qexp();
3755

    
3756
    dec= s->spatial_decomposition_count= 5;
3757
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3758

    
3759
    s->chroma_h_shift= 1; //FIXME XXX
3760
    s->chroma_v_shift= 1;
3761

    
3762
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3763

    
3764
    width= s->avctx->width;
3765
    height= s->avctx->height;
3766

    
3767
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3768

    
3769
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3770
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3771

    
3772
    for(plane_index=0; plane_index<3; plane_index++){
3773
        int w= s->avctx->width;
3774
        int h= s->avctx->height;
3775

    
3776
        if(plane_index){
3777
            w>>= s->chroma_h_shift;
3778
            h>>= s->chroma_v_shift;
3779
        }
3780
        s->plane[plane_index].width = w;
3781
        s->plane[plane_index].height= h;
3782
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3783
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3784
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3785
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3786

    
3787
                b->buf= s->spatial_dwt_buffer;
3788
                b->level= level;
3789
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3790
                b->width = (w + !(orientation&1))>>1;
3791
                b->height= (h + !(orientation>1))>>1;
3792

    
3793
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3794
                b->buf_x_offset = 0;
3795
                b->buf_y_offset = 0;
3796

    
3797
                if(orientation&1){
3798
                    b->buf += (w+1)>>1;
3799
                    b->buf_x_offset = (w+1)>>1;
3800
                }
3801
                if(orientation>1){
3802
                    b->buf += b->stride>>1;
3803
                    b->buf_y_offset = b->stride_line >> 1;
3804
                }
3805

    
3806
                if(level)
3807
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3808
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3809
            }
3810
            w= (w+1)>>1;
3811
            h= (h+1)>>1;
3812
        }
3813
    }
3814

    
3815
    for(i=0; i<MAX_REF_FRAMES; i++)
3816
        for(j=0; j<MAX_REF_FRAMES; j++)
3817
            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3818

    
3819
    reset_contexts(s);
3820
/*
3821
    width= s->width= avctx->width;
3822
    height= s->height= avctx->height;
3823

3824
    assert(width && height);
3825
*/
3826
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3827

    
3828
    return 0;
3829
}
3830

    
3831
static int qscale2qlog(int qscale){
3832
    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3833
           + 61*QROOT/8; //<64 >60
3834
}
3835

    
3836
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3837
{
3838
    /* estimate the frame's complexity as a sum of weighted dwt coefs.
3839
     * FIXME we know exact mv bits at this point,
3840
     * but ratecontrol isn't set up to include them. */
3841
    uint32_t coef_sum= 0;
3842
    int level, orientation, delta_qlog;
3843

    
3844
    for(level=0; level<s->spatial_decomposition_count; level++){
3845
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3846
            SubBand *b= &s->plane[0].band[level][orientation];
3847
            DWTELEM *buf= b->buf;
3848
            const int w= b->width;
3849
            const int h= b->height;
3850
            const int stride= b->stride;
3851
            const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16);
3852
            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3853
            const int qdiv= (1<<16)/qmul;
3854
            int x, y;
3855
            if(orientation==0)
3856
                decorrelate(s, b, buf, stride, 1, 0);
3857
            for(y=0; y<h; y++)
3858
                for(x=0; x<w; x++)
3859
                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3860
            if(orientation==0)
3861
                correlate(s, b, buf, stride, 1, 0);
3862
        }
3863
    }
3864

    
3865
    /* ugly, ratecontrol just takes a sqrt again */
3866
    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3867
    assert(coef_sum < INT_MAX);
3868

    
3869
    if(pict->pict_type == I_TYPE){
3870
        s->m.current_picture.mb_var_sum= coef_sum;
3871
        s->m.current_picture.mc_mb_var_sum= 0;
3872
    }else{
3873
        s->m.current_picture.mc_mb_var_sum= coef_sum;
3874
        s->m.current_picture.mb_var_sum= 0;
3875
    }
3876

    
3877
    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3878
    if (pict->quality < 0)
3879
        return -1;
3880
    s->lambda= pict->quality * 3/2;
3881
    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3882
    s->qlog+= delta_qlog;
3883
    return delta_qlog;
3884
}
3885

    
3886
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3887
    int width = p->width;
3888
    int height= p->height;
3889
    int level, orientation, x, y;
3890

    
3891
    for(level=0; level<s->spatial_decomposition_count; level++){
3892
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3893
            SubBand *b= &p->band[level][orientation];
3894
            DWTELEM *buf= b->buf;
3895
            int64_t error=0;
3896

    
3897
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3898
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
3899
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3900
            for(y=0; y<height; y++){
3901
                for(x=0; x<width; x++){
3902
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
3903
                    error += d*d;
3904
                }
3905
            }
3906

    
3907
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3908
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3909
        }
3910
    }
3911
}
3912

    
3913
static int encode_init(AVCodecContext *avctx)
3914
{
3915
    SnowContext *s = avctx->priv_data;
3916
    int plane_index;
3917

    
3918
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3919
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3920
               "use vstrict=-2 / -strict -2 to use it anyway\n");
3921
        return -1;
3922
    }
3923

    
3924
    if(avctx->prediction_method == DWT_97
3925
       && (avctx->flags & CODEC_FLAG_QSCALE)
3926
       && avctx->global_quality == 0){
3927
        av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3928
        return -1;
3929
    }
3930

    
3931
    common_init(avctx);
3932
    alloc_blocks(s);
3933

    
3934
    s->version=0;
3935

    
3936
    s->m.avctx   = avctx;
3937
    s->m.flags   = avctx->flags;
3938
    s->m.bit_rate= avctx->bit_rate;
3939

    
3940
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3941
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3942
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3943
    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3944
    h263_encode_init(&s->m); //mv_penalty
3945

    
3946
    s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3947

    
3948
    if(avctx->flags&CODEC_FLAG_PASS1){
3949
        if(!avctx->stats_out)
3950
            avctx->stats_out = av_mallocz(256);
3951
    }
3952
    if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3953
        if(ff_rate_control_init(&s->m) < 0)
3954
            return -1;
3955
    }
3956
    s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3957

    
3958
    for(plane_index=0; plane_index<3; plane_index++){
3959
        calculate_vissual_weight(s, &s->plane[plane_index]);
3960
    }
3961

    
3962

    
3963
    avctx->coded_frame= &s->current_picture;
3964
    switch(avctx->pix_fmt){
3965
//    case PIX_FMT_YUV444P:
3966
//    case PIX_FMT_YUV422P:
3967
    case PIX_FMT_YUV420P:
3968
    case PIX_FMT_GRAY8:
3969
//    case PIX_FMT_YUV411P:
3970
//    case PIX_FMT_YUV410P:
3971
        s->colorspace_type= 0;
3972
        break;
3973
/*    case PIX_FMT_RGBA32:
3974
        s->colorspace= 1;
3975
        break;*/
3976
    default:
3977
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3978
        return -1;
3979
    }
3980
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3981
    s->chroma_h_shift= 1;
3982
    s->chroma_v_shift= 1;
3983

    
3984
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3985
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3986

    
3987
    s->avctx->get_buffer(s->avctx, &s->input_picture);
3988

    
3989
    if(s->avctx->me_method == ME_ITER){
3990
        int i;
3991
        int size= s->b_width * s->b_height << 2*s->block_max_depth;
3992
        for(i=0; i<s->max_ref_frames; i++){
3993
            s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
3994
            s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
3995
        }
3996
    }
3997

    
3998
    return 0;
3999
}
4000

    
4001
static int frame_start(SnowContext *s){
4002
   AVFrame tmp;
4003
   int w= s->avctx->width; //FIXME round up to x16 ?
4004
   int h= s->avctx->height;
4005

    
4006
    if(s->current_picture.data[0]){
4007
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
4008
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4009
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4010
    }
4011

    
4012
    tmp= s->last_picture[s->max_ref_frames-1];
4013
    memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4014
    s->last_picture[0]= s->current_picture;
4015
    s->current_picture= tmp;
4016

    
4017
    if(s->keyframe){
4018
        s->ref_frames= 0;
4019
    }else{
4020
        int i;
4021
        for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4022
            if(i && s->last_picture[i-1].key_frame)
4023
                break;
4024
        s->ref_frames= i;
4025
    }
4026

    
4027
    s->current_picture.reference= 1;
4028
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4029
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4030
        return -1;
4031
    }
4032

    
4033
    s->current_picture.key_frame= s->keyframe;
4034

    
4035
    return 0;
4036
}
4037

    
4038
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4039
    SnowContext *s = avctx->priv_data;
4040
    RangeCoder * const c= &s->c;
4041
    AVFrame *pict = data;
4042
    const int width= s->avctx->width;
4043
    const int height= s->avctx->height;
4044
    int level, orientation, plane_index, i, y;
4045
    uint8_t rc_header_bak[sizeof(s->header_state)];