Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 2912e87a

History | View | Annotate | Download (159 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of Libav.
5
 *
6
 * Libav is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * Libav is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with Libav; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "libavutil/intmath.h"
22
#include "avcodec.h"
23
#include "dsputil.h"
24
#include "dwt.h"
25
#include "snow.h"
26

    
27
#include "rangecoder.h"
28
#include "mathops.h"
29

    
30
#include "mpegvideo.h"
31
#include "h263.h"
32

    
33
#undef NDEBUG
34
#include <assert.h>
35

    
36
static const int8_t quant3[256]={
37
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
53
};
54
static const int8_t quant3b[256]={
55
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
};
72
static const int8_t quant3bA[256]={
73
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89
};
90
static const int8_t quant5[256]={
91
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
107
};
108
static const int8_t quant7[256]={
109
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
120
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
123
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
124
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
125
};
126
static const int8_t quant9[256]={
127
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
128
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
142
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
143
};
144
static const int8_t quant11[256]={
145
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
146
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
147
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
159
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
160
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
161
};
162
static const int8_t quant13[256]={
163
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
164
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
165
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
166
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
169
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
173
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
174
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
176
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
177
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
178
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179
};
180

    
181
#if 0 //64*cubic
182
static const uint8_t obmc32[1024]={
183
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
184
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
185
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
186
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
187
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
188
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
189
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
190
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
191
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
192
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
193
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
194
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
195
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
196
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
197
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
198
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
199
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
200
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
201
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
202
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
203
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
204
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
205
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
206
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
207
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
208
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
209
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
210
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
211
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
212
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
213
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
214
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
215
//error:0.000022
216
};
217
static const uint8_t obmc16[256]={
218
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
219
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
220
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
221
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
222
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
223
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
224
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
225
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
226
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
227
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
228
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
229
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
230
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
231
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
232
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
233
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
234
//error:0.000033
235
};
236
#elif 1 // 64*linear
237
static const uint8_t obmc32[1024]={
238
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
239
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
240
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
241
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
242
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
243
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
244
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
245
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
246
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
247
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
248
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
249
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
250
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
251
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
252
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
253
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
254
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
255
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
256
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
257
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
258
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
259
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
260
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
261
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
262
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
263
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
264
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
265
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
266
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
267
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
268
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
269
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
270
 //error:0.000020
271
};
272
static const uint8_t obmc16[256]={
273
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
274
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
275
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
276
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
277
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
278
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
281
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
282
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
283
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
284
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
285
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
286
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
287
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
288
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
289
//error:0.000015
290
};
291
#else //64*cos
292
static const uint8_t obmc32[1024]={
293
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
294
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
295
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
296
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
297
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
298
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
299
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
300
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
301
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
302
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
303
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
304
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
305
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
306
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
307
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
308
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
309
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
310
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
311
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
312
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
313
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
314
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
315
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
316
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
317
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
318
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
319
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
320
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
321
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
322
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
323
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
324
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
325
//error:0.000022
326
};
327
static const uint8_t obmc16[256]={
328
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
329
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
330
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
331
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
332
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
333
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
334
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
335
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
336
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
337
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
338
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
339
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
340
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
341
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
342
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
343
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
344
//error:0.000022
345
};
346
#endif /* 0 */
347

    
348
//linear *64
349
static const uint8_t obmc8[64]={
350
  4, 12, 20, 28, 28, 20, 12,  4,
351
 12, 36, 60, 84, 84, 60, 36, 12,
352
 20, 60,100,140,140,100, 60, 20,
353
 28, 84,140,196,196,140, 84, 28,
354
 28, 84,140,196,196,140, 84, 28,
355
 20, 60,100,140,140,100, 60, 20,
356
 12, 36, 60, 84, 84, 60, 36, 12,
357
  4, 12, 20, 28, 28, 20, 12,  4,
358
//error:0.000000
359
};
360

    
361
//linear *64
362
static const uint8_t obmc4[16]={
363
 16, 48, 48, 16,
364
 48,144,144, 48,
365
 48,144,144, 48,
366
 16, 48, 48, 16,
367
//error:0.000000
368
};
369

    
370
static const uint8_t * const obmc_tab[4]={
371
    obmc32, obmc16, obmc8, obmc4
372
};
373

    
374
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
375

    
376
typedef struct BlockNode{
377
    int16_t mx;
378
    int16_t my;
379
    uint8_t ref;
380
    uint8_t color[3];
381
    uint8_t type;
382
//#define TYPE_SPLIT    1
383
#define BLOCK_INTRA   1
384
#define BLOCK_OPT     2
385
//#define TYPE_NOCOLOR  4
386
    uint8_t level; //FIXME merge into type?
387
}BlockNode;
388

    
389
static const BlockNode null_block= { //FIXME add border maybe
390
    .color= {128,128,128},
391
    .mx= 0,
392
    .my= 0,
393
    .ref= 0,
394
    .type= 0,
395
    .level= 0,
396
};
397

    
398
#define LOG2_MB_SIZE 4
399
#define MB_SIZE (1<<LOG2_MB_SIZE)
400
#define ENCODER_EXTRA_BITS 4
401
#define HTAPS_MAX 8
402

    
403
typedef struct x_and_coeff{
404
    int16_t x;
405
    uint16_t coeff;
406
} x_and_coeff;
407

    
408
typedef struct SubBand{
409
    int level;
410
    int stride;
411
    int width;
412
    int height;
413
    int qlog;        ///< log(qscale)/log[2^(1/6)]
414
    DWTELEM *buf;
415
    IDWTELEM *ibuf;
416
    int buf_x_offset;
417
    int buf_y_offset;
418
    int stride_line; ///< Stride measured in lines, not pixels.
419
    x_and_coeff * x_coeff;
420
    struct SubBand *parent;
421
    uint8_t state[/*7*2*/ 7 + 512][32];
422
}SubBand;
423

    
424
typedef struct Plane{
425
    int width;
426
    int height;
427
    SubBand band[MAX_DECOMPOSITIONS][4];
428

    
429
    int htaps;
430
    int8_t hcoeff[HTAPS_MAX/2];
431
    int diag_mc;
432
    int fast_mc;
433

    
434
    int last_htaps;
435
    int8_t last_hcoeff[HTAPS_MAX/2];
436
    int last_diag_mc;
437
}Plane;
438

    
439
typedef struct SnowContext{
440

    
441
    AVCodecContext *avctx;
442
    RangeCoder c;
443
    DSPContext dsp;
444
    DWTContext dwt;
445
    AVFrame new_picture;
446
    AVFrame input_picture;              ///< new_picture with the internal linesizes
447
    AVFrame current_picture;
448
    AVFrame last_picture[MAX_REF_FRAMES];
449
    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
450
    AVFrame mconly_picture;
451
//     uint8_t q_context[16];
452
    uint8_t header_state[32];
453
    uint8_t block_state[128 + 32*128];
454
    int keyframe;
455
    int always_reset;
456
    int version;
457
    int spatial_decomposition_type;
458
    int last_spatial_decomposition_type;
459
    int temporal_decomposition_type;
460
    int spatial_decomposition_count;
461
    int last_spatial_decomposition_count;
462
    int temporal_decomposition_count;
463
    int max_ref_frames;
464
    int ref_frames;
465
    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
466
    uint32_t *ref_scores[MAX_REF_FRAMES];
467
    DWTELEM *spatial_dwt_buffer;
468
    IDWTELEM *spatial_idwt_buffer;
469
    int colorspace_type;
470
    int chroma_h_shift;
471
    int chroma_v_shift;
472
    int spatial_scalability;
473
    int qlog;
474
    int last_qlog;
475
    int lambda;
476
    int lambda2;
477
    int pass1_rc;
478
    int mv_scale;
479
    int last_mv_scale;
480
    int qbias;
481
    int last_qbias;
482
#define QBIAS_SHIFT 3
483
    int b_width;
484
    int b_height;
485
    int block_max_depth;
486
    int last_block_max_depth;
487
    Plane plane[MAX_PLANES];
488
    BlockNode *block;
489
#define ME_CACHE_SIZE 1024
490
    int me_cache[ME_CACHE_SIZE];
491
    int me_cache_generation;
492
    slice_buffer sb;
493

    
494
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
495

    
496
    uint8_t *scratchbuf;
497
}SnowContext;
498

    
499
#ifdef __sgi
500
// Avoid a name clash on SGI IRIX
501
#undef qexp
502
#endif
503
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
504
static uint8_t qexp[QROOT];
505

    
506
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
507
    int i;
508

    
509
    if(v){
510
        const int a= FFABS(v);
511
        const int e= av_log2(a);
512
#if 1
513
        const int el= FFMIN(e, 10);
514
        put_rac(c, state+0, 0);
515

    
516
        for(i=0; i<el; i++){
517
            put_rac(c, state+1+i, 1);  //1..10
518
        }
519
        for(; i<e; i++){
520
            put_rac(c, state+1+9, 1);  //1..10
521
        }
522
        put_rac(c, state+1+FFMIN(i,9), 0);
523

    
524
        for(i=e-1; i>=el; i--){
525
            put_rac(c, state+22+9, (a>>i)&1); //22..31
526
        }
527
        for(; i>=0; i--){
528
            put_rac(c, state+22+i, (a>>i)&1); //22..31
529
        }
530

    
531
        if(is_signed)
532
            put_rac(c, state+11 + el, v < 0); //11..21
533
#else
534

    
535
        put_rac(c, state+0, 0);
536
        if(e<=9){
537
            for(i=0; i<e; i++){
538
                put_rac(c, state+1+i, 1);  //1..10
539
            }
540
            put_rac(c, state+1+i, 0);
541

    
542
            for(i=e-1; i>=0; i--){
543
                put_rac(c, state+22+i, (a>>i)&1); //22..31
544
            }
545

    
546
            if(is_signed)
547
                put_rac(c, state+11 + e, v < 0); //11..21
548
        }else{
549
            for(i=0; i<e; i++){
550
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
551
            }
552
            put_rac(c, state+1+9, 0);
553

    
554
            for(i=e-1; i>=0; i--){
555
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
556
            }
557

    
558
            if(is_signed)
559
                put_rac(c, state+11 + 10, v < 0); //11..21
560
        }
561
#endif /* 1 */
562
    }else{
563
        put_rac(c, state+0, 1);
564
    }
565
}
566

    
567
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
568
    if(get_rac(c, state+0))
569
        return 0;
570
    else{
571
        int i, e, a;
572
        e= 0;
573
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
574
            e++;
575
        }
576

    
577
        a= 1;
578
        for(i=e-1; i>=0; i--){
579
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
580
        }
581

    
582
        e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
583
        return (a^e)-e;
584
    }
585
}
586

    
587
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
588
    int i;
589
    int r= log2>=0 ? 1<<log2 : 1;
590

    
591
    assert(v>=0);
592
    assert(log2>=-4);
593

    
594
    while(v >= r){
595
        put_rac(c, state+4+log2, 1);
596
        v -= r;
597
        log2++;
598
        if(log2>0) r+=r;
599
    }
600
    put_rac(c, state+4+log2, 0);
601

    
602
    for(i=log2-1; i>=0; i--){
603
        put_rac(c, state+31-i, (v>>i)&1);
604
    }
605
}
606

    
607
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
608
    int i;
609
    int r= log2>=0 ? 1<<log2 : 1;
610
    int v=0;
611

    
612
    assert(log2>=-4);
613

    
614
    while(get_rac(c, state+4+log2)){
615
        v+= r;
616
        log2++;
617
        if(log2>0) r+=r;
618
    }
619

    
620
    for(i=log2-1; i>=0; i--){
621
        v+= get_rac(c, state+31-i)<<i;
622
    }
623

    
624
    return v;
625
}
626

    
627
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
628
    const int w= b->width;
629
    const int h= b->height;
630
    int x,y;
631

    
632
    int run, runs;
633
    x_and_coeff *xc= b->x_coeff;
634
    x_and_coeff *prev_xc= NULL;
635
    x_and_coeff *prev2_xc= xc;
636
    x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
637
    x_and_coeff *prev_parent_xc= parent_xc;
638

    
639
    runs= get_symbol2(&s->c, b->state[30], 0);
640
    if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
641
    else           run= INT_MAX;
642

    
643
    for(y=0; y<h; y++){
644
        int v=0;
645
        int lt=0, t=0, rt=0;
646

    
647
        if(y && prev_xc->x == 0){
648
            rt= prev_xc->coeff;
649
        }
650
        for(x=0; x<w; x++){
651
            int p=0;
652
            const int l= v;
653

    
654
            lt= t; t= rt;
655

    
656
            if(y){
657
                if(prev_xc->x <= x)
658
                    prev_xc++;
659
                if(prev_xc->x == x + 1)
660
                    rt= prev_xc->coeff;
661
                else
662
                    rt=0;
663
            }
664
            if(parent_xc){
665
                if(x>>1 > parent_xc->x){
666
                    parent_xc++;
667
                }
668
                if(x>>1 == parent_xc->x){
669
                    p= parent_xc->coeff;
670
                }
671
            }
672
            if(/*ll|*/l|lt|t|rt|p){
673
                int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
674

    
675
                v=get_rac(&s->c, &b->state[0][context]);
676
                if(v){
677
                    v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
678
                    v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
679

    
680
                    xc->x=x;
681
                    (xc++)->coeff= v;
682
                }
683
            }else{
684
                if(!run){
685
                    if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
686
                    else           run= INT_MAX;
687
                    v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
688
                    v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
689

    
690
                    xc->x=x;
691
                    (xc++)->coeff= v;
692
                }else{
693
                    int max_run;
694
                    run--;
695
                    v=0;
696

    
697
                    if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
698
                    else  max_run= FFMIN(run, w-x-1);
699
                    if(parent_xc)
700
                        max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
701
                    x+= max_run;
702
                    run-= max_run;
703
                }
704
            }
705
        }
706
        (xc++)->x= w+1; //end marker
707
        prev_xc= prev2_xc;
708
        prev2_xc= xc;
709

    
710
        if(parent_xc){
711
            if(y&1){
712
                while(parent_xc->x != parent->width+1)
713
                    parent_xc++;
714
                parent_xc++;
715
                prev_parent_xc= parent_xc;
716
            }else{
717
                parent_xc= prev_parent_xc;
718
            }
719
        }
720
    }
721

    
722
    (xc++)->x= w+1; //end marker
723
}
724

    
725
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
726
    const int w= b->width;
727
    int y;
728
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
729
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
730
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
731
    int new_index = 0;
732

    
733
    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
734
        qadd= 0;
735
        qmul= 1<<QEXPSHIFT;
736
    }
737

    
738
    /* If we are on the second or later slice, restore our index. */
739
    if (start_y != 0)
740
        new_index = save_state[0];
741

    
742

    
743
    for(y=start_y; y<h; y++){
744
        int x = 0;
745
        int v;
746
        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
747
        memset(line, 0, b->width*sizeof(IDWTELEM));
748
        v = b->x_coeff[new_index].coeff;
749
        x = b->x_coeff[new_index++].x;
750
        while(x < w){
751
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
752
            register int u= -(v&1);
753
            line[x] = (t^u) - u;
754

    
755
            v = b->x_coeff[new_index].coeff;
756
            x = b->x_coeff[new_index++].x;
757
        }
758
    }
759

    
760
    /* Save our variables for the next slice. */
761
    save_state[0] = new_index;
762

    
763
    return;
764
}
765

    
766
static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
767
    int plane_index, level, orientation;
768

    
769
    for(plane_index=0; plane_index<3; plane_index++){
770
        for(level=0; level<MAX_DECOMPOSITIONS; level++){
771
            for(orientation=level ? 1:0; orientation<4; orientation++){
772
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
773
            }
774
        }
775
    }
776
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
777
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
778
}
779

    
780
static int alloc_blocks(SnowContext *s){
781
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
782
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
783

    
784
    s->b_width = w;
785
    s->b_height= h;
786

    
787
    av_free(s->block);
788
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
789
    return 0;
790
}
791

    
792
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
793
    uint8_t *bytestream= d->bytestream;
794
    uint8_t *bytestream_start= d->bytestream_start;
795
    *d= *s;
796
    d->bytestream= bytestream;
797
    d->bytestream_start= bytestream_start;
798
}
799

    
800
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
801
    const int w= s->b_width << s->block_max_depth;
802
    const int rem_depth= s->block_max_depth - level;
803
    const int index= (x + y*w) << rem_depth;
804
    const int block_w= 1<<rem_depth;
805
    BlockNode block;
806
    int i,j;
807

    
808
    block.color[0]= l;
809
    block.color[1]= cb;
810
    block.color[2]= cr;
811
    block.mx= mx;
812
    block.my= my;
813
    block.ref= ref;
814
    block.type= type;
815
    block.level= level;
816

    
817
    for(j=0; j<block_w; j++){
818
        for(i=0; i<block_w; i++){
819
            s->block[index + i + j*w]= block;
820
        }
821
    }
822
}
823

    
824
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
825
    const int offset[3]= {
826
          y*c->  stride + x,
827
        ((y*c->uvstride + x)>>1),
828
        ((y*c->uvstride + x)>>1),
829
    };
830
    int i;
831
    for(i=0; i<3; i++){
832
        c->src[0][i]= src [i];
833
        c->ref[0][i]= ref [i] + offset[i];
834
    }
835
    assert(!ref_index);
836
}
837

    
838
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
839
                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
840
    if(s->ref_frames == 1){
841
        *mx = mid_pred(left->mx, top->mx, tr->mx);
842
        *my = mid_pred(left->my, top->my, tr->my);
843
    }else{
844
        const int *scale = scale_mv_ref[ref];
845
        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
846
                       (top ->mx * scale[top ->ref] + 128) >>8,
847
                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
848
        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
849
                       (top ->my * scale[top ->ref] + 128) >>8,
850
                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
851
    }
852
}
853

    
854
static av_always_inline int same_block(BlockNode *a, BlockNode *b){
855
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
856
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
857
    }else{
858
        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
859
    }
860
}
861

    
862
static void decode_q_branch(SnowContext *s, int level, int x, int y){
863
    const int w= s->b_width << s->block_max_depth;
864
    const int rem_depth= s->block_max_depth - level;
865
    const int index= (x + y*w) << rem_depth;
866
    int trx= (x+1)<<rem_depth;
867
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
868
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
869
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
870
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
871
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
872

    
873
    if(s->keyframe){
874
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
875
        return;
876
    }
877

    
878
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
879
        int type, mx, my;
880
        int l = left->color[0];
881
        int cb= left->color[1];
882
        int cr= left->color[2];
883
        int ref = 0;
884
        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
885
        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
886
        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
887

    
888
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
889

    
890
        if(type){
891
            pred_mv(s, &mx, &my, 0, left, top, tr);
892
            l += get_symbol(&s->c, &s->block_state[32], 1);
893
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
894
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
895
        }else{
896
            if(s->ref_frames > 1)
897
                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
898
            pred_mv(s, &mx, &my, ref, left, top, tr);
899
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
900
            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
901
        }
902
        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
903
    }else{
904
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
905
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
906
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
907
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
908
    }
909
}
910

    
911
static void decode_blocks(SnowContext *s){
912
    int x, y;
913
    int w= s->b_width;
914
    int h= s->b_height;
915

    
916
    for(y=0; y<h; y++){
917
        for(x=0; x<w; x++){
918
            decode_q_branch(s, 0, x, y);
919
        }
920
    }
921
}
922

    
923
static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
924
    static const uint8_t weight[64]={
925
    8,7,6,5,4,3,2,1,
926
    7,7,0,0,0,0,0,1,
927
    6,0,6,0,0,0,2,0,
928
    5,0,0,5,0,3,0,0,
929
    4,0,0,0,4,0,0,0,
930
    3,0,0,5,0,3,0,0,
931
    2,0,6,0,0,0,2,0,
932
    1,7,0,0,0,0,0,1,
933
    };
934

    
935
    static const uint8_t brane[256]={
936
    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
937
    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
938
    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
939
    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
940
    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
941
    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
942
    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
943
    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
944
    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
945
    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
946
    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
947
    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
948
    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
949
    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
950
    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
951
    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
952
    };
953

    
954
    static const uint8_t needs[16]={
955
    0,1,0,0,
956
    2,4,2,0,
957
    0,1,0,0,
958
    15
959
    };
960

    
961
    int x, y, b, r, l;
962
    int16_t tmpIt   [64*(32+HTAPS_MAX)];
963
    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
964
    int16_t *tmpI= tmpIt;
965
    uint8_t *tmp2= tmp2t[0];
966
    const uint8_t *hpel[11];
967
    assert(dx<16 && dy<16);
968
    r= brane[dx + 16*dy]&15;
969
    l= brane[dx + 16*dy]>>4;
970

    
971
    b= needs[l] | needs[r];
972
    if(p && !p->diag_mc)
973
        b= 15;
974

    
975
    if(b&5){
976
        for(y=0; y < b_h+HTAPS_MAX-1; y++){
977
            for(x=0; x < b_w; x++){
978
                int a_1=src[x + HTAPS_MAX/2-4];
979
                int a0= src[x + HTAPS_MAX/2-3];
980
                int a1= src[x + HTAPS_MAX/2-2];
981
                int a2= src[x + HTAPS_MAX/2-1];
982
                int a3= src[x + HTAPS_MAX/2+0];
983
                int a4= src[x + HTAPS_MAX/2+1];
984
                int a5= src[x + HTAPS_MAX/2+2];
985
                int a6= src[x + HTAPS_MAX/2+3];
986
                int am=0;
987
                if(!p || p->fast_mc){
988
                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
989
                    tmpI[x]= am;
990
                    am= (am+16)>>5;
991
                }else{
992
                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
993
                    tmpI[x]= am;
994
                    am= (am+32)>>6;
995
                }
996

    
997
                if(am&(~255)) am= ~(am>>31);
998
                tmp2[x]= am;
999
            }
1000
            tmpI+= 64;
1001
            tmp2+= stride;
1002
            src += stride;
1003
        }
1004
        src -= stride*y;
1005
    }
1006
    src += HTAPS_MAX/2 - 1;
1007
    tmp2= tmp2t[1];
1008

    
1009
    if(b&2){
1010
        for(y=0; y < b_h; y++){
1011
            for(x=0; x < b_w+1; x++){
1012
                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1013
                int a0= src[x + (HTAPS_MAX/2-3)*stride];
1014
                int a1= src[x + (HTAPS_MAX/2-2)*stride];
1015
                int a2= src[x + (HTAPS_MAX/2-1)*stride];
1016
                int a3= src[x + (HTAPS_MAX/2+0)*stride];
1017
                int a4= src[x + (HTAPS_MAX/2+1)*stride];
1018
                int a5= src[x + (HTAPS_MAX/2+2)*stride];
1019
                int a6= src[x + (HTAPS_MAX/2+3)*stride];
1020
                int am=0;
1021
                if(!p || p->fast_mc)
1022
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1023
                else
1024
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
1025

    
1026
                if(am&(~255)) am= ~(am>>31);
1027
                tmp2[x]= am;
1028
            }
1029
            src += stride;
1030
            tmp2+= stride;
1031
        }
1032
        src -= stride*y;
1033
    }
1034
    src += stride*(HTAPS_MAX/2 - 1);
1035
    tmp2= tmp2t[2];
1036
    tmpI= tmpIt;
1037
    if(b&4){
1038
        for(y=0; y < b_h; y++){
1039
            for(x=0; x < b_w; x++){
1040
                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1041
                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1042
                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1043
                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1044
                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1045
                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1046
                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1047
                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
1048
                int am=0;
1049
                if(!p || p->fast_mc)
1050
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1051
                else
1052
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
1053
                if(am&(~255)) am= ~(am>>31);
1054
                tmp2[x]= am;
1055
            }
1056
            tmpI+= 64;
1057
            tmp2+= stride;
1058
        }
1059
    }
1060

    
1061
    hpel[ 0]= src;
1062
    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
1063
    hpel[ 2]= src + 1;
1064

    
1065
    hpel[ 4]= tmp2t[1];
1066
    hpel[ 5]= tmp2t[2];
1067
    hpel[ 6]= tmp2t[1] + 1;
1068

    
1069
    hpel[ 8]= src + stride;
1070
    hpel[ 9]= hpel[1] + stride;
1071
    hpel[10]= hpel[8] + 1;
1072

    
1073
    if(b==15){
1074
        const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
1075
        const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1076
        const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1077
        const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
1078
        dx&=7;
1079
        dy&=7;
1080
        for(y=0; y < b_h; y++){
1081
            for(x=0; x < b_w; x++){
1082
                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1083
                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
1084
            }
1085
            src1+=stride;
1086
            src2+=stride;
1087
            src3+=stride;
1088
            src4+=stride;
1089
            dst +=stride;
1090
        }
1091
    }else{
1092
        const uint8_t *src1= hpel[l];
1093
        const uint8_t *src2= hpel[r];
1094
        int a= weight[((dx&7) + (8*(dy&7)))];
1095
        int b= 8-a;
1096
        for(y=0; y < b_h; y++){
1097
            for(x=0; x < b_w; x++){
1098
                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1099
            }
1100
            src1+=stride;
1101
            src2+=stride;
1102
            dst +=stride;
1103
        }
1104
    }
1105
}
1106

    
1107
#define mca(dx,dy,b_w)\
1108
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
1109
    assert(h==b_w);\
1110
    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
1111
}
1112

    
1113
mca( 0, 0,16)
1114
mca( 8, 0,16)
1115
mca( 0, 8,16)
1116
mca( 8, 8,16)
1117
mca( 0, 0,8)
1118
mca( 8, 0,8)
1119
mca( 0, 8,8)
1120
mca( 8, 8,8)
1121

    
1122
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1123
    if(block->type & BLOCK_INTRA){
1124
        int x, y;
1125
        const int color = block->color[plane_index];
1126
        const int color4= color*0x01010101;
1127
        if(b_w==32){
1128
            for(y=0; y < b_h; y++){
1129
                *(uint32_t*)&dst[0 + y*stride]= color4;
1130
                *(uint32_t*)&dst[4 + y*stride]= color4;
1131
                *(uint32_t*)&dst[8 + y*stride]= color4;
1132
                *(uint32_t*)&dst[12+ y*stride]= color4;
1133
                *(uint32_t*)&dst[16+ y*stride]= color4;
1134
                *(uint32_t*)&dst[20+ y*stride]= color4;
1135
                *(uint32_t*)&dst[24+ y*stride]= color4;
1136
                *(uint32_t*)&dst[28+ y*stride]= color4;
1137
            }
1138
        }else if(b_w==16){
1139
            for(y=0; y < b_h; y++){
1140
                *(uint32_t*)&dst[0 + y*stride]= color4;
1141
                *(uint32_t*)&dst[4 + y*stride]= color4;
1142
                *(uint32_t*)&dst[8 + y*stride]= color4;
1143
                *(uint32_t*)&dst[12+ y*stride]= color4;
1144
            }
1145
        }else if(b_w==8){
1146
            for(y=0; y < b_h; y++){
1147
                *(uint32_t*)&dst[0 + y*stride]= color4;
1148
                *(uint32_t*)&dst[4 + y*stride]= color4;
1149
            }
1150
        }else if(b_w==4){
1151
            for(y=0; y < b_h; y++){
1152
                *(uint32_t*)&dst[0 + y*stride]= color4;
1153
            }
1154
        }else{
1155
            for(y=0; y < b_h; y++){
1156
                for(x=0; x < b_w; x++){
1157
                    dst[x + y*stride]= color;
1158
                }
1159
            }
1160
        }
1161
    }else{
1162
        uint8_t *src= s->last_picture[block->ref].data[plane_index];
1163
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
1164
        int mx= block->mx*scale;
1165
        int my= block->my*scale;
1166
        const int dx= mx&15;
1167
        const int dy= my&15;
1168
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
1169
        sx += (mx>>4) - (HTAPS_MAX/2-1);
1170
        sy += (my>>4) - (HTAPS_MAX/2-1);
1171
        src += sx + sy*stride;
1172
        if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1173
           || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1174
            s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
1175
            src= tmp + MB_SIZE;
1176
        }
1177
//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1178
//        assert(!(b_w&(b_w-1)));
1179
        assert(b_w>1 && b_h>1);
1180
        assert((tab_index>=0 && tab_index<4) || b_w==32);
1181
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1182
            mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
1183
        else if(b_w==32){
1184
            int y;
1185
            for(y=0; y<b_h; y+=16){
1186
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1187
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1188
            }
1189
        }else if(b_w==b_h)
1190
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
1191
        else if(b_w==2*b_h){
1192
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
1193
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
1194
        }else{
1195
            assert(2*b_w==b_h);
1196
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
1197
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
1198
        }
1199
    }
1200
}
1201

    
1202
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
1203
                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1204
    int y, x;
1205
    IDWTELEM * dst;
1206
    for(y=0; y<b_h; y++){
1207
        //FIXME ugly misuse of obmc_stride
1208
        const uint8_t *obmc1= obmc + y*obmc_stride;
1209
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1210
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1211
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1212
        dst = slice_buffer_get_line(sb, src_y + y);
1213
        for(x=0; x<b_w; x++){
1214
            int v=   obmc1[x] * block[3][x + y*src_stride]
1215
                    +obmc2[x] * block[2][x + y*src_stride]
1216
                    +obmc3[x] * block[1][x + y*src_stride]
1217
                    +obmc4[x] * block[0][x + y*src_stride];
1218

    
1219
            v <<= 8 - LOG2_OBMC_MAX;
1220
            if(FRAC_BITS != 8){
1221
                v >>= 8 - FRAC_BITS;
1222
            }
1223
            if(add){
1224
                v += dst[x + src_x];
1225
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1226
                if(v&(~255)) v= ~(v>>31);
1227
                dst8[x + y*src_stride] = v;
1228
            }else{
1229
                dst[x + src_x] -= v;
1230
            }
1231
        }
1232
    }
1233
}
1234

    
1235
//FIXME name cleanup (b_w, block_w, b_width stuff)
1236
static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
1237
    const int b_width = s->b_width  << s->block_max_depth;
1238
    const int b_height= s->b_height << s->block_max_depth;
1239
    const int b_stride= b_width;
1240
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
1241
    BlockNode *rt= lt+1;
1242
    BlockNode *lb= lt+b_stride;
1243
    BlockNode *rb= lb+1;
1244
    uint8_t *block[4];
1245
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
1246
    uint8_t *tmp = s->scratchbuf;
1247
    uint8_t *ptmp;
1248
    int x,y;
1249

    
1250
    if(b_x<0){
1251
        lt= rt;
1252
        lb= rb;
1253
    }else if(b_x + 1 >= b_width){
1254
        rt= lt;
1255
        rb= lb;
1256
    }
1257
    if(b_y<0){
1258
        lt= lb;
1259
        rt= rb;
1260
    }else if(b_y + 1 >= b_height){
1261
        lb= lt;
1262
        rb= rt;
1263
    }
1264

    
1265
    if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
1266
        obmc -= src_x;
1267
        b_w += src_x;
1268
        if(!sliced && !offset_dst)
1269
            dst -= src_x;
1270
        src_x=0;
1271
    }else if(src_x + b_w > w){
1272
        b_w = w - src_x;
1273
    }
1274
    if(src_y<0){
1275
        obmc -= src_y*obmc_stride;
1276
        b_h += src_y;
1277
        if(!sliced && !offset_dst)
1278
            dst -= src_y*dst_stride;
1279
        src_y=0;
1280
    }else if(src_y + b_h> h){
1281
        b_h = h - src_y;
1282
    }
1283

    
1284
    if(b_w<=0 || b_h<=0) return;
1285

    
1286
    assert(src_stride > 2*MB_SIZE + 5);
1287

    
1288
    if(!sliced && offset_dst)
1289
        dst += src_x + src_y*dst_stride;
1290
    dst8+= src_x + src_y*src_stride;
1291
//    src += src_x + src_y*src_stride;
1292

    
1293
    ptmp= tmp + 3*tmp_step;
1294
    block[0]= ptmp;
1295
    ptmp+=tmp_step;
1296
    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1297

    
1298
    if(same_block(lt, rt)){
1299
        block[1]= block[0];
1300
    }else{
1301
        block[1]= ptmp;
1302
        ptmp+=tmp_step;
1303
        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1304
    }
1305

    
1306
    if(same_block(lt, lb)){
1307
        block[2]= block[0];
1308
    }else if(same_block(rt, lb)){
1309
        block[2]= block[1];
1310
    }else{
1311
        block[2]= ptmp;
1312
        ptmp+=tmp_step;
1313
        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1314
    }
1315

    
1316
    if(same_block(lt, rb) ){
1317
        block[3]= block[0];
1318
    }else if(same_block(rt, rb)){
1319
        block[3]= block[1];
1320
    }else if(same_block(lb, rb)){
1321
        block[3]= block[2];
1322
    }else{
1323
        block[3]= ptmp;
1324
        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1325
    }
1326
#if 0
1327
    for(y=0; y<b_h; y++){
1328
        for(x=0; x<b_w; x++){
1329
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
1330
            if(add) dst[x + y*dst_stride] += v;
1331
            else    dst[x + y*dst_stride] -= v;
1332
        }
1333
    }
1334
    for(y=0; y<b_h; y++){
1335
        uint8_t *obmc2= obmc + (obmc_stride>>1);
1336
        for(x=0; x<b_w; x++){
1337
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
1338
            if(add) dst[x + y*dst_stride] += v;
1339
            else    dst[x + y*dst_stride] -= v;
1340
        }
1341
    }
1342
    for(y=0; y<b_h; y++){
1343
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1344
        for(x=0; x<b_w; x++){
1345
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
1346
            if(add) dst[x + y*dst_stride] += v;
1347
            else    dst[x + y*dst_stride] -= v;
1348
        }
1349
    }
1350
    for(y=0; y<b_h; y++){
1351
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1352
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1353
        for(x=0; x<b_w; x++){
1354
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
1355
            if(add) dst[x + y*dst_stride] += v;
1356
            else    dst[x + y*dst_stride] -= v;
1357
        }
1358
    }
1359
#else
1360
    if(sliced){
1361
        s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
1362
    }else{
1363
        for(y=0; y<b_h; y++){
1364
            //FIXME ugly misuse of obmc_stride
1365
            const uint8_t *obmc1= obmc + y*obmc_stride;
1366
            const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1367
            const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1368
            const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1369
            for(x=0; x<b_w; x++){
1370
                int v=   obmc1[x] * block[3][x + y*src_stride]
1371
                        +obmc2[x] * block[2][x + y*src_stride]
1372
                        +obmc3[x] * block[1][x + y*src_stride]
1373
                        +obmc4[x] * block[0][x + y*src_stride];
1374

    
1375
                v <<= 8 - LOG2_OBMC_MAX;
1376
                if(FRAC_BITS != 8){
1377
                    v >>= 8 - FRAC_BITS;
1378
                }
1379
                if(add){
1380
                    v += dst[x + y*dst_stride];
1381
                    v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1382
                    if(v&(~255)) v= ~(v>>31);
1383
                    dst8[x + y*src_stride] = v;
1384
                }else{
1385
                    dst[x + y*dst_stride] -= v;
1386
                }
1387
            }
1388
        }
1389
    }
1390
#endif /* 0 */
1391
}
1392

    
1393
static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
1394
    Plane *p= &s->plane[plane_index];
1395
    const int mb_w= s->b_width  << s->block_max_depth;
1396
    const int mb_h= s->b_height << s->block_max_depth;
1397
    int x, y, mb_x;
1398
    int block_size = MB_SIZE >> s->block_max_depth;
1399
    int block_w    = plane_index ? block_size/2 : block_size;
1400
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1401
    int obmc_stride= plane_index ? block_size : 2*block_size;
1402
    int ref_stride= s->current_picture.linesize[plane_index];
1403
    uint8_t *dst8= s->current_picture.data[plane_index];
1404
    int w= p->width;
1405
    int h= p->height;
1406

    
1407
    if(s->keyframe || (s->avctx->debug&512)){
1408
        if(mb_y==mb_h)
1409
            return;
1410

    
1411
        if(add){
1412
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1413
//                DWTELEM * line = slice_buffer_get_line(sb, y);
1414
                IDWTELEM * line = sb->line[y];
1415
                for(x=0; x<w; x++){
1416
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1417
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1418
                    v >>= FRAC_BITS;
1419
                    if(v&(~255)) v= ~(v>>31);
1420
                    dst8[x + y*ref_stride]= v;
1421
                }
1422
            }
1423
        }else{
1424
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1425
//                DWTELEM * line = slice_buffer_get_line(sb, y);
1426
                IDWTELEM * line = sb->line[y];
1427
                for(x=0; x<w; x++){
1428
                    line[x] -= 128 << FRAC_BITS;
1429
//                    buf[x + y*w]-= 128<<FRAC_BITS;
1430
                }
1431
            }
1432
        }
1433

    
1434
        return;
1435
    }
1436

    
1437
    for(mb_x=0; mb_x<=mb_w; mb_x++){
1438
        add_yblock(s, 1, sb, old_buffer, dst8, obmc,
1439
                   block_w*mb_x - block_w/2,
1440
                   block_w*mb_y - block_w/2,
1441
                   block_w, block_w,
1442
                   w, h,
1443
                   w, ref_stride, obmc_stride,
1444
                   mb_x - 1, mb_y - 1,
1445
                   add, 0, plane_index);
1446
    }
1447
}
1448

    
1449
static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
1450
    Plane *p= &s->plane[plane_index];
1451
    const int mb_w= s->b_width  << s->block_max_depth;
1452
    const int mb_h= s->b_height << s->block_max_depth;
1453
    int x, y, mb_x;
1454
    int block_size = MB_SIZE >> s->block_max_depth;
1455
    int block_w    = plane_index ? block_size/2 : block_size;
1456
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1457
    const int obmc_stride= plane_index ? block_size : 2*block_size;
1458
    int ref_stride= s->current_picture.linesize[plane_index];
1459
    uint8_t *dst8= s->current_picture.data[plane_index];
1460
    int w= p->width;
1461
    int h= p->height;
1462

    
1463
    if(s->keyframe || (s->avctx->debug&512)){
1464
        if(mb_y==mb_h)
1465
            return;
1466

    
1467
        if(add){
1468
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1469
                for(x=0; x<w; x++){
1470
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1471
                    v >>= FRAC_BITS;
1472
                    if(v&(~255)) v= ~(v>>31);
1473
                    dst8[x + y*ref_stride]= v;
1474
                }
1475
            }
1476
        }else{
1477
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1478
                for(x=0; x<w; x++){
1479
                    buf[x + y*w]-= 128<<FRAC_BITS;
1480
                }
1481
            }
1482
        }
1483

    
1484
        return;
1485
    }
1486

    
1487
    for(mb_x=0; mb_x<=mb_w; mb_x++){
1488
        add_yblock(s, 0, NULL, buf, dst8, obmc,
1489
                   block_w*mb_x - block_w/2,
1490
                   block_w*mb_y - block_w/2,
1491
                   block_w, block_w,
1492
                   w, h,
1493
                   w, ref_stride, obmc_stride,
1494
                   mb_x - 1, mb_y - 1,
1495
                   add, 1, plane_index);
1496
    }
1497
}
1498

    
1499
static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
1500
    const int mb_h= s->b_height << s->block_max_depth;
1501
    int mb_y;
1502
    for(mb_y=0; mb_y<=mb_h; mb_y++)
1503
        predict_slice(s, buf, plane_index, add, mb_y);
1504
}
1505

    
1506
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
1507
    const int w= b->width;
1508
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1509
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1510
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1511
    int x,y;
1512

    
1513
    if(s->qlog == LOSSLESS_QLOG) return;
1514

    
1515
    for(y=start_y; y<end_y; y++){
1516
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1517
        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1518
        for(x=0; x<w; x++){
1519
            int i= line[x];
1520
            if(i<0){
1521
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1522
            }else if(i>0){
1523
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
1524
            }
1525
        }
1526
    }
1527
}
1528

    
1529
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
1530
    const int w= b->width;
1531
    int x,y;
1532

    
1533
    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
1534
    IDWTELEM * prev;
1535

    
1536
    if (start_y != 0)
1537
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1538

    
1539
    for(y=start_y; y<end_y; y++){
1540
        prev = line;
1541
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1542
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1543
        for(x=0; x<w; x++){
1544
            if(x){
1545
                if(use_median){
1546
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
1547
                    else  line[x] += line[x - 1];
1548
                }else{
1549
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
1550
                    else  line[x] += line[x - 1];
1551
                }
1552
            }else{
1553
                if(y) line[x] += prev[x];
1554
            }
1555
        }
1556
    }
1557
}
1558

    
1559
static void decode_qlogs(SnowContext *s){
1560
    int plane_index, level, orientation;
1561

    
1562
    for(plane_index=0; plane_index<3; plane_index++){
1563
        for(level=0; level<s->spatial_decomposition_count; level++){
1564
            for(orientation=level ? 1:0; orientation<4; orientation++){
1565
                int q;
1566
                if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1567
                else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1568
                else                    q= get_symbol(&s->c, s->header_state, 1);
1569
                s->plane[plane_index].band[level][orientation].qlog= q;
1570
            }
1571
        }
1572
    }
1573
}
1574

    
1575
#define GET_S(dst, check) \
1576
    tmp= get_symbol(&s->c, s->header_state, 0);\
1577
    if(!(check)){\
1578
        av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
1579
        return -1;\
1580
    }\
1581
    dst= tmp;
1582

    
1583
static int decode_header(SnowContext *s){
1584
    int plane_index, tmp;
1585
    uint8_t kstate[32];
1586

    
1587
    memset(kstate, MID_STATE, sizeof(kstate));
1588

    
1589
    s->keyframe= get_rac(&s->c, kstate);
1590
    if(s->keyframe || s->always_reset){
1591
        reset_contexts(s);
1592
        s->spatial_decomposition_type=
1593
        s->qlog=
1594
        s->qbias=
1595
        s->mv_scale=
1596
        s->block_max_depth= 0;
1597
    }
1598
    if(s->keyframe){
1599
        GET_S(s->version, tmp <= 0U)
1600
        s->always_reset= get_rac(&s->c, s->header_state);
1601
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1602
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1603
        GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1604
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1605
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1606
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1607
        s->spatial_scalability= get_rac(&s->c, s->header_state);
1608
//        s->rate_scalability= get_rac(&s->c, s->header_state);
1609
        GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
1610
        s->max_ref_frames++;
1611

    
1612
        decode_qlogs(s);
1613
    }
1614

    
1615
    if(!s->keyframe){
1616
        if(get_rac(&s->c, s->header_state)){
1617
            for(plane_index=0; plane_index<2; plane_index++){
1618
                int htaps, i, sum=0;
1619
                Plane *p= &s->plane[plane_index];
1620
                p->diag_mc= get_rac(&s->c, s->header_state);
1621
                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
1622
                if((unsigned)htaps > HTAPS_MAX || htaps==0)
1623
                    return -1;
1624
                p->htaps= htaps;
1625
                for(i= htaps/2; i; i--){
1626
                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
1627
                    sum += p->hcoeff[i];
1628
                }
1629
                p->hcoeff[0]= 32-sum;
1630
            }
1631
            s->plane[2].diag_mc= s->plane[1].diag_mc;
1632
            s->plane[2].htaps  = s->plane[1].htaps;
1633
            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1634
        }
1635
        if(get_rac(&s->c, s->header_state)){
1636
            GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1637
            decode_qlogs(s);
1638
        }
1639
    }
1640

    
1641
    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
1642
    if(s->spatial_decomposition_type > 1U){
1643
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1644
        return -1;
1645
    }
1646
    if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
1647
             s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
1648
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
1649
        return -1;
1650
    }
1651

    
1652
    s->qlog           += get_symbol(&s->c, s->header_state, 1);
1653
    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
1654
    s->qbias          += get_symbol(&s->c, s->header_state, 1);
1655
    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
1656
    if(s->block_max_depth > 1 || s->block_max_depth < 0){
1657
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
1658
        s->block_max_depth= 0;
1659
        return -1;
1660
    }
1661

    
1662
    return 0;
1663
}
1664

    
1665
static void init_qexp(void){
1666
    int i;
1667
    double v=128;
1668

    
1669
    for(i=0; i<QROOT; i++){
1670
        qexp[i]= lrintf(v);
1671
        v *= pow(2, 1.0 / QROOT);
1672
    }
1673
}
1674

    
1675
static av_cold int common_init(AVCodecContext *avctx){
1676
    SnowContext *s = avctx->priv_data;
1677
    int width, height;
1678
    int i, j;
1679

    
1680
    s->avctx= avctx;
1681
    s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
1682

    
1683
    dsputil_init(&s->dsp, avctx);
1684
    ff_dwt_init(&s->dwt);
1685

    
1686
#define mcf(dx,dy)\
1687
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
1688
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
1689
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
1690
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
1691
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
1692
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
1693

    
1694
    mcf( 0, 0)
1695
    mcf( 4, 0)
1696
    mcf( 8, 0)
1697
    mcf(12, 0)
1698
    mcf( 0, 4)
1699
    mcf( 4, 4)
1700
    mcf( 8, 4)
1701
    mcf(12, 4)
1702
    mcf( 0, 8)
1703
    mcf( 4, 8)
1704
    mcf( 8, 8)
1705
    mcf(12, 8)
1706
    mcf( 0,12)
1707
    mcf( 4,12)
1708
    mcf( 8,12)
1709
    mcf(12,12)
1710

    
1711
#define mcfh(dx,dy)\
1712
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
1713
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
1714
        mc_block_hpel ## dx ## dy ## 16;\
1715
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
1716
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
1717
        mc_block_hpel ## dx ## dy ## 8;
1718

    
1719
    mcfh(0, 0)
1720
    mcfh(8, 0)
1721
    mcfh(0, 8)
1722
    mcfh(8, 8)
1723

    
1724
    if(!qexp[0])
1725
        init_qexp();
1726

    
1727
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
1728

    
1729
    width= s->avctx->width;
1730
    height= s->avctx->height;
1731

    
1732
    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
1733
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
1734

    
1735
    for(i=0; i<MAX_REF_FRAMES; i++)
1736
        for(j=0; j<MAX_REF_FRAMES; j++)
1737
            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
1738

    
1739
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
1740
    s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
1741

    
1742
    return 0;
1743
}
1744

    
1745
static int common_init_after_header(AVCodecContext *avctx){
1746
    SnowContext *s = avctx->priv_data;
1747
    int plane_index, level, orientation;
1748

    
1749
    for(plane_index=0; plane_index<3; plane_index++){
1750
        int w= s->avctx->width;
1751
        int h= s->avctx->height;
1752

    
1753
        if(plane_index){
1754
            w>>= s->chroma_h_shift;
1755
            h>>= s->chroma_v_shift;
1756
        }
1757
        s->plane[plane_index].width = w;
1758
        s->plane[plane_index].height= h;
1759

    
1760
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
1761
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
1762
                SubBand *b= &s->plane[plane_index].band[level][orientation];
1763

    
1764
                b->buf= s->spatial_dwt_buffer;
1765
                b->level= level;
1766
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1767
                b->width = (w + !(orientation&1))>>1;
1768
                b->height= (h + !(orientation>1))>>1;
1769

    
1770
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
1771
                b->buf_x_offset = 0;
1772
                b->buf_y_offset = 0;
1773

    
1774
                if(orientation&1){
1775
                    b->buf += (w+1)>>1;
1776
                    b->buf_x_offset = (w+1)>>1;
1777
                }
1778
                if(orientation>1){
1779
                    b->buf += b->stride>>1;
1780
                    b->buf_y_offset = b->stride_line >> 1;
1781
                }
1782
                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
1783

    
1784
                if(level)
1785
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
1786
                //FIXME avoid this realloc
1787
                av_freep(&b->x_coeff);
1788
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
1789
            }
1790
            w= (w+1)>>1;
1791
            h= (h+1)>>1;
1792
        }
1793
    }
1794

    
1795
    return 0;
1796
}
1797

    
1798
#define QUANTIZE2 0
1799

    
1800
#if QUANTIZE2==1
1801
#define Q2_STEP 8
1802

    
1803
static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
1804
    SubBand *b= &p->band[level][orientation];
1805
    int x, y;
1806
    int xo=0;
1807
    int yo=0;
1808
    int step= 1 << (s->spatial_decomposition_count - level);
1809

    
1810
    if(orientation&1)
1811
        xo= step>>1;
1812
    if(orientation&2)
1813
        yo= step>>1;
1814

    
1815
    //FIXME bias for nonzero ?
1816
    //FIXME optimize
1817
    memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
1818
    for(y=0; y<p->height; y++){
1819
        for(x=0; x<p->width; x++){
1820
            int sx= (x-xo + step/2) / step / Q2_STEP;
1821
            int sy= (y-yo + step/2) / step / Q2_STEP;
1822
            int v= r0[x + y*p->width] - r1[x + y*p->width];
1823
            assert(sx>=0 && sy>=0 && sx < score_stride);
1824
            v= ((v+8)>>4)<<4;
1825
            score[sx + sy*score_stride] += v*v;
1826
            assert(score[sx + sy*score_stride] >= 0);
1827
        }
1828
    }
1829
}
1830

    
1831
static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
1832
    int level, orientation;
1833

    
1834
    for(level=0; level<s->spatial_decomposition_count; level++){
1835
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
1836
            SubBand *b= &p->band[level][orientation];
1837
            IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
1838

    
1839
            dequantize(s, b, dst, b->stride);
1840
        }
1841
    }
1842
}
1843

    
1844
static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
1845
    int level, orientation, ys, xs, x, y, pass;
1846
    IDWTELEM best_dequant[height * stride];
1847
    IDWTELEM idwt2_buffer[height * stride];
1848
    const int score_stride= (width + 10)/Q2_STEP;
1849
    int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1850
    int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1851
    int threshold= (s->m.lambda * s->m.lambda) >> 6;
1852

    
1853
    //FIXME pass the copy cleanly ?
1854

    
1855
//    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
1856
    ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
1857

    
1858
    for(level=0; level<s->spatial_decomposition_count; level++){
1859
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
1860
            SubBand *b= &p->band[level][orientation];
1861
            IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1862
             DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
1863
            assert(src == b->buf); // code does not depend on this but it is true currently
1864

    
1865
            quantize(s, b, dst, src, b->stride, s->qbias);
1866
        }
1867
    }
1868
    for(pass=0; pass<1; pass++){
1869
        if(s->qbias == 0) //keyframe
1870
            continue;
1871
        for(level=0; level<s->spatial_decomposition_count; level++){
1872
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
1873
                SubBand *b= &p->band[level][orientation];
1874
                IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
1875
                IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1876

    
1877
                for(ys= 0; ys<Q2_STEP; ys++){
1878
                    for(xs= 0; xs<Q2_STEP; xs++){
1879
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1880
                        dequantize_all(s, p, idwt2_buffer, width, height);
1881
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1882
                        find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1883
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1884
                        for(y=ys; y<b->height; y+= Q2_STEP){
1885
                            for(x=xs; x<b->width; x+= Q2_STEP){
1886
                                if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
1887
                                if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
1888
                                //FIXME try more than just --
1889
                            }
1890
                        }
1891
                        dequantize_all(s, p, idwt2_buffer, width, height);
1892
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1893
                        find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1894
                        for(y=ys; y<b->height; y+= Q2_STEP){
1895
                            for(x=xs; x<b->width; x+= Q2_STEP){
1896
                                int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
1897
                                if(score[score_idx] <= best_score[score_idx] + threshold){
1898
                                    best_score[score_idx]= score[score_idx];
1899
                                    if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
1900
                                    if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
1901
                                    //FIXME copy instead
1902
                                }
1903
                            }
1904
                        }
1905
                    }
1906
                }
1907
            }
1908
        }
1909
    }
1910
    memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
1911
}
1912

    
1913
#endif /* QUANTIZE2==1 */
1914

    
1915
#define USE_HALFPEL_PLANE 0
1916

    
1917
static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
1918
    int p,x,y;
1919

    
1920
    assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
1921

    
1922
    for(p=0; p<3; p++){
1923
        int is_chroma= !!p;
1924
        int w= s->avctx->width  >>is_chroma;
1925
        int h= s->avctx->height >>is_chroma;
1926
        int ls= frame->linesize[p];
1927
        uint8_t *src= frame->data[p];
1928

    
1929
        halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1930
        halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1931
        halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1932

    
1933
        halfpel[0][p]= src;
1934
        for(y=0; y<h; y++){
1935
            for(x=0; x<w; x++){
1936
                int i= y*ls + x;
1937

    
1938
                halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
1939
            }
1940
        }
1941
        for(y=0; y<h; y++){
1942
            for(x=0; x<w; x++){
1943
                int i= y*ls + x;
1944

    
1945
                halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1946
            }
1947
        }
1948
        src= halfpel[1][p];
1949
        for(y=0; y<h; y++){
1950
            for(x=0; x<w; x++){
1951
                int i= y*ls + x;
1952

    
1953
                halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1954
            }
1955
        }
1956

    
1957
//FIXME border!
1958
    }
1959
}
1960

    
1961
static void release_buffer(AVCodecContext *avctx){
1962
    SnowContext *s = avctx->priv_data;
1963
    int i;
1964

    
1965
    if(s->last_picture[s->max_ref_frames-1].data[0]){
1966
        avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
1967
        for(i=0; i<9; i++)
1968
            if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
1969
                av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
1970
    }
1971
}
1972

    
1973
static int frame_start(SnowContext *s){
1974
   AVFrame tmp;
1975
   int w= s->avctx->width; //FIXME round up to x16 ?
1976
   int h= s->avctx->height;
1977

    
1978
    if(s->current_picture.data[0]){
1979
        s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
1980
        s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
1981
        s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
1982
    }
1983

    
1984
    release_buffer(s->avctx);
1985

    
1986
    tmp= s->last_picture[s->max_ref_frames-1];
1987
    memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
1988
    memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
1989
    if(USE_HALFPEL_PLANE && s->current_picture.data[0])
1990
        halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
1991
    s->last_picture[0]= s->current_picture;
1992
    s->current_picture= tmp;
1993

    
1994
    if(s->keyframe){
1995
        s->ref_frames= 0;
1996
    }else{
1997
        int i;
1998
        for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
1999
            if(i && s->last_picture[i-1].key_frame)
2000
                break;
2001
        s->ref_frames= i;
2002
        if(s->ref_frames==0){
2003
            av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
2004
            return -1;
2005
        }
2006
    }
2007

    
2008
    s->current_picture.reference= 1;
2009
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2010
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2011
        return -1;
2012
    }
2013

    
2014
    s->current_picture.key_frame= s->keyframe;
2015

    
2016
    return 0;
2017
}
2018

    
2019
static av_cold void common_end(SnowContext *s){
2020
    int plane_index, level, orientation, i;
2021

    
2022
    av_freep(&s->spatial_dwt_buffer);
2023
    av_freep(&s->spatial_idwt_buffer);
2024

    
2025
    s->m.me.temp= NULL;
2026
    av_freep(&s->m.me.scratchpad);
2027
    av_freep(&s->m.me.map);
2028
    av_freep(&s->m.me.score_map);
2029
    av_freep(&s->m.obmc_scratchpad);
2030

    
2031
    av_freep(&s->block);
2032
    av_freep(&s->scratchbuf);
2033

    
2034
    for(i=0; i<MAX_REF_FRAMES; i++){
2035
        av_freep(&s->ref_mvs[i]);
2036
        av_freep(&s->ref_scores[i]);
2037
        if(s->last_picture[i].data[0])
2038
            s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
2039
    }
2040

    
2041
    for(plane_index=0; plane_index<3; plane_index++){
2042
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2043
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2044
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2045

    
2046
                av_freep(&b->x_coeff);
2047
            }
2048
        }
2049
    }
2050
    if (s->mconly_picture.data[0])
2051
        s->avctx->release_buffer(s->avctx, &s->mconly_picture);
2052
    if (s->current_picture.data[0])
2053
        s->avctx->release_buffer(s->avctx, &s->current_picture);
2054
}
2055

    
2056
static av_cold int decode_init(AVCodecContext *avctx)
2057
{
2058
    avctx->pix_fmt= PIX_FMT_YUV420P;
2059

    
2060
    common_init(avctx);
2061

    
2062
    return 0;
2063
}
2064

    
2065
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
2066
    const uint8_t *buf = avpkt->data;
2067
    int buf_size = avpkt->size;
2068
    SnowContext *s = avctx->priv_data;
2069
    RangeCoder * const c= &s->c;
2070
    int bytes_read;
2071
    AVFrame *picture = data;
2072
    int level, orientation, plane_index;
2073

    
2074
    ff_init_range_decoder(c, buf, buf_size);
2075
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2076

    
2077
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2078
    if(decode_header(s)<0)
2079
        return -1;
2080
    common_init_after_header(avctx);
2081

    
2082
    // realloc slice buffer for the case that spatial_decomposition_count changed
2083
    ff_slice_buffer_destroy(&s->sb);
2084
    ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
2085

    
2086
    for(plane_index=0; plane_index<3; plane_index++){
2087
        Plane *p= &s->plane[plane_index];
2088
        p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
2089
                                              && p->hcoeff[1]==-10
2090
                                              && p->hcoeff[2]==2;
2091
    }
2092

    
2093
    alloc_blocks(s);
2094

    
2095
    if(frame_start(s) < 0)
2096
        return -1;
2097
    //keyframe flag duplication mess FIXME
2098
    if(avctx->debug&FF_DEBUG_PICT_INFO)
2099
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2100

    
2101
    decode_blocks(s);
2102

    
2103
    for(plane_index=0; plane_index<3; plane_index++){
2104
        Plane *p= &s->plane[plane_index];
2105
        int w= p->width;
2106
        int h= p->height;
2107
        int x, y;
2108
        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2109

    
2110
        if(s->avctx->debug&2048){
2111
            memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2112
            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2113

    
2114
            for(y=0; y<h; y++){
2115
                for(x=0; x<w; x++){
2116
                    int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2117
                    s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2118
                }
2119
            }
2120
        }
2121

    
2122
        {
2123
        for(level=0; level<s->spatial_decomposition_count; level++){
2124
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2125
                SubBand *b= &p->band[level][orientation];
2126
                unpack_coeffs(s, b, b->parent, orientation);
2127
            }
2128
        }
2129
        }
2130

    
2131
        {
2132
        const int mb_h= s->b_height << s->block_max_depth;
2133
        const int block_size = MB_SIZE >> s->block_max_depth;
2134
        const int block_w    = plane_index ? block_size/2 : block_size;
2135
        int mb_y;
2136
        DWTCompose cs[MAX_DECOMPOSITIONS];
2137
        int yd=0, yq=0;
2138
        int y;
2139
        int end_y;
2140

    
2141
        ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2142
        for(mb_y=0; mb_y<=mb_h; mb_y++){
2143

    
2144
            int slice_starty = block_w*mb_y;
2145
            int slice_h = block_w*(mb_y+1);
2146
            if (!(s->keyframe || s->avctx->debug&512)){
2147
                slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
2148
                slice_h -= (block_w >> 1);
2149
            }
2150

    
2151
            for(level=0; level<s->spatial_decomposition_count; level++){
2152
                for(orientation=level ? 1 : 0; orientation<4; orientation++){
2153
                    SubBand *b= &p->band[level][orientation];
2154
                    int start_y;
2155
                    int end_y;
2156
                    int our_mb_start = mb_y;
2157
                    int our_mb_end = (mb_y + 1);
2158
                    const int extra= 3;
2159
                    start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
2160
                    end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2161
                    if (!(s->keyframe || s->avctx->debug&512)){
2162
                        start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2163
                        end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2164
                    }
2165
                    start_y = FFMIN(b->height, start_y);
2166
                    end_y = FFMIN(b->height, end_y);
2167

    
2168
                    if (start_y != end_y){
2169
                        if (orientation == 0){
2170
                            SubBand * correlate_band = &p->band[0][0];
2171
                            int correlate_end_y = FFMIN(b->height, end_y + 1);
2172
                            int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
2173
                            decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
2174
                            correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
2175
                            dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2176
                        }
2177
                        else
2178
                            decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
2179
                    }
2180
                }
2181
            }
2182

    
2183
            for(; yd<slice_h; yd+=4){
2184
                ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2185
            }
2186

    
2187
            if(s->qlog == LOSSLESS_QLOG){
2188
                for(; yq<slice_h && yq<h; yq++){
2189
                    IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2190
                    for(x=0; x<w; x++){
2191
                        line[x] <<= FRAC_BITS;
2192
                    }
2193
                }
2194
            }
2195

    
2196
            predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2197

    
2198
            y = FFMIN(p->height, slice_starty);
2199
            end_y = FFMIN(p->height, slice_h);
2200
            while(y < end_y)
2201
                ff_slice_buffer_release(&s->sb, y++);
2202
        }
2203

    
2204
        ff_slice_buffer_flush(&s->sb);
2205
        }
2206

    
2207
    }
2208

    
2209
    emms_c();
2210

    
2211
    release_buffer(avctx);
2212

    
2213
    if(!(s->avctx->debug&2048))
2214
        *picture= s->current_picture;
2215
    else
2216
        *picture= s->mconly_picture;
2217

    
2218
    *data_size = sizeof(AVFrame);
2219

    
2220
    bytes_read= c->bytestream - c->bytestream_start;
2221
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2222

    
2223
    return bytes_read;
2224
}
2225

    
2226
static av_cold int decode_end(AVCodecContext *avctx)
2227
{
2228
    SnowContext *s = avctx->priv_data;
2229

    
2230
    ff_slice_buffer_destroy(&s->sb);
2231

    
2232
    common_end(s);
2233

    
2234
    return 0;
2235
}
2236

    
2237
AVCodec ff_snow_decoder = {
2238
    "snow",
2239
    AVMEDIA_TYPE_VIDEO,
2240
    CODEC_ID_SNOW,
2241
    sizeof(SnowContext),
2242
    decode_init,
2243
    NULL,
2244
    decode_end,
2245
    decode_frame,
2246
    CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2247
    NULL,
2248
    .long_name = NULL_IF_CONFIG_SMALL("Snow"),
2249
};
2250

    
2251
#if CONFIG_SNOW_ENCODER
2252
static av_cold int encode_init(AVCodecContext *avctx)
2253
{
2254
    SnowContext *s = avctx->priv_data;
2255
    int plane_index;
2256

    
2257
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2258
        av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2259
               "Use vstrict=-2 / -strict -2 to use it anyway.\n");
2260
        return -1;
2261
    }
2262

    
2263
    if(avctx->prediction_method == DWT_97
2264
       && (avctx->flags & CODEC_FLAG_QSCALE)
2265
       && avctx->global_quality == 0){
2266
        av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
2267
        return -1;
2268
    }
2269

    
2270
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2271

    
2272
    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2273
    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
2274

    
2275
    for(plane_index=0; plane_index<3; plane_index++){
2276
        s->plane[plane_index].diag_mc= 1;
2277
        s->plane[plane_index].htaps= 6;
2278
        s->plane[plane_index].hcoeff[0]=  40;
2279
        s->plane[plane_index].hcoeff[1]= -10;
2280
        s->plane[plane_index].hcoeff[2]=   2;
2281
        s->plane[plane_index].fast_mc= 1;
2282
    }
2283

    
2284
    common_init(avctx);
2285
    alloc_blocks(s);
2286

    
2287
    s->version=0;
2288

    
2289
    s->m.avctx   = avctx;
2290
    s->m.flags   = avctx->flags;
2291
    s->m.bit_rate= avctx->bit_rate;
2292

    
2293
    s->m.me.temp      =
2294
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2295
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2296
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2297
    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
2298
    h263_encode_init(&s->m); //mv_penalty
2299

    
2300
    s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
2301

    
2302
    if(avctx->flags&CODEC_FLAG_PASS1){
2303
        if(!avctx->stats_out)
2304
            avctx->stats_out = av_mallocz(256);
2305
    }
2306
    if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2307
        if(ff_rate_control_init(&s->m) < 0)
2308
            return -1;
2309
    }
2310
    s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2311

    
2312
    avctx->coded_frame= &s->current_picture;
2313
    switch(avctx->pix_fmt){
2314
//    case PIX_FMT_YUV444P:
2315
//    case PIX_FMT_YUV422P:
2316
    case PIX_FMT_YUV420P:
2317
    case PIX_FMT_GRAY8:
2318
//    case PIX_FMT_YUV411P:
2319
//    case PIX_FMT_YUV410P:
2320
        s->colorspace_type= 0;
2321
        break;
2322
/*    case PIX_FMT_RGB32:
2323
        s->colorspace= 1;
2324
        break;*/
2325
    default:
2326
        av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2327
        return -1;
2328
    }
2329
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2330
    s->chroma_h_shift= 1;
2331
    s->chroma_v_shift= 1;
2332

    
2333
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
2334
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
2335

    
2336
    s->avctx->get_buffer(s->avctx, &s->input_picture);
2337

    
2338
    if(s->avctx->me_method == ME_ITER){
2339
        int i;
2340
        int size= s->b_width * s->b_height << 2*s->block_max_depth;
2341
        for(i=0; i<s->max_ref_frames; i++){
2342
            s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
2343
            s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
2344
        }
2345
    }
2346

    
2347
    return 0;
2348
}
2349

    
2350
//near copy & paste from dsputil, FIXME
2351
static int pix_sum(uint8_t * pix, int line_size, int w)
2352
{
2353
    int s, i, j;
2354

    
2355
    s = 0;
2356
    for (i = 0; i < w; i++) {
2357
        for (j = 0; j < w; j++) {
2358
            s += pix[0];
2359
            pix ++;
2360
        }
2361
        pix += line_size - w;
2362
    }
2363
    return s;
2364
}
2365

    
2366
//near copy & paste from dsputil, FIXME
2367
static int pix_norm1(uint8_t * pix, int line_size, int w)
2368
{
2369
    int s, i, j;
2370
    uint32_t *sq = ff_squareTbl + 256;
2371

    
2372
    s = 0;
2373
    for (i = 0; i < w; i++) {
2374
        for (j = 0; j < w; j ++) {
2375
            s += sq[pix[0]];
2376
            pix ++;
2377
        }
2378
        pix += line_size - w;
2379
    }
2380
    return s;
2381
}
2382

    
2383
//FIXME copy&paste
2384
#define P_LEFT P[1]
2385
#define P_TOP P[2]
2386
#define P_TOPRIGHT P[3]
2387
#define P_MEDIAN P[4]
2388
#define P_MV1 P[9]
2389
#define FLAG_QPEL   1 //must be 1
2390

    
2391
static int encode_q_branch(SnowContext *s, int level, int x, int y){
2392
    uint8_t p_buffer[1024];
2393
    uint8_t i_buffer[1024];
2394
    uint8_t p_state[sizeof(s->block_state)];
2395
    uint8_t i_state[sizeof(s->block_state)];
2396
    RangeCoder pc, ic;
2397
    uint8_t *pbbak= s->c.bytestream;
2398
    uint8_t *pbbak_start= s->c.bytestream_start;
2399
    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
2400
    const int w= s->b_width  << s->block_max_depth;
2401
    const int h= s->b_height << s->block_max_depth;
2402
    const int rem_depth= s->block_max_depth - level;
2403
    const int index= (x + y*w) << rem_depth;
2404
    const int block_w= 1<<(LOG2_MB_SIZE - level);
2405
    int trx= (x+1)<<rem_depth;
2406
    int try= (y+1)<<rem_depth;
2407
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2408
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2409
    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2410
    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2411
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2412
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2413
    int pl = left->color[0];
2414
    int pcb= left->color[1];
2415
    int pcr= left->color[2];
2416
    int pmx, pmy;
2417
    int mx=0, my=0;
2418
    int l,cr,cb;
2419
    const int stride= s->current_picture.linesize[0];
2420
    const int uvstride= s->current_picture.linesize[1];
2421
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
2422
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2423
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2424
    int P[10][2];
2425
    int16_t last_mv[3][2];
2426
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2427
    const int shift= 1+qpel;
2428
    MotionEstContext *c= &s->m.me;
2429
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2430
    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2431
    int my_context= av_log2(2*FFABS(left->my - top->my));
2432
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2433
    int ref, best_ref, ref_score, ref_mx, ref_my;
2434

    
2435
    assert(sizeof(s->block_state) >= 256);
2436
    if(s->keyframe){
2437
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2438
        return 0;
2439
    }
2440

    
2441
//    clip predictors / edge ?
2442

    
2443
    P_LEFT[0]= left->mx;
2444
    P_LEFT[1]= left->my;
2445
    P_TOP [0]= top->mx;
2446
    P_TOP [1]= top->my;
2447
    P_TOPRIGHT[0]= tr->mx;
2448
    P_TOPRIGHT[1]= tr->my;
2449

    
2450
    last_mv[0][0]= s->block[index].mx;
2451
    last_mv[0][1]= s->block[index].my;
2452
    last_mv[1][0]= right->mx;
2453
    last_mv[1][1]= right->my;
2454
    last_mv[2][0]= bottom->mx;
2455
    last_mv[2][1]= bottom->my;
2456

    
2457
    s->m.mb_stride=2;
2458
    s->m.mb_x=
2459
    s->m.mb_y= 0;
2460
    c->skip= 0;
2461

    
2462
    assert(c->  stride ==   stride);
2463
    assert(c->uvstride == uvstride);
2464

    
2465
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2466
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2467
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2468
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2469

    
2470
    c->xmin = - x*block_w - 16+3;
2471
    c->ymin = - y*block_w - 16+3;
2472
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2473
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2474

    
2475
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2476
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
2477
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2478
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2479
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2480
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2481
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2482

    
2483
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2484
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2485

    
2486
    if (!y) {
2487
        c->pred_x= P_LEFT[0];
2488
        c->pred_y= P_LEFT[1];
2489
    } else {
2490
        c->pred_x = P_MEDIAN[0];
2491
        c->pred_y = P_MEDIAN[1];
2492
    }
2493

    
2494
    score= INT_MAX;
2495
    best_ref= 0;
2496
    for(ref=0; ref<s->ref_frames; ref++){
2497
        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2498

    
2499
        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2500
                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2501

    
2502
        assert(ref_mx >= c->xmin);
2503
        assert(ref_mx <= c->xmax);
2504
        assert(ref_my >= c->ymin);
2505
        assert(ref_my <= c->ymax);
2506

    
2507
        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2508
        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2509
        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2510
        if(s->ref_mvs[ref]){
2511
            s->ref_mvs[ref][index][0]= ref_mx;
2512
            s->ref_mvs[ref][index][1]= ref_my;
2513
            s->ref_scores[ref][index]= ref_score;
2514
        }
2515
        if(score > ref_score){
2516
            score= ref_score;
2517
            best_ref= ref;
2518
            mx= ref_mx;
2519
            my= ref_my;
2520
        }
2521
    }
2522
    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
2523

    
2524
  //  subpel search
2525
    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2526
    pc= s->c;
2527
    pc.bytestream_start=
2528
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2529
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2530

    
2531
    if(level!=s->block_max_depth)
2532
        put_rac(&pc, &p_state[4 + s_context], 1);
2533
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2534
    if(s->ref_frames > 1)
2535
        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2536
    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2537
    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2538
    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2539
    p_len= pc.bytestream - pc.bytestream_start;
2540
    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2541

    
2542
    block_s= block_w*block_w;
2543
    sum = pix_sum(current_data[0], stride, block_w);
2544
    l= (sum + block_s/2)/block_s;
2545
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2546

    
2547
    block_s= block_w*block_w>>2;
2548
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
2549
    cb= (sum + block_s/2)/block_s;
2550
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2551
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
2552
    cr= (sum + block_s/2)/block_s;
2553
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2554

    
2555
    ic= s->c;
2556
    ic.bytestream_start=
2557
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2558
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2559
    if(level!=s->block_max_depth)
2560
        put_rac(&ic, &i_state[4 + s_context], 1);
2561
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2562
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2563
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2564
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2565
    i_len= ic.bytestream - ic.bytestream_start;
2566
    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2567

    
2568
//    assert(score==256*256*256*64-1);
2569
    assert(iscore < 255*255*256 + s->lambda2*10);
2570
    assert(iscore >= 0);
2571
    assert(l>=0 && l<=255);
2572
    assert(pl>=0 && pl<=255);
2573

    
2574
    if(level==0){
2575
        int varc= iscore >> 8;
2576
        int vard= score >> 8;
2577
        if (vard <= 64 || vard < varc)
2578
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2579
        else
2580
            c->scene_change_score+= s->m.qscale;
2581
    }
2582

    
2583
    if(level!=s->block_max_depth){
2584
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2585
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2586
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2587
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2588
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2589
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2590

    
2591
        if(score2 < score && score2 < iscore)
2592
            return score2;
2593
    }
2594

    
2595
    if(iscore < score){
2596
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2597
        memcpy(pbbak, i_buffer, i_len);
2598
        s->c= ic;
2599
        s->c.bytestream_start= pbbak_start;
2600
        s->c.bytestream= pbbak + i_len;
2601
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2602
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2603
        return iscore;
2604
    }else{
2605
        memcpy(pbbak, p_buffer, p_len);
2606
        s->c= pc;
2607
        s->c.bytestream_start= pbbak_start;
2608
        s->c.bytestream= pbbak + p_len;
2609
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2610
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2611
        return score;
2612
    }
2613
}
2614

    
2615
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2616
    const int w= s->b_width  << s->block_max_depth;
2617
    const int rem_depth= s->block_max_depth - level;
2618
    const int index= (x + y*w) << rem_depth;
2619
    int trx= (x+1)<<rem_depth;
2620
    BlockNode *b= &s->block[index];
2621
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2622
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2623
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2624
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2625
    int pl = left->color[0];
2626
    int pcb= left->color[1];
2627
    int pcr= left->color[2];
2628
    int pmx, pmy;
2629
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2630
    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2631
    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2632
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2633

    
2634
    if(s->keyframe){
2635
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2636
        return;
2637
    }
2638

    
2639
    if(level!=s->block_max_depth){
2640
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2641
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
2642
        }else{
2643
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
2644
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2645
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2646
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2647
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2648
            return;
2649
        }
2650
    }
2651
    if(b->type & BLOCK_INTRA){
2652
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2653
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2654
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2655
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2656
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2657
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2658
    }else{
2659
        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2660
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2661
        if(s->ref_frames > 1)
2662
            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2663
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2664
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2665
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2666
    }
2667
}
2668

    
2669
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2670
    int i, x2, y2;
2671
    Plane *p= &s->plane[plane_index];
2672
    const int block_size = MB_SIZE >> s->block_max_depth;
2673
    const int block_w    = plane_index ? block_size/2 : block_size;
2674
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2675
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2676
    const int ref_stride= s->current_picture.linesize[plane_index];
2677
    uint8_t *src= s-> input_picture.data[plane_index];
2678
    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2679
    const int b_stride = s->b_width << s->block_max_depth;
2680
    const int w= p->width;
2681
    const int h= p->height;
2682
    int index= mb_x + mb_y*b_stride;
2683
    BlockNode *b= &s->block[index];
2684
    BlockNode backup= *b;
2685
    int ab=0;
2686
    int aa=0;
2687

    
2688
    b->type|= BLOCK_INTRA;
2689
    b->color[plane_index]= 0;
2690
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2691

    
2692
    for(i=0; i<4; i++){
2693
        int mb_x2= mb_x + (i &1) - 1;
2694
        int mb_y2= mb_y + (i>>1) - 1;
2695
        int x= block_w*mb_x2 + block_w/2;
2696
        int y= block_w*mb_y2 + block_w/2;
2697

    
2698
        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2699
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2700

    
2701
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2702
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2703
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2704
                int obmc_v= obmc[index];
2705
                int d;
2706
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2707
                if(x<0) obmc_v += obmc[index + block_w];
2708
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2709
                if(x+block_w>w) obmc_v += obmc[index - block_w];
2710
                //FIXME precalculate this or simplify it somehow else
2711

    
2712
                d = -dst[index] + (1<<(FRAC_BITS-1));
2713
                dst[index] = d;
2714
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2715
                aa += obmc_v * obmc_v; //FIXME precalculate this
2716
            }
2717
        }
2718
    }
2719
    *b= backup;
2720

    
2721
    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2722
}
2723

    
2724
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2725
    const int b_stride = s->b_width << s->block_max_depth;
2726
    const int b_height = s->b_height<< s->block_max_depth;
2727
    int index= x + y*b_stride;
2728
    const BlockNode *b     = &s->block[index];
2729
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2730
    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2731
    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2732
    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2733
    int dmx, dmy;
2734
//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2735
//  int my_context= av_log2(2*FFABS(left->my - top->my));
2736

    
2737
    if(x<0 || x>=b_stride || y>=b_height)
2738
        return 0;
2739
/*
2740
1            0      0
2741
01X          1-2    1
2742
001XX        3-6    2-3
2743
0001XXX      7-14   4-7
2744
00001XXXX   15-30   8-15
2745
*/
2746
//FIXME try accurate rate
2747
//FIXME intra and inter predictors if surrounding blocks are not the same type
2748
    if(b->type & BLOCK_INTRA){
2749
        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2750
                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
2751
                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
2752
    }else{
2753
        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2754
        dmx-= b->mx;
2755
        dmy-= b->my;
2756
        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2757
                    + av_log2(2*FFABS(dmy))
2758
                    + av_log2(2*b->ref));
2759
    }
2760
}
2761

    
2762
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2763
    Plane *p= &s->plane[plane_index];
2764
    const int block_size = MB_SIZE >> s->block_max_depth;
2765
    const int block_w    = plane_index ? block_size/2 : block_size;
2766
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2767
    const int ref_stride= s->current_picture.linesize[plane_index];
2768
    uint8_t *dst= s->current_picture.data[plane_index];
2769
    uint8_t *src= s->  input_picture.data[plane_index];
2770
    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2771
    uint8_t *cur = s->scratchbuf;
2772
    uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2773
    const int b_stride = s->b_width << s->block_max_depth;
2774
    const int b_height = s->b_height<< s->block_max_depth;
2775
    const int w= p->width;
2776
    const int h= p->height;
2777
    int distortion;
2778
    int rate= 0;
2779
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2780
    int sx= block_w*mb_x - block_w/2;
2781
    int sy= block_w*mb_y - block_w/2;
2782
    int x0= FFMAX(0,-sx);
2783
    int y0= FFMAX(0,-sy);
2784
    int x1= FFMIN(block_w*2, w-sx);
2785
    int y1= FFMIN(block_w*2, h-sy);
2786
    int i,x,y;
2787

    
2788
    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2789

    
2790
    for(y=y0; y<y1; y++){
2791
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2792
        const IDWTELEM *pred1 = pred + y*obmc_stride;
2793
        uint8_t *cur1 = cur + y*ref_stride;
2794
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2795
        for(x=x0; x<x1; x++){
2796
#if FRAC_BITS >= LOG2_OBMC_MAX
2797
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2798
#else
2799
            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2800
#endif
2801
            v = (v + pred1[x]) >> FRAC_BITS;
2802
            if(v&(~255)) v= ~(v>>31);
2803
            dst1[x] = v;
2804
        }
2805
    }
2806

    
2807
    /* copy the regions where obmc[] = (uint8_t)256 */
2808
    if(LOG2_OBMC_MAX == 8
2809
        && (mb_x == 0 || mb_x == b_stride-1)
2810
        && (mb_y == 0 || mb_y == b_height-1)){
2811
        if(mb_x == 0)
2812
            x1 = block_w;
2813
        else
2814
            x0 = block_w;
2815
        if(mb_y == 0)
2816
            y1 = block_w;
2817
        else
2818
            y0 = block_w;
2819
        for(y=y0; y<y1; y++)
2820
            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2821
    }
2822

    
2823
    if(block_w==16){
2824
        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2825
        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2826
        /* FIXME cmps overlap but do not cover the wavelet's whole support.
2827
         * So improving the score of one block is not strictly guaranteed
2828
         * to improve the score of the whole frame, thus iterative motion
2829
         * estimation does not always converge. */
2830
        if(s->avctx->me_cmp == FF_CMP_W97)
2831
            distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2832
        else if(s->avctx->me_cmp == FF_CMP_W53)
2833
            distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2834
        else{
2835
            distortion = 0;
2836
            for(i=0; i<4; i++){
2837
                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2838
                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2839
            }
2840
        }
2841
    }else{
2842
        assert(block_w==8);
2843
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2844
    }
2845

    
2846
    if(plane_index==0){
2847
        for(i=0; i<4; i++){
2848
/* ..RRr
2849
 * .RXx.
2850
 * rxx..
2851
 */
2852
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2853
        }
2854
        if(mb_x == b_stride-2)
2855
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2856
    }
2857
    return distortion + rate*penalty_factor;
2858
}
2859

    
2860
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2861
    int i, y2;
2862
    Plane *p= &s->plane[plane_index];
2863
    const int block_size = MB_SIZE >> s->block_max_depth;
2864
    const int block_w    = plane_index ? block_size/2 : block_size;
2865
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2866
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2867
    const int ref_stride= s->current_picture.linesize[plane_index];
2868
    uint8_t *dst= s->current_picture.data[plane_index];
2869
    uint8_t *src= s-> input_picture.data[plane_index];
2870
    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2871
    // const has only been removed from zero_dst to suppress a warning
2872
    static IDWTELEM zero_dst[4096]; //FIXME
2873
    const int b_stride = s->b_width << s->block_max_depth;
2874
    const int w= p->width;
2875
    const int h= p->height;
2876
    int distortion= 0;
2877
    int rate= 0;
2878
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2879

    
2880
    for(i=0; i<9; i++){
2881
        int mb_x2= mb_x + (i%3) - 1;
2882
        int mb_y2= mb_y + (i/3) - 1;
2883
        int x= block_w*mb_x2 + block_w/2;
2884
        int y= block_w*mb_y2 + block_w/2;
2885

    
2886
        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2887
                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2888

    
2889
        //FIXME find a cleaner/simpler way to skip the outside stuff
2890
        for(y2= y; y2<0; y2++)
2891
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2892
        for(y2= h; y2<y+block_w; y2++)
2893
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2894
        if(x<0){
2895
            for(y2= y; y2<y+block_w; y2++)
2896
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2897
        }
2898
        if(x+block_w > w){
2899
            for(y2= y; y2<y+block_w; y2++)
2900
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2901
        }
2902

    
2903
        assert(block_w== 8 || block_w==16);
2904
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2905
    }
2906

    
2907
    if(plane_index==0){
2908
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2909
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2910

    
2911
/* ..RRRr
2912
 * .RXXx.
2913
 * .RXXx.
2914
 * rxxx.
2915
 */
2916
        if(merged)
2917
            rate = get_block_bits(s, mb_x, mb_y, 2);
2918
        for(i=merged?4:0; i<9; i++){
2919
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2920
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2921
        }
2922
    }
2923
    return distortion + rate*penalty_factor;
2924
}
2925

    
2926
static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2927
    const int w= b->width;
2928
    const int h= b->height;
2929
    int x, y;
2930

    
2931
    if(1){
2932
        int run=0;
2933
        int runs[w*h];
2934
        int run_index=0;
2935
        int max_index;
2936

    
2937
        for(y=0; y<h; y++){
2938
            for(x=0; x<w; x++){
2939
                int v, p=0;
2940
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
2941
                v= src[x + y*stride];
2942

    
2943
                if(y){
2944
                    t= src[x + (y-1)*stride];
2945
                    if(x){
2946
                        lt= src[x - 1 + (y-1)*stride];
2947
                    }
2948
                    if(x + 1 < w){
2949
                        rt= src[x + 1 + (y-1)*stride];
2950
                    }
2951
                }
2952
                if(x){
2953
                    l= src[x - 1 + y*stride];
2954
                    /*if(x > 1){
2955
                        if(orientation==1) ll= src[y + (x-2)*stride];
2956
                        else               ll= src[x - 2 + y*stride];
2957
                    }*/
2958
                }
2959
                if(parent){
2960
                    int px= x>>1;
2961
                    int py= y>>1;
2962
                    if(px<b->parent->width && py<b->parent->height)
2963
                        p= parent[px + py*2*stride];
2964
                }
2965
                if(!(/*ll|*/l|lt|t|rt|p)){
2966
                    if(v){
2967
                        runs[run_index++]= run;
2968
                        run=0;
2969
                    }else{
2970
                        run++;
2971
                    }
2972
                }
2973
            }
2974
        }
2975
        max_index= run_index;
2976
        runs[run_index++]= run;
2977
        run_index=0;
2978
        run= runs[run_index++];
2979

    
2980
        put_symbol2(&s->c, b->state[30], max_index, 0);
2981
        if(run_index <= max_index)
2982
            put_symbol2(&s->c, b->state[1], run, 3);
2983

    
2984
        for(y=0; y<h; y++){
2985
            if(s->c.bytestream_end - s->c.bytestream < w*40){
2986
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2987
                return -1;
2988
            }
2989
            for(x=0; x<w; x++){
2990
                int v, p=0;
2991
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
2992
                v= src[x + y*stride];
2993

    
2994
                if(y){
2995
                    t= src[x + (y-1)*stride];
2996
                    if(x){
2997
                        lt= src[x - 1 + (y-1)*stride];
2998
                    }
2999
                    if(x + 1 < w){
3000
                        rt= src[x + 1 + (y-1)*stride];
3001
                    }
3002
                }
3003
                if(x){
3004
                    l= src[x - 1 + y*stride];
3005
                    /*if(x > 1){
3006
                        if(orientation==1) ll= src[y + (x-2)*stride];
3007
                        else               ll= src[x - 2 + y*stride];
3008
                    }*/
3009
                }
3010
                if(parent){
3011
                    int px= x>>1;
3012
                    int py= y>>1;
3013
                    if(px<b->parent->width && py<b->parent->height)
3014
                        p= parent[px + py*2*stride];
3015
                }
3016
                if(/*ll|*/l|lt|t|rt|p){
3017
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3018

    
3019
                    put_rac(&s->c, &b->state[0][context], !!v);
3020
                }else{
3021
                    if(!run){
3022
                        run= runs[run_index++];
3023

    
3024
                        if(run_index <= max_index)
3025
                            put_symbol2(&s->c, b->state[1], run, 3);
3026
                        assert(v);
3027
                    }else{
3028
                        run--;
3029
                        assert(!v);
3030
                    }
3031
                }
3032
                if(v){
3033
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3034
                    int l2= 2*FFABS(l) + (l<0);
3035
                    int t2= 2*FFABS(t) + (t<0);
3036

    
3037
                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
3038
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
3039
                }
3040
            }
3041
        }
3042
    }
3043
    return 0;
3044
}
3045

    
3046
static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3047
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
3048
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
3049
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
3050
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
3051
}
3052

    
3053
static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3054
    const int b_stride= s->b_width << s->block_max_depth;
3055
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3056
    BlockNode backup= *block;
3057
    int rd, index, value;
3058

    
3059
    assert(mb_x>=0 && mb_y>=0);
3060
    assert(mb_x<b_stride);
3061

    
3062
    if(intra){
3063
        block->color[0] = p[0];
3064
        block->color[1] = p[1];
3065
        block->color[2] = p[2];
3066
        block->type |= BLOCK_INTRA;
3067
    }else{
3068
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3069
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3070
        if(s->me_cache[index] == value)
3071
            return 0;
3072
        s->me_cache[index]= value;
3073

    
3074
        block->mx= p[0];
3075
        block->my= p[1];
3076
        block->type &= ~BLOCK_INTRA;
3077
    }
3078

    
3079
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3080

    
3081
//FIXME chroma
3082
    if(rd < *best_rd){
3083
        *best_rd= rd;
3084
        return 1;
3085
    }else{
3086
        *block= backup;
3087
        return 0;
3088
    }
3089
}
3090

    
3091
/* special case for int[2] args we discard afterwards,
3092
 * fixes compilation problem with gcc 2.95 */
3093
static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3094
    int p[2] = {p0, p1};
3095
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3096
}
3097

    
3098
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3099
    const int b_stride= s->b_width << s->block_max_depth;
3100
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3101
    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3102
    int rd, index, value;
3103

    
3104
    assert(mb_x>=0 && mb_y>=0);
3105
    assert(mb_x<b_stride);
3106
    assert(((mb_x|mb_y)&1) == 0);
3107

    
3108
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3109
    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3110
    if(s->me_cache[index] == value)
3111
        return 0;
3112
    s->me_cache[index]= value;
3113

    
3114
    block->mx= p0;
3115
    block->my= p1;
3116
    block->ref= ref;
3117
    block->type &= ~BLOCK_INTRA;
3118
    block[1]= block[b_stride]= block[b_stride+1]= *block;
3119

    
3120
    rd= get_4block_rd(s, mb_x, mb_y, 0);
3121

    
3122
//FIXME chroma
3123
    if(rd < *best_rd){
3124
        *best_rd= rd;
3125
        return 1;
3126
    }else{
3127
        block[0]= backup[0];
3128
        block[1]= backup[1];
3129
        block[b_stride]= backup[2];
3130
        block[b_stride+1]= backup[3];
3131
        return 0;
3132
    }
3133
}
3134

    
3135
static void iterative_me(SnowContext *s){
3136
    int pass, mb_x, mb_y;
3137
    const int b_width = s->b_width  << s->block_max_depth;
3138
    const int b_height= s->b_height << s->block_max_depth;
3139
    const int b_stride= b_width;
3140
    int color[3];
3141

    
3142
    {
3143
        RangeCoder r = s->c;
3144
        uint8_t state[sizeof(s->block_state)];
3145
        memcpy(state, s->block_state, sizeof(s->block_state));
3146
        for(mb_y= 0; mb_y<s->b_height; mb_y++)
3147
            for(mb_x= 0; mb_x<s->b_width; mb_x++)
3148
                encode_q_branch(s, 0, mb_x, mb_y);
3149
        s->c = r;
3150
        memcpy(s->block_state, state, sizeof(s->block_state));
3151
    }
3152

    
3153
    for(pass=0; pass<25; pass++){
3154
        int change= 0;
3155

    
3156
        for(mb_y= 0; mb_y<b_height; mb_y++){
3157
            for(mb_x= 0; mb_x<b_width; mb_x++){
3158
                int dia_change, i, j, ref;
3159
                int best_rd= INT_MAX, ref_rd;
3160
                BlockNode backup, ref_b;
3161
                const int index= mb_x + mb_y * b_stride;
3162
                BlockNode *block= &s->block[index];
3163
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3164
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3165
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3166
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3167
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3168
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3169
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3170
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3171
                const int b_w= (MB_SIZE >> s->block_max_depth);
3172
                uint8_t obmc_edged[b_w*2][b_w*2];
3173

    
3174
                if(pass && (block->type & BLOCK_OPT))
3175
                    continue;
3176
                block->type |= BLOCK_OPT;
3177

    
3178
                backup= *block;
3179

    
3180
                if(!s->me_cache_generation)
3181
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3182
                s->me_cache_generation += 1<<22;
3183

    
3184
                //FIXME precalculate
3185
                {
3186
                    int x, y;
3187
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3188
                    if(mb_x==0)
3189
                        for(y=0; y<b_w*2; y++)
3190
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3191
                    if(mb_x==b_stride-1)
3192
                        for(y=0; y<b_w*2; y++)
3193
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3194
                    if(mb_y==0){
3195
                        for(x=0; x<b_w*2; x++)
3196
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3197
                        for(y=1; y<b_w; y++)
3198
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3199
                    }
3200
                    if(mb_y==b_height-1){
3201
                        for(x=0; x<b_w*2; x++)
3202
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3203
                        for(y=b_w; y<b_w*2-1; y++)
3204
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3205
                    }
3206
                }
3207

    
3208
                //skip stuff outside the picture
3209
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3210
                    uint8_t *src= s->  input_picture.data[0];
3211
                    uint8_t *dst= s->current_picture.data[0];
3212
                    const int stride= s->current_picture.linesize[0];
3213
                    const int block_w= MB_SIZE >> s->block_max_depth;
3214
                    const int sx= block_w*mb_x - block_w/2;
3215
                    const int sy= block_w*mb_y - block_w/2;
3216
                    const int w= s->plane[0].width;
3217
                    const int h= s->plane[0].height;
3218
                    int y;
3219

    
3220
                    for(y=sy; y<0; y++)
3221
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3222
                    for(y=h; y<sy+block_w*2; y++)
3223
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3224
                    if(sx<0){
3225
                        for(y=sy; y<sy+block_w*2; y++)
3226
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3227
                    }
3228
                    if(sx+block_w*2 > w){
3229
                        for(y=sy; y<sy+block_w*2; y++)
3230
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3231
                    }
3232
                }
3233

    
3234
                // intra(black) = neighbors' contribution to the current block
3235
                for(i=0; i<3; i++)
3236
                    color[i]= get_dc(s, mb_x, mb_y, i);
3237

    
3238
                // get previous score (cannot be cached due to OBMC)
3239
                if(pass > 0 && (block->type&BLOCK_INTRA)){
3240
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3241
                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3242
                }else
3243
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3244

    
3245
                ref_b= *block;
3246
                ref_rd= best_rd;
3247
                for(ref=0; ref < s->ref_frames; ref++){
3248
                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3249
                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3250
                        continue;
3251
                    block->ref= ref;
3252
                    best_rd= INT_MAX;
3253

    
3254
                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3255
                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3256
                    if(tb)
3257
                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3258
                    if(lb)
3259
                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3260
                    if(rb)
3261
                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3262
                    if(bb)
3263
                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3264

    
3265
                    /* fullpel ME */
3266
                    //FIXME avoid subpel interpolation / round to nearest integer
3267
                    do{
3268
                        dia_change=0;
3269
                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3270
                            for(j=0; j<i; j++){
3271
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3272
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3273
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3274
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3275
                            }
3276
                        }
3277
                    }while(dia_change);
3278
                    /* subpel ME */
3279
                    do{
3280
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3281
                        dia_change=0;
3282
                        for(i=0; i<8; i++)
3283
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3284
                    }while(dia_change);
3285
                    //FIXME or try the standard 2 pass qpel or similar
3286

    
3287
                    mvr[0][0]= block->mx;
3288
                    mvr[0][1]= block->my;
3289
                    if(ref_rd > best_rd){
3290
                        ref_rd= best_rd;
3291
                        ref_b= *block;
3292
                    }
3293
                }
3294
                best_rd= ref_rd;
3295
                *block= ref_b;
3296
#if 1
3297
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3298
                //FIXME RD style color selection
3299
#endif
3300
                if(!same_block(block, &backup)){
3301
                    if(tb ) tb ->type &= ~BLOCK_OPT;
3302
                    if(lb ) lb ->type &= ~BLOCK_OPT;
3303
                    if(rb ) rb ->type &= ~BLOCK_OPT;
3304
                    if(bb ) bb ->type &= ~BLOCK_OPT;
3305
                    if(tlb) tlb->type &= ~BLOCK_OPT;
3306
                    if(trb) trb->type &= ~BLOCK_OPT;
3307
                    if(blb) blb->type &= ~BLOCK_OPT;
3308
                    if(brb) brb->type &= ~BLOCK_OPT;
3309
                    change ++;
3310
                }
3311
            }
3312
        }
3313
        av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3314
        if(!change)
3315
            break;
3316
    }
3317

    
3318
    if(s->block_max_depth == 1){
3319
        int change= 0;
3320
        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3321
            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3322
                int i;
3323
                int best_rd, init_rd;
3324
                const int index= mb_x + mb_y * b_stride;
3325
                BlockNode *b[4];
3326

    
3327
                b[0]= &s->block[index];
3328
                b[1]= b[0]+1;
3329
                b[2]= b[0]+b_stride;
3330
                b[3]= b[2]+1;
3331
                if(same_block(b[0], b[1]) &&
3332
                   same_block(b[0], b[2]) &&
3333
                   same_block(b[0], b[3]))
3334
                    continue;
3335

    
3336
                if(!s->me_cache_generation)
3337
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3338
                s->me_cache_generation += 1<<22;
3339

    
3340
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3341

    
3342
                //FIXME more multiref search?
3343
                check_4block_inter(s, mb_x, mb_y,
3344
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3345
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3346

    
3347
                for(i=0; i<4; i++)
3348
                    if(!(b[i]->type&BLOCK_INTRA))
3349
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3350

    
3351
                if(init_rd != best_rd)
3352
                    change++;
3353
            }
3354
        }
3355
        av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3356
    }
3357
}
3358

    
3359
static void encode_blocks(SnowContext *s, int search){
3360
    int x, y;
3361
    int w= s->b_width;
3362
    int h= s->b_height;
3363

    
3364
    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
3365
        iterative_me(s);
3366

    
3367
    for(y=0; y<h; y++){
3368
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
3369
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3370
            return;
3371
        }
3372
        for(x=0; x<w; x++){
3373
            if(s->avctx->me_method == ME_ITER || !search)
3374
                encode_q_branch2(s, 0, x, y);
3375
            else
3376
                encode_q_branch (s, 0, x, y);
3377
        }
3378
    }
3379
}
3380

    
3381
static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3382
    const int w= b->width;
3383
    const int h= b->height;
3384
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3385
    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3386
    int x,y, thres1, thres2;
3387

    
3388
    if(s->qlog == LOSSLESS_QLOG){
3389
        for(y=0; y<h; y++)
3390
            for(x=0; x<w; x++)
3391
                dst[x + y*stride]= src[x + y*stride];
3392
        return;
3393
    }
3394

    
3395
    bias= bias ? 0 : (3*qmul)>>3;
3396
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3397
    thres2= 2*thres1;
3398

    
3399
    if(!bias){
3400
        for(y=0; y<h; y++){
3401
            for(x=0; x<w; x++){
3402
                int i= src[x + y*stride];
3403

    
3404
                if((unsigned)(i+thres1) > thres2){
3405
                    if(i>=0){
3406
                        i<<= QEXPSHIFT;
3407
                        i/= qmul; //FIXME optimize
3408
                        dst[x + y*stride]=  i;
3409
                    }else{
3410
                        i= -i;
3411
                        i<<= QEXPSHIFT;
3412
                        i/= qmul; //FIXME optimize
3413
                        dst[x + y*stride]= -i;
3414
                    }
3415
                }else
3416
                    dst[x + y*stride]= 0;
3417
            }
3418
        }
3419
    }else{
3420
        for(y=0; y<h; y++){
3421
            for(x=0; x<w; x++){
3422
                int i= src[x + y*stride];
3423

    
3424
                if((unsigned)(i+thres1) > thres2){
3425
                    if(i>=0){
3426
                        i<<= QEXPSHIFT;
3427
                        i= (i + bias) / qmul; //FIXME optimize
3428
                        dst[x + y*stride]=  i;
3429
                    }else{
3430
                        i= -i;
3431
                        i<<= QEXPSHIFT;
3432
                        i= (i + bias) / qmul; //FIXME optimize
3433
                        dst[x + y*stride]= -i;
3434
                    }
3435
                }else
3436
                    dst[x + y*stride]= 0;
3437
            }
3438
        }
3439
    }
3440
}
3441

    
3442
static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3443
    const int w= b->width;
3444
    const int h= b->height;
3445
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3446
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3447
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3448
    int x,y;
3449

    
3450
    if(s->qlog == LOSSLESS_QLOG) return;
3451

    
3452
    for(y=0; y<h; y++){
3453
        for(x=0; x<w; x++){
3454
            int i= src[x + y*stride];
3455
            if(i<0){
3456
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3457
            }else if(i>0){
3458
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3459
            }
3460
        }
3461
    }
3462
}
3463

    
3464
static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3465
    const int w= b->width;
3466
    const int h= b->height;
3467
    int x,y;
3468

    
3469
    for(y=h-1; y>=0; y--){
3470
        for(x=w-1; x>=0; x--){
3471
            int i= x + y*stride;
3472

    
3473
            if(x){
3474
                if(use_median){
3475
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3476
                    else  src[i] -= src[i - 1];
3477
                }else{
3478
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3479
                    else  src[i] -= src[i - 1];
3480
                }
3481
            }else{
3482
                if(y) src[i] -= src[i - stride];
3483
            }
3484
        }
3485
    }
3486
}
3487

    
3488
static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3489
    const int w= b->width;
3490
    const int h= b->height;
3491
    int x,y;
3492

    
3493
    for(y=0; y<h; y++){
3494
        for(x=0; x<w; x++){
3495
            int i= x + y*stride;
3496

    
3497
            if(x){
3498
                if(use_median){
3499
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3500
                    else  src[i] += src[i - 1];
3501
                }else{
3502
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3503
                    else  src[i] += src[i - 1];
3504
                }
3505
            }else{
3506
                if(y) src[i] += src[i - stride];
3507
            }
3508
        }
3509
    }
3510
}
3511

    
3512
static void encode_qlogs(SnowContext *s){
3513
    int plane_index, level, orientation;
3514

    
3515
    for(plane_index=0; plane_index<2; plane_index++){
3516
        for(level=0; level<s->spatial_decomposition_count; level++){
3517
            for(orientation=level ? 1:0; orientation<4; orientation++){
3518
                if(orientation==2) continue;
3519
                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3520
            }
3521
        }
3522
    }
3523
}
3524

    
3525
static void encode_header(SnowContext *s){
3526
    int plane_index, i;
3527
    uint8_t kstate[32];
3528

    
3529
    memset(kstate, MID_STATE, sizeof(kstate));
3530

    
3531
    put_rac(&s->c, kstate, s->keyframe);
3532
    if(s->keyframe || s->always_reset){
3533
        reset_contexts(s);
3534
        s->last_spatial_decomposition_type=
3535
        s->last_qlog=
3536
        s->last_qbias=
3537
        s->last_mv_scale=
3538
        s->last_block_max_depth= 0;
3539
        for(plane_index=0; plane_index<2; plane_index++){
3540
            Plane *p= &s->plane[plane_index];
3541
            p->last_htaps=0;
3542
            p->last_diag_mc=0;
3543
            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3544
        }
3545
    }
3546
    if(s->keyframe){
3547
        put_symbol(&s->c, s->header_state, s->version, 0);
3548
        put_rac(&s->c, s->header_state, s->always_reset);
3549
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3550
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3551
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3552
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3553
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3554
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3555
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3556
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3557
        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3558

    
3559
        encode_qlogs(s);
3560
    }
3561

    
3562
    if(!s->keyframe){
3563
        int update_mc=0;
3564
        for(plane_index=0; plane_index<2; plane_index++){
3565
            Plane *p= &s->plane[plane_index];
3566
            update_mc |= p->last_htaps   != p->htaps;
3567
            update_mc |= p->last_diag_mc != p->diag_mc;
3568
            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3569
        }
3570
        put_rac(&s->c, s->header_state, update_mc);
3571
        if(update_mc){
3572
            for(plane_index=0; plane_index<2; plane_index++){
3573
                Plane *p= &s->plane[plane_index];
3574
                put_rac(&s->c, s->header_state, p->diag_mc);
3575
                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3576
                for(i= p->htaps/2; i; i--)
3577
                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3578
            }
3579
        }
3580
        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3581
            put_rac(&s->c, s->header_state, 1);
3582
            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3583
            encode_qlogs(s);
3584
        }else
3585
            put_rac(&s->c, s->header_state, 0);
3586
    }
3587

    
3588
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3589
    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3590
    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3591
    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3592
    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3593

    
3594
}
3595

    
3596
static void update_last_header_values(SnowContext *s){
3597
    int plane_index;
3598

    
3599
    if(!s->keyframe){
3600
        for(plane_index=0; plane_index<2; plane_index++){
3601
            Plane *p= &s->plane[plane_index];
3602
            p->last_diag_mc= p->diag_mc;
3603
            p->last_htaps  = p->htaps;
3604
            memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3605
        }
3606
    }
3607

    
3608
    s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
3609
    s->last_qlog                        = s->qlog;
3610
    s->last_qbias                       = s->qbias;
3611
    s->last_mv_scale                    = s->mv_scale;
3612
    s->last_block_max_depth             = s->block_max_depth;
3613
    s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3614
}
3615

    
3616
static int qscale2qlog(int qscale){
3617
    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3618
           + 61*QROOT/8; //<64 >60
3619
}
3620

    
3621
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3622
{
3623
    /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3624
     * FIXME we know exact mv bits at this point,
3625
     * but ratecontrol isn't set up to include them. */
3626
    uint32_t coef_sum= 0;
3627
    int level, orientation, delta_qlog;
3628

    
3629
    for(level=0; level<s->spatial_decomposition_count; level++){
3630
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3631
            SubBand *b= &s->plane[0].band[level][orientation];
3632
            IDWTELEM *buf= b->ibuf;
3633
            const int w= b->width;
3634
            const int h= b->height;
3635
            const int stride= b->stride;
3636
            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3637
            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3638
            const int qdiv= (1<<16)/qmul;
3639
            int x, y;
3640
            //FIXME this is ugly
3641
            for(y=0; y<h; y++)
3642
                for(x=0; x<w; x++)
3643
                    buf[x+y*stride]= b->buf[x+y*stride];
3644
            if(orientation==0)
3645
                decorrelate(s, b, buf, stride, 1, 0);
3646
            for(y=0; y<h; y++)
3647
                for(x=0; x<w; x++)
3648
                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3649
        }
3650
    }
3651

    
3652
    /* ugly, ratecontrol just takes a sqrt again */
3653
    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3654
    assert(coef_sum < INT_MAX);
3655

    
3656
    if(pict->pict_type == FF_I_TYPE){
3657
        s->m.current_picture.mb_var_sum= coef_sum;
3658
        s->m.current_picture.mc_mb_var_sum= 0;
3659
    }else{
3660
        s->m.current_picture.mc_mb_var_sum= coef_sum;
3661
        s->m.current_picture.mb_var_sum= 0;
3662
    }
3663

    
3664
    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3665
    if (pict->quality < 0)
3666
        return INT_MIN;
3667
    s->lambda= pict->quality * 3/2;
3668
    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3669
    s->qlog+= delta_qlog;
3670
    return delta_qlog;
3671
}
3672

    
3673
static void calculate_visual_weight(SnowContext *s, Plane *p){
3674
    int width = p->width;
3675
    int height= p->height;
3676
    int level, orientation, x, y;
3677

    
3678
    for(level=0; level<s->spatial_decomposition_count; level++){
3679
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3680
            SubBand *b= &p->band[level][orientation];
3681
            IDWTELEM *ibuf= b->ibuf;
3682
            int64_t error=0;
3683

    
3684
            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3685
            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3686
            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3687
            for(y=0; y<height; y++){
3688
                for(x=0; x<width; x++){
3689
                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3690
                    error += d*d;
3691
                }
3692
            }
3693

    
3694
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3695
        }
3696
    }
3697
}
3698

    
3699
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3700
    SnowContext *s = avctx->priv_data;
3701
    RangeCoder * const c= &s->c;
3702
    AVFrame *pict = data;
3703
    const int width= s->avctx->width;
3704
    const int height= s->avctx->height;
3705
    int level, orientation, plane_index, i, y;
3706
    uint8_t rc_header_bak[sizeof(s->header_state)];
3707
    uint8_t rc_block_bak[sizeof(s->block_state)];
3708

    
3709
    ff_init_range_encoder(c, buf, buf_size);
3710
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3711

    
3712
    for(i=0; i<3; i++){
3713
        int shift= !!i;
3714
        for(y=0; y<(height>>shift); y++)
3715
            memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3716
                   &pict->data[i][y * pict->linesize[i]],
3717
                   width>>shift);
3718
    }
3719
    s->new_picture = *pict;
3720

    
3721
    s->m.picture_number= avctx->frame_number;
3722
    if(avctx->flags&CODEC_FLAG_PASS2){
3723
        s->m.pict_type =
3724
        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3725
        s->keyframe= pict->pict_type==FF_I_TYPE;
3726
        if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3727
            pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3728
            if (pict->quality < 0)
3729
                return -1;
3730
        }
3731
    }else{
3732
        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3733
        s->m.pict_type=
3734
        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3735
    }
3736

    
3737
    if(s->pass1_rc && avctx->frame_number == 0)
3738
        pict->quality= 2*FF_QP2LAMBDA;
3739
    if(pict->quality){
3740
        s->qlog= qscale2qlog(pict->quality);
3741
        s->lambda = pict->quality * 3/2;
3742
    }
3743
    if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3744
        s->qlog= LOSSLESS_QLOG;
3745
        s->lambda = 0;
3746
    }//else keep previous frame's qlog until after motion estimation
3747

    
3748
    frame_start(s);
3749

    
3750
    s->m.current_picture_ptr= &s->m.current_picture;
3751
    s->m.last_picture.pts= s->m.current_picture.pts;
3752
    s->m.current_picture.pts= pict->pts;
3753
    if(pict->pict_type == FF_P_TYPE){
3754
        int block_width = (width +15)>>4;
3755
        int block_height= (height+15)>>4;
3756
        int stride= s->current_picture.linesize[0];
3757

    
3758
        assert(s->current_picture.data[0]);
3759
        assert(s->last_picture[0].data[0]);
3760

    
3761
        s->m.avctx= s->avctx;
3762
        s->m.current_picture.data[0]= s->current_picture.data[0];
3763
        s->m.   last_picture.data[0]= s->last_picture[0].data[0];
3764
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3765
        s->m.   last_picture_ptr= &s->m.   last_picture;
3766
        s->m.linesize=
3767
        s->m.   last_picture.linesize[0]=
3768
        s->m.    new_picture.linesize[0]=
3769
        s->m.current_picture.linesize[0]= stride;
3770
        s->m.uvlinesize= s->current_picture.linesize[1];
3771
        s->m.width = width;
3772
        s->m.height= height;
3773
        s->m.mb_width = block_width;
3774
        s->m.mb_height= block_height;
3775
        s->m.mb_stride=   s->m.mb_width+1;
3776
        s->m.b8_stride= 2*s->m.mb_width+1;
3777
        s->m.f_code=1;
3778
        s->m.pict_type= pict->pict_type;
3779
        s->m.me_method= s->avctx->me_method;
3780
        s->m.me.scene_change_score=0;
3781
        s->m.flags= s->avctx->flags;
3782
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3783
        s->m.out_format= FMT_H263;
3784
        s->m.unrestricted_mv= 1;
3785

    
3786
        s->m.lambda = s->lambda;
3787
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3788
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3789

    
3790
        s->m.dsp= s->dsp; //move
3791
        ff_init_me(&s->m);
3792
        s->dsp= s->m.dsp;
3793
    }
3794

    
3795
    if(s->pass1_rc){
3796
        memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
3797
        memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
3798
    }
3799

    
3800
redo_frame:
3801

    
3802
    if(pict->pict_type == FF_I_TYPE)
3803
        s->spatial_decomposition_count= 5;
3804
    else
3805
        s->spatial_decomposition_count= 5;
3806

    
3807
    s->m.pict_type = pict->pict_type;
3808
    s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
3809

    
3810
    common_init_after_header(avctx);
3811

    
3812
    if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3813
        for(plane_index=0; plane_index<3; plane_index++){
3814
            calculate_visual_weight(s, &s->plane[plane_index]);
3815
        }
3816
    }
3817

    
3818
    encode_header(s);
3819
    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3820
    encode_blocks(s, 1);
3821
    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3822

    
3823
    for(plane_index=0; plane_index<3; plane_index++){
3824
        Plane *p= &s->plane[plane_index];
3825
        int w= p->width;
3826
        int h= p->height;
3827
        int x, y;
3828
//        int bits= put_bits_count(&s->c.pb);
3829

    
3830
        if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
3831
            //FIXME optimize
3832
            if(pict->data[plane_index]) //FIXME gray hack
3833
                for(y=0; y<h; y++){
3834
                    for(x=0; x<w; x++){
3835
                        s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3836
                    }
3837
                }
3838
            predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
3839

    
3840
            if(   plane_index==0
3841
               && pict->pict_type == FF_P_TYPE
3842
               && !(avctx->flags&CODEC_FLAG_PASS2)
3843
               && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3844
                ff_init_range_encoder(c, buf, buf_size);
3845
                ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3846
                pict->pict_type= FF_I_TYPE;
3847
                s->keyframe=1;
3848
                s->current_picture.key_frame=1;
3849
                goto redo_frame;
3850
            }
3851

    
3852
            if(s->qlog == LOSSLESS_QLOG){
3853
                for(y=0; y<h; y++){
3854
                    for(x=0; x<w; x++){
3855
                        s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3856
                    }
3857
                }
3858
            }else{
3859
                for(y=0; y<h; y++){
3860
                    for(x=0; x<w; x++){
3861
                        s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
3862
                    }
3863
                }
3864
            }
3865

    
3866
            /*  if(QUANTIZE2)
3867
                dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
3868
            else*/
3869
                ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3870

    
3871
            if(s->pass1_rc && plane_index==0){
3872
                int delta_qlog = ratecontrol_1pass(s, pict);
3873
                if (delta_qlog <= INT_MIN)
3874
                    return -1;
3875
                if(delta_qlog){
3876
                    //reordering qlog in the bitstream would eliminate this reset
3877
                    ff_init_range_encoder(c, buf, buf_size);
3878
                    memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
3879
                    memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
3880
                    encode_header(s);
3881
                    encode_blocks(s, 0);
3882
                }
3883
            }
3884

    
3885
            for(level=0; level<s->spatial_decomposition_count; level++){
3886
                for(orientation=level ? 1 : 0; orientation<4; orientation++){
3887
                    SubBand *b= &p->band[level][orientation];
3888

    
3889
                    if(!QUANTIZE2)
3890
                        quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
3891
                    if(orientation==0)
3892
                        decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
3893
                    encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
3894
                    assert(b->parent==NULL || b->parent->stride == b->stride*2);
3895
                    if(orientation==0)
3896
                        correlate(s, b, b->ibuf, b->stride, 1, 0);
3897
                }
3898
            }
3899

    
3900
            for(level=0; level<s->spatial_decomposition_count; level++){
3901
                for(orientation=level ? 1 : 0; orientation<4; orientation++){
3902
                    SubBand *b= &p->band[level][orientation];
3903

    
3904
                    dequantize(s, b, b->ibuf, b->stride);
3905
                }
3906
            }
3907

    
3908
            ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3909
            if(s->qlog == LOSSLESS_QLOG){
3910
                for(y=0; y<h; y++){
3911
                    for(x=0; x<w; x++){
3912
                        s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
3913
                    }
3914
                }
3915
            }
3916
            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3917
        }else{
3918
            //ME/MC only
3919
            if(pict->pict_type == FF_I_TYPE){
3920
                for(y=0; y<h; y++){
3921
                    for(x=0; x<w; x++){
3922
                        s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
3923
                            pict->data[plane_index][y*pict->linesize[plane_index] + x];
3924
                    }
3925
                }
3926
            }else{
3927
                memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
3928
                predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3929
            }
3930
        }
3931
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3932
            int64_t error= 0;
3933

    
3934
            if(pict->data[plane_index]) //FIXME gray hack
3935
                for(y=0; y<h; y++){
3936
                    for(x=0; x<w; x++){
3937
                        int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3938
                        error += d*d;
3939
                    }
3940
                }
3941
            s->avctx->error[plane_index] += error;
3942
            s->current_picture.error[plane_index] = error;
3943
        }
3944

    
3945
    }
3946

    
3947
    update_last_header_values(s);
3948

    
3949
    release_buffer(avctx);
3950

    
3951
    s->current_picture.coded_picture_number = avctx->frame_number;
3952
    s->current_picture.pict_type = pict->pict_type;
3953
    s->current_picture.quality = pict->quality;
3954
    s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3955
    s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
3956
    s->m.current_picture.display_picture_number =
3957
    s->m.current_picture.coded_picture_number = avctx->frame_number;
3958
    s->m.current_picture.quality = pict->quality;
3959
    s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3960
    if(s->pass1_rc)
3961
        if (ff_rate_estimate_qscale(&s->m, 0) < 0)
3962
            return -1;
3963
    if(avctx->flags&CODEC_FLAG_PASS1)
3964
        ff_write_pass1_stats(&s->m);
3965
    s->m.last_pict_type = s->m.pict_type;
3966
    avctx->frame_bits = s->m.frame_bits;
3967
    avctx->mv_bits = s->m.mv_bits;
3968
    avctx->misc_bits = s->m.misc_bits;
3969
    avctx->p_tex_bits = s->m.p_tex_bits;
3970

    
3971
    emms_c();
3972

    
3973
    return ff_rac_terminate(c);
3974
}
3975

    
3976
static av_cold int encode_end(AVCodecContext *avctx)
3977
{
3978
    SnowContext *s = avctx->priv_data;
3979

    
3980
    common_end(s);
3981
    if (s->input_picture.data[0])
3982
        avctx->release_buffer(avctx, &s->input_picture);
3983
    av_free(avctx->stats_out);
3984

    
3985
    return 0;
3986
}
3987

    
3988
AVCodec ff_snow_encoder = {
3989
    "snow",
3990
    AVMEDIA_TYPE_VIDEO,
3991
    CODEC_ID_SNOW,
3992
    sizeof(SnowContext),
3993
    encode_init,
3994
    encode_frame,
3995
    encode_end,
3996
    .long_name = NULL_IF_CONFIG_SMALL("Snow"),
3997
};
3998
#endif
3999

    
4000

    
4001
#ifdef TEST
4002
#undef malloc
4003
#undef free
4004
#undef printf
4005

    
4006
#include "libavutil/lfg.h"
4007

    
4008
int main(void){
4009
    int width=256;
4010
    int height=256;
4011
    int buffer[2][width*height];
4012
    SnowContext s;
4013
    int i;
4014
    AVLFG prng;
4015
    s.spatial_decomposit