Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 05aec7bb

History | View | Annotate | Download (159 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "libavutil/intmath.h"
22
#include "avcodec.h"
23
#include "dsputil.h"
24
#include "dwt.h"
25
#include "snow.h"
26

    
27
#include "rangecoder.h"
28
#include "mathops.h"
29

    
30
#include "mpegvideo.h"
31
#include "h263.h"
32

    
33
#undef NDEBUG
34
#include <assert.h>
35

    
36
static const int8_t quant3[256]={
37
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
53
};
54
static const int8_t quant3b[256]={
55
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
};
72
static const int8_t quant3bA[256]={
73
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89
};
90
static const int8_t quant5[256]={
91
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
107
};
108
static const int8_t quant7[256]={
109
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
120
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
123
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
124
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
125
};
126
static const int8_t quant9[256]={
127
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
128
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
142
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
143
};
144
static const int8_t quant11[256]={
145
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
146
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
147
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
159
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
160
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
161
};
162
static const int8_t quant13[256]={
163
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
164
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
165
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
166
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
169
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
173
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
174
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
176
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
177
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
178
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179
};
180

    
181
#if 0 //64*cubic
182
static const uint8_t obmc32[1024]={
183
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
184
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
185
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
186
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
187
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
188
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
189
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
190
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
191
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
192
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
193
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
194
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
195
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
196
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
197
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
198
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
199
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
200
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
201
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
202
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
203
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
204
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
205
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
206
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
207
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
208
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
209
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
210
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
211
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
212
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
213
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
214
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
215
//error:0.000022
216
};
217
static const uint8_t obmc16[256]={
218
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
219
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
220
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
221
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
222
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
223
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
224
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
225
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
226
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
227
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
228
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
229
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
230
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
231
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
232
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
233
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
234
//error:0.000033
235
};
236
#elif 1 // 64*linear
237
static const uint8_t obmc32[1024]={
238
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
239
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
240
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
241
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
242
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
243
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
244
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
245
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
246
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
247
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
248
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
249
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
250
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
251
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
252
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
253
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
254
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
255
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
256
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
257
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
258
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
259
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
260
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
261
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
262
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
263
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
264
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
265
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
266
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
267
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
268
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
269
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
270
 //error:0.000020
271
};
272
static const uint8_t obmc16[256]={
273
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
274
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
275
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
276
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
277
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
278
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
281
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
282
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
283
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
284
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
285
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
286
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
287
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
288
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
289
//error:0.000015
290
};
291
#else //64*cos
292
static const uint8_t obmc32[1024]={
293
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
294
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
295
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
296
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
297
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
298
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
299
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
300
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
301
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
302
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
303
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
304
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
305
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
306
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
307
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
308
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
309
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
310
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
311
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
312
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
313
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
314
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
315
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
316
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
317
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
318
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
319
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
320
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
321
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
322
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
323
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
324
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
325
//error:0.000022
326
};
327
static const uint8_t obmc16[256]={
328
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
329
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
330
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
331
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
332
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
333
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
334
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
335
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
336
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
337
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
338
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
339
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
340
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
341
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
342
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
343
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
344
//error:0.000022
345
};
346
#endif /* 0 */
347

    
348
//linear *64
349
static const uint8_t obmc8[64]={
350
  4, 12, 20, 28, 28, 20, 12,  4,
351
 12, 36, 60, 84, 84, 60, 36, 12,
352
 20, 60,100,140,140,100, 60, 20,
353
 28, 84,140,196,196,140, 84, 28,
354
 28, 84,140,196,196,140, 84, 28,
355
 20, 60,100,140,140,100, 60, 20,
356
 12, 36, 60, 84, 84, 60, 36, 12,
357
  4, 12, 20, 28, 28, 20, 12,  4,
358
//error:0.000000
359
};
360

    
361
//linear *64
362
static const uint8_t obmc4[16]={
363
 16, 48, 48, 16,
364
 48,144,144, 48,
365
 48,144,144, 48,
366
 16, 48, 48, 16,
367
//error:0.000000
368
};
369

    
370
static const uint8_t * const obmc_tab[4]={
371
    obmc32, obmc16, obmc8, obmc4
372
};
373

    
374
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
375

    
376
typedef struct BlockNode{
377
    int16_t mx;
378
    int16_t my;
379
    uint8_t ref;
380
    uint8_t color[3];
381
    uint8_t type;
382
//#define TYPE_SPLIT    1
383
#define BLOCK_INTRA   1
384
#define BLOCK_OPT     2
385
//#define TYPE_NOCOLOR  4
386
    uint8_t level; //FIXME merge into type?
387
}BlockNode;
388

    
389
static const BlockNode null_block= { //FIXME add border maybe
390
    .color= {128,128,128},
391
    .mx= 0,
392
    .my= 0,
393
    .ref= 0,
394
    .type= 0,
395
    .level= 0,
396
};
397

    
398
#define LOG2_MB_SIZE 4
399
#define MB_SIZE (1<<LOG2_MB_SIZE)
400
#define ENCODER_EXTRA_BITS 4
401
#define HTAPS_MAX 8
402

    
403
typedef struct x_and_coeff{
404
    int16_t x;
405
    uint16_t coeff;
406
} x_and_coeff;
407

    
408
typedef struct SubBand{
409
    int level;
410
    int stride;
411
    int width;
412
    int height;
413
    int qlog;        ///< log(qscale)/log[2^(1/6)]
414
    DWTELEM *buf;
415
    IDWTELEM *ibuf;
416
    int buf_x_offset;
417
    int buf_y_offset;
418
    int stride_line; ///< Stride measured in lines, not pixels.
419
    x_and_coeff * x_coeff;
420
    struct SubBand *parent;
421
    uint8_t state[/*7*2*/ 7 + 512][32];
422
}SubBand;
423

    
424
typedef struct Plane{
425
    int width;
426
    int height;
427
    SubBand band[MAX_DECOMPOSITIONS][4];
428

    
429
    int htaps;
430
    int8_t hcoeff[HTAPS_MAX/2];
431
    int diag_mc;
432
    int fast_mc;
433

    
434
    int last_htaps;
435
    int8_t last_hcoeff[HTAPS_MAX/2];
436
    int last_diag_mc;
437
}Plane;
438

    
439
typedef struct SnowContext{
440

    
441
    AVCodecContext *avctx;
442
    RangeCoder c;
443
    DSPContext dsp;
444
    DWTContext dwt;
445
    AVFrame new_picture;
446
    AVFrame input_picture;              ///< new_picture with the internal linesizes
447
    AVFrame current_picture;
448
    AVFrame last_picture[MAX_REF_FRAMES];
449
    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
450
    AVFrame mconly_picture;
451
//     uint8_t q_context[16];
452
    uint8_t header_state[32];
453
    uint8_t block_state[128 + 32*128];
454
    int keyframe;
455
    int always_reset;
456
    int version;
457
    int spatial_decomposition_type;
458
    int last_spatial_decomposition_type;
459
    int temporal_decomposition_type;
460
    int spatial_decomposition_count;
461
    int last_spatial_decomposition_count;
462
    int temporal_decomposition_count;
463
    int max_ref_frames;
464
    int ref_frames;
465
    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
466
    uint32_t *ref_scores[MAX_REF_FRAMES];
467
    DWTELEM *spatial_dwt_buffer;
468
    IDWTELEM *spatial_idwt_buffer;
469
    int colorspace_type;
470
    int chroma_h_shift;
471
    int chroma_v_shift;
472
    int spatial_scalability;
473
    int qlog;
474
    int last_qlog;
475
    int lambda;
476
    int lambda2;
477
    int pass1_rc;
478
    int mv_scale;
479
    int last_mv_scale;
480
    int qbias;
481
    int last_qbias;
482
#define QBIAS_SHIFT 3
483
    int b_width;
484
    int b_height;
485
    int block_max_depth;
486
    int last_block_max_depth;
487
    Plane plane[MAX_PLANES];
488
    BlockNode *block;
489
#define ME_CACHE_SIZE 1024
490
    int me_cache[ME_CACHE_SIZE];
491
    int me_cache_generation;
492
    slice_buffer sb;
493

    
494
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
495

    
496
    uint8_t *scratchbuf;
497
}SnowContext;
498

    
499
#ifdef __sgi
500
// Avoid a name clash on SGI IRIX
501
#undef qexp
502
#endif
503
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
504
static uint8_t qexp[QROOT];
505

    
506
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
507
    int i;
508

    
509
    if(v){
510
        const int a= FFABS(v);
511
        const int e= av_log2(a);
512
#if 1
513
        const int el= FFMIN(e, 10);
514
        put_rac(c, state+0, 0);
515

    
516
        for(i=0; i<el; i++){
517
            put_rac(c, state+1+i, 1);  //1..10
518
        }
519
        for(; i<e; i++){
520
            put_rac(c, state+1+9, 1);  //1..10
521
        }
522
        put_rac(c, state+1+FFMIN(i,9), 0);
523

    
524
        for(i=e-1; i>=el; i--){
525
            put_rac(c, state+22+9, (a>>i)&1); //22..31
526
        }
527
        for(; i>=0; i--){
528
            put_rac(c, state+22+i, (a>>i)&1); //22..31
529
        }
530

    
531
        if(is_signed)
532
            put_rac(c, state+11 + el, v < 0); //11..21
533
#else
534

    
535
        put_rac(c, state+0, 0);
536
        if(e<=9){
537
            for(i=0; i<e; i++){
538
                put_rac(c, state+1+i, 1);  //1..10
539
            }
540
            put_rac(c, state+1+i, 0);
541

    
542
            for(i=e-1; i>=0; i--){
543
                put_rac(c, state+22+i, (a>>i)&1); //22..31
544
            }
545

    
546
            if(is_signed)
547
                put_rac(c, state+11 + e, v < 0); //11..21
548
        }else{
549
            for(i=0; i<e; i++){
550
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
551
            }
552
            put_rac(c, state+1+9, 0);
553

    
554
            for(i=e-1; i>=0; i--){
555
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
556
            }
557

    
558
            if(is_signed)
559
                put_rac(c, state+11 + 10, v < 0); //11..21
560
        }
561
#endif /* 1 */
562
    }else{
563
        put_rac(c, state+0, 1);
564
    }
565
}
566

    
567
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
568
    if(get_rac(c, state+0))
569
        return 0;
570
    else{
571
        int i, e, a;
572
        e= 0;
573
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
574
            e++;
575
        }
576

    
577
        a= 1;
578
        for(i=e-1; i>=0; i--){
579
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
580
        }
581

    
582
        e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
583
        return (a^e)-e;
584
    }
585
}
586

    
587
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
588
    int i;
589
    int r= log2>=0 ? 1<<log2 : 1;
590

    
591
    assert(v>=0);
592
    assert(log2>=-4);
593

    
594
    while(v >= r){
595
        put_rac(c, state+4+log2, 1);
596
        v -= r;
597
        log2++;
598
        if(log2>0) r+=r;
599
    }
600
    put_rac(c, state+4+log2, 0);
601

    
602
    for(i=log2-1; i>=0; i--){
603
        put_rac(c, state+31-i, (v>>i)&1);
604
    }
605
}
606

    
607
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
608
    int i;
609
    int r= log2>=0 ? 1<<log2 : 1;
610
    int v=0;
611

    
612
    assert(log2>=-4);
613

    
614
    while(get_rac(c, state+4+log2)){
615
        v+= r;
616
        log2++;
617
        if(log2>0) r+=r;
618
    }
619

    
620
    for(i=log2-1; i>=0; i--){
621
        v+= get_rac(c, state+31-i)<<i;
622
    }
623

    
624
    return v;
625
}
626

    
627
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
628
    const int w= b->width;
629
    const int h= b->height;
630
    int x,y;
631

    
632
    int run, runs;
633
    x_and_coeff *xc= b->x_coeff;
634
    x_and_coeff *prev_xc= NULL;
635
    x_and_coeff *prev2_xc= xc;
636
    x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
637
    x_and_coeff *prev_parent_xc= parent_xc;
638

    
639
    runs= get_symbol2(&s->c, b->state[30], 0);
640
    if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
641
    else           run= INT_MAX;
642

    
643
    for(y=0; y<h; y++){
644
        int v=0;
645
        int lt=0, t=0, rt=0;
646

    
647
        if(y && prev_xc->x == 0){
648
            rt= prev_xc->coeff;
649
        }
650
        for(x=0; x<w; x++){
651
            int p=0;
652
            const int l= v;
653

    
654
            lt= t; t= rt;
655

    
656
            if(y){
657
                if(prev_xc->x <= x)
658
                    prev_xc++;
659
                if(prev_xc->x == x + 1)
660
                    rt= prev_xc->coeff;
661
                else
662
                    rt=0;
663
            }
664
            if(parent_xc){
665
                if(x>>1 > parent_xc->x){
666
                    parent_xc++;
667
                }
668
                if(x>>1 == parent_xc->x){
669
                    p= parent_xc->coeff;
670
                }
671
            }
672
            if(/*ll|*/l|lt|t|rt|p){
673
                int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
674

    
675
                v=get_rac(&s->c, &b->state[0][context]);
676
                if(v){
677
                    v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
678
                    v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
679

    
680
                    xc->x=x;
681
                    (xc++)->coeff= v;
682
                }
683
            }else{
684
                if(!run){
685
                    if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
686
                    else           run= INT_MAX;
687
                    v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
688
                    v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
689

    
690
                    xc->x=x;
691
                    (xc++)->coeff= v;
692
                }else{
693
                    int max_run;
694
                    run--;
695
                    v=0;
696

    
697
                    if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
698
                    else  max_run= FFMIN(run, w-x-1);
699
                    if(parent_xc)
700
                        max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
701
                    x+= max_run;
702
                    run-= max_run;
703
                }
704
            }
705
        }
706
        (xc++)->x= w+1; //end marker
707
        prev_xc= prev2_xc;
708
        prev2_xc= xc;
709

    
710
        if(parent_xc){
711
            if(y&1){
712
                while(parent_xc->x != parent->width+1)
713
                    parent_xc++;
714
                parent_xc++;
715
                prev_parent_xc= parent_xc;
716
            }else{
717
                parent_xc= prev_parent_xc;
718
            }
719
        }
720
    }
721

    
722
    (xc++)->x= w+1; //end marker
723
}
724

    
725
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
726
    const int w= b->width;
727
    int y;
728
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
729
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
730
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
731
    int new_index = 0;
732

    
733
    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
734
        qadd= 0;
735
        qmul= 1<<QEXPSHIFT;
736
    }
737

    
738
    /* If we are on the second or later slice, restore our index. */
739
    if (start_y != 0)
740
        new_index = save_state[0];
741

    
742

    
743
    for(y=start_y; y<h; y++){
744
        int x = 0;
745
        int v;
746
        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
747
        memset(line, 0, b->width*sizeof(IDWTELEM));
748
        v = b->x_coeff[new_index].coeff;
749
        x = b->x_coeff[new_index++].x;
750
        while(x < w){
751
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
752
            register int u= -(v&1);
753
            line[x] = (t^u) - u;
754

    
755
            v = b->x_coeff[new_index].coeff;
756
            x = b->x_coeff[new_index++].x;
757
        }
758
    }
759

    
760
    /* Save our variables for the next slice. */
761
    save_state[0] = new_index;
762

    
763
    return;
764
}
765

    
766
static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
767
    int plane_index, level, orientation;
768

    
769
    for(plane_index=0; plane_index<3; plane_index++){
770
        for(level=0; level<MAX_DECOMPOSITIONS; level++){
771
            for(orientation=level ? 1:0; orientation<4; orientation++){
772
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
773
            }
774
        }
775
    }
776
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
777
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
778
}
779

    
780
static int alloc_blocks(SnowContext *s){
781
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
782
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
783

    
784
    s->b_width = w;
785
    s->b_height= h;
786

    
787
    av_free(s->block);
788
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
789
    return 0;
790
}
791

    
792
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
793
    uint8_t *bytestream= d->bytestream;
794
    uint8_t *bytestream_start= d->bytestream_start;
795
    *d= *s;
796
    d->bytestream= bytestream;
797
    d->bytestream_start= bytestream_start;
798
}
799

    
800
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
801
    const int w= s->b_width << s->block_max_depth;
802
    const int rem_depth= s->block_max_depth - level;
803
    const int index= (x + y*w) << rem_depth;
804
    const int block_w= 1<<rem_depth;
805
    BlockNode block;
806
    int i,j;
807

    
808
    block.color[0]= l;
809
    block.color[1]= cb;
810
    block.color[2]= cr;
811
    block.mx= mx;
812
    block.my= my;
813
    block.ref= ref;
814
    block.type= type;
815
    block.level= level;
816

    
817
    for(j=0; j<block_w; j++){
818
        for(i=0; i<block_w; i++){
819
            s->block[index + i + j*w]= block;
820
        }
821
    }
822
}
823

    
824
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
825
    const int offset[3]= {
826
          y*c->  stride + x,
827
        ((y*c->uvstride + x)>>1),
828
        ((y*c->uvstride + x)>>1),
829
    };
830
    int i;
831
    for(i=0; i<3; i++){
832
        c->src[0][i]= src [i];
833
        c->ref[0][i]= ref [i] + offset[i];
834
    }
835
    assert(!ref_index);
836
}
837

    
838
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
839
                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
840
    if(s->ref_frames == 1){
841
        *mx = mid_pred(left->mx, top->mx, tr->mx);
842
        *my = mid_pred(left->my, top->my, tr->my);
843
    }else{
844
        const int *scale = scale_mv_ref[ref];
845
        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
846
                       (top ->mx * scale[top ->ref] + 128) >>8,
847
                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
848
        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
849
                       (top ->my * scale[top ->ref] + 128) >>8,
850
                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
851
    }
852
}
853

    
854
static av_always_inline int same_block(BlockNode *a, BlockNode *b){
855
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
856
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
857
    }else{
858
        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
859
    }
860
}
861

    
862
static void decode_q_branch(SnowContext *s, int level, int x, int y){
863
    const int w= s->b_width << s->block_max_depth;
864
    const int rem_depth= s->block_max_depth - level;
865
    const int index= (x + y*w) << rem_depth;
866
    int trx= (x+1)<<rem_depth;
867
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
868
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
869
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
870
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
871
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
872

    
873
    if(s->keyframe){
874
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
875
        return;
876
    }
877

    
878
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
879
        int type, mx, my;
880
        int l = left->color[0];
881
        int cb= left->color[1];
882
        int cr= left->color[2];
883
        int ref = 0;
884
        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
885
        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
886
        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
887

    
888
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
889

    
890
        if(type){
891
            pred_mv(s, &mx, &my, 0, left, top, tr);
892
            l += get_symbol(&s->c, &s->block_state[32], 1);
893
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
894
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
895
        }else{
896
            if(s->ref_frames > 1)
897
                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
898
            pred_mv(s, &mx, &my, ref, left, top, tr);
899
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
900
            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
901
        }
902
        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
903
    }else{
904
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
905
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
906
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
907
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
908
    }
909
}
910

    
911
static void decode_blocks(SnowContext *s){
912
    int x, y;
913
    int w= s->b_width;
914
    int h= s->b_height;
915

    
916
    for(y=0; y<h; y++){
917
        for(x=0; x<w; x++){
918
            decode_q_branch(s, 0, x, y);
919
        }
920
    }
921
}
922

    
923
static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
924
    static const uint8_t weight[64]={
925
    8,7,6,5,4,3,2,1,
926
    7,7,0,0,0,0,0,1,
927
    6,0,6,0,0,0,2,0,
928
    5,0,0,5,0,3,0,0,
929
    4,0,0,0,4,0,0,0,
930
    3,0,0,5,0,3,0,0,
931
    2,0,6,0,0,0,2,0,
932
    1,7,0,0,0,0,0,1,
933
    };
934

    
935
    static const uint8_t brane[256]={
936
    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
937
    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
938
    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
939
    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
940
    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
941
    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
942
    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
943
    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
944
    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
945
    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
946
    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
947
    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
948
    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
949
    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
950
    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
951
    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
952
    };
953

    
954
    static const uint8_t needs[16]={
955
    0,1,0,0,
956
    2,4,2,0,
957
    0,1,0,0,
958
    15
959
    };
960

    
961
    int x, y, b, r, l;
962
    int16_t tmpIt   [64*(32+HTAPS_MAX)];
963
    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
964
    int16_t *tmpI= tmpIt;
965
    uint8_t *tmp2= tmp2t[0];
966
    const uint8_t *hpel[11];
967
    assert(dx<16 && dy<16);
968
    r= brane[dx + 16*dy]&15;
969
    l= brane[dx + 16*dy]>>4;
970

    
971
    b= needs[l] | needs[r];
972
    if(p && !p->diag_mc)
973
        b= 15;
974

    
975
    if(b&5){
976
        for(y=0; y < b_h+HTAPS_MAX-1; y++){
977
            for(x=0; x < b_w; x++){
978
                int a_1=src[x + HTAPS_MAX/2-4];
979
                int a0= src[x + HTAPS_MAX/2-3];
980
                int a1= src[x + HTAPS_MAX/2-2];
981
                int a2= src[x + HTAPS_MAX/2-1];
982
                int a3= src[x + HTAPS_MAX/2+0];
983
                int a4= src[x + HTAPS_MAX/2+1];
984
                int a5= src[x + HTAPS_MAX/2+2];
985
                int a6= src[x + HTAPS_MAX/2+3];
986
                int am=0;
987
                if(!p || p->fast_mc){
988
                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
989
                    tmpI[x]= am;
990
                    am= (am+16)>>5;
991
                }else{
992
                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
993
                    tmpI[x]= am;
994
                    am= (am+32)>>6;
995
                }
996

    
997
                if(am&(~255)) am= ~(am>>31);
998
                tmp2[x]= am;
999
            }
1000
            tmpI+= 64;
1001
            tmp2+= stride;
1002
            src += stride;
1003
        }
1004
        src -= stride*y;
1005
    }
1006
    src += HTAPS_MAX/2 - 1;
1007
    tmp2= tmp2t[1];
1008

    
1009
    if(b&2){
1010
        for(y=0; y < b_h; y++){
1011
            for(x=0; x < b_w+1; x++){
1012
                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1013
                int a0= src[x + (HTAPS_MAX/2-3)*stride];
1014
                int a1= src[x + (HTAPS_MAX/2-2)*stride];
1015
                int a2= src[x + (HTAPS_MAX/2-1)*stride];
1016
                int a3= src[x + (HTAPS_MAX/2+0)*stride];
1017
                int a4= src[x + (HTAPS_MAX/2+1)*stride];
1018
                int a5= src[x + (HTAPS_MAX/2+2)*stride];
1019
                int a6= src[x + (HTAPS_MAX/2+3)*stride];
1020
                int am=0;
1021
                if(!p || p->fast_mc)
1022
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1023
                else
1024
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
1025

    
1026
                if(am&(~255)) am= ~(am>>31);
1027
                tmp2[x]= am;
1028
            }
1029
            src += stride;
1030
            tmp2+= stride;
1031
        }
1032
        src -= stride*y;
1033
    }
1034
    src += stride*(HTAPS_MAX/2 - 1);
1035
    tmp2= tmp2t[2];
1036
    tmpI= tmpIt;
1037
    if(b&4){
1038
        for(y=0; y < b_h; y++){
1039
            for(x=0; x < b_w; x++){
1040
                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1041
                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1042
                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1043
                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1044
                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1045
                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1046
                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1047
                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
1048
                int am=0;
1049
                if(!p || p->fast_mc)
1050
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1051
                else
1052
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
1053
                if(am&(~255)) am= ~(am>>31);
1054
                tmp2[x]= am;
1055
            }
1056
            tmpI+= 64;
1057
            tmp2+= stride;
1058
        }
1059
    }
1060

    
1061
    hpel[ 0]= src;
1062
    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
1063
    hpel[ 2]= src + 1;
1064

    
1065
    hpel[ 4]= tmp2t[1];
1066
    hpel[ 5]= tmp2t[2];
1067
    hpel[ 6]= tmp2t[1] + 1;
1068

    
1069
    hpel[ 8]= src + stride;
1070
    hpel[ 9]= hpel[1] + stride;
1071
    hpel[10]= hpel[8] + 1;
1072

    
1073
    if(b==15){
1074
        const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
1075
        const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1076
        const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1077
        const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
1078
        dx&=7;
1079
        dy&=7;
1080
        for(y=0; y < b_h; y++){
1081
            for(x=0; x < b_w; x++){
1082
                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1083
                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
1084
            }
1085
            src1+=stride;
1086
            src2+=stride;
1087
            src3+=stride;
1088
            src4+=stride;
1089
            dst +=stride;
1090
        }
1091
    }else{
1092
        const uint8_t *src1= hpel[l];
1093
        const uint8_t *src2= hpel[r];
1094
        int a= weight[((dx&7) + (8*(dy&7)))];
1095
        int b= 8-a;
1096
        for(y=0; y < b_h; y++){
1097
            for(x=0; x < b_w; x++){
1098
                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1099
            }
1100
            src1+=stride;
1101
            src2+=stride;
1102
            dst +=stride;
1103
        }
1104
    }
1105
}
1106

    
1107
#define mca(dx,dy,b_w)\
1108
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
1109
    uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
1110
    assert(h==b_w);\
1111
    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
1112
}
1113

    
1114
mca( 0, 0,16)
1115
mca( 8, 0,16)
1116
mca( 0, 8,16)
1117
mca( 8, 8,16)
1118
mca( 0, 0,8)
1119
mca( 8, 0,8)
1120
mca( 0, 8,8)
1121
mca( 8, 8,8)
1122

    
1123
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1124
    if(block->type & BLOCK_INTRA){
1125
        int x, y;
1126
        const int color = block->color[plane_index];
1127
        const int color4= color*0x01010101;
1128
        if(b_w==32){
1129
            for(y=0; y < b_h; y++){
1130
                *(uint32_t*)&dst[0 + y*stride]= color4;
1131
                *(uint32_t*)&dst[4 + y*stride]= color4;
1132
                *(uint32_t*)&dst[8 + y*stride]= color4;
1133
                *(uint32_t*)&dst[12+ y*stride]= color4;
1134
                *(uint32_t*)&dst[16+ y*stride]= color4;
1135
                *(uint32_t*)&dst[20+ y*stride]= color4;
1136
                *(uint32_t*)&dst[24+ y*stride]= color4;
1137
                *(uint32_t*)&dst[28+ y*stride]= color4;
1138
            }
1139
        }else if(b_w==16){
1140
            for(y=0; y < b_h; y++){
1141
                *(uint32_t*)&dst[0 + y*stride]= color4;
1142
                *(uint32_t*)&dst[4 + y*stride]= color4;
1143
                *(uint32_t*)&dst[8 + y*stride]= color4;
1144
                *(uint32_t*)&dst[12+ y*stride]= color4;
1145
            }
1146
        }else if(b_w==8){
1147
            for(y=0; y < b_h; y++){
1148
                *(uint32_t*)&dst[0 + y*stride]= color4;
1149
                *(uint32_t*)&dst[4 + y*stride]= color4;
1150
            }
1151
        }else if(b_w==4){
1152
            for(y=0; y < b_h; y++){
1153
                *(uint32_t*)&dst[0 + y*stride]= color4;
1154
            }
1155
        }else{
1156
            for(y=0; y < b_h; y++){
1157
                for(x=0; x < b_w; x++){
1158
                    dst[x + y*stride]= color;
1159
                }
1160
            }
1161
        }
1162
    }else{
1163
        uint8_t *src= s->last_picture[block->ref].data[plane_index];
1164
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
1165
        int mx= block->mx*scale;
1166
        int my= block->my*scale;
1167
        const int dx= mx&15;
1168
        const int dy= my&15;
1169
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
1170
        sx += (mx>>4) - (HTAPS_MAX/2-1);
1171
        sy += (my>>4) - (HTAPS_MAX/2-1);
1172
        src += sx + sy*stride;
1173
        if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1174
           || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1175
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
1176
            src= tmp + MB_SIZE;
1177
        }
1178
//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1179
//        assert(!(b_w&(b_w-1)));
1180
        assert(b_w>1 && b_h>1);
1181
        assert((tab_index>=0 && tab_index<4) || b_w==32);
1182
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1183
            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1184
        else if(b_w==32){
1185
            int y;
1186
            for(y=0; y<b_h; y+=16){
1187
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1188
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1189
            }
1190
        }else if(b_w==b_h)
1191
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
1192
        else if(b_w==2*b_h){
1193
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
1194
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
1195
        }else{
1196
            assert(2*b_w==b_h);
1197
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
1198
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
1199
        }
1200
    }
1201
}
1202

    
1203
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
1204
                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1205
    int y, x;
1206
    IDWTELEM * dst;
1207
    for(y=0; y<b_h; y++){
1208
        //FIXME ugly misuse of obmc_stride
1209
        const uint8_t *obmc1= obmc + y*obmc_stride;
1210
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1211
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1212
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1213
        dst = slice_buffer_get_line(sb, src_y + y);
1214
        for(x=0; x<b_w; x++){
1215
            int v=   obmc1[x] * block[3][x + y*src_stride]
1216
                    +obmc2[x] * block[2][x + y*src_stride]
1217
                    +obmc3[x] * block[1][x + y*src_stride]
1218
                    +obmc4[x] * block[0][x + y*src_stride];
1219

    
1220
            v <<= 8 - LOG2_OBMC_MAX;
1221
            if(FRAC_BITS != 8){
1222
                v >>= 8 - FRAC_BITS;
1223
            }
1224
            if(add){
1225
                v += dst[x + src_x];
1226
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1227
                if(v&(~255)) v= ~(v>>31);
1228
                dst8[x + y*src_stride] = v;
1229
            }else{
1230
                dst[x + src_x] -= v;
1231
            }
1232
        }
1233
    }
1234
}
1235

    
1236
//FIXME name cleanup (b_w, block_w, b_width stuff)
1237
static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
1238
    const int b_width = s->b_width  << s->block_max_depth;
1239
    const int b_height= s->b_height << s->block_max_depth;
1240
    const int b_stride= b_width;
1241
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
1242
    BlockNode *rt= lt+1;
1243
    BlockNode *lb= lt+b_stride;
1244
    BlockNode *rb= lb+1;
1245
    uint8_t *block[4];
1246
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
1247
    uint8_t *tmp = s->scratchbuf;
1248
    uint8_t *ptmp;
1249
    int x,y;
1250

    
1251
    if(b_x<0){
1252
        lt= rt;
1253
        lb= rb;
1254
    }else if(b_x + 1 >= b_width){
1255
        rt= lt;
1256
        rb= lb;
1257
    }
1258
    if(b_y<0){
1259
        lt= lb;
1260
        rt= rb;
1261
    }else if(b_y + 1 >= b_height){
1262
        lb= lt;
1263
        rb= rt;
1264
    }
1265

    
1266
    if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
1267
        obmc -= src_x;
1268
        b_w += src_x;
1269
        if(!sliced && !offset_dst)
1270
            dst -= src_x;
1271
        src_x=0;
1272
    }else if(src_x + b_w > w){
1273
        b_w = w - src_x;
1274
    }
1275
    if(src_y<0){
1276
        obmc -= src_y*obmc_stride;
1277
        b_h += src_y;
1278
        if(!sliced && !offset_dst)
1279
            dst -= src_y*dst_stride;
1280
        src_y=0;
1281
    }else if(src_y + b_h> h){
1282
        b_h = h - src_y;
1283
    }
1284

    
1285
    if(b_w<=0 || b_h<=0) return;
1286

    
1287
    assert(src_stride > 2*MB_SIZE + 5);
1288

    
1289
    if(!sliced && offset_dst)
1290
        dst += src_x + src_y*dst_stride;
1291
    dst8+= src_x + src_y*src_stride;
1292
//    src += src_x + src_y*src_stride;
1293

    
1294
    ptmp= tmp + 3*tmp_step;
1295
    block[0]= ptmp;
1296
    ptmp+=tmp_step;
1297
    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1298

    
1299
    if(same_block(lt, rt)){
1300
        block[1]= block[0];
1301
    }else{
1302
        block[1]= ptmp;
1303
        ptmp+=tmp_step;
1304
        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1305
    }
1306

    
1307
    if(same_block(lt, lb)){
1308
        block[2]= block[0];
1309
    }else if(same_block(rt, lb)){
1310
        block[2]= block[1];
1311
    }else{
1312
        block[2]= ptmp;
1313
        ptmp+=tmp_step;
1314
        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1315
    }
1316

    
1317
    if(same_block(lt, rb) ){
1318
        block[3]= block[0];
1319
    }else if(same_block(rt, rb)){
1320
        block[3]= block[1];
1321
    }else if(same_block(lb, rb)){
1322
        block[3]= block[2];
1323
    }else{
1324
        block[3]= ptmp;
1325
        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1326
    }
1327
#if 0
1328
    for(y=0; y<b_h; y++){
1329
        for(x=0; x<b_w; x++){
1330
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
1331
            if(add) dst[x + y*dst_stride] += v;
1332
            else    dst[x + y*dst_stride] -= v;
1333
        }
1334
    }
1335
    for(y=0; y<b_h; y++){
1336
        uint8_t *obmc2= obmc + (obmc_stride>>1);
1337
        for(x=0; x<b_w; x++){
1338
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
1339
            if(add) dst[x + y*dst_stride] += v;
1340
            else    dst[x + y*dst_stride] -= v;
1341
        }
1342
    }
1343
    for(y=0; y<b_h; y++){
1344
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1345
        for(x=0; x<b_w; x++){
1346
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
1347
            if(add) dst[x + y*dst_stride] += v;
1348
            else    dst[x + y*dst_stride] -= v;
1349
        }
1350
    }
1351
    for(y=0; y<b_h; y++){
1352
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1353
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1354
        for(x=0; x<b_w; x++){
1355
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
1356
            if(add) dst[x + y*dst_stride] += v;
1357
            else    dst[x + y*dst_stride] -= v;
1358
        }
1359
    }
1360
#else
1361
    if(sliced){
1362
        s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
1363
    }else{
1364
        for(y=0; y<b_h; y++){
1365
            //FIXME ugly misuse of obmc_stride
1366
            const uint8_t *obmc1= obmc + y*obmc_stride;
1367
            const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1368
            const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1369
            const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1370
            for(x=0; x<b_w; x++){
1371
                int v=   obmc1[x] * block[3][x + y*src_stride]
1372
                        +obmc2[x] * block[2][x + y*src_stride]
1373
                        +obmc3[x] * block[1][x + y*src_stride]
1374
                        +obmc4[x] * block[0][x + y*src_stride];
1375

    
1376
                v <<= 8 - LOG2_OBMC_MAX;
1377
                if(FRAC_BITS != 8){
1378
                    v >>= 8 - FRAC_BITS;
1379
                }
1380
                if(add){
1381
                    v += dst[x + y*dst_stride];
1382
                    v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1383
                    if(v&(~255)) v= ~(v>>31);
1384
                    dst8[x + y*src_stride] = v;
1385
                }else{
1386
                    dst[x + y*dst_stride] -= v;
1387
                }
1388
            }
1389
        }
1390
    }
1391
#endif /* 0 */
1392
}
1393

    
1394
static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
1395
    Plane *p= &s->plane[plane_index];
1396
    const int mb_w= s->b_width  << s->block_max_depth;
1397
    const int mb_h= s->b_height << s->block_max_depth;
1398
    int x, y, mb_x;
1399
    int block_size = MB_SIZE >> s->block_max_depth;
1400
    int block_w    = plane_index ? block_size/2 : block_size;
1401
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1402
    int obmc_stride= plane_index ? block_size : 2*block_size;
1403
    int ref_stride= s->current_picture.linesize[plane_index];
1404
    uint8_t *dst8= s->current_picture.data[plane_index];
1405
    int w= p->width;
1406
    int h= p->height;
1407

    
1408
    if(s->keyframe || (s->avctx->debug&512)){
1409
        if(mb_y==mb_h)
1410
            return;
1411

    
1412
        if(add){
1413
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1414
//                DWTELEM * line = slice_buffer_get_line(sb, y);
1415
                IDWTELEM * line = sb->line[y];
1416
                for(x=0; x<w; x++){
1417
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1418
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1419
                    v >>= FRAC_BITS;
1420
                    if(v&(~255)) v= ~(v>>31);
1421
                    dst8[x + y*ref_stride]= v;
1422
                }
1423
            }
1424
        }else{
1425
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1426
//                DWTELEM * line = slice_buffer_get_line(sb, y);
1427
                IDWTELEM * line = sb->line[y];
1428
                for(x=0; x<w; x++){
1429
                    line[x] -= 128 << FRAC_BITS;
1430
//                    buf[x + y*w]-= 128<<FRAC_BITS;
1431
                }
1432
            }
1433
        }
1434

    
1435
        return;
1436
    }
1437

    
1438
    for(mb_x=0; mb_x<=mb_w; mb_x++){
1439
        add_yblock(s, 1, sb, old_buffer, dst8, obmc,
1440
                   block_w*mb_x - block_w/2,
1441
                   block_w*mb_y - block_w/2,
1442
                   block_w, block_w,
1443
                   w, h,
1444
                   w, ref_stride, obmc_stride,
1445
                   mb_x - 1, mb_y - 1,
1446
                   add, 0, plane_index);
1447
    }
1448
}
1449

    
1450
static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
1451
    Plane *p= &s->plane[plane_index];
1452
    const int mb_w= s->b_width  << s->block_max_depth;
1453
    const int mb_h= s->b_height << s->block_max_depth;
1454
    int x, y, mb_x;
1455
    int block_size = MB_SIZE >> s->block_max_depth;
1456
    int block_w    = plane_index ? block_size/2 : block_size;
1457
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1458
    const int obmc_stride= plane_index ? block_size : 2*block_size;
1459
    int ref_stride= s->current_picture.linesize[plane_index];
1460
    uint8_t *dst8= s->current_picture.data[plane_index];
1461
    int w= p->width;
1462
    int h= p->height;
1463

    
1464
    if(s->keyframe || (s->avctx->debug&512)){
1465
        if(mb_y==mb_h)
1466
            return;
1467

    
1468
        if(add){
1469
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1470
                for(x=0; x<w; x++){
1471
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1472
                    v >>= FRAC_BITS;
1473
                    if(v&(~255)) v= ~(v>>31);
1474
                    dst8[x + y*ref_stride]= v;
1475
                }
1476
            }
1477
        }else{
1478
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1479
                for(x=0; x<w; x++){
1480
                    buf[x + y*w]-= 128<<FRAC_BITS;
1481
                }
1482
            }
1483
        }
1484

    
1485
        return;
1486
    }
1487

    
1488
    for(mb_x=0; mb_x<=mb_w; mb_x++){
1489
        add_yblock(s, 0, NULL, buf, dst8, obmc,
1490
                   block_w*mb_x - block_w/2,
1491
                   block_w*mb_y - block_w/2,
1492
                   block_w, block_w,
1493
                   w, h,
1494
                   w, ref_stride, obmc_stride,
1495
                   mb_x - 1, mb_y - 1,
1496
                   add, 1, plane_index);
1497
    }
1498
}
1499

    
1500
static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
1501
    const int mb_h= s->b_height << s->block_max_depth;
1502
    int mb_y;
1503
    for(mb_y=0; mb_y<=mb_h; mb_y++)
1504
        predict_slice(s, buf, plane_index, add, mb_y);
1505
}
1506

    
1507
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
1508
    const int w= b->width;
1509
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1510
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1511
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1512
    int x,y;
1513

    
1514
    if(s->qlog == LOSSLESS_QLOG) return;
1515

    
1516
    for(y=start_y; y<end_y; y++){
1517
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1518
        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1519
        for(x=0; x<w; x++){
1520
            int i= line[x];
1521
            if(i<0){
1522
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1523
            }else if(i>0){
1524
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
1525
            }
1526
        }
1527
    }
1528
}
1529

    
1530
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
1531
    const int w= b->width;
1532
    int x,y;
1533

    
1534
    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
1535
    IDWTELEM * prev;
1536

    
1537
    if (start_y != 0)
1538
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1539

    
1540
    for(y=start_y; y<end_y; y++){
1541
        prev = line;
1542
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1543
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1544
        for(x=0; x<w; x++){
1545
            if(x){
1546
                if(use_median){
1547
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
1548
                    else  line[x] += line[x - 1];
1549
                }else{
1550
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
1551
                    else  line[x] += line[x - 1];
1552
                }
1553
            }else{
1554
                if(y) line[x] += prev[x];
1555
            }
1556
        }
1557
    }
1558
}
1559

    
1560
static void decode_qlogs(SnowContext *s){
1561
    int plane_index, level, orientation;
1562

    
1563
    for(plane_index=0; plane_index<3; plane_index++){
1564
        for(level=0; level<s->spatial_decomposition_count; level++){
1565
            for(orientation=level ? 1:0; orientation<4; orientation++){
1566
                int q;
1567
                if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1568
                else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1569
                else                    q= get_symbol(&s->c, s->header_state, 1);
1570
                s->plane[plane_index].band[level][orientation].qlog= q;
1571
            }
1572
        }
1573
    }
1574
}
1575

    
1576
#define GET_S(dst, check) \
1577
    tmp= get_symbol(&s->c, s->header_state, 0);\
1578
    if(!(check)){\
1579
        av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
1580
        return -1;\
1581
    }\
1582
    dst= tmp;
1583

    
1584
static int decode_header(SnowContext *s){
1585
    int plane_index, tmp;
1586
    uint8_t kstate[32];
1587

    
1588
    memset(kstate, MID_STATE, sizeof(kstate));
1589

    
1590
    s->keyframe= get_rac(&s->c, kstate);
1591
    if(s->keyframe || s->always_reset){
1592
        reset_contexts(s);
1593
        s->spatial_decomposition_type=
1594
        s->qlog=
1595
        s->qbias=
1596
        s->mv_scale=
1597
        s->block_max_depth= 0;
1598
    }
1599
    if(s->keyframe){
1600
        GET_S(s->version, tmp <= 0U)
1601
        s->always_reset= get_rac(&s->c, s->header_state);
1602
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1603
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1604
        GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1605
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1606
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1607
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1608
        s->spatial_scalability= get_rac(&s->c, s->header_state);
1609
//        s->rate_scalability= get_rac(&s->c, s->header_state);
1610
        GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
1611
        s->max_ref_frames++;
1612

    
1613
        decode_qlogs(s);
1614
    }
1615

    
1616
    if(!s->keyframe){
1617
        if(get_rac(&s->c, s->header_state)){
1618
            for(plane_index=0; plane_index<2; plane_index++){
1619
                int htaps, i, sum=0;
1620
                Plane *p= &s->plane[plane_index];
1621
                p->diag_mc= get_rac(&s->c, s->header_state);
1622
                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
1623
                if((unsigned)htaps > HTAPS_MAX || htaps==0)
1624
                    return -1;
1625
                p->htaps= htaps;
1626
                for(i= htaps/2; i; i--){
1627
                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
1628
                    sum += p->hcoeff[i];
1629
                }
1630
                p->hcoeff[0]= 32-sum;
1631
            }
1632
            s->plane[2].diag_mc= s->plane[1].diag_mc;
1633
            s->plane[2].htaps  = s->plane[1].htaps;
1634
            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1635
        }
1636
        if(get_rac(&s->c, s->header_state)){
1637
            GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1638
            decode_qlogs(s);
1639
        }
1640
    }
1641

    
1642
    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
1643
    if(s->spatial_decomposition_type > 1U){
1644
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1645
        return -1;
1646
    }
1647
    if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
1648
             s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
1649
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
1650
        return -1;
1651
    }
1652

    
1653
    s->qlog           += get_symbol(&s->c, s->header_state, 1);
1654
    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
1655
    s->qbias          += get_symbol(&s->c, s->header_state, 1);
1656
    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
1657
    if(s->block_max_depth > 1 || s->block_max_depth < 0){
1658
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
1659
        s->block_max_depth= 0;
1660
        return -1;
1661
    }
1662

    
1663
    return 0;
1664
}
1665

    
1666
static void init_qexp(void){
1667
    int i;
1668
    double v=128;
1669

    
1670
    for(i=0; i<QROOT; i++){
1671
        qexp[i]= lrintf(v);
1672
        v *= pow(2, 1.0 / QROOT);
1673
    }
1674
}
1675

    
1676
static av_cold int common_init(AVCodecContext *avctx){
1677
    SnowContext *s = avctx->priv_data;
1678
    int width, height;
1679
    int i, j;
1680

    
1681
    s->avctx= avctx;
1682
    s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
1683

    
1684
    dsputil_init(&s->dsp, avctx);
1685
    ff_dwt_init(&s->dwt);
1686

    
1687
#define mcf(dx,dy)\
1688
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
1689
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
1690
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
1691
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
1692
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
1693
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
1694

    
1695
    mcf( 0, 0)
1696
    mcf( 4, 0)
1697
    mcf( 8, 0)
1698
    mcf(12, 0)
1699
    mcf( 0, 4)
1700
    mcf( 4, 4)
1701
    mcf( 8, 4)
1702
    mcf(12, 4)
1703
    mcf( 0, 8)
1704
    mcf( 4, 8)
1705
    mcf( 8, 8)
1706
    mcf(12, 8)
1707
    mcf( 0,12)
1708
    mcf( 4,12)
1709
    mcf( 8,12)
1710
    mcf(12,12)
1711

    
1712
#define mcfh(dx,dy)\
1713
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
1714
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
1715
        mc_block_hpel ## dx ## dy ## 16;\
1716
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
1717
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
1718
        mc_block_hpel ## dx ## dy ## 8;
1719

    
1720
    mcfh(0, 0)
1721
    mcfh(8, 0)
1722
    mcfh(0, 8)
1723
    mcfh(8, 8)
1724

    
1725
    if(!qexp[0])
1726
        init_qexp();
1727

    
1728
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
1729

    
1730
    width= s->avctx->width;
1731
    height= s->avctx->height;
1732

    
1733
    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
1734
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
1735

    
1736
    for(i=0; i<MAX_REF_FRAMES; i++)
1737
        for(j=0; j<MAX_REF_FRAMES; j++)
1738
            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
1739

    
1740
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
1741
    s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
1742

    
1743
    return 0;
1744
}
1745

    
1746
static int common_init_after_header(AVCodecContext *avctx){
1747
    SnowContext *s = avctx->priv_data;
1748
    int plane_index, level, orientation;
1749

    
1750
    for(plane_index=0; plane_index<3; plane_index++){
1751
        int w= s->avctx->width;
1752
        int h= s->avctx->height;
1753

    
1754
        if(plane_index){
1755
            w>>= s->chroma_h_shift;
1756
            h>>= s->chroma_v_shift;
1757
        }
1758
        s->plane[plane_index].width = w;
1759
        s->plane[plane_index].height= h;
1760

    
1761
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
1762
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
1763
                SubBand *b= &s->plane[plane_index].band[level][orientation];
1764

    
1765
                b->buf= s->spatial_dwt_buffer;
1766
                b->level= level;
1767
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1768
                b->width = (w + !(orientation&1))>>1;
1769
                b->height= (h + !(orientation>1))>>1;
1770

    
1771
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
1772
                b->buf_x_offset = 0;
1773
                b->buf_y_offset = 0;
1774

    
1775
                if(orientation&1){
1776
                    b->buf += (w+1)>>1;
1777
                    b->buf_x_offset = (w+1)>>1;
1778
                }
1779
                if(orientation>1){
1780
                    b->buf += b->stride>>1;
1781
                    b->buf_y_offset = b->stride_line >> 1;
1782
                }
1783
                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
1784

    
1785
                if(level)
1786
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
1787
                //FIXME avoid this realloc
1788
                av_freep(&b->x_coeff);
1789
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
1790
            }
1791
            w= (w+1)>>1;
1792
            h= (h+1)>>1;
1793
        }
1794
    }
1795

    
1796
    return 0;
1797
}
1798

    
1799
#define QUANTIZE2 0
1800

    
1801
#if QUANTIZE2==1
1802
#define Q2_STEP 8
1803

    
1804
static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
1805
    SubBand *b= &p->band[level][orientation];
1806
    int x, y;
1807
    int xo=0;
1808
    int yo=0;
1809
    int step= 1 << (s->spatial_decomposition_count - level);
1810

    
1811
    if(orientation&1)
1812
        xo= step>>1;
1813
    if(orientation&2)
1814
        yo= step>>1;
1815

    
1816
    //FIXME bias for nonzero ?
1817
    //FIXME optimize
1818
    memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
1819
    for(y=0; y<p->height; y++){
1820
        for(x=0; x<p->width; x++){
1821
            int sx= (x-xo + step/2) / step / Q2_STEP;
1822
            int sy= (y-yo + step/2) / step / Q2_STEP;
1823
            int v= r0[x + y*p->width] - r1[x + y*p->width];
1824
            assert(sx>=0 && sy>=0 && sx < score_stride);
1825
            v= ((v+8)>>4)<<4;
1826
            score[sx + sy*score_stride] += v*v;
1827
            assert(score[sx + sy*score_stride] >= 0);
1828
        }
1829
    }
1830
}
1831

    
1832
static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
1833
    int level, orientation;
1834

    
1835
    for(level=0; level<s->spatial_decomposition_count; level++){
1836
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
1837
            SubBand *b= &p->band[level][orientation];
1838
            IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
1839

    
1840
            dequantize(s, b, dst, b->stride);
1841
        }
1842
    }
1843
}
1844

    
1845
static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
1846
    int level, orientation, ys, xs, x, y, pass;
1847
    IDWTELEM best_dequant[height * stride];
1848
    IDWTELEM idwt2_buffer[height * stride];
1849
    const int score_stride= (width + 10)/Q2_STEP;
1850
    int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1851
    int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1852
    int threshold= (s->m.lambda * s->m.lambda) >> 6;
1853

    
1854
    //FIXME pass the copy cleanly ?
1855

    
1856
//    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
1857
    ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
1858

    
1859
    for(level=0; level<s->spatial_decomposition_count; level++){
1860
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
1861
            SubBand *b= &p->band[level][orientation];
1862
            IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1863
             DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
1864
            assert(src == b->buf); // code does not depend on this but it is true currently
1865

    
1866
            quantize(s, b, dst, src, b->stride, s->qbias);
1867
        }
1868
    }
1869
    for(pass=0; pass<1; pass++){
1870
        if(s->qbias == 0) //keyframe
1871
            continue;
1872
        for(level=0; level<s->spatial_decomposition_count; level++){
1873
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
1874
                SubBand *b= &p->band[level][orientation];
1875
                IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
1876
                IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1877

    
1878
                for(ys= 0; ys<Q2_STEP; ys++){
1879
                    for(xs= 0; xs<Q2_STEP; xs++){
1880
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1881
                        dequantize_all(s, p, idwt2_buffer, width, height);
1882
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1883
                        find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1884
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1885
                        for(y=ys; y<b->height; y+= Q2_STEP){
1886
                            for(x=xs; x<b->width; x+= Q2_STEP){
1887
                                if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
1888
                                if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
1889
                                //FIXME try more than just --
1890
                            }
1891
                        }
1892
                        dequantize_all(s, p, idwt2_buffer, width, height);
1893
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1894
                        find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1895
                        for(y=ys; y<b->height; y+= Q2_STEP){
1896
                            for(x=xs; x<b->width; x+= Q2_STEP){
1897
                                int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
1898
                                if(score[score_idx] <= best_score[score_idx] + threshold){
1899
                                    best_score[score_idx]= score[score_idx];
1900
                                    if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
1901
                                    if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
1902
                                    //FIXME copy instead
1903
                                }
1904
                            }
1905
                        }
1906
                    }
1907
                }
1908
            }
1909
        }
1910
    }
1911
    memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
1912
}
1913

    
1914
#endif /* QUANTIZE2==1 */
1915

    
1916
#define USE_HALFPEL_PLANE 0
1917

    
1918
static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
1919
    int p,x,y;
1920

    
1921
    assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
1922

    
1923
    for(p=0; p<3; p++){
1924
        int is_chroma= !!p;
1925
        int w= s->avctx->width  >>is_chroma;
1926
        int h= s->avctx->height >>is_chroma;
1927
        int ls= frame->linesize[p];
1928
        uint8_t *src= frame->data[p];
1929

    
1930
        halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1931
        halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1932
        halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1933

    
1934
        halfpel[0][p]= src;
1935
        for(y=0; y<h; y++){
1936
            for(x=0; x<w; x++){
1937
                int i= y*ls + x;
1938

    
1939
                halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
1940
            }
1941
        }
1942
        for(y=0; y<h; y++){
1943
            for(x=0; x<w; x++){
1944
                int i= y*ls + x;
1945

    
1946
                halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1947
            }
1948
        }
1949
        src= halfpel[1][p];
1950
        for(y=0; y<h; y++){
1951
            for(x=0; x<w; x++){
1952
                int i= y*ls + x;
1953

    
1954
                halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1955
            }
1956
        }
1957

    
1958
//FIXME border!
1959
    }
1960
}
1961

    
1962
static void release_buffer(AVCodecContext *avctx){
1963
    SnowContext *s = avctx->priv_data;
1964
    int i;
1965

    
1966
    if(s->last_picture[s->max_ref_frames-1].data[0]){
1967
        avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
1968
        for(i=0; i<9; i++)
1969
            if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
1970
                av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
1971
    }
1972
}
1973

    
1974
static int frame_start(SnowContext *s){
1975
   AVFrame tmp;
1976
   int w= s->avctx->width; //FIXME round up to x16 ?
1977
   int h= s->avctx->height;
1978

    
1979
    if(s->current_picture.data[0]){
1980
        s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
1981
        s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
1982
        s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
1983
    }
1984

    
1985
    release_buffer(s->avctx);
1986

    
1987
    tmp= s->last_picture[s->max_ref_frames-1];
1988
    memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
1989
    memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
1990
    if(USE_HALFPEL_PLANE && s->current_picture.data[0])
1991
        halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
1992
    s->last_picture[0]= s->current_picture;
1993
    s->current_picture= tmp;
1994

    
1995
    if(s->keyframe){
1996
        s->ref_frames= 0;
1997
    }else{
1998
        int i;
1999
        for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
2000
            if(i && s->last_picture[i-1].key_frame)
2001
                break;
2002
        s->ref_frames= i;
2003
        if(s->ref_frames==0){
2004
            av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
2005
            return -1;
2006
        }
2007
    }
2008

    
2009
    s->current_picture.reference= 1;
2010
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2011
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2012
        return -1;
2013
    }
2014

    
2015
    s->current_picture.key_frame= s->keyframe;
2016

    
2017
    return 0;
2018
}
2019

    
2020
static av_cold void common_end(SnowContext *s){
2021
    int plane_index, level, orientation, i;
2022

    
2023
    av_freep(&s->spatial_dwt_buffer);
2024
    av_freep(&s->spatial_idwt_buffer);
2025

    
2026
    s->m.me.temp= NULL;
2027
    av_freep(&s->m.me.scratchpad);
2028
    av_freep(&s->m.me.map);
2029
    av_freep(&s->m.me.score_map);
2030
    av_freep(&s->m.obmc_scratchpad);
2031

    
2032
    av_freep(&s->block);
2033
    av_freep(&s->scratchbuf);
2034

    
2035
    for(i=0; i<MAX_REF_FRAMES; i++){
2036
        av_freep(&s->ref_mvs[i]);
2037
        av_freep(&s->ref_scores[i]);
2038
        if(s->last_picture[i].data[0])
2039
            s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
2040
    }
2041

    
2042
    for(plane_index=0; plane_index<3; plane_index++){
2043
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2044
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2045
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2046

    
2047
                av_freep(&b->x_coeff);
2048
            }
2049
        }
2050
    }
2051
    if (s->mconly_picture.data[0])
2052
        s->avctx->release_buffer(s->avctx, &s->mconly_picture);
2053
    if (s->current_picture.data[0])
2054
        s->avctx->release_buffer(s->avctx, &s->current_picture);
2055
}
2056

    
2057
static av_cold int decode_init(AVCodecContext *avctx)
2058
{
2059
    avctx->pix_fmt= PIX_FMT_YUV420P;
2060

    
2061
    common_init(avctx);
2062

    
2063
    return 0;
2064
}
2065

    
2066
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
2067
    const uint8_t *buf = avpkt->data;
2068
    int buf_size = avpkt->size;
2069
    SnowContext *s = avctx->priv_data;
2070
    RangeCoder * const c= &s->c;
2071
    int bytes_read;
2072
    AVFrame *picture = data;
2073
    int level, orientation, plane_index;
2074

    
2075
    ff_init_range_decoder(c, buf, buf_size);
2076
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2077

    
2078
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2079
    if(decode_header(s)<0)
2080
        return -1;
2081
    common_init_after_header(avctx);
2082

    
2083
    // realloc slice buffer for the case that spatial_decomposition_count changed
2084
    slice_buffer_destroy(&s->sb);
2085
    slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
2086

    
2087
    for(plane_index=0; plane_index<3; plane_index++){
2088
        Plane *p= &s->plane[plane_index];
2089
        p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
2090
                                              && p->hcoeff[1]==-10
2091
                                              && p->hcoeff[2]==2;
2092
    }
2093

    
2094
    alloc_blocks(s);
2095

    
2096
    if(frame_start(s) < 0)
2097
        return -1;
2098
    //keyframe flag duplication mess FIXME
2099
    if(avctx->debug&FF_DEBUG_PICT_INFO)
2100
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2101

    
2102
    decode_blocks(s);
2103

    
2104
    for(plane_index=0; plane_index<3; plane_index++){
2105
        Plane *p= &s->plane[plane_index];
2106
        int w= p->width;
2107
        int h= p->height;
2108
        int x, y;
2109
        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2110

    
2111
        if(s->avctx->debug&2048){
2112
            memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2113
            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2114

    
2115
            for(y=0; y<h; y++){
2116
                for(x=0; x<w; x++){
2117
                    int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2118
                    s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2119
                }
2120
            }
2121
        }
2122

    
2123
        {
2124
        for(level=0; level<s->spatial_decomposition_count; level++){
2125
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2126
                SubBand *b= &p->band[level][orientation];
2127
                unpack_coeffs(s, b, b->parent, orientation);
2128
            }
2129
        }
2130
        }
2131

    
2132
        {
2133
        const int mb_h= s->b_height << s->block_max_depth;
2134
        const int block_size = MB_SIZE >> s->block_max_depth;
2135
        const int block_w    = plane_index ? block_size/2 : block_size;
2136
        int mb_y;
2137
        DWTCompose cs[MAX_DECOMPOSITIONS];
2138
        int yd=0, yq=0;
2139
        int y;
2140
        int end_y;
2141

    
2142
        ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2143
        for(mb_y=0; mb_y<=mb_h; mb_y++){
2144

    
2145
            int slice_starty = block_w*mb_y;
2146
            int slice_h = block_w*(mb_y+1);
2147
            if (!(s->keyframe || s->avctx->debug&512)){
2148
                slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
2149
                slice_h -= (block_w >> 1);
2150
            }
2151

    
2152
            for(level=0; level<s->spatial_decomposition_count; level++){
2153
                for(orientation=level ? 1 : 0; orientation<4; orientation++){
2154
                    SubBand *b= &p->band[level][orientation];
2155
                    int start_y;
2156
                    int end_y;
2157
                    int our_mb_start = mb_y;
2158
                    int our_mb_end = (mb_y + 1);
2159
                    const int extra= 3;
2160
                    start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
2161
                    end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2162
                    if (!(s->keyframe || s->avctx->debug&512)){
2163
                        start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2164
                        end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2165
                    }
2166
                    start_y = FFMIN(b->height, start_y);
2167
                    end_y = FFMIN(b->height, end_y);
2168

    
2169
                    if (start_y != end_y){
2170
                        if (orientation == 0){
2171
                            SubBand * correlate_band = &p->band[0][0];
2172
                            int correlate_end_y = FFMIN(b->height, end_y + 1);
2173
                            int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
2174
                            decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
2175
                            correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
2176
                            dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2177
                        }
2178
                        else
2179
                            decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
2180
                    }
2181
                }
2182
            }
2183

    
2184
            for(; yd<slice_h; yd+=4){
2185
                ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2186
            }
2187

    
2188
            if(s->qlog == LOSSLESS_QLOG){
2189
                for(; yq<slice_h && yq<h; yq++){
2190
                    IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2191
                    for(x=0; x<w; x++){
2192
                        line[x] <<= FRAC_BITS;
2193
                    }
2194
                }
2195
            }
2196

    
2197
            predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2198

    
2199
            y = FFMIN(p->height, slice_starty);
2200
            end_y = FFMIN(p->height, slice_h);
2201
            while(y < end_y)
2202
                slice_buffer_release(&s->sb, y++);
2203
        }
2204

    
2205
        slice_buffer_flush(&s->sb);
2206
        }
2207

    
2208
    }
2209

    
2210
    emms_c();
2211

    
2212
    release_buffer(avctx);
2213

    
2214
    if(!(s->avctx->debug&2048))
2215
        *picture= s->current_picture;
2216
    else
2217
        *picture= s->mconly_picture;
2218

    
2219
    *data_size = sizeof(AVFrame);
2220

    
2221
    bytes_read= c->bytestream - c->bytestream_start;
2222
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2223

    
2224
    return bytes_read;
2225
}
2226

    
2227
static av_cold int decode_end(AVCodecContext *avctx)
2228
{
2229
    SnowContext *s = avctx->priv_data;
2230

    
2231
    slice_buffer_destroy(&s->sb);
2232

    
2233
    common_end(s);
2234

    
2235
    return 0;
2236
}
2237

    
2238
AVCodec snow_decoder = {
2239
    "snow",
2240
    CODEC_TYPE_VIDEO,
2241
    CODEC_ID_SNOW,
2242
    sizeof(SnowContext),
2243
    decode_init,
2244
    NULL,
2245
    decode_end,
2246
    decode_frame,
2247
    CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2248
    NULL,
2249
    .long_name = NULL_IF_CONFIG_SMALL("Snow"),
2250
};
2251

    
2252
#if CONFIG_SNOW_ENCODER
2253
static av_cold int encode_init(AVCodecContext *avctx)
2254
{
2255
    SnowContext *s = avctx->priv_data;
2256
    int plane_index;
2257

    
2258
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2259
        av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2260
               "Use vstrict=-2 / -strict -2 to use it anyway.\n");
2261
        return -1;
2262
    }
2263

    
2264
    if(avctx->prediction_method == DWT_97
2265
       && (avctx->flags & CODEC_FLAG_QSCALE)
2266
       && avctx->global_quality == 0){
2267
        av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
2268
        return -1;
2269
    }
2270

    
2271
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2272

    
2273
    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2274
    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
2275

    
2276
    for(plane_index=0; plane_index<3; plane_index++){
2277
        s->plane[plane_index].diag_mc= 1;
2278
        s->plane[plane_index].htaps= 6;
2279
        s->plane[plane_index].hcoeff[0]=  40;
2280
        s->plane[plane_index].hcoeff[1]= -10;
2281
        s->plane[plane_index].hcoeff[2]=   2;
2282
        s->plane[plane_index].fast_mc= 1;
2283
    }
2284

    
2285
    common_init(avctx);
2286
    alloc_blocks(s);
2287

    
2288
    s->version=0;
2289

    
2290
    s->m.avctx   = avctx;
2291
    s->m.flags   = avctx->flags;
2292
    s->m.bit_rate= avctx->bit_rate;
2293

    
2294
    s->m.me.temp      =
2295
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2296
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2297
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2298
    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
2299
    h263_encode_init(&s->m); //mv_penalty
2300

    
2301
    s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
2302

    
2303
    if(avctx->flags&CODEC_FLAG_PASS1){
2304
        if(!avctx->stats_out)
2305
            avctx->stats_out = av_mallocz(256);
2306
    }
2307
    if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2308
        if(ff_rate_control_init(&s->m) < 0)
2309
            return -1;
2310
    }
2311
    s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2312

    
2313
    avctx->coded_frame= &s->current_picture;
2314
    switch(avctx->pix_fmt){
2315
//    case PIX_FMT_YUV444P:
2316
//    case PIX_FMT_YUV422P:
2317
    case PIX_FMT_YUV420P:
2318
    case PIX_FMT_GRAY8:
2319
//    case PIX_FMT_YUV411P:
2320
//    case PIX_FMT_YUV410P:
2321
        s->colorspace_type= 0;
2322
        break;
2323
/*    case PIX_FMT_RGB32:
2324
        s->colorspace= 1;
2325
        break;*/
2326
    default:
2327
        av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2328
        return -1;
2329
    }
2330
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2331
    s->chroma_h_shift= 1;
2332
    s->chroma_v_shift= 1;
2333

    
2334
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
2335
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
2336

    
2337
    s->avctx->get_buffer(s->avctx, &s->input_picture);
2338

    
2339
    if(s->avctx->me_method == ME_ITER){
2340
        int i;
2341
        int size= s->b_width * s->b_height << 2*s->block_max_depth;
2342
        for(i=0; i<s->max_ref_frames; i++){
2343
            s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
2344
            s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
2345
        }
2346
    }
2347

    
2348
    return 0;
2349
}
2350

    
2351
//near copy & paste from dsputil, FIXME
2352
static int pix_sum(uint8_t * pix, int line_size, int w)
2353
{
2354
    int s, i, j;
2355

    
2356
    s = 0;
2357
    for (i = 0; i < w; i++) {
2358
        for (j = 0; j < w; j++) {
2359
            s += pix[0];
2360
            pix ++;
2361
        }
2362
        pix += line_size - w;
2363
    }
2364
    return s;
2365
}
2366

    
2367
//near copy & paste from dsputil, FIXME
2368
static int pix_norm1(uint8_t * pix, int line_size, int w)
2369
{
2370
    int s, i, j;
2371
    uint32_t *sq = ff_squareTbl + 256;
2372

    
2373
    s = 0;
2374
    for (i = 0; i < w; i++) {
2375
        for (j = 0; j < w; j ++) {
2376
            s += sq[pix[0]];
2377
            pix ++;
2378
        }
2379
        pix += line_size - w;
2380
    }
2381
    return s;
2382
}
2383

    
2384
//FIXME copy&paste
2385
#define P_LEFT P[1]
2386
#define P_TOP P[2]
2387
#define P_TOPRIGHT P[3]
2388
#define P_MEDIAN P[4]
2389
#define P_MV1 P[9]
2390
#define FLAG_QPEL   1 //must be 1
2391

    
2392
static int encode_q_branch(SnowContext *s, int level, int x, int y){
2393
    uint8_t p_buffer[1024];
2394
    uint8_t i_buffer[1024];
2395
    uint8_t p_state[sizeof(s->block_state)];
2396
    uint8_t i_state[sizeof(s->block_state)];
2397
    RangeCoder pc, ic;
2398
    uint8_t *pbbak= s->c.bytestream;
2399
    uint8_t *pbbak_start= s->c.bytestream_start;
2400
    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
2401
    const int w= s->b_width  << s->block_max_depth;
2402
    const int h= s->b_height << s->block_max_depth;
2403
    const int rem_depth= s->block_max_depth - level;
2404
    const int index= (x + y*w) << rem_depth;
2405
    const int block_w= 1<<(LOG2_MB_SIZE - level);
2406
    int trx= (x+1)<<rem_depth;
2407
    int try= (y+1)<<rem_depth;
2408
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2409
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2410
    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2411
    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2412
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2413
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2414
    int pl = left->color[0];
2415
    int pcb= left->color[1];
2416
    int pcr= left->color[2];
2417
    int pmx, pmy;
2418
    int mx=0, my=0;
2419
    int l,cr,cb;
2420
    const int stride= s->current_picture.linesize[0];
2421
    const int uvstride= s->current_picture.linesize[1];
2422
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
2423
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2424
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2425
    int P[10][2];
2426
    int16_t last_mv[3][2];
2427
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2428
    const int shift= 1+qpel;
2429
    MotionEstContext *c= &s->m.me;
2430
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2431
    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2432
    int my_context= av_log2(2*FFABS(left->my - top->my));
2433
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2434
    int ref, best_ref, ref_score, ref_mx, ref_my;
2435

    
2436
    assert(sizeof(s->block_state) >= 256);
2437
    if(s->keyframe){
2438
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2439
        return 0;
2440
    }
2441

    
2442
//    clip predictors / edge ?
2443

    
2444
    P_LEFT[0]= left->mx;
2445
    P_LEFT[1]= left->my;
2446
    P_TOP [0]= top->mx;
2447
    P_TOP [1]= top->my;
2448
    P_TOPRIGHT[0]= tr->mx;
2449
    P_TOPRIGHT[1]= tr->my;
2450

    
2451
    last_mv[0][0]= s->block[index].mx;
2452
    last_mv[0][1]= s->block[index].my;
2453
    last_mv[1][0]= right->mx;
2454
    last_mv[1][1]= right->my;
2455
    last_mv[2][0]= bottom->mx;
2456
    last_mv[2][1]= bottom->my;
2457

    
2458
    s->m.mb_stride=2;
2459
    s->m.mb_x=
2460
    s->m.mb_y= 0;
2461
    c->skip= 0;
2462

    
2463
    assert(c->  stride ==   stride);
2464
    assert(c->uvstride == uvstride);
2465

    
2466
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2467
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2468
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2469
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2470

    
2471
    c->xmin = - x*block_w - 16+3;
2472
    c->ymin = - y*block_w - 16+3;
2473
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2474
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2475

    
2476
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2477
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
2478
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2479
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2480
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2481
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2482
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2483

    
2484
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2485
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2486

    
2487
    if (!y) {
2488
        c->pred_x= P_LEFT[0];
2489
        c->pred_y= P_LEFT[1];
2490
    } else {
2491
        c->pred_x = P_MEDIAN[0];
2492
        c->pred_y = P_MEDIAN[1];
2493
    }
2494

    
2495
    score= INT_MAX;
2496
    best_ref= 0;
2497
    for(ref=0; ref<s->ref_frames; ref++){
2498
        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2499

    
2500
        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2501
                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2502

    
2503
        assert(ref_mx >= c->xmin);
2504
        assert(ref_mx <= c->xmax);
2505
        assert(ref_my >= c->ymin);
2506
        assert(ref_my <= c->ymax);
2507

    
2508
        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2509
        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2510
        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2511
        if(s->ref_mvs[ref]){
2512
            s->ref_mvs[ref][index][0]= ref_mx;
2513
            s->ref_mvs[ref][index][1]= ref_my;
2514
            s->ref_scores[ref][index]= ref_score;
2515
        }
2516
        if(score > ref_score){
2517
            score= ref_score;
2518
            best_ref= ref;
2519
            mx= ref_mx;
2520
            my= ref_my;
2521
        }
2522
    }
2523
    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
2524

    
2525
  //  subpel search
2526
    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2527
    pc= s->c;
2528
    pc.bytestream_start=
2529
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2530
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2531

    
2532
    if(level!=s->block_max_depth)
2533
        put_rac(&pc, &p_state[4 + s_context], 1);
2534
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2535
    if(s->ref_frames > 1)
2536
        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2537
    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2538
    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2539
    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2540
    p_len= pc.bytestream - pc.bytestream_start;
2541
    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2542

    
2543
    block_s= block_w*block_w;
2544
    sum = pix_sum(current_data[0], stride, block_w);
2545
    l= (sum + block_s/2)/block_s;
2546
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2547

    
2548
    block_s= block_w*block_w>>2;
2549
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
2550
    cb= (sum + block_s/2)/block_s;
2551
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2552
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
2553
    cr= (sum + block_s/2)/block_s;
2554
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2555

    
2556
    ic= s->c;
2557
    ic.bytestream_start=
2558
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2559
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2560
    if(level!=s->block_max_depth)
2561
        put_rac(&ic, &i_state[4 + s_context], 1);
2562
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2563
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2564
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2565
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2566
    i_len= ic.bytestream - ic.bytestream_start;
2567
    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2568

    
2569
//    assert(score==256*256*256*64-1);
2570
    assert(iscore < 255*255*256 + s->lambda2*10);
2571
    assert(iscore >= 0);
2572
    assert(l>=0 && l<=255);
2573
    assert(pl>=0 && pl<=255);
2574

    
2575
    if(level==0){
2576
        int varc= iscore >> 8;
2577
        int vard= score >> 8;
2578
        if (vard <= 64 || vard < varc)
2579
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2580
        else
2581
            c->scene_change_score+= s->m.qscale;
2582
    }
2583

    
2584
    if(level!=s->block_max_depth){
2585
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2586
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2587
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2588
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2589
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2590
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2591

    
2592
        if(score2 < score && score2 < iscore)
2593
            return score2;
2594
    }
2595

    
2596
    if(iscore < score){
2597
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2598
        memcpy(pbbak, i_buffer, i_len);
2599
        s->c= ic;
2600
        s->c.bytestream_start= pbbak_start;
2601
        s->c.bytestream= pbbak + i_len;
2602
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2603
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2604
        return iscore;
2605
    }else{
2606
        memcpy(pbbak, p_buffer, p_len);
2607
        s->c= pc;
2608
        s->c.bytestream_start= pbbak_start;
2609
        s->c.bytestream= pbbak + p_len;
2610
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2611
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2612
        return score;
2613
    }
2614
}
2615

    
2616
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2617
    const int w= s->b_width  << s->block_max_depth;
2618
    const int rem_depth= s->block_max_depth - level;
2619
    const int index= (x + y*w) << rem_depth;
2620
    int trx= (x+1)<<rem_depth;
2621
    BlockNode *b= &s->block[index];
2622
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2623
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2624
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2625
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2626
    int pl = left->color[0];
2627
    int pcb= left->color[1];
2628
    int pcr= left->color[2];
2629
    int pmx, pmy;
2630
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2631
    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2632
    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2633
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2634

    
2635
    if(s->keyframe){
2636
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2637
        return;
2638
    }
2639

    
2640
    if(level!=s->block_max_depth){
2641
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2642
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
2643
        }else{
2644
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
2645
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2646
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2647
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2648
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2649
            return;
2650
        }
2651
    }
2652
    if(b->type & BLOCK_INTRA){
2653
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2654
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2655
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2656
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2657
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2658
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2659
    }else{
2660
        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2661
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2662
        if(s->ref_frames > 1)
2663
            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2664
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2665
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2666
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2667
    }
2668
}
2669

    
2670
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2671
    int i, x2, y2;
2672
    Plane *p= &s->plane[plane_index];
2673
    const int block_size = MB_SIZE >> s->block_max_depth;
2674
    const int block_w    = plane_index ? block_size/2 : block_size;
2675
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2676
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2677
    const int ref_stride= s->current_picture.linesize[plane_index];
2678
    uint8_t *src= s-> input_picture.data[plane_index];
2679
    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2680
    const int b_stride = s->b_width << s->block_max_depth;
2681
    const int w= p->width;
2682
    const int h= p->height;
2683
    int index= mb_x + mb_y*b_stride;
2684
    BlockNode *b= &s->block[index];
2685
    BlockNode backup= *b;
2686
    int ab=0;
2687
    int aa=0;
2688

    
2689
    b->type|= BLOCK_INTRA;
2690
    b->color[plane_index]= 0;
2691
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2692

    
2693
    for(i=0; i<4; i++){
2694
        int mb_x2= mb_x + (i &1) - 1;
2695
        int mb_y2= mb_y + (i>>1) - 1;
2696
        int x= block_w*mb_x2 + block_w/2;
2697
        int y= block_w*mb_y2 + block_w/2;
2698

    
2699
        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2700
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2701

    
2702
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2703
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2704
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2705
                int obmc_v= obmc[index];
2706
                int d;
2707
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2708
                if(x<0) obmc_v += obmc[index + block_w];
2709
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2710
                if(x+block_w>w) obmc_v += obmc[index - block_w];
2711
                //FIXME precalculate this or simplify it somehow else
2712

    
2713
                d = -dst[index] + (1<<(FRAC_BITS-1));
2714
                dst[index] = d;
2715
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2716
                aa += obmc_v * obmc_v; //FIXME precalculate this
2717
            }
2718
        }
2719
    }
2720
    *b= backup;
2721

    
2722
    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2723
}
2724

    
2725
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2726
    const int b_stride = s->b_width << s->block_max_depth;
2727
    const int b_height = s->b_height<< s->block_max_depth;
2728
    int index= x + y*b_stride;
2729
    const BlockNode *b     = &s->block[index];
2730
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2731
    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2732
    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2733
    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2734
    int dmx, dmy;
2735
//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2736
//  int my_context= av_log2(2*FFABS(left->my - top->my));
2737

    
2738
    if(x<0 || x>=b_stride || y>=b_height)
2739
        return 0;
2740
/*
2741
1            0      0
2742
01X          1-2    1
2743
001XX        3-6    2-3
2744
0001XXX      7-14   4-7
2745
00001XXXX   15-30   8-15
2746
*/
2747
//FIXME try accurate rate
2748
//FIXME intra and inter predictors if surrounding blocks are not the same type
2749
    if(b->type & BLOCK_INTRA){
2750
        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2751
                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
2752
                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
2753
    }else{
2754
        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2755
        dmx-= b->mx;
2756
        dmy-= b->my;
2757
        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2758
                    + av_log2(2*FFABS(dmy))
2759
                    + av_log2(2*b->ref));
2760
    }
2761
}
2762

    
2763
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2764
    Plane *p= &s->plane[plane_index];
2765
    const int block_size = MB_SIZE >> s->block_max_depth;
2766
    const int block_w    = plane_index ? block_size/2 : block_size;
2767
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2768
    const int ref_stride= s->current_picture.linesize[plane_index];
2769
    uint8_t *dst= s->current_picture.data[plane_index];
2770
    uint8_t *src= s->  input_picture.data[plane_index];
2771
    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2772
    uint8_t *cur = s->scratchbuf;
2773
    uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2774
    const int b_stride = s->b_width << s->block_max_depth;
2775
    const int b_height = s->b_height<< s->block_max_depth;
2776
    const int w= p->width;
2777
    const int h= p->height;
2778
    int distortion;
2779
    int rate= 0;
2780
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2781
    int sx= block_w*mb_x - block_w/2;
2782
    int sy= block_w*mb_y - block_w/2;
2783
    int x0= FFMAX(0,-sx);
2784
    int y0= FFMAX(0,-sy);
2785
    int x1= FFMIN(block_w*2, w-sx);
2786
    int y1= FFMIN(block_w*2, h-sy);
2787
    int i,x,y;
2788

    
2789
    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2790

    
2791
    for(y=y0; y<y1; y++){
2792
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2793
        const IDWTELEM *pred1 = pred + y*obmc_stride;
2794
        uint8_t *cur1 = cur + y*ref_stride;
2795
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2796
        for(x=x0; x<x1; x++){
2797
#if FRAC_BITS >= LOG2_OBMC_MAX
2798
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2799
#else
2800
            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2801
#endif
2802
            v = (v + pred1[x]) >> FRAC_BITS;
2803
            if(v&(~255)) v= ~(v>>31);
2804
            dst1[x] = v;
2805
        }
2806
    }
2807

    
2808
    /* copy the regions where obmc[] = (uint8_t)256 */
2809
    if(LOG2_OBMC_MAX == 8
2810
        && (mb_x == 0 || mb_x == b_stride-1)
2811
        && (mb_y == 0 || mb_y == b_height-1)){
2812
        if(mb_x == 0)
2813
            x1 = block_w;
2814
        else
2815
            x0 = block_w;
2816
        if(mb_y == 0)
2817
            y1 = block_w;
2818
        else
2819
            y0 = block_w;
2820
        for(y=y0; y<y1; y++)
2821
            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2822
    }
2823

    
2824
    if(block_w==16){
2825
        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2826
        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2827
        /* FIXME cmps overlap but do not cover the wavelet's whole support.
2828
         * So improving the score of one block is not strictly guaranteed
2829
         * to improve the score of the whole frame, thus iterative motion
2830
         * estimation does not always converge. */
2831
        if(s->avctx->me_cmp == FF_CMP_W97)
2832
            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2833
        else if(s->avctx->me_cmp == FF_CMP_W53)
2834
            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2835
        else{
2836
            distortion = 0;
2837
            for(i=0; i<4; i++){
2838
                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2839
                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2840
            }
2841
        }
2842
    }else{
2843
        assert(block_w==8);
2844
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2845
    }
2846

    
2847
    if(plane_index==0){
2848
        for(i=0; i<4; i++){
2849
/* ..RRr
2850
 * .RXx.
2851
 * rxx..
2852
 */
2853
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2854
        }
2855
        if(mb_x == b_stride-2)
2856
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2857
    }
2858
    return distortion + rate*penalty_factor;
2859
}
2860

    
2861
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2862
    int i, y2;
2863
    Plane *p= &s->plane[plane_index];
2864
    const int block_size = MB_SIZE >> s->block_max_depth;
2865
    const int block_w    = plane_index ? block_size/2 : block_size;
2866
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2867
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2868
    const int ref_stride= s->current_picture.linesize[plane_index];
2869
    uint8_t *dst= s->current_picture.data[plane_index];
2870
    uint8_t *src= s-> input_picture.data[plane_index];
2871
    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2872
    // const has only been removed from zero_dst to suppress a warning
2873
    static IDWTELEM zero_dst[4096]; //FIXME
2874
    const int b_stride = s->b_width << s->block_max_depth;
2875
    const int w= p->width;
2876
    const int h= p->height;
2877
    int distortion= 0;
2878
    int rate= 0;
2879
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2880

    
2881
    for(i=0; i<9; i++){
2882
        int mb_x2= mb_x + (i%3) - 1;
2883
        int mb_y2= mb_y + (i/3) - 1;
2884
        int x= block_w*mb_x2 + block_w/2;
2885
        int y= block_w*mb_y2 + block_w/2;
2886

    
2887
        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2888
                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2889

    
2890
        //FIXME find a cleaner/simpler way to skip the outside stuff
2891
        for(y2= y; y2<0; y2++)
2892
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2893
        for(y2= h; y2<y+block_w; y2++)
2894
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2895
        if(x<0){
2896
            for(y2= y; y2<y+block_w; y2++)
2897
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2898
        }
2899
        if(x+block_w > w){
2900
            for(y2= y; y2<y+block_w; y2++)
2901
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2902
        }
2903

    
2904
        assert(block_w== 8 || block_w==16);
2905
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2906
    }
2907

    
2908
    if(plane_index==0){
2909
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2910
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2911

    
2912
/* ..RRRr
2913
 * .RXXx.
2914
 * .RXXx.
2915
 * rxxx.
2916
 */
2917
        if(merged)
2918
            rate = get_block_bits(s, mb_x, mb_y, 2);
2919
        for(i=merged?4:0; i<9; i++){
2920
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2921
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2922
        }
2923
    }
2924
    return distortion + rate*penalty_factor;
2925
}
2926

    
2927
static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2928
    const int w= b->width;
2929
    const int h= b->height;
2930
    int x, y;
2931

    
2932
    if(1){
2933
        int run=0;
2934
        int runs[w*h];
2935
        int run_index=0;
2936
        int max_index;
2937

    
2938
        for(y=0; y<h; y++){
2939
            for(x=0; x<w; x++){
2940
                int v, p=0;
2941
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
2942
                v= src[x + y*stride];
2943

    
2944
                if(y){
2945
                    t= src[x + (y-1)*stride];
2946
                    if(x){
2947
                        lt= src[x - 1 + (y-1)*stride];
2948
                    }
2949
                    if(x + 1 < w){
2950
                        rt= src[x + 1 + (y-1)*stride];
2951
                    }
2952
                }
2953
                if(x){
2954
                    l= src[x - 1 + y*stride];
2955
                    /*if(x > 1){
2956
                        if(orientation==1) ll= src[y + (x-2)*stride];
2957
                        else               ll= src[x - 2 + y*stride];
2958
                    }*/
2959
                }
2960
                if(parent){
2961
                    int px= x>>1;
2962
                    int py= y>>1;
2963
                    if(px<b->parent->width && py<b->parent->height)
2964
                        p= parent[px + py*2*stride];
2965
                }
2966
                if(!(/*ll|*/l|lt|t|rt|p)){
2967
                    if(v){
2968
                        runs[run_index++]= run;
2969
                        run=0;
2970
                    }else{
2971
                        run++;
2972
                    }
2973
                }
2974
            }
2975
        }
2976
        max_index= run_index;
2977
        runs[run_index++]= run;
2978
        run_index=0;
2979
        run= runs[run_index++];
2980

    
2981
        put_symbol2(&s->c, b->state[30], max_index, 0);
2982
        if(run_index <= max_index)
2983
            put_symbol2(&s->c, b->state[1], run, 3);
2984

    
2985
        for(y=0; y<h; y++){
2986
            if(s->c.bytestream_end - s->c.bytestream < w*40){
2987
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2988
                return -1;
2989
            }
2990
            for(x=0; x<w; x++){
2991
                int v, p=0;
2992
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
2993
                v= src[x + y*stride];
2994

    
2995
                if(y){
2996
                    t= src[x + (y-1)*stride];
2997
                    if(x){
2998
                        lt= src[x - 1 + (y-1)*stride];
2999
                    }
3000
                    if(x + 1 < w){
3001
                        rt= src[x + 1 + (y-1)*stride];
3002
                    }
3003
                }
3004
                if(x){
3005
                    l= src[x - 1 + y*stride];
3006
                    /*if(x > 1){
3007
                        if(orientation==1) ll= src[y + (x-2)*stride];
3008
                        else               ll= src[x - 2 + y*stride];
3009
                    }*/
3010
                }
3011
                if(parent){
3012
                    int px= x>>1;
3013
                    int py= y>>1;
3014
                    if(px<b->parent->width && py<b->parent->height)
3015
                        p= parent[px + py*2*stride];
3016
                }
3017
                if(/*ll|*/l|lt|t|rt|p){
3018
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3019

    
3020
                    put_rac(&s->c, &b->state[0][context], !!v);
3021
                }else{
3022
                    if(!run){
3023
                        run= runs[run_index++];
3024

    
3025
                        if(run_index <= max_index)
3026
                            put_symbol2(&s->c, b->state[1], run, 3);
3027
                        assert(v);
3028
                    }else{
3029
                        run--;
3030
                        assert(!v);
3031
                    }
3032
                }
3033
                if(v){
3034
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3035
                    int l2= 2*FFABS(l) + (l<0);
3036
                    int t2= 2*FFABS(t) + (t<0);
3037

    
3038
                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
3039
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
3040
                }
3041
            }
3042
        }
3043
    }
3044
    return 0;
3045
}
3046

    
3047
static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3048
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
3049
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
3050
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
3051
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
3052
}
3053

    
3054
static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3055
    const int b_stride= s->b_width << s->block_max_depth;
3056
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3057
    BlockNode backup= *block;
3058
    int rd, index, value;
3059

    
3060
    assert(mb_x>=0 && mb_y>=0);
3061
    assert(mb_x<b_stride);
3062

    
3063
    if(intra){
3064
        block->color[0] = p[0];
3065
        block->color[1] = p[1];
3066
        block->color[2] = p[2];
3067
        block->type |= BLOCK_INTRA;
3068
    }else{
3069
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3070
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3071
        if(s->me_cache[index] == value)
3072
            return 0;
3073
        s->me_cache[index]= value;
3074

    
3075
        block->mx= p[0];
3076
        block->my= p[1];
3077
        block->type &= ~BLOCK_INTRA;
3078
    }
3079

    
3080
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3081

    
3082
//FIXME chroma
3083
    if(rd < *best_rd){
3084
        *best_rd= rd;
3085
        return 1;
3086
    }else{
3087
        *block= backup;
3088
        return 0;
3089
    }
3090
}
3091

    
3092
/* special case for int[2] args we discard afterwards,
3093
 * fixes compilation problem with gcc 2.95 */
3094
static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3095
    int p[2] = {p0, p1};
3096
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3097
}
3098

    
3099
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3100
    const int b_stride= s->b_width << s->block_max_depth;
3101
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3102
    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3103
    int rd, index, value;
3104

    
3105
    assert(mb_x>=0 && mb_y>=0);
3106
    assert(mb_x<b_stride);
3107
    assert(((mb_x|mb_y)&1) == 0);
3108

    
3109
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3110
    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3111
    if(s->me_cache[index] == value)
3112
        return 0;
3113
    s->me_cache[index]= value;
3114

    
3115
    block->mx= p0;
3116
    block->my= p1;
3117
    block->ref= ref;
3118
    block->type &= ~BLOCK_INTRA;
3119
    block[1]= block[b_stride]= block[b_stride+1]= *block;
3120

    
3121
    rd= get_4block_rd(s, mb_x, mb_y, 0);
3122

    
3123
//FIXME chroma
3124
    if(rd < *best_rd){
3125
        *best_rd= rd;
3126
        return 1;
3127
    }else{
3128
        block[0]= backup[0];
3129
        block[1]= backup[1];
3130
        block[b_stride]= backup[2];
3131
        block[b_stride+1]= backup[3];
3132
        return 0;
3133
    }
3134
}
3135

    
3136
static void iterative_me(SnowContext *s){
3137
    int pass, mb_x, mb_y;
3138
    const int b_width = s->b_width  << s->block_max_depth;
3139
    const int b_height= s->b_height << s->block_max_depth;
3140
    const int b_stride= b_width;
3141
    int color[3];
3142

    
3143
    {
3144
        RangeCoder r = s->c;
3145
        uint8_t state[sizeof(s->block_state)];
3146
        memcpy(state, s->block_state, sizeof(s->block_state));
3147
        for(mb_y= 0; mb_y<s->b_height; mb_y++)
3148
            for(mb_x= 0; mb_x<s->b_width; mb_x++)
3149
                encode_q_branch(s, 0, mb_x, mb_y);
3150
        s->c = r;
3151
        memcpy(s->block_state, state, sizeof(s->block_state));
3152
    }
3153

    
3154
    for(pass=0; pass<25; pass++){
3155
        int change= 0;
3156

    
3157
        for(mb_y= 0; mb_y<b_height; mb_y++){
3158
            for(mb_x= 0; mb_x<b_width; mb_x++){
3159
                int dia_change, i, j, ref;
3160
                int best_rd= INT_MAX, ref_rd;
3161
                BlockNode backup, ref_b;
3162
                const int index= mb_x + mb_y * b_stride;
3163
                BlockNode *block= &s->block[index];
3164
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3165
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3166
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3167
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3168
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3169
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3170
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3171
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3172
                const int b_w= (MB_SIZE >> s->block_max_depth);
3173
                uint8_t obmc_edged[b_w*2][b_w*2];
3174

    
3175
                if(pass && (block->type & BLOCK_OPT))
3176
                    continue;
3177
                block->type |= BLOCK_OPT;
3178

    
3179
                backup= *block;
3180

    
3181
                if(!s->me_cache_generation)
3182
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3183
                s->me_cache_generation += 1<<22;
3184

    
3185
                //FIXME precalculate
3186
                {
3187
                    int x, y;
3188
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3189
                    if(mb_x==0)
3190
                        for(y=0; y<b_w*2; y++)
3191
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3192
                    if(mb_x==b_stride-1)
3193
                        for(y=0; y<b_w*2; y++)
3194
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3195
                    if(mb_y==0){
3196
                        for(x=0; x<b_w*2; x++)
3197
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3198
                        for(y=1; y<b_w; y++)
3199
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3200
                    }
3201
                    if(mb_y==b_height-1){
3202
                        for(x=0; x<b_w*2; x++)
3203
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3204
                        for(y=b_w; y<b_w*2-1; y++)
3205
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3206
                    }
3207
                }
3208

    
3209
                //skip stuff outside the picture
3210
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3211
                    uint8_t *src= s->  input_picture.data[0];
3212
                    uint8_t *dst= s->current_picture.data[0];
3213
                    const int stride= s->current_picture.linesize[0];
3214
                    const int block_w= MB_SIZE >> s->block_max_depth;
3215
                    const int sx= block_w*mb_x - block_w/2;
3216
                    const int sy= block_w*mb_y - block_w/2;
3217
                    const int w= s->plane[0].width;
3218
                    const int h= s->plane[0].height;
3219
                    int y;
3220

    
3221
                    for(y=sy; y<0; y++)
3222
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3223
                    for(y=h; y<sy+block_w*2; y++)
3224
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3225
                    if(sx<0){
3226
                        for(y=sy; y<sy+block_w*2; y++)
3227
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3228
                    }
3229
                    if(sx+block_w*2 > w){
3230
                        for(y=sy; y<sy+block_w*2; y++)
3231
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3232
                    }
3233
                }
3234

    
3235
                // intra(black) = neighbors' contribution to the current block
3236
                for(i=0; i<3; i++)
3237
                    color[i]= get_dc(s, mb_x, mb_y, i);
3238

    
3239
                // get previous score (cannot be cached due to OBMC)
3240
                if(pass > 0 && (block->type&BLOCK_INTRA)){
3241
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3242
                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3243
                }else
3244
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3245

    
3246
                ref_b= *block;
3247
                ref_rd= best_rd;
3248
                for(ref=0; ref < s->ref_frames; ref++){
3249
                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3250
                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3251
                        continue;
3252
                    block->ref= ref;
3253
                    best_rd= INT_MAX;
3254

    
3255
                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3256
                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3257
                    if(tb)
3258
                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3259
                    if(lb)
3260
                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3261
                    if(rb)
3262
                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3263
                    if(bb)
3264
                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3265

    
3266
                    /* fullpel ME */
3267
                    //FIXME avoid subpel interpolation / round to nearest integer
3268
                    do{
3269
                        dia_change=0;
3270
                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3271
                            for(j=0; j<i; j++){
3272
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3273
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3274
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3275
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3276
                            }
3277
                        }
3278
                    }while(dia_change);
3279
                    /* subpel ME */
3280
                    do{
3281
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3282
                        dia_change=0;
3283
                        for(i=0; i<8; i++)
3284
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3285
                    }while(dia_change);
3286
                    //FIXME or try the standard 2 pass qpel or similar
3287

    
3288
                    mvr[0][0]= block->mx;
3289
                    mvr[0][1]= block->my;
3290
                    if(ref_rd > best_rd){
3291
                        ref_rd= best_rd;
3292
                        ref_b= *block;
3293
                    }
3294
                }
3295
                best_rd= ref_rd;
3296
                *block= ref_b;
3297
#if 1
3298
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3299
                //FIXME RD style color selection
3300
#endif
3301
                if(!same_block(block, &backup)){
3302
                    if(tb ) tb ->type &= ~BLOCK_OPT;
3303
                    if(lb ) lb ->type &= ~BLOCK_OPT;
3304
                    if(rb ) rb ->type &= ~BLOCK_OPT;
3305
                    if(bb ) bb ->type &= ~BLOCK_OPT;
3306
                    if(tlb) tlb->type &= ~BLOCK_OPT;
3307
                    if(trb) trb->type &= ~BLOCK_OPT;
3308
                    if(blb) blb->type &= ~BLOCK_OPT;
3309
                    if(brb) brb->type &= ~BLOCK_OPT;
3310
                    change ++;
3311
                }
3312
            }
3313
        }
3314
        av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3315
        if(!change)
3316
            break;
3317
    }
3318

    
3319
    if(s->block_max_depth == 1){
3320
        int change= 0;
3321
        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3322
            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3323
                int i;
3324
                int best_rd, init_rd;
3325
                const int index= mb_x + mb_y * b_stride;
3326
                BlockNode *b[4];
3327

    
3328
                b[0]= &s->block[index];
3329
                b[1]= b[0]+1;
3330
                b[2]= b[0]+b_stride;
3331
                b[3]= b[2]+1;
3332
                if(same_block(b[0], b[1]) &&
3333
                   same_block(b[0], b[2]) &&
3334
                   same_block(b[0], b[3]))
3335
                    continue;
3336

    
3337
                if(!s->me_cache_generation)
3338
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3339
                s->me_cache_generation += 1<<22;
3340

    
3341
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3342

    
3343
                //FIXME more multiref search?
3344
                check_4block_inter(s, mb_x, mb_y,
3345
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3346
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3347

    
3348
                for(i=0; i<4; i++)
3349
                    if(!(b[i]->type&BLOCK_INTRA))
3350
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3351

    
3352
                if(init_rd != best_rd)
3353
                    change++;
3354
            }
3355
        }
3356
        av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3357
    }
3358
}
3359

    
3360
static void encode_blocks(SnowContext *s, int search){
3361
    int x, y;
3362
    int w= s->b_width;
3363
    int h= s->b_height;
3364

    
3365
    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
3366
        iterative_me(s);
3367

    
3368
    for(y=0; y<h; y++){
3369
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
3370
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3371
            return;
3372
        }
3373
        for(x=0; x<w; x++){
3374
            if(s->avctx->me_method == ME_ITER || !search)
3375
                encode_q_branch2(s, 0, x, y);
3376
            else
3377
                encode_q_branch (s, 0, x, y);
3378
        }
3379
    }
3380
}
3381

    
3382
static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3383
    const int w= b->width;
3384
    const int h= b->height;
3385
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3386
    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3387
    int x,y, thres1, thres2;
3388

    
3389
    if(s->qlog == LOSSLESS_QLOG){
3390
        for(y=0; y<h; y++)
3391
            for(x=0; x<w; x++)
3392
                dst[x + y*stride]= src[x + y*stride];
3393
        return;
3394
    }
3395

    
3396
    bias= bias ? 0 : (3*qmul)>>3;
3397
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3398
    thres2= 2*thres1;
3399

    
3400
    if(!bias){
3401
        for(y=0; y<h; y++){
3402
            for(x=0; x<w; x++){
3403
                int i= src[x + y*stride];
3404

    
3405
                if((unsigned)(i+thres1) > thres2){
3406
                    if(i>=0){
3407
                        i<<= QEXPSHIFT;
3408
                        i/= qmul; //FIXME optimize
3409
                        dst[x + y*stride]=  i;
3410
                    }else{
3411
                        i= -i;
3412
                        i<<= QEXPSHIFT;
3413
                        i/= qmul; //FIXME optimize
3414
                        dst[x + y*stride]= -i;
3415
                    }
3416
                }else
3417
                    dst[x + y*stride]= 0;
3418
            }
3419
        }
3420
    }else{
3421
        for(y=0; y<h; y++){
3422
            for(x=0; x<w; x++){
3423
                int i= src[x + y*stride];
3424

    
3425
                if((unsigned)(i+thres1) > thres2){
3426
                    if(i>=0){
3427
                        i<<= QEXPSHIFT;
3428
                        i= (i + bias) / qmul; //FIXME optimize
3429
                        dst[x + y*stride]=  i;
3430
                    }else{
3431
                        i= -i;
3432
                        i<<= QEXPSHIFT;
3433
                        i= (i + bias) / qmul; //FIXME optimize
3434
                        dst[x + y*stride]= -i;
3435
                    }
3436
                }else
3437
                    dst[x + y*stride]= 0;
3438
            }
3439
        }
3440
    }
3441
}
3442

    
3443
static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3444
    const int w= b->width;
3445
    const int h= b->height;
3446
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3447
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3448
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3449
    int x,y;
3450

    
3451
    if(s->qlog == LOSSLESS_QLOG) return;
3452

    
3453
    for(y=0; y<h; y++){
3454
        for(x=0; x<w; x++){
3455
            int i= src[x + y*stride];
3456
            if(i<0){
3457
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3458
            }else if(i>0){
3459
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3460
            }
3461
        }
3462
    }
3463
}
3464

    
3465
static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3466
    const int w= b->width;
3467
    const int h= b->height;
3468
    int x,y;
3469

    
3470
    for(y=h-1; y>=0; y--){
3471
        for(x=w-1; x>=0; x--){
3472
            int i= x + y*stride;
3473

    
3474
            if(x){
3475
                if(use_median){
3476
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3477
                    else  src[i] -= src[i - 1];
3478
                }else{
3479
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3480
                    else  src[i] -= src[i - 1];
3481
                }
3482
            }else{
3483
                if(y) src[i] -= src[i - stride];
3484
            }
3485
        }
3486
    }
3487
}
3488

    
3489
static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3490
    const int w= b->width;
3491
    const int h= b->height;
3492
    int x,y;
3493

    
3494
    for(y=0; y<h; y++){
3495
        for(x=0; x<w; x++){
3496
            int i= x + y*stride;
3497

    
3498
            if(x){
3499
                if(use_median){
3500
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3501
                    else  src[i] += src[i - 1];
3502
                }else{
3503
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3504
                    else  src[i] += src[i - 1];
3505
                }
3506
            }else{
3507
                if(y) src[i] += src[i - stride];
3508
            }
3509
        }
3510
    }
3511
}
3512

    
3513
static void encode_qlogs(SnowContext *s){
3514
    int plane_index, level, orientation;
3515

    
3516
    for(plane_index=0; plane_index<2; plane_index++){
3517
        for(level=0; level<s->spatial_decomposition_count; level++){
3518
            for(orientation=level ? 1:0; orientation<4; orientation++){
3519
                if(orientation==2) continue;
3520
                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3521
            }
3522
        }
3523
    }
3524
}
3525

    
3526
static void encode_header(SnowContext *s){
3527
    int plane_index, i;
3528
    uint8_t kstate[32];
3529

    
3530
    memset(kstate, MID_STATE, sizeof(kstate));
3531

    
3532
    put_rac(&s->c, kstate, s->keyframe);
3533
    if(s->keyframe || s->always_reset){
3534
        reset_contexts(s);
3535
        s->last_spatial_decomposition_type=
3536
        s->last_qlog=
3537
        s->last_qbias=
3538
        s->last_mv_scale=
3539
        s->last_block_max_depth= 0;
3540
        for(plane_index=0; plane_index<2; plane_index++){
3541
            Plane *p= &s->plane[plane_index];
3542
            p->last_htaps=0;
3543
            p->last_diag_mc=0;
3544
            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3545
        }
3546
    }
3547
    if(s->keyframe){
3548
        put_symbol(&s->c, s->header_state, s->version, 0);
3549
        put_rac(&s->c, s->header_state, s->always_reset);
3550
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3551
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3552
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3553
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3554
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3555
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3556
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3557
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3558
        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3559

    
3560
        encode_qlogs(s);
3561
    }
3562

    
3563
    if(!s->keyframe){
3564
        int update_mc=0;
3565
        for(plane_index=0; plane_index<2; plane_index++){
3566
            Plane *p= &s->plane[plane_index];
3567
            update_mc |= p->last_htaps   != p->htaps;
3568
            update_mc |= p->last_diag_mc != p->diag_mc;
3569
            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3570
        }
3571
        put_rac(&s->c, s->header_state, update_mc);
3572
        if(update_mc){
3573
            for(plane_index=0; plane_index<2; plane_index++){
3574
                Plane *p= &s->plane[plane_index];
3575
                put_rac(&s->c, s->header_state, p->diag_mc);
3576
                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3577
                for(i= p->htaps/2; i; i--)
3578
                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3579
            }
3580
        }
3581
        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3582
            put_rac(&s->c, s->header_state, 1);
3583
            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3584
            encode_qlogs(s);
3585
        }else
3586
            put_rac(&s->c, s->header_state, 0);
3587
    }
3588

    
3589
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3590
    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3591
    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3592
    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3593
    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3594

    
3595
}
3596

    
3597
static void update_last_header_values(SnowContext *s){
3598
    int plane_index;
3599

    
3600
    if(!s->keyframe){
3601
        for(plane_index=0; plane_index<2; plane_index++){
3602
            Plane *p= &s->plane[plane_index];
3603
            p->last_diag_mc= p->diag_mc;
3604
            p->last_htaps  = p->htaps;
3605
            memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3606
        }
3607
    }
3608

    
3609
    s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
3610
    s->last_qlog                        = s->qlog;
3611
    s->last_qbias                       = s->qbias;
3612
    s->last_mv_scale                    = s->mv_scale;
3613
    s->last_block_max_depth             = s->block_max_depth;
3614
    s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3615
}
3616

    
3617
static int qscale2qlog(int qscale){
3618
    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3619
           + 61*QROOT/8; //<64 >60
3620
}
3621

    
3622
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3623
{
3624
    /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3625
     * FIXME we know exact mv bits at this point,
3626
     * but ratecontrol isn't set up to include them. */
3627
    uint32_t coef_sum= 0;
3628
    int level, orientation, delta_qlog;
3629

    
3630
    for(level=0; level<s->spatial_decomposition_count; level++){
3631
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3632
            SubBand *b= &s->plane[0].band[level][orientation];
3633
            IDWTELEM *buf= b->ibuf;
3634
            const int w= b->width;
3635
            const int h= b->height;
3636
            const int stride= b->stride;
3637
            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3638
            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3639
            const int qdiv= (1<<16)/qmul;
3640
            int x, y;
3641
            //FIXME this is ugly
3642
            for(y=0; y<h; y++)
3643
                for(x=0; x<w; x++)
3644
                    buf[x+y*stride]= b->buf[x+y*stride];
3645
            if(orientation==0)
3646
                decorrelate(s, b, buf, stride, 1, 0);
3647
            for(y=0; y<h; y++)
3648
                for(x=0; x<w; x++)
3649
                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3650
        }
3651
    }
3652

    
3653
    /* ugly, ratecontrol just takes a sqrt again */
3654
    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3655
    assert(coef_sum < INT_MAX);
3656

    
3657
    if(pict->pict_type == FF_I_TYPE){
3658
        s->m.current_picture.mb_var_sum= coef_sum;
3659
        s->m.current_picture.mc_mb_var_sum= 0;
3660
    }else{
3661
        s->m.current_picture.mc_mb_var_sum= coef_sum;
3662
        s->m.current_picture.mb_var_sum= 0;
3663
    }
3664

    
3665
    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3666
    if (pict->quality < 0)
3667
        return INT_MIN;
3668
    s->lambda= pict->quality * 3/2;
3669
    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3670
    s->qlog+= delta_qlog;
3671
    return delta_qlog;
3672
}
3673

    
3674
static void calculate_visual_weight(SnowContext *s, Plane *p){
3675
    int width = p->width;
3676
    int height= p->height;
3677
    int level, orientation, x, y;
3678

    
3679
    for(level=0; level<s->spatial_decomposition_count; level++){
3680
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3681
            SubBand *b= &p->band[level][orientation];
3682
            IDWTELEM *ibuf= b->ibuf;
3683
            int64_t error=0;
3684

    
3685
            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3686
            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3687
            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3688
            for(y=0; y<height; y++){
3689
                for(x=0; x<width; x++){
3690
                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3691
                    error += d*d;
3692
                }
3693
            }
3694

    
3695
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3696
        }
3697
    }
3698
}
3699

    
3700
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3701
    SnowContext *s = avctx->priv_data;
3702
    RangeCoder * const c= &s->c;
3703
    AVFrame *pict = data;
3704
    const int width= s->avctx->width;
3705
    const int height= s->avctx->height;
3706
    int level, orientation, plane_index, i, y;
3707
    uint8_t rc_header_bak[sizeof(s->header_state)];
3708
    uint8_t rc_block_bak[sizeof(s->block_state)];
3709

    
3710
    ff_init_range_encoder(c, buf, buf_size);
3711
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3712

    
3713
    for(i=0; i<3; i++){
3714
        int shift= !!i;
3715
        for(y=0; y<(height>>shift); y++)
3716
            memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3717
                   &pict->data[i][y * pict->linesize[i]],
3718
                   width>>shift);
3719
    }
3720
    s->new_picture = *pict;
3721

    
3722
    s->m.picture_number= avctx->frame_number;
3723
    if(avctx->flags&CODEC_FLAG_PASS2){
3724
        s->m.pict_type =
3725
        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3726
        s->keyframe= pict->pict_type==FF_I_TYPE;
3727
        if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3728
            pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3729
            if (pict->quality < 0)
3730
                return -1;
3731
        }
3732
    }else{
3733
        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3734
        s->m.pict_type=
3735
        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3736
    }
3737

    
3738
    if(s->pass1_rc && avctx->frame_number == 0)
3739
        pict->quality= 2*FF_QP2LAMBDA;
3740
    if(pict->quality){
3741
        s->qlog= qscale2qlog(pict->quality);
3742
        s->lambda = pict->quality * 3/2;
3743
    }
3744
    if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3745
        s->qlog= LOSSLESS_QLOG;
3746
        s->lambda = 0;
3747
    }//else keep previous frame's qlog until after motion estimation
3748

    
3749
    frame_start(s);
3750

    
3751
    s->m.current_picture_ptr= &s->m.current_picture;
3752
    s->m.last_picture.pts= s->m.current_picture.pts;
3753
    s->m.current_picture.pts= pict->pts;
3754
    if(pict->pict_type == FF_P_TYPE){
3755
        int block_width = (width +15)>>4;
3756
        int block_height= (height+15)>>4;
3757
        int stride= s->current_picture.linesize[0];
3758

    
3759
        assert(s->current_picture.data[0]);
3760
        assert(s->last_picture[0].data[0]);
3761

    
3762
        s->m.avctx= s->avctx;
3763
        s->m.current_picture.data[0]= s->current_picture.data[0];
3764
        s->m.   last_picture.data[0]= s->last_picture[0].data[0];
3765
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3766
        s->m.   last_picture_ptr= &s->m.   last_picture;
3767
        s->m.linesize=
3768
        s->m.   last_picture.linesize[0]=
3769
        s->m.    new_picture.linesize[0]=
3770
        s->m.current_picture.linesize[0]= stride;
3771
        s->m.uvlinesize= s->current_picture.linesize[1];
3772
        s->m.width = width;
3773
        s->m.height= height;
3774
        s->m.mb_width = block_width;
3775
        s->m.mb_height= block_height;
3776
        s->m.mb_stride=   s->m.mb_width+1;
3777
        s->m.b8_stride= 2*s->m.mb_width+1;
3778
        s->m.f_code=1;
3779
        s->m.pict_type= pict->pict_type;
3780
        s->m.me_method= s->avctx->me_method;
3781
        s->m.me.scene_change_score=0;
3782
        s->m.flags= s->avctx->flags;
3783
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3784
        s->m.out_format= FMT_H263;
3785
        s->m.unrestricted_mv= 1;
3786

    
3787
        s->m.lambda = s->lambda;
3788
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3789
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3790

    
3791
        s->m.dsp= s->dsp; //move
3792
        ff_init_me(&s->m);
3793
        s->dsp= s->m.dsp;
3794
    }
3795

    
3796
    if(s->pass1_rc){
3797
        memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
3798
        memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
3799
    }
3800

    
3801
redo_frame:
3802

    
3803
    if(pict->pict_type == FF_I_TYPE)
3804
        s->spatial_decomposition_count= 5;
3805
    else
3806
        s->spatial_decomposition_count= 5;
3807

    
3808
    s->m.pict_type = pict->pict_type;
3809
    s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
3810

    
3811
    common_init_after_header(avctx);
3812

    
3813
    if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3814
        for(plane_index=0; plane_index<3; plane_index++){
3815
            calculate_visual_weight(s, &s->plane[plane_index]);
3816
        }
3817
    }
3818

    
3819
    encode_header(s);
3820
    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3821
    encode_blocks(s, 1);
3822
    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3823

    
3824
    for(plane_index=0; plane_index<3; plane_index++){
3825
        Plane *p= &s->plane[plane_index];
3826
        int w= p->width;
3827
        int h= p->height;
3828
        int x, y;
3829
//        int bits= put_bits_count(&s->c.pb);
3830

    
3831
        if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
3832
            //FIXME optimize
3833
            if(pict->data[plane_index]) //FIXME gray hack
3834
                for(y=0; y<h; y++){
3835
                    for(x=0; x<w; x++){
3836
                        s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3837
                    }
3838
                }
3839
            predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
3840

    
3841
            if(   plane_index==0
3842
               && pict->pict_type == FF_P_TYPE
3843
               && !(avctx->flags&CODEC_FLAG_PASS2)
3844
               && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3845
                ff_init_range_encoder(c, buf, buf_size);
3846
                ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3847
                pict->pict_type= FF_I_TYPE;
3848
                s->keyframe=1;
3849
                s->current_picture.key_frame=1;
3850
                goto redo_frame;
3851
            }
3852

    
3853
            if(s->qlog == LOSSLESS_QLOG){
3854
                for(y=0; y<h; y++){
3855
                    for(x=0; x<w; x++){
3856
                        s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3857
                    }
3858
                }
3859
            }else{
3860
                for(y=0; y<h; y++){
3861
                    for(x=0; x<w; x++){
3862
                        s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
3863
                    }
3864
                }
3865
            }
3866

    
3867
            /*  if(QUANTIZE2)
3868
                dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
3869
            else*/
3870
                ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3871

    
3872
            if(s->pass1_rc && plane_index==0){
3873
                int delta_qlog = ratecontrol_1pass(s, pict);
3874
                if (delta_qlog <= INT_MIN)
3875
                    return -1;
3876
                if(delta_qlog){
3877
                    //reordering qlog in the bitstream would eliminate this reset
3878
                    ff_init_range_encoder(c, buf, buf_size);
3879
                    memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
3880
                    memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
3881
                    encode_header(s);
3882
                    encode_blocks(s, 0);
3883
                }
3884
            }
3885

    
3886
            for(level=0; level<s->spatial_decomposition_count; level++){
3887
                for(orientation=level ? 1 : 0; orientation<4; orientation++){
3888
                    SubBand *b= &p->band[level][orientation];
3889

    
3890
                    if(!QUANTIZE2)
3891
                        quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
3892
                    if(orientation==0)
3893
                        decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
3894
                    encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
3895
                    assert(b->parent==NULL || b->parent->stride == b->stride*2);
3896
                    if(orientation==0)
3897
                        correlate(s, b, b->ibuf, b->stride, 1, 0);
3898
                }
3899
            }
3900

    
3901
            for(level=0; level<s->spatial_decomposition_count; level++){
3902
                for(orientation=level ? 1 : 0; orientation<4; orientation++){
3903
                    SubBand *b= &p->band[level][orientation];
3904

    
3905
                    dequantize(s, b, b->ibuf, b->stride);
3906
                }
3907
            }
3908

    
3909
            ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3910
            if(s->qlog == LOSSLESS_QLOG){
3911
                for(y=0; y<h; y++){
3912
                    for(x=0; x<w; x++){
3913
                        s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
3914
                    }
3915
                }
3916
            }
3917
            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3918
        }else{
3919
            //ME/MC only
3920
            if(pict->pict_type == FF_I_TYPE){
3921
                for(y=0; y<h; y++){
3922
                    for(x=0; x<w; x++){
3923
                        s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
3924
                            pict->data[plane_index][y*pict->linesize[plane_index] + x];
3925
                    }
3926
                }
3927
            }else{
3928
                memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
3929
                predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3930
            }
3931
        }
3932
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3933
            int64_t error= 0;
3934

    
3935
            if(pict->data[plane_index]) //FIXME gray hack
3936
                for(y=0; y<h; y++){
3937
                    for(x=0; x<w; x++){
3938
                        int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3939
                        error += d*d;
3940
                    }
3941
                }
3942
            s->avctx->error[plane_index] += error;
3943
            s->current_picture.error[plane_index] = error;
3944
        }
3945

    
3946
    }
3947

    
3948
    update_last_header_values(s);
3949

    
3950
    release_buffer(avctx);
3951

    
3952
    s->current_picture.coded_picture_number = avctx->frame_number;
3953
    s->current_picture.pict_type = pict->pict_type;
3954
    s->current_picture.quality = pict->quality;
3955
    s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3956
    s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
3957
    s->m.current_picture.display_picture_number =
3958
    s->m.current_picture.coded_picture_number = avctx->frame_number;
3959
    s->m.current_picture.quality = pict->quality;
3960
    s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3961
    if(s->pass1_rc)
3962
        if (ff_rate_estimate_qscale(&s->m, 0) < 0)
3963
            return -1;
3964
    if(avctx->flags&CODEC_FLAG_PASS1)
3965
        ff_write_pass1_stats(&s->m);
3966
    s->m.last_pict_type = s->m.pict_type;
3967
    avctx->frame_bits = s->m.frame_bits;
3968
    avctx->mv_bits = s->m.mv_bits;
3969
    avctx->misc_bits = s->m.misc_bits;
3970
    avctx->p_tex_bits = s->m.p_tex_bits;
3971

    
3972
    emms_c();
3973

    
3974
    return ff_rac_terminate(c);
3975
}
3976

    
3977
static av_cold int encode_end(AVCodecContext *avctx)
3978
{
3979
    SnowContext *s = avctx->priv_data;
3980

    
3981
    common_end(s);
3982
    if (s->input_picture.data[0])
3983
        avctx->release_buffer(avctx, &s->input_picture);
3984
    av_free(avctx->stats_out);
3985

    
3986
    return 0;
3987
}
3988

    
3989
AVCodec snow_encoder = {
3990
    "snow",
3991
    CODEC_TYPE_VIDEO,
3992
    CODEC_ID_SNOW,
3993
    sizeof(SnowContext),
3994
    encode_init,
3995
    encode_frame,
3996
    encode_end,
3997
    .long_name = NULL_IF_CONFIG_SMALL("Snow"),
3998
};
3999
#endif
4000

    
4001

    
4002
#ifdef TEST
4003
#undef malloc
4004
#undef free
4005
#undef printf
4006

    
4007
#include "libavutil/lfg.h"
4008

    
4009
int main(void){
4010
    int width=256;
4011
    int height=256;
4012
    int buffer[2][width*height];
4013
    SnowContext s;
4014
    int i;
4015