Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ ef3dfbd4

History | View | Annotate | Download (180 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "avcodec.h"
22
#include "dsputil.h"
23
#include "snow.h"
24

    
25
#include "rangecoder.h"
26

    
27
#include "mpegvideo.h"
28

    
29
#undef NDEBUG
30
#include <assert.h>
31

    
32
static const int8_t quant3[256]={
33
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49
};
50
static const int8_t quant3b[256]={
51
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
};
68
static const int8_t quant3bA[256]={
69
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
};
86
static const int8_t quant5[256]={
87
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103
};
104
static const int8_t quant7[256]={
105
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121
};
122
static const int8_t quant9[256]={
123
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139
};
140
static const int8_t quant11[256]={
141
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157
};
158
static const int8_t quant13[256]={
159
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
175
};
176

    
177
#if 0 //64*cubic
178
static const uint8_t obmc32[1024]={
179
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
180
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
181
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
182
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
183
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
184
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
185
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
186
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
187
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
188
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
189
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
190
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
191
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
192
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
193
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
194
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
195
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
196
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
197
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
198
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
199
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
200
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
201
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
202
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
203
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
204
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
205
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
206
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
207
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
208
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
209
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
210
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
211
//error:0.000022
212
};
213
static const uint8_t obmc16[256]={
214
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
215
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
216
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
217
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
218
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
219
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
220
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
221
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
222
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
223
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
224
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
225
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
226
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
227
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
228
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
229
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
230
//error:0.000033
231
};
232
#elif 1 // 64*linear
233
static const uint8_t obmc32[1024]={
234
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
235
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
236
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
237
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
238
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
239
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
240
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
241
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
242
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
243
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
244
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
245
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
246
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
247
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
248
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
249
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
250
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
251
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
252
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
253
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
254
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
255
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
256
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
257
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
258
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
259
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
260
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
261
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
262
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
263
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
264
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
265
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
266
 //error:0.000020
267
};
268
static const uint8_t obmc16[256]={
269
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
270
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
271
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
272
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
273
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
274
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
281
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
282
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
283
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
284
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
285
//error:0.000015
286
};
287
#else //64*cos
288
static const uint8_t obmc32[1024]={
289
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
290
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
291
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
292
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
293
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
294
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
295
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
296
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
297
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
298
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
299
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
300
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
301
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
302
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
303
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
304
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
305
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
306
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
307
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
308
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
309
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
310
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
311
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
312
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
313
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
314
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
315
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
316
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
317
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
318
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
319
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
320
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
321
//error:0.000022
322
};
323
static const uint8_t obmc16[256]={
324
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
325
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
326
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
327
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
328
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
329
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
330
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
331
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
332
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
333
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
334
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
335
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
336
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
337
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
338
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
339
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
340
//error:0.000022
341
};
342
#endif /* 0 */
343

    
344
//linear *64
345
static const uint8_t obmc8[64]={
346
  4, 12, 20, 28, 28, 20, 12,  4,
347
 12, 36, 60, 84, 84, 60, 36, 12,
348
 20, 60,100,140,140,100, 60, 20,
349
 28, 84,140,196,196,140, 84, 28,
350
 28, 84,140,196,196,140, 84, 28,
351
 20, 60,100,140,140,100, 60, 20,
352
 12, 36, 60, 84, 84, 60, 36, 12,
353
  4, 12, 20, 28, 28, 20, 12,  4,
354
//error:0.000000
355
};
356

    
357
//linear *64
358
static const uint8_t obmc4[16]={
359
 16, 48, 48, 16,
360
 48,144,144, 48,
361
 48,144,144, 48,
362
 16, 48, 48, 16,
363
//error:0.000000
364
};
365

    
366
static const uint8_t *obmc_tab[4]={
367
    obmc32, obmc16, obmc8, obmc4
368
};
369

    
370
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
371

    
372
typedef struct BlockNode{
373
    int16_t mx;
374
    int16_t my;
375
    uint8_t ref;
376
    uint8_t color[3];
377
    uint8_t type;
378
//#define TYPE_SPLIT    1
379
#define BLOCK_INTRA   1
380
#define BLOCK_OPT     2
381
//#define TYPE_NOCOLOR  4
382
    uint8_t level; //FIXME merge into type?
383
}BlockNode;
384

    
385
static const BlockNode null_block= { //FIXME add border maybe
386
    .color= {128,128,128},
387
    .mx= 0,
388
    .my= 0,
389
    .ref= 0,
390
    .type= 0,
391
    .level= 0,
392
};
393

    
394
#define LOG2_MB_SIZE 4
395
#define MB_SIZE (1<<LOG2_MB_SIZE)
396
#define ENCODER_EXTRA_BITS 4
397
#define HTAPS_MAX 8
398

    
399
typedef struct x_and_coeff{
400
    int16_t x;
401
    uint16_t coeff;
402
} x_and_coeff;
403

    
404
typedef struct SubBand{
405
    int level;
406
    int stride;
407
    int width;
408
    int height;
409
    int qlog;        ///< log(qscale)/log[2^(1/6)]
410
    DWTELEM *buf;
411
    IDWTELEM *ibuf;
412
    int buf_x_offset;
413
    int buf_y_offset;
414
    int stride_line; ///< Stride measured in lines, not pixels.
415
    x_and_coeff * x_coeff;
416
    struct SubBand *parent;
417
    uint8_t state[/*7*2*/ 7 + 512][32];
418
}SubBand;
419

    
420
typedef struct Plane{
421
    int width;
422
    int height;
423
    SubBand band[MAX_DECOMPOSITIONS][4];
424

    
425
    int htaps;
426
    int8_t hcoeff[HTAPS_MAX/2];
427
    int diag_mc;
428
    int fast_mc;
429

    
430
    int last_htaps;
431
    int8_t last_hcoeff[HTAPS_MAX/2];
432
    int last_diag_mc;
433
}Plane;
434

    
435
typedef struct SnowContext{
436
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
437

    
438
    AVCodecContext *avctx;
439
    RangeCoder c;
440
    DSPContext dsp;
441
    AVFrame new_picture;
442
    AVFrame input_picture;              ///< new_picture with the internal linesizes
443
    AVFrame current_picture;
444
    AVFrame last_picture[MAX_REF_FRAMES];
445
    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446
    AVFrame mconly_picture;
447
//     uint8_t q_context[16];
448
    uint8_t header_state[32];
449
    uint8_t block_state[128 + 32*128];
450
    int keyframe;
451
    int always_reset;
452
    int version;
453
    int spatial_decomposition_type;
454
    int last_spatial_decomposition_type;
455
    int temporal_decomposition_type;
456
    int spatial_decomposition_count;
457
    int last_spatial_decomposition_count;
458
    int temporal_decomposition_count;
459
    int max_ref_frames;
460
    int ref_frames;
461
    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
462
    uint32_t *ref_scores[MAX_REF_FRAMES];
463
    DWTELEM *spatial_dwt_buffer;
464
    IDWTELEM *spatial_idwt_buffer;
465
    int colorspace_type;
466
    int chroma_h_shift;
467
    int chroma_v_shift;
468
    int spatial_scalability;
469
    int qlog;
470
    int last_qlog;
471
    int lambda;
472
    int lambda2;
473
    int pass1_rc;
474
    int mv_scale;
475
    int last_mv_scale;
476
    int qbias;
477
    int last_qbias;
478
#define QBIAS_SHIFT 3
479
    int b_width;
480
    int b_height;
481
    int block_max_depth;
482
    int last_block_max_depth;
483
    Plane plane[MAX_PLANES];
484
    BlockNode *block;
485
#define ME_CACHE_SIZE 1024
486
    int me_cache[ME_CACHE_SIZE];
487
    int me_cache_generation;
488
    slice_buffer sb;
489

    
490
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
491
}SnowContext;
492

    
493
typedef struct {
494
    IDWTELEM *b0;
495
    IDWTELEM *b1;
496
    IDWTELEM *b2;
497
    IDWTELEM *b3;
498
    int y;
499
} dwt_compose_t;
500

    
501
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
502
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
503

    
504
static void iterative_me(SnowContext *s);
505

    
506
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
507
{
508
    int i;
509

    
510
    buf->base_buffer = base_buffer;
511
    buf->line_count = line_count;
512
    buf->line_width = line_width;
513
    buf->data_count = max_allocated_lines;
514
    buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
515
    buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
516

    
517
    for(i = 0; i < max_allocated_lines; i++){
518
        buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
519
    }
520

    
521
    buf->data_stack_top = max_allocated_lines - 1;
522
}
523

    
524
static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
525
{
526
    int offset;
527
    IDWTELEM * buffer;
528

    
529
    assert(buf->data_stack_top >= 0);
530
//  assert(!buf->line[line]);
531
    if (buf->line[line])
532
        return buf->line[line];
533

    
534
    offset = buf->line_width * line;
535
    buffer = buf->data_stack[buf->data_stack_top];
536
    buf->data_stack_top--;
537
    buf->line[line] = buffer;
538

    
539
    return buffer;
540
}
541

    
542
static void slice_buffer_release(slice_buffer * buf, int line)
543
{
544
    int offset;
545
    IDWTELEM * buffer;
546

    
547
    assert(line >= 0 && line < buf->line_count);
548
    assert(buf->line[line]);
549

    
550
    offset = buf->line_width * line;
551
    buffer = buf->line[line];
552
    buf->data_stack_top++;
553
    buf->data_stack[buf->data_stack_top] = buffer;
554
    buf->line[line] = NULL;
555
}
556

    
557
static void slice_buffer_flush(slice_buffer * buf)
558
{
559
    int i;
560
    for(i = 0; i < buf->line_count; i++){
561
        if (buf->line[i])
562
            slice_buffer_release(buf, i);
563
    }
564
}
565

    
566
static void slice_buffer_destroy(slice_buffer * buf)
567
{
568
    int i;
569
    slice_buffer_flush(buf);
570

    
571
    for(i = buf->data_count - 1; i >= 0; i--){
572
        av_freep(&buf->data_stack[i]);
573
    }
574
    av_freep(&buf->data_stack);
575
    av_freep(&buf->line);
576
}
577

    
578
#ifdef __sgi
579
// Avoid a name clash on SGI IRIX
580
#undef qexp
581
#endif
582
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
583
static uint8_t qexp[QROOT];
584

    
585
static inline int mirror(int v, int m){
586
    while((unsigned)v > (unsigned)m){
587
        v=-v;
588
        if(v<0) v+= 2*m;
589
    }
590
    return v;
591
}
592

    
593
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
594
    int i;
595

    
596
    if(v){
597
        const int a= FFABS(v);
598
        const int e= av_log2(a);
599
#if 1
600
        const int el= FFMIN(e, 10);
601
        put_rac(c, state+0, 0);
602

    
603
        for(i=0; i<el; i++){
604
            put_rac(c, state+1+i, 1);  //1..10
605
        }
606
        for(; i<e; i++){
607
            put_rac(c, state+1+9, 1);  //1..10
608
        }
609
        put_rac(c, state+1+FFMIN(i,9), 0);
610

    
611
        for(i=e-1; i>=el; i--){
612
            put_rac(c, state+22+9, (a>>i)&1); //22..31
613
        }
614
        for(; i>=0; i--){
615
            put_rac(c, state+22+i, (a>>i)&1); //22..31
616
        }
617

    
618
        if(is_signed)
619
            put_rac(c, state+11 + el, v < 0); //11..21
620
#else
621

    
622
        put_rac(c, state+0, 0);
623
        if(e<=9){
624
            for(i=0; i<e; i++){
625
                put_rac(c, state+1+i, 1);  //1..10
626
            }
627
            put_rac(c, state+1+i, 0);
628

    
629
            for(i=e-1; i>=0; i--){
630
                put_rac(c, state+22+i, (a>>i)&1); //22..31
631
            }
632

    
633
            if(is_signed)
634
                put_rac(c, state+11 + e, v < 0); //11..21
635
        }else{
636
            for(i=0; i<e; i++){
637
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
638
            }
639
            put_rac(c, state+1+FFMIN(i,9), 0);
640

    
641
            for(i=e-1; i>=0; i--){
642
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
643
            }
644

    
645
            if(is_signed)
646
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
647
        }
648
#endif /* 1 */
649
    }else{
650
        put_rac(c, state+0, 1);
651
    }
652
}
653

    
654
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
655
    if(get_rac(c, state+0))
656
        return 0;
657
    else{
658
        int i, e, a;
659
        e= 0;
660
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
661
            e++;
662
        }
663

    
664
        a= 1;
665
        for(i=e-1; i>=0; i--){
666
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
667
        }
668

    
669
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
670
            return -a;
671
        else
672
            return a;
673
    }
674
}
675

    
676
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
677
    int i;
678
    int r= log2>=0 ? 1<<log2 : 1;
679

    
680
    assert(v>=0);
681
    assert(log2>=-4);
682

    
683
    while(v >= r){
684
        put_rac(c, state+4+log2, 1);
685
        v -= r;
686
        log2++;
687
        if(log2>0) r+=r;
688
    }
689
    put_rac(c, state+4+log2, 0);
690

    
691
    for(i=log2-1; i>=0; i--){
692
        put_rac(c, state+31-i, (v>>i)&1);
693
    }
694
}
695

    
696
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
697
    int i;
698
    int r= log2>=0 ? 1<<log2 : 1;
699
    int v=0;
700

    
701
    assert(log2>=-4);
702

    
703
    while(get_rac(c, state+4+log2)){
704
        v+= r;
705
        log2++;
706
        if(log2>0) r+=r;
707
    }
708

    
709
    for(i=log2-1; i>=0; i--){
710
        v+= get_rac(c, state+31-i)<<i;
711
    }
712

    
713
    return v;
714
}
715

    
716
static av_always_inline void
717
lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
718
     int dst_step, int src_step, int ref_step,
719
     int width, int mul, int add, int shift,
720
     int highpass, int inverse){
721
    const int mirror_left= !highpass;
722
    const int mirror_right= (width&1) ^ highpass;
723
    const int w= (width>>1) - 1 + (highpass & width);
724
    int i;
725

    
726
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
727
    if(mirror_left){
728
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
729
        dst += dst_step;
730
        src += src_step;
731
    }
732

    
733
    for(i=0; i<w; i++){
734
        dst[i*dst_step] =
735
            LIFT(src[i*src_step],
736
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
737
                 inverse);
738
    }
739

    
740
    if(mirror_right){
741
        dst[w*dst_step] =
742
            LIFT(src[w*src_step],
743
                 ((mul*2*ref[w*ref_step]+add)>>shift),
744
                 inverse);
745
    }
746
}
747

    
748
static av_always_inline void
749
inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
750
         int dst_step, int src_step, int ref_step,
751
         int width, int mul, int add, int shift,
752
         int highpass, int inverse){
753
    const int mirror_left= !highpass;
754
    const int mirror_right= (width&1) ^ highpass;
755
    const int w= (width>>1) - 1 + (highpass & width);
756
    int i;
757

    
758
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
759
    if(mirror_left){
760
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
761
        dst += dst_step;
762
        src += src_step;
763
    }
764

    
765
    for(i=0; i<w; i++){
766
        dst[i*dst_step] =
767
            LIFT(src[i*src_step],
768
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
769
                 inverse);
770
    }
771

    
772
    if(mirror_right){
773
        dst[w*dst_step] =
774
            LIFT(src[w*src_step],
775
                 ((mul*2*ref[w*ref_step]+add)>>shift),
776
                 inverse);
777
    }
778
}
779

    
780
#ifndef liftS
781
static av_always_inline void
782
liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
783
      int dst_step, int src_step, int ref_step,
784
      int width, int mul, int add, int shift,
785
      int highpass, int inverse){
786
    const int mirror_left= !highpass;
787
    const int mirror_right= (width&1) ^ highpass;
788
    const int w= (width>>1) - 1 + (highpass & width);
789
    int i;
790

    
791
    assert(shift == 4);
792
#define LIFTS(src, ref, inv) \
793
        ((inv) ? \
794
            (src) + (((ref) + 4*(src))>>shift): \
795
            -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
796
    if(mirror_left){
797
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
798
        dst += dst_step;
799
        src += src_step;
800
    }
801

    
802
    for(i=0; i<w; i++){
803
        dst[i*dst_step] =
804
            LIFTS(src[i*src_step],
805
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
806
                  inverse);
807
    }
808

    
809
    if(mirror_right){
810
        dst[w*dst_step] =
811
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
812
    }
813
}
814
static av_always_inline void
815
inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
816
          int dst_step, int src_step, int ref_step,
817
          int width, int mul, int add, int shift,
818
          int highpass, int inverse){
819
    const int mirror_left= !highpass;
820
    const int mirror_right= (width&1) ^ highpass;
821
    const int w= (width>>1) - 1 + (highpass & width);
822
    int i;
823

    
824
    assert(shift == 4);
825
#define LIFTS(src, ref, inv) \
826
    ((inv) ? \
827
        (src) + (((ref) + 4*(src))>>shift): \
828
        -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
829
    if(mirror_left){
830
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
831
        dst += dst_step;
832
        src += src_step;
833
    }
834

    
835
    for(i=0; i<w; i++){
836
        dst[i*dst_step] =
837
            LIFTS(src[i*src_step],
838
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
839
                  inverse);
840
    }
841

    
842
    if(mirror_right){
843
        dst[w*dst_step] =
844
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
845
    }
846
}
847
#endif /* ! liftS */
848

    
849
static void horizontal_decompose53i(DWTELEM *b, int width){
850
    DWTELEM temp[width];
851
    const int width2= width>>1;
852
    int x;
853
    const int w2= (width+1)>>1;
854

    
855
    for(x=0; x<width2; x++){
856
        temp[x   ]= b[2*x    ];
857
        temp[x+w2]= b[2*x + 1];
858
    }
859
    if(width&1)
860
        temp[x   ]= b[2*x    ];
861
#if 0
862
    {
863
    int A1,A2,A3,A4;
864
    A2= temp[1       ];
865
    A4= temp[0       ];
866
    A1= temp[0+width2];
867
    A1 -= (A2 + A4)>>1;
868
    A4 += (A1 + 1)>>1;
869
    b[0+width2] = A1;
870
    b[0       ] = A4;
871
    for(x=1; x+1<width2; x+=2){
872
        A3= temp[x+width2];
873
        A4= temp[x+1     ];
874
        A3 -= (A2 + A4)>>1;
875
        A2 += (A1 + A3 + 2)>>2;
876
        b[x+width2] = A3;
877
        b[x       ] = A2;
878

879
        A1= temp[x+1+width2];
880
        A2= temp[x+2       ];
881
        A1 -= (A2 + A4)>>1;
882
        A4 += (A1 + A3 + 2)>>2;
883
        b[x+1+width2] = A1;
884
        b[x+1       ] = A4;
885
    }
886
    A3= temp[width-1];
887
    A3 -= A2;
888
    A2 += (A1 + A3 + 2)>>2;
889
    b[width -1] = A3;
890
    b[width2-1] = A2;
891
    }
892
#else
893
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
894
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
895
#endif /* 0 */
896
}
897

    
898
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
899
    int i;
900

    
901
    for(i=0; i<width; i++){
902
        b1[i] -= (b0[i] + b2[i])>>1;
903
    }
904
}
905

    
906
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
907
    int i;
908

    
909
    for(i=0; i<width; i++){
910
        b1[i] += (b0[i] + b2[i] + 2)>>2;
911
    }
912
}
913

    
914
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
915
    int y;
916
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
917
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
918

    
919
    for(y=-2; y<height; y+=2){
920
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
921
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
922

    
923
        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
924
        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
925

    
926
        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
927
        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
928

    
929
        b0=b2;
930
        b1=b3;
931
    }
932
}
933

    
934
static void horizontal_decompose97i(DWTELEM *b, int width){
935
    DWTELEM temp[width];
936
    const int w2= (width+1)>>1;
937

    
938
    lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
939
    liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
940
    lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
941
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
942
}
943

    
944

    
945
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
946
    int i;
947

    
948
    for(i=0; i<width; i++){
949
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
950
    }
951
}
952

    
953
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
954
    int i;
955

    
956
    for(i=0; i<width; i++){
957
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
958
    }
959
}
960

    
961
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
962
    int i;
963

    
964
    for(i=0; i<width; i++){
965
#ifdef liftS
966
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
967
#else
968
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
969
#endif
970
    }
971
}
972

    
973
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
974
    int i;
975

    
976
    for(i=0; i<width; i++){
977
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
978
    }
979
}
980

    
981
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
982
    int y;
983
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
984
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
985
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
986
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
987

    
988
    for(y=-4; y<height; y+=2){
989
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
990
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
991

    
992
        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
993
        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
994

    
995
        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
996
        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
997
        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
998
        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
999

    
1000
        b0=b2;
1001
        b1=b3;
1002
        b2=b4;
1003
        b3=b5;
1004
    }
1005
}
1006

    
1007
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1008
    int level;
1009

    
1010
    for(level=0; level<decomposition_count; level++){
1011
        switch(type){
1012
        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1013
        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1014
        }
1015
    }
1016
}
1017

    
1018
static void horizontal_compose53i(IDWTELEM *b, int width){
1019
    IDWTELEM temp[width];
1020
    const int width2= width>>1;
1021
    const int w2= (width+1)>>1;
1022
    int x;
1023

    
1024
#if 0
1025
    int A1,A2,A3,A4;
1026
    A2= temp[1       ];
1027
    A4= temp[0       ];
1028
    A1= temp[0+width2];
1029
    A1 -= (A2 + A4)>>1;
1030
    A4 += (A1 + 1)>>1;
1031
    b[0+width2] = A1;
1032
    b[0       ] = A4;
1033
    for(x=1; x+1<width2; x+=2){
1034
        A3= temp[x+width2];
1035
        A4= temp[x+1     ];
1036
        A3 -= (A2 + A4)>>1;
1037
        A2 += (A1 + A3 + 2)>>2;
1038
        b[x+width2] = A3;
1039
        b[x       ] = A2;
1040

1041
        A1= temp[x+1+width2];
1042
        A2= temp[x+2       ];
1043
        A1 -= (A2 + A4)>>1;
1044
        A4 += (A1 + A3 + 2)>>2;
1045
        b[x+1+width2] = A1;
1046
        b[x+1       ] = A4;
1047
    }
1048
    A3= temp[width-1];
1049
    A3 -= A2;
1050
    A2 += (A1 + A3 + 2)>>2;
1051
    b[width -1] = A3;
1052
    b[width2-1] = A2;
1053
#else
1054
    inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1055
    inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1056
#endif /* 0 */
1057
    for(x=0; x<width2; x++){
1058
        b[2*x    ]= temp[x   ];
1059
        b[2*x + 1]= temp[x+w2];
1060
    }
1061
    if(width&1)
1062
        b[2*x    ]= temp[x   ];
1063
}
1064

    
1065
static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1066
    int i;
1067

    
1068
    for(i=0; i<width; i++){
1069
        b1[i] += (b0[i] + b2[i])>>1;
1070
    }
1071
}
1072

    
1073
static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1074
    int i;
1075

    
1076
    for(i=0; i<width; i++){
1077
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1078
    }
1079
}
1080

    
1081
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1082
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1083
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1084
    cs->y = -1;
1085
}
1086

    
1087
static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1088
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1089
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1090
    cs->y = -1;
1091
}
1092

    
1093
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1094
    int y= cs->y;
1095

    
1096
    IDWTELEM *b0= cs->b0;
1097
    IDWTELEM *b1= cs->b1;
1098
    IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1099
    IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1100

    
1101
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1102
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1103

    
1104
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1105
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1106

    
1107
    cs->b0 = b2;
1108
    cs->b1 = b3;
1109
    cs->y += 2;
1110
}
1111

    
1112
static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1113
    int y= cs->y;
1114
    IDWTELEM *b0= cs->b0;
1115
    IDWTELEM *b1= cs->b1;
1116
    IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1117
    IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1118

    
1119
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1120
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1121

    
1122
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1123
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1124

    
1125
    cs->b0 = b2;
1126
    cs->b1 = b3;
1127
    cs->y += 2;
1128
}
1129

    
1130
static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1131
    dwt_compose_t cs;
1132
    spatial_compose53i_init(&cs, buffer, height, stride);
1133
    while(cs.y <= height)
1134
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1135
}
1136

    
1137

    
1138
void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1139
    IDWTELEM temp[width];
1140
    const int w2= (width+1)>>1;
1141

    
1142
    inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1143
    inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1144
    inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1145
    inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1146
}
1147

    
1148
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1149
    int i;
1150

    
1151
    for(i=0; i<width; i++){
1152
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1153
    }
1154
}
1155

    
1156
static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1157
    int i;
1158

    
1159
    for(i=0; i<width; i++){
1160
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1161
    }
1162
}
1163

    
1164
static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1165
    int i;
1166

    
1167
    for(i=0; i<width; i++){
1168
#ifdef liftS
1169
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1170
#else
1171
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1172
#endif
1173
    }
1174
}
1175

    
1176
static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1177
    int i;
1178

    
1179
    for(i=0; i<width; i++){
1180
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1181
    }
1182
}
1183

    
1184
void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1185
    int i;
1186

    
1187
    for(i=0; i<width; i++){
1188
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1189
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1190
#ifdef liftS
1191
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1192
#else
1193
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1194
#endif
1195
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1196
    }
1197
}
1198

    
1199
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1200
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1201
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1202
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1203
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1204
    cs->y = -3;
1205
}
1206

    
1207
static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1208
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1209
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1210
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1211
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1212
    cs->y = -3;
1213
}
1214

    
1215
static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1216
    int y = cs->y;
1217

    
1218
    IDWTELEM *b0= cs->b0;
1219
    IDWTELEM *b1= cs->b1;
1220
    IDWTELEM *b2= cs->b2;
1221
    IDWTELEM *b3= cs->b3;
1222
    IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1223
    IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1224

    
1225
    if(y>0 && y+4<height){
1226
        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1227
    }else{
1228
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1229
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1230
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1231
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1232
    }
1233

    
1234
        if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1235
        if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1236

    
1237
    cs->b0=b2;
1238
    cs->b1=b3;
1239
    cs->b2=b4;
1240
    cs->b3=b5;
1241
    cs->y += 2;
1242
}
1243

    
1244
static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1245
    int y = cs->y;
1246
    IDWTELEM *b0= cs->b0;
1247
    IDWTELEM *b1= cs->b1;
1248
    IDWTELEM *b2= cs->b2;
1249
    IDWTELEM *b3= cs->b3;
1250
    IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1251
    IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1252

    
1253
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1254
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1255
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1256
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1257

    
1258
        if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1259
        if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1260

    
1261
    cs->b0=b2;
1262
    cs->b1=b3;
1263
    cs->b2=b4;
1264
    cs->b3=b5;
1265
    cs->y += 2;
1266
}
1267

    
1268
static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1269
    dwt_compose_t cs;
1270
    spatial_compose97i_init(&cs, buffer, height, stride);
1271
    while(cs.y <= height)
1272
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1273
}
1274

    
1275
static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1276
    int level;
1277
    for(level=decomposition_count-1; level>=0; level--){
1278
        switch(type){
1279
        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1280
        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1281
        }
1282
    }
1283
}
1284

    
1285
static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1286
    int level;
1287
    for(level=decomposition_count-1; level>=0; level--){
1288
        switch(type){
1289
        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1290
        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1291
        }
1292
    }
1293
}
1294

    
1295
static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1296
    const int support = type==1 ? 3 : 5;
1297
    int level;
1298
    if(type==2) return;
1299

    
1300
    for(level=decomposition_count-1; level>=0; level--){
1301
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1302
            switch(type){
1303
            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1304
                break;
1305
            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1306
                break;
1307
            }
1308
        }
1309
    }
1310
}
1311

    
1312
static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1313
    const int support = type==1 ? 3 : 5;
1314
    int level;
1315
    if(type==2) return;
1316

    
1317
    for(level=decomposition_count-1; level>=0; level--){
1318
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1319
            switch(type){
1320
            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1321
                break;
1322
            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1323
                break;
1324
            }
1325
        }
1326
    }
1327
}
1328

    
1329
static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1330
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1331
        int y;
1332
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1333
        for(y=0; y<height; y+=4)
1334
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1335
}
1336

    
1337
static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1338
    const int w= b->width;
1339
    const int h= b->height;
1340
    int x, y;
1341

    
1342
    if(1){
1343
        int run=0;
1344
        int runs[w*h];
1345
        int run_index=0;
1346
        int max_index;
1347

    
1348
        for(y=0; y<h; y++){
1349
            for(x=0; x<w; x++){
1350
                int v, p=0;
1351
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1352
                v= src[x + y*stride];
1353

    
1354
                if(y){
1355
                    t= src[x + (y-1)*stride];
1356
                    if(x){
1357
                        lt= src[x - 1 + (y-1)*stride];
1358
                    }
1359
                    if(x + 1 < w){
1360
                        rt= src[x + 1 + (y-1)*stride];
1361
                    }
1362
                }
1363
                if(x){
1364
                    l= src[x - 1 + y*stride];
1365
                    /*if(x > 1){
1366
                        if(orientation==1) ll= src[y + (x-2)*stride];
1367
                        else               ll= src[x - 2 + y*stride];
1368
                    }*/
1369
                }
1370
                if(parent){
1371
                    int px= x>>1;
1372
                    int py= y>>1;
1373
                    if(px<b->parent->width && py<b->parent->height)
1374
                        p= parent[px + py*2*stride];
1375
                }
1376
                if(!(/*ll|*/l|lt|t|rt|p)){
1377
                    if(v){
1378
                        runs[run_index++]= run;
1379
                        run=0;
1380
                    }else{
1381
                        run++;
1382
                    }
1383
                }
1384
            }
1385
        }
1386
        max_index= run_index;
1387
        runs[run_index++]= run;
1388
        run_index=0;
1389
        run= runs[run_index++];
1390

    
1391
        put_symbol2(&s->c, b->state[30], max_index, 0);
1392
        if(run_index <= max_index)
1393
            put_symbol2(&s->c, b->state[1], run, 3);
1394

    
1395
        for(y=0; y<h; y++){
1396
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1397
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1398
                return -1;
1399
            }
1400
            for(x=0; x<w; x++){
1401
                int v, p=0;
1402
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1403
                v= src[x + y*stride];
1404

    
1405
                if(y){
1406
                    t= src[x + (y-1)*stride];
1407
                    if(x){
1408
                        lt= src[x - 1 + (y-1)*stride];
1409
                    }
1410
                    if(x + 1 < w){
1411
                        rt= src[x + 1 + (y-1)*stride];
1412
                    }
1413
                }
1414
                if(x){
1415
                    l= src[x - 1 + y*stride];
1416
                    /*if(x > 1){
1417
                        if(orientation==1) ll= src[y + (x-2)*stride];
1418
                        else               ll= src[x - 2 + y*stride];
1419
                    }*/
1420
                }
1421
                if(parent){
1422
                    int px= x>>1;
1423
                    int py= y>>1;
1424
                    if(px<b->parent->width && py<b->parent->height)
1425
                        p= parent[px + py*2*stride];
1426
                }
1427
                if(/*ll|*/l|lt|t|rt|p){
1428
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1429

    
1430
                    put_rac(&s->c, &b->state[0][context], !!v);
1431
                }else{
1432
                    if(!run){
1433
                        run= runs[run_index++];
1434

    
1435
                        if(run_index <= max_index)
1436
                            put_symbol2(&s->c, b->state[1], run, 3);
1437
                        assert(v);
1438
                    }else{
1439
                        run--;
1440
                        assert(!v);
1441
                    }
1442
                }
1443
                if(v){
1444
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1445
                    int l2= 2*FFABS(l) + (l<0);
1446
                    int t2= 2*FFABS(t) + (t<0);
1447

    
1448
                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1449
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1450
                }
1451
            }
1452
        }
1453
    }
1454
    return 0;
1455
}
1456

    
1457
static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1458
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1459
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1460
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1461
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1462
}
1463

    
1464
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1465
    const int w= b->width;
1466
    const int h= b->height;
1467
    int x,y;
1468

    
1469
    if(1){
1470
        int run, runs;
1471
        x_and_coeff *xc= b->x_coeff;
1472
        x_and_coeff *prev_xc= NULL;
1473
        x_and_coeff *prev2_xc= xc;
1474
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1475
        x_and_coeff *prev_parent_xc= parent_xc;
1476

    
1477
        runs= get_symbol2(&s->c, b->state[30], 0);
1478
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1479
        else           run= INT_MAX;
1480

    
1481
        for(y=0; y<h; y++){
1482
            int v=0;
1483
            int lt=0, t=0, rt=0;
1484

    
1485
            if(y && prev_xc->x == 0){
1486
                rt= prev_xc->coeff;
1487
            }
1488
            for(x=0; x<w; x++){
1489
                int p=0;
1490
                const int l= v;
1491

    
1492
                lt= t; t= rt;
1493

    
1494
                if(y){
1495
                    if(prev_xc->x <= x)
1496
                        prev_xc++;
1497
                    if(prev_xc->x == x + 1)
1498
                        rt= prev_xc->coeff;
1499
                    else
1500
                        rt=0;
1501
                }
1502
                if(parent_xc){
1503
                    if(x>>1 > parent_xc->x){
1504
                        parent_xc++;
1505
                    }
1506
                    if(x>>1 == parent_xc->x){
1507
                        p= parent_xc->coeff;
1508
                    }
1509
                }
1510
                if(/*ll|*/l|lt|t|rt|p){
1511
                    int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1512

    
1513
                    v=get_rac(&s->c, &b->state[0][context]);
1514
                    if(v){
1515
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1516
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1517

    
1518
                        xc->x=x;
1519
                        (xc++)->coeff= v;
1520
                    }
1521
                }else{
1522
                    if(!run){
1523
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1524
                        else           run= INT_MAX;
1525
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1526
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1527

    
1528
                        xc->x=x;
1529
                        (xc++)->coeff= v;
1530
                    }else{
1531
                        int max_run;
1532
                        run--;
1533
                        v=0;
1534

    
1535
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1536
                        else  max_run= FFMIN(run, w-x-1);
1537
                        if(parent_xc)
1538
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1539
                        x+= max_run;
1540
                        run-= max_run;
1541
                    }
1542
                }
1543
            }
1544
            (xc++)->x= w+1; //end marker
1545
            prev_xc= prev2_xc;
1546
            prev2_xc= xc;
1547

    
1548
            if(parent_xc){
1549
                if(y&1){
1550
                    while(parent_xc->x != parent->width+1)
1551
                        parent_xc++;
1552
                    parent_xc++;
1553
                    prev_parent_xc= parent_xc;
1554
                }else{
1555
                    parent_xc= prev_parent_xc;
1556
                }
1557
            }
1558
        }
1559

    
1560
        (xc++)->x= w+1; //end marker
1561
    }
1562
}
1563

    
1564
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1565
    const int w= b->width;
1566
    int y;
1567
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1568
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1569
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1570
    int new_index = 0;
1571

    
1572
    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1573
        qadd= 0;
1574
        qmul= 1<<QEXPSHIFT;
1575
    }
1576

    
1577
    /* If we are on the second or later slice, restore our index. */
1578
    if (start_y != 0)
1579
        new_index = save_state[0];
1580

    
1581

    
1582
    for(y=start_y; y<h; y++){
1583
        int x = 0;
1584
        int v;
1585
        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1586
        memset(line, 0, b->width*sizeof(IDWTELEM));
1587
        v = b->x_coeff[new_index].coeff;
1588
        x = b->x_coeff[new_index++].x;
1589
        while(x < w){
1590
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1591
            register int u= -(v&1);
1592
            line[x] = (t^u) - u;
1593

    
1594
            v = b->x_coeff[new_index].coeff;
1595
            x = b->x_coeff[new_index++].x;
1596
        }
1597
    }
1598

    
1599
    /* Save our variables for the next slice. */
1600
    save_state[0] = new_index;
1601

    
1602
    return;
1603
}
1604

    
1605
static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1606
    int plane_index, level, orientation;
1607

    
1608
    for(plane_index=0; plane_index<3; plane_index++){
1609
        for(level=0; level<MAX_DECOMPOSITIONS; level++){
1610
            for(orientation=level ? 1:0; orientation<4; orientation++){
1611
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1612
            }
1613
        }
1614
    }
1615
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1616
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1617
}
1618

    
1619
static int alloc_blocks(SnowContext *s){
1620
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1621
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1622

    
1623
    s->b_width = w;
1624
    s->b_height= h;
1625

    
1626
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1627
    return 0;
1628
}
1629

    
1630
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1631
    uint8_t *bytestream= d->bytestream;
1632
    uint8_t *bytestream_start= d->bytestream_start;
1633
    *d= *s;
1634
    d->bytestream= bytestream;
1635
    d->bytestream_start= bytestream_start;
1636
}
1637

    
1638
//near copy & paste from dsputil, FIXME
1639
static int pix_sum(uint8_t * pix, int line_size, int w)
1640
{
1641
    int s, i, j;
1642

    
1643
    s = 0;
1644
    for (i = 0; i < w; i++) {
1645
        for (j = 0; j < w; j++) {
1646
            s += pix[0];
1647
            pix ++;
1648
        }
1649
        pix += line_size - w;
1650
    }
1651
    return s;
1652
}
1653

    
1654
//near copy & paste from dsputil, FIXME
1655
static int pix_norm1(uint8_t * pix, int line_size, int w)
1656
{
1657
    int s, i, j;
1658
    uint32_t *sq = ff_squareTbl + 256;
1659

    
1660
    s = 0;
1661
    for (i = 0; i < w; i++) {
1662
        for (j = 0; j < w; j ++) {
1663
            s += sq[pix[0]];
1664
            pix ++;
1665
        }
1666
        pix += line_size - w;
1667
    }
1668
    return s;
1669
}
1670

    
1671
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1672
    const int w= s->b_width << s->block_max_depth;
1673
    const int rem_depth= s->block_max_depth - level;
1674
    const int index= (x + y*w) << rem_depth;
1675
    const int block_w= 1<<rem_depth;
1676
    BlockNode block;
1677
    int i,j;
1678

    
1679
    block.color[0]= l;
1680
    block.color[1]= cb;
1681
    block.color[2]= cr;
1682
    block.mx= mx;
1683
    block.my= my;
1684
    block.ref= ref;
1685
    block.type= type;
1686
    block.level= level;
1687

    
1688
    for(j=0; j<block_w; j++){
1689
        for(i=0; i<block_w; i++){
1690
            s->block[index + i + j*w]= block;
1691
        }
1692
    }
1693
}
1694

    
1695
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1696
    const int offset[3]= {
1697
          y*c->  stride + x,
1698
        ((y*c->uvstride + x)>>1),
1699
        ((y*c->uvstride + x)>>1),
1700
    };
1701
    int i;
1702
    for(i=0; i<3; i++){
1703
        c->src[0][i]= src [i];
1704
        c->ref[0][i]= ref [i] + offset[i];
1705
    }
1706
    assert(!ref_index);
1707
}
1708

    
1709
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1710
                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1711
    if(s->ref_frames == 1){
1712
        *mx = mid_pred(left->mx, top->mx, tr->mx);
1713
        *my = mid_pred(left->my, top->my, tr->my);
1714
    }else{
1715
        const int *scale = scale_mv_ref[ref];
1716
        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1717
                       (top ->mx * scale[top ->ref] + 128) >>8,
1718
                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1719
        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1720
                       (top ->my * scale[top ->ref] + 128) >>8,
1721
                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
1722
    }
1723
}
1724

    
1725
//FIXME copy&paste
1726
#define P_LEFT P[1]
1727
#define P_TOP P[2]
1728
#define P_TOPRIGHT P[3]
1729
#define P_MEDIAN P[4]
1730
#define P_MV1 P[9]
1731
#define FLAG_QPEL   1 //must be 1
1732

    
1733
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1734
    uint8_t p_buffer[1024];
1735
    uint8_t i_buffer[1024];
1736
    uint8_t p_state[sizeof(s->block_state)];
1737
    uint8_t i_state[sizeof(s->block_state)];
1738
    RangeCoder pc, ic;
1739
    uint8_t *pbbak= s->c.bytestream;
1740
    uint8_t *pbbak_start= s->c.bytestream_start;
1741
    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1742
    const int w= s->b_width  << s->block_max_depth;
1743
    const int h= s->b_height << s->block_max_depth;
1744
    const int rem_depth= s->block_max_depth - level;
1745
    const int index= (x + y*w) << rem_depth;
1746
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1747
    int trx= (x+1)<<rem_depth;
1748
    int try= (y+1)<<rem_depth;
1749
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1750
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1751
    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1752
    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1753
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1754
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1755
    int pl = left->color[0];
1756
    int pcb= left->color[1];
1757
    int pcr= left->color[2];
1758
    int pmx, pmy;
1759
    int mx=0, my=0;
1760
    int l,cr,cb;
1761
    const int stride= s->current_picture.linesize[0];
1762
    const int uvstride= s->current_picture.linesize[1];
1763
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
1764
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1765
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1766
    int P[10][2];
1767
    int16_t last_mv[3][2];
1768
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1769
    const int shift= 1+qpel;
1770
    MotionEstContext *c= &s->m.me;
1771
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1772
    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1773
    int my_context= av_log2(2*FFABS(left->my - top->my));
1774
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1775
    int ref, best_ref, ref_score, ref_mx, ref_my;
1776

    
1777
    assert(sizeof(s->block_state) >= 256);
1778
    if(s->keyframe){
1779
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1780
        return 0;
1781
    }
1782

    
1783
//    clip predictors / edge ?
1784

    
1785
    P_LEFT[0]= left->mx;
1786
    P_LEFT[1]= left->my;
1787
    P_TOP [0]= top->mx;
1788
    P_TOP [1]= top->my;
1789
    P_TOPRIGHT[0]= tr->mx;
1790
    P_TOPRIGHT[1]= tr->my;
1791

    
1792
    last_mv[0][0]= s->block[index].mx;
1793
    last_mv[0][1]= s->block[index].my;
1794
    last_mv[1][0]= right->mx;
1795
    last_mv[1][1]= right->my;
1796
    last_mv[2][0]= bottom->mx;
1797
    last_mv[2][1]= bottom->my;
1798

    
1799
    s->m.mb_stride=2;
1800
    s->m.mb_x=
1801
    s->m.mb_y= 0;
1802
    c->skip= 0;
1803

    
1804
    assert(c->  stride ==   stride);
1805
    assert(c->uvstride == uvstride);
1806

    
1807
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1808
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1809
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1810
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1811

    
1812
    c->xmin = - x*block_w - 16+2;
1813
    c->ymin = - y*block_w - 16+2;
1814
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1815
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1816

    
1817
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1818
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
1819
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1820
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1821
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1822
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1823
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1824

    
1825
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1826
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1827

    
1828
    if (!y) {
1829
        c->pred_x= P_LEFT[0];
1830
        c->pred_y= P_LEFT[1];
1831
    } else {
1832
        c->pred_x = P_MEDIAN[0];
1833
        c->pred_y = P_MEDIAN[1];
1834
    }
1835

    
1836
    score= INT_MAX;
1837
    best_ref= 0;
1838
    for(ref=0; ref<s->ref_frames; ref++){
1839
        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1840

    
1841
        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1842
                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1843

    
1844
        assert(ref_mx >= c->xmin);
1845
        assert(ref_mx <= c->xmax);
1846
        assert(ref_my >= c->ymin);
1847
        assert(ref_my <= c->ymax);
1848

    
1849
        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1850
        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1851
        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1852
        if(s->ref_mvs[ref]){
1853
            s->ref_mvs[ref][index][0]= ref_mx;
1854
            s->ref_mvs[ref][index][1]= ref_my;
1855
            s->ref_scores[ref][index]= ref_score;
1856
        }
1857
        if(score > ref_score){
1858
            score= ref_score;
1859
            best_ref= ref;
1860
            mx= ref_mx;
1861
            my= ref_my;
1862
        }
1863
    }
1864
    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1865

    
1866
  //  subpel search
1867
    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1868
    pc= s->c;
1869
    pc.bytestream_start=
1870
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1871
    memcpy(p_state, s->block_state, sizeof(s->block_state));
1872

    
1873
    if(level!=s->block_max_depth)
1874
        put_rac(&pc, &p_state[4 + s_context], 1);
1875
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1876
    if(s->ref_frames > 1)
1877
        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1878
    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1879
    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1880
    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1881
    p_len= pc.bytestream - pc.bytestream_start;
1882
    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1883

    
1884
    block_s= block_w*block_w;
1885
    sum = pix_sum(current_data[0], stride, block_w);
1886
    l= (sum + block_s/2)/block_s;
1887
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1888

    
1889
    block_s= block_w*block_w>>2;
1890
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
1891
    cb= (sum + block_s/2)/block_s;
1892
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1893
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
1894
    cr= (sum + block_s/2)/block_s;
1895
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1896

    
1897
    ic= s->c;
1898
    ic.bytestream_start=
1899
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1900
    memcpy(i_state, s->block_state, sizeof(s->block_state));
1901
    if(level!=s->block_max_depth)
1902
        put_rac(&ic, &i_state[4 + s_context], 1);
1903
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1904
    put_symbol(&ic, &i_state[32],  l-pl , 1);
1905
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1906
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1907
    i_len= ic.bytestream - ic.bytestream_start;
1908
    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1909

    
1910
//    assert(score==256*256*256*64-1);
1911
    assert(iscore < 255*255*256 + s->lambda2*10);
1912
    assert(iscore >= 0);
1913
    assert(l>=0 && l<=255);
1914
    assert(pl>=0 && pl<=255);
1915

    
1916
    if(level==0){
1917
        int varc= iscore >> 8;
1918
        int vard= score >> 8;
1919
        if (vard <= 64 || vard < varc)
1920
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1921
        else
1922
            c->scene_change_score+= s->m.qscale;
1923
    }
1924

    
1925
    if(level!=s->block_max_depth){
1926
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
1927
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1928
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1929
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1930
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1931
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1932

    
1933
        if(score2 < score && score2 < iscore)
1934
            return score2;
1935
    }
1936

    
1937
    if(iscore < score){
1938
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1939
        memcpy(pbbak, i_buffer, i_len);
1940
        s->c= ic;
1941
        s->c.bytestream_start= pbbak_start;
1942
        s->c.bytestream= pbbak + i_len;
1943
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1944
        memcpy(s->block_state, i_state, sizeof(s->block_state));
1945
        return iscore;
1946
    }else{
1947
        memcpy(pbbak, p_buffer, p_len);
1948
        s->c= pc;
1949
        s->c.bytestream_start= pbbak_start;
1950
        s->c.bytestream= pbbak + p_len;
1951
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1952
        memcpy(s->block_state, p_state, sizeof(s->block_state));
1953
        return score;
1954
    }
1955
}
1956

    
1957
static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1958
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1959
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1960
    }else{
1961
        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1962
    }
1963
}
1964

    
1965
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1966
    const int w= s->b_width  << s->block_max_depth;
1967
    const int rem_depth= s->block_max_depth - level;
1968
    const int index= (x + y*w) << rem_depth;
1969
    int trx= (x+1)<<rem_depth;
1970
    BlockNode *b= &s->block[index];
1971
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1972
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1973
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1974
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1975
    int pl = left->color[0];
1976
    int pcb= left->color[1];
1977
    int pcr= left->color[2];
1978
    int pmx, pmy;
1979
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1980
    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1981
    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1982
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1983

    
1984
    if(s->keyframe){
1985
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1986
        return;
1987
    }
1988

    
1989
    if(level!=s->block_max_depth){
1990
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1991
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
1992
        }else{
1993
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
1994
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1995
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1996
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1997
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
1998
            return;
1999
        }
2000
    }
2001
    if(b->type & BLOCK_INTRA){
2002
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2003
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2004
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2005
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2006
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2007
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2008
    }else{
2009
        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2010
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2011
        if(s->ref_frames > 1)
2012
            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2013
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2014
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2015
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2016
    }
2017
}
2018

    
2019
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2020
    const int w= s->b_width << s->block_max_depth;
2021
    const int rem_depth= s->block_max_depth - level;
2022
    const int index= (x + y*w) << rem_depth;
2023
    int trx= (x+1)<<rem_depth;
2024
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2025
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2026
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2027
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2028
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2029

    
2030
    if(s->keyframe){
2031
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2032
        return;
2033
    }
2034

    
2035
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2036
        int type, mx, my;
2037
        int l = left->color[0];
2038
        int cb= left->color[1];
2039
        int cr= left->color[2];
2040
        int ref = 0;
2041
        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2042
        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2043
        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2044

    
2045
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2046

    
2047
        if(type){
2048
            pred_mv(s, &mx, &my, 0, left, top, tr);
2049
            l += get_symbol(&s->c, &s->block_state[32], 1);
2050
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2051
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2052
        }else{
2053
            if(s->ref_frames > 1)
2054
                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2055
            pred_mv(s, &mx, &my, ref, left, top, tr);
2056
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2057
            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2058
        }
2059
        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2060
    }else{
2061
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2062
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2063
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2064
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2065
    }
2066
}
2067

    
2068
static void encode_blocks(SnowContext *s, int search){
2069
    int x, y;
2070
    int w= s->b_width;
2071
    int h= s->b_height;
2072

    
2073
    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2074
        iterative_me(s);
2075

    
2076
    for(y=0; y<h; y++){
2077
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2078
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2079
            return;
2080
        }
2081
        for(x=0; x<w; x++){
2082
            if(s->avctx->me_method == ME_ITER || !search)
2083
                encode_q_branch2(s, 0, x, y);
2084
            else
2085
                encode_q_branch (s, 0, x, y);
2086
        }
2087
    }
2088
}
2089

    
2090
static void decode_blocks(SnowContext *s){
2091
    int x, y;
2092
    int w= s->b_width;
2093
    int h= s->b_height;
2094

    
2095
    for(y=0; y<h; y++){
2096
        for(x=0; x<w; x++){
2097
            decode_q_branch(s, 0, x, y);
2098
        }
2099
    }
2100
}
2101

    
2102
static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2103
    const static uint8_t weight[64]={
2104
    8,7,6,5,4,3,2,1,
2105
    7,7,0,0,0,0,0,1,
2106
    6,0,6,0,0,0,2,0,
2107
    5,0,0,5,0,3,0,0,
2108
    4,0,0,0,4,0,0,0,
2109
    3,0,0,5,0,3,0,0,
2110
    2,0,6,0,0,0,2,0,
2111
    1,7,0,0,0,0,0,1,
2112
    };
2113

    
2114
    const static uint8_t brane[256]={
2115
    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2116
    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2117
    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2118
    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2119
    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2120
    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2121
    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2122
    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2123
    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2124
    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2125
    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2126
    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2127
    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2128
    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2129
    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2130
    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2131
    };
2132

    
2133
    const static uint8_t needs[16]={
2134
    0,1,0,0,
2135
    2,4,2,0,
2136
    0,1,0,0,
2137
    15
2138
    };
2139

    
2140
    int x, y, b, r, l;
2141
    int16_t tmpIt   [64*(32+HTAPS_MAX)];
2142
    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2143
    int16_t *tmpI= tmpIt;
2144
    uint8_t *tmp2= tmp2t[0];
2145
    const uint8_t *hpel[11];
2146
    assert(dx<16 && dy<16);
2147
    r= brane[dx + 16*dy]&15;
2148
    l= brane[dx + 16*dy]>>4;
2149

    
2150
    b= needs[l] | needs[r];
2151
    if(p && !p->diag_mc)
2152
        b= 15;
2153

    
2154
    if(b&5){
2155
        for(y=0; y < b_h+HTAPS_MAX-1; y++){
2156
            for(x=0; x < b_w; x++){
2157
                int a_1=src[x + HTAPS_MAX/2-4];
2158
                int a0= src[x + HTAPS_MAX/2-3];
2159
                int a1= src[x + HTAPS_MAX/2-2];
2160
                int a2= src[x + HTAPS_MAX/2-1];
2161
                int a3= src[x + HTAPS_MAX/2+0];
2162
                int a4= src[x + HTAPS_MAX/2+1];
2163
                int a5= src[x + HTAPS_MAX/2+2];
2164
                int a6= src[x + HTAPS_MAX/2+3];
2165
                int am=0;
2166
                if(!p || p->fast_mc){
2167
                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2168
                    tmpI[x]= am;
2169
                    am= (am+16)>>5;
2170
                }else{
2171
                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2172
                    tmpI[x]= am;
2173
                    am= (am+32)>>6;
2174
                }
2175

    
2176
                if(am&(~255)) am= ~(am>>31);
2177
                tmp2[x]= am;
2178
            }
2179
            tmpI+= 64;
2180
            tmp2+= stride;
2181
            src += stride;
2182
        }
2183
        src -= stride*y;
2184
    }
2185
    src += HTAPS_MAX/2 - 1;
2186
    tmp2= tmp2t[1];
2187

    
2188
    if(b&2){
2189
        for(y=0; y < b_h; y++){
2190
            for(x=0; x < b_w+1; x++){
2191
                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2192
                int a0= src[x + (HTAPS_MAX/2-3)*stride];
2193
                int a1= src[x + (HTAPS_MAX/2-2)*stride];
2194
                int a2= src[x + (HTAPS_MAX/2-1)*stride];
2195
                int a3= src[x + (HTAPS_MAX/2+0)*stride];
2196
                int a4= src[x + (HTAPS_MAX/2+1)*stride];
2197
                int a5= src[x + (HTAPS_MAX/2+2)*stride];
2198
                int a6= src[x + (HTAPS_MAX/2+3)*stride];
2199
                int am=0;
2200
                if(!p || p->fast_mc)
2201
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2202
                else
2203
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2204

    
2205
                if(am&(~255)) am= ~(am>>31);
2206
                tmp2[x]= am;
2207
            }
2208
            src += stride;
2209
            tmp2+= stride;
2210
        }
2211
        src -= stride*y;
2212
    }
2213
    src += stride*(HTAPS_MAX/2 - 1);
2214
    tmp2= tmp2t[2];
2215
    tmpI= tmpIt;
2216
    if(b&4){
2217
        for(y=0; y < b_h; y++){
2218
            for(x=0; x < b_w; x++){
2219
                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2220
                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2221
                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2222
                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2223
                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2224
                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2225
                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2226
                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2227
                int am=0;
2228
                if(!p || p->fast_mc)
2229
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2230
                else
2231
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2232
                if(am&(~255)) am= ~(am>>31);
2233
                tmp2[x]= am;
2234
            }
2235
            tmpI+= 64;
2236
            tmp2+= stride;
2237
        }
2238
    }
2239

    
2240
    hpel[ 0]= src;
2241
    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2242
    hpel[ 2]= src + 1;
2243

    
2244
    hpel[ 4]= tmp2t[1];
2245
    hpel[ 5]= tmp2t[2];
2246
    hpel[ 6]= tmp2t[1] + 1;
2247

    
2248
    hpel[ 8]= src + stride;
2249
    hpel[ 9]= hpel[1] + stride;
2250
    hpel[10]= hpel[8] + 1;
2251

    
2252
    if(b==15){
2253
        const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
2254
        const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2255
        const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2256
        const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2257
        dx&=7;
2258
        dy&=7;
2259
        for(y=0; y < b_h; y++){
2260
            for(x=0; x < b_w; x++){
2261
                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2262
                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
2263
            }
2264
            src1+=stride;
2265
            src2+=stride;
2266
            src3+=stride;
2267
            src4+=stride;
2268
            dst +=stride;
2269
        }
2270
    }else{
2271
        const uint8_t *src1= hpel[l];
2272
        const uint8_t *src2= hpel[r];
2273
        int a= weight[((dx&7) + (8*(dy&7)))];
2274
        int b= 8-a;
2275
        for(y=0; y < b_h; y++){
2276
            for(x=0; x < b_w; x++){
2277
                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2278
            }
2279
            src1+=stride;
2280
            src2+=stride;
2281
            dst +=stride;
2282
        }
2283
    }
2284
}
2285

    
2286
#define mca(dx,dy,b_w)\
2287
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2288
    uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2289
    assert(h==b_w);\
2290
    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2291
}
2292

    
2293
mca( 0, 0,16)
2294
mca( 8, 0,16)
2295
mca( 0, 8,16)
2296
mca( 8, 8,16)
2297
mca( 0, 0,8)
2298
mca( 8, 0,8)
2299
mca( 0, 8,8)
2300
mca( 8, 8,8)
2301

    
2302
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2303
    if(block->type & BLOCK_INTRA){
2304
        int x, y;
2305
        const int color = block->color[plane_index];
2306
        const int color4= color*0x01010101;
2307
        if(b_w==32){
2308
            for(y=0; y < b_h; y++){
2309
                *(uint32_t*)&dst[0 + y*stride]= color4;
2310
                *(uint32_t*)&dst[4 + y*stride]= color4;
2311
                *(uint32_t*)&dst[8 + y*stride]= color4;
2312
                *(uint32_t*)&dst[12+ y*stride]= color4;
2313
                *(uint32_t*)&dst[16+ y*stride]= color4;
2314
                *(uint32_t*)&dst[20+ y*stride]= color4;
2315
                *(uint32_t*)&dst[24+ y*stride]= color4;
2316
                *(uint32_t*)&dst[28+ y*stride]= color4;
2317
            }
2318
        }else if(b_w==16){
2319
            for(y=0; y < b_h; y++){
2320
                *(uint32_t*)&dst[0 + y*stride]= color4;
2321
                *(uint32_t*)&dst[4 + y*stride]= color4;
2322
                *(uint32_t*)&dst[8 + y*stride]= color4;
2323
                *(uint32_t*)&dst[12+ y*stride]= color4;
2324
            }
2325
        }else if(b_w==8){
2326
            for(y=0; y < b_h; y++){
2327
                *(uint32_t*)&dst[0 + y*stride]= color4;
2328
                *(uint32_t*)&dst[4 + y*stride]= color4;
2329
            }
2330
        }else if(b_w==4){
2331
            for(y=0; y < b_h; y++){
2332
                *(uint32_t*)&dst[0 + y*stride]= color4;
2333
            }
2334
        }else{
2335
            for(y=0; y < b_h; y++){
2336
                for(x=0; x < b_w; x++){
2337
                    dst[x + y*stride]= color;
2338
                }
2339
            }
2340
        }
2341
    }else{
2342
        uint8_t *src= s->last_picture[block->ref].data[plane_index];
2343
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2344
        int mx= block->mx*scale;
2345
        int my= block->my*scale;
2346
        const int dx= mx&15;
2347
        const int dy= my&15;
2348
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2349
        sx += (mx>>4) - (HTAPS_MAX/2-1);
2350
        sy += (my>>4) - (HTAPS_MAX/2-1);
2351
        src += sx + sy*stride;
2352
        if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2353
           || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2354
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2355
            src= tmp + MB_SIZE;
2356
        }
2357
//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2358
//        assert(!(b_w&(b_w-1)));
2359
        assert(b_w>1 && b_h>1);
2360
        assert((tab_index>=0 && tab_index<4) || b_w==32);
2361
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2362
            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2363
        else if(b_w==32){
2364
            int y;
2365
            for(y=0; y<b_h; y+=16){
2366
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2367
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2368
            }
2369
        }else if(b_w==b_h)
2370
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2371
        else if(b_w==2*b_h){
2372
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
2373
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2374
        }else{
2375
            assert(2*b_w==b_h);
2376
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
2377
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2378
        }
2379
    }
2380
}
2381

    
2382
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2383
                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2384
    int y, x;
2385
    IDWTELEM * dst;
2386
    for(y=0; y<b_h; y++){
2387
        //FIXME ugly misuse of obmc_stride
2388
        const uint8_t *obmc1= obmc + y*obmc_stride;
2389
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2390
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2391
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2392
        dst = slice_buffer_get_line(sb, src_y + y);
2393
        for(x=0; x<b_w; x++){
2394
            int v=   obmc1[x] * block[3][x + y*src_stride]
2395
                    +obmc2[x] * block[2][x + y*src_stride]
2396
                    +obmc3[x] * block[1][x + y*src_stride]
2397
                    +obmc4[x] * block[0][x + y*src_stride];
2398

    
2399
            v <<= 8 - LOG2_OBMC_MAX;
2400
            if(FRAC_BITS != 8){
2401
                v >>= 8 - FRAC_BITS;
2402
            }
2403
            if(add){
2404
                v += dst[x + src_x];
2405
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2406
                if(v&(~255)) v= ~(v>>31);
2407
                dst8[x + y*src_stride] = v;
2408
            }else{
2409
                dst[x + src_x] -= v;
2410
            }
2411
        }
2412
    }
2413
}
2414

    
2415
//FIXME name cleanup (b_w, block_w, b_width stuff)
2416
static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2417
    const int b_width = s->b_width  << s->block_max_depth;
2418
    const int b_height= s->b_height << s->block_max_depth;
2419
    const int b_stride= b_width;
2420
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2421
    BlockNode *rt= lt+1;
2422
    BlockNode *lb= lt+b_stride;
2423
    BlockNode *rb= lb+1;
2424
    uint8_t *block[4];
2425
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2426
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2427
    uint8_t *ptmp;
2428
    int x,y;
2429

    
2430
    if(b_x<0){
2431
        lt= rt;
2432
        lb= rb;
2433
    }else if(b_x + 1 >= b_width){
2434
        rt= lt;
2435
        rb= lb;
2436
    }
2437
    if(b_y<0){
2438
        lt= lb;
2439
        rt= rb;
2440
    }else if(b_y + 1 >= b_height){
2441
        lb= lt;
2442
        rb= rt;
2443
    }
2444

    
2445
    if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2446
        obmc -= src_x;
2447
        b_w += src_x;
2448
        if(!sliced && !offset_dst)
2449
            dst -= src_x;
2450
        src_x=0;
2451
    }else if(src_x + b_w > w){
2452
        b_w = w - src_x;
2453
    }
2454
    if(src_y<0){
2455
        obmc -= src_y*obmc_stride;
2456
        b_h += src_y;
2457
        if(!sliced && !offset_dst)
2458
            dst -= src_y*dst_stride;
2459
        src_y=0;
2460
    }else if(src_y + b_h> h){
2461
        b_h = h - src_y;
2462
    }
2463

    
2464
    if(b_w<=0 || b_h<=0) return;
2465

    
2466
    assert(src_stride > 2*MB_SIZE + 5);
2467

    
2468
    if(!sliced && offset_dst)
2469
        dst += src_x + src_y*dst_stride;
2470
    dst8+= src_x + src_y*src_stride;
2471
//    src += src_x + src_y*src_stride;
2472

    
2473
    ptmp= tmp + 3*tmp_step;
2474
    block[0]= ptmp;
2475
    ptmp+=tmp_step;
2476
    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2477

    
2478
    if(same_block(lt, rt)){
2479
        block[1]= block[0];
2480
    }else{
2481
        block[1]= ptmp;
2482
        ptmp+=tmp_step;
2483
        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2484
    }
2485

    
2486
    if(same_block(lt, lb)){
2487
        block[2]= block[0];
2488
    }else if(same_block(rt, lb)){
2489
        block[2]= block[1];
2490
    }else{
2491
        block[2]= ptmp;
2492
        ptmp+=tmp_step;
2493
        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2494
    }
2495

    
2496
    if(same_block(lt, rb) ){
2497
        block[3]= block[0];
2498
    }else if(same_block(rt, rb)){
2499
        block[3]= block[1];
2500
    }else if(same_block(lb, rb)){
2501
        block[3]= block[2];
2502
    }else{
2503
        block[3]= ptmp;
2504
        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2505
    }
2506
#if 0
2507
    for(y=0; y<b_h; y++){
2508
        for(x=0; x<b_w; x++){
2509
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2510
            if(add) dst[x + y*dst_stride] += v;
2511
            else    dst[x + y*dst_stride] -= v;
2512
        }
2513
    }
2514
    for(y=0; y<b_h; y++){
2515
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2516
        for(x=0; x<b_w; x++){
2517
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2518
            if(add) dst[x + y*dst_stride] += v;
2519
            else    dst[x + y*dst_stride] -= v;
2520
        }
2521
    }
2522
    for(y=0; y<b_h; y++){
2523
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2524
        for(x=0; x<b_w; x++){
2525
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2526
            if(add) dst[x + y*dst_stride] += v;
2527
            else    dst[x + y*dst_stride] -= v;
2528
        }
2529
    }
2530
    for(y=0; y<b_h; y++){
2531
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2532
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2533
        for(x=0; x<b_w; x++){
2534
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2535
            if(add) dst[x + y*dst_stride] += v;
2536
            else    dst[x + y*dst_stride] -= v;
2537
        }
2538
    }
2539
#else
2540
    if(sliced){
2541
        s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2542
    }else
2543
    for(y=0; y<b_h; y++){
2544
        //FIXME ugly misuse of obmc_stride
2545
        const uint8_t *obmc1= obmc + y*obmc_stride;
2546
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2547
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2548
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2549
        for(x=0; x<b_w; x++){
2550
            int v=   obmc1[x] * block[3][x + y*src_stride]
2551
                    +obmc2[x] * block[2][x + y*src_stride]
2552
                    +obmc3[x] * block[1][x + y*src_stride]
2553
                    +obmc4[x] * block[0][x + y*src_stride];
2554

    
2555
            v <<= 8 - LOG2_OBMC_MAX;
2556
            if(FRAC_BITS != 8){
2557
                v >>= 8 - FRAC_BITS;
2558
            }
2559
            if(add){
2560
                v += dst[x + y*dst_stride];
2561
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2562
                if(v&(~255)) v= ~(v>>31);
2563
                dst8[x + y*src_stride] = v;
2564
            }else{
2565
                dst[x + y*dst_stride] -= v;
2566
            }
2567
        }
2568
    }
2569
#endif /* 0 */
2570
}
2571

    
2572
static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2573
    Plane *p= &s->plane[plane_index];
2574
    const int mb_w= s->b_width  << s->block_max_depth;
2575
    const int mb_h= s->b_height << s->block_max_depth;
2576
    int x, y, mb_x;
2577
    int block_size = MB_SIZE >> s->block_max_depth;
2578
    int block_w    = plane_index ? block_size/2 : block_size;
2579
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2580
    int obmc_stride= plane_index ? block_size : 2*block_size;
2581
    int ref_stride= s->current_picture.linesize[plane_index];
2582
    uint8_t *dst8= s->current_picture.data[plane_index];
2583
    int w= p->width;
2584
    int h= p->height;
2585

    
2586
    if(s->keyframe || (s->avctx->debug&512)){
2587
        if(mb_y==mb_h)
2588
            return;
2589

    
2590
        if(add){
2591
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2592
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2593
                IDWTELEM * line = sb->line[y];
2594
                for(x=0; x<w; x++){
2595
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2596
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2597
                    v >>= FRAC_BITS;
2598
                    if(v&(~255)) v= ~(v>>31);
2599
                    dst8[x + y*ref_stride]= v;
2600
                }
2601
            }
2602
        }else{
2603
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2604
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2605
                IDWTELEM * line = sb->line[y];
2606
                for(x=0; x<w; x++){
2607
                    line[x] -= 128 << FRAC_BITS;
2608
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2609
                }
2610
            }
2611
        }
2612

    
2613
        return;
2614
    }
2615

    
2616
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2617
            add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2618
                       block_w*mb_x - block_w/2,
2619
                       block_w*mb_y - block_w/2,
2620
                       block_w, block_w,
2621
                       w, h,
2622
                       w, ref_stride, obmc_stride,
2623
                       mb_x - 1, mb_y - 1,
2624
                       add, 0, plane_index);
2625
        }
2626
}
2627

    
2628
static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2629
    Plane *p= &s->plane[plane_index];
2630
    const int mb_w= s->b_width  << s->block_max_depth;
2631
    const int mb_h= s->b_height << s->block_max_depth;
2632
    int x, y, mb_x;
2633
    int block_size = MB_SIZE >> s->block_max_depth;
2634
    int block_w    = plane_index ? block_size/2 : block_size;
2635
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2636
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2637
    int ref_stride= s->current_picture.linesize[plane_index];
2638
    uint8_t *dst8= s->current_picture.data[plane_index];
2639
    int w= p->width;
2640
    int h= p->height;
2641

    
2642
    if(s->keyframe || (s->avctx->debug&512)){
2643
        if(mb_y==mb_h)
2644
            return;
2645

    
2646
        if(add){
2647
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2648
                for(x=0; x<w; x++){
2649
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2650
                    v >>= FRAC_BITS;
2651
                    if(v&(~255)) v= ~(v>>31);
2652
                    dst8[x + y*ref_stride]= v;
2653
                }
2654
            }
2655
        }else{
2656
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2657
                for(x=0; x<w; x++){
2658
                    buf[x + y*w]-= 128<<FRAC_BITS;
2659
                }
2660
            }
2661
        }
2662

    
2663
        return;
2664
    }
2665

    
2666
    for(mb_x=0; mb_x<=mb_w; mb_x++){
2667
        add_yblock(s, 0, NULL, buf, dst8, obmc,
2668
                   block_w*mb_x - block_w/2,
2669
                   block_w*mb_y - block_w/2,
2670
                   block_w, block_w,
2671
                   w, h,
2672
                   w, ref_stride, obmc_stride,
2673
                   mb_x - 1, mb_y - 1,
2674
                   add, 1, plane_index);
2675
    }
2676
}
2677

    
2678
static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2679
    const int mb_h= s->b_height << s->block_max_depth;
2680
    int mb_y;
2681
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2682
        predict_slice(s, buf, plane_index, add, mb_y);
2683
}
2684

    
2685
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2686
    int i, x2, y2;
2687
    Plane *p= &s->plane[plane_index];
2688
    const int block_size = MB_SIZE >> s->block_max_depth;
2689
    const int block_w    = plane_index ? block_size/2 : block_size;
2690
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2691
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2692
    const int ref_stride= s->current_picture.linesize[plane_index];
2693
    uint8_t *src= s-> input_picture.data[plane_index];
2694
    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2695
    const int b_stride = s->b_width << s->block_max_depth;
2696
    const int w= p->width;
2697
    const int h= p->height;
2698
    int index= mb_x + mb_y*b_stride;
2699
    BlockNode *b= &s->block[index];
2700
    BlockNode backup= *b;
2701
    int ab=0;
2702
    int aa=0;
2703

    
2704
    b->type|= BLOCK_INTRA;
2705
    b->color[plane_index]= 0;
2706
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2707

    
2708
    for(i=0; i<4; i++){
2709
        int mb_x2= mb_x + (i &1) - 1;
2710
        int mb_y2= mb_y + (i>>1) - 1;
2711
        int x= block_w*mb_x2 + block_w/2;
2712
        int y= block_w*mb_y2 + block_w/2;
2713

    
2714
        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2715
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2716

    
2717
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2718
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2719
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2720
                int obmc_v= obmc[index];
2721
                int d;
2722
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2723
                if(x<0) obmc_v += obmc[index + block_w];
2724
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2725
                if(x+block_w>w) obmc_v += obmc[index - block_w];
2726
                //FIXME precalculate this or simplify it somehow else
2727

    
2728
                d = -dst[index] + (1<<(FRAC_BITS-1));
2729
                dst[index] = d;
2730
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2731
                aa += obmc_v * obmc_v; //FIXME precalculate this
2732
            }
2733
        }
2734
    }
2735
    *b= backup;
2736

    
2737
    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2738
}
2739

    
2740
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2741
    const int b_stride = s->b_width << s->block_max_depth;
2742
    const int b_height = s->b_height<< s->block_max_depth;
2743
    int index= x + y*b_stride;
2744
    const BlockNode *b     = &s->block[index];
2745
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2746
    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2747
    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2748
    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2749
    int dmx, dmy;
2750
//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2751
//  int my_context= av_log2(2*FFABS(left->my - top->my));
2752

    
2753
    if(x<0 || x>=b_stride || y>=b_height)
2754
        return 0;
2755
/*
2756
1            0      0
2757
01X          1-2    1
2758
001XX        3-6    2-3
2759
0001XXX      7-14   4-7
2760
00001XXXX   15-30   8-15
2761
*/
2762
//FIXME try accurate rate
2763
//FIXME intra and inter predictors if surrounding blocks are not the same type
2764
    if(b->type & BLOCK_INTRA){
2765
        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2766
                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
2767
                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
2768
    }else{
2769
        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2770
        dmx-= b->mx;
2771
        dmy-= b->my;
2772
        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2773
                    + av_log2(2*FFABS(dmy))
2774
                    + av_log2(2*b->ref));
2775
    }
2776
}
2777

    
2778
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2779
    Plane *p= &s->plane[plane_index];
2780
    const int block_size = MB_SIZE >> s->block_max_depth;
2781
    const int block_w    = plane_index ? block_size/2 : block_size;
2782
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2783
    const int ref_stride= s->current_picture.linesize[plane_index];
2784
    uint8_t *dst= s->current_picture.data[plane_index];
2785
    uint8_t *src= s->  input_picture.data[plane_index];
2786
    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2787
    uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2788
    uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2789
    const int b_stride = s->b_width << s->block_max_depth;
2790
    const int b_height = s->b_height<< s->block_max_depth;
2791
    const int w= p->width;
2792
    const int h= p->height;
2793
    int distortion;
2794
    int rate= 0;
2795
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2796
    int sx= block_w*mb_x - block_w/2;
2797
    int sy= block_w*mb_y - block_w/2;
2798
    int x0= FFMAX(0,-sx);
2799
    int y0= FFMAX(0,-sy);
2800
    int x1= FFMIN(block_w*2, w-sx);
2801
    int y1= FFMIN(block_w*2, h-sy);
2802
    int i,x,y;
2803

    
2804
    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2805

    
2806
    for(y=y0; y<y1; y++){
2807
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2808
        const IDWTELEM *pred1 = pred + y*obmc_stride;
2809
        uint8_t *cur1 = cur + y*ref_stride;
2810
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2811
        for(x=x0; x<x1; x++){
2812
#if FRAC_BITS >= LOG2_OBMC_MAX
2813
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2814
#else
2815
            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2816
#endif
2817
            v = (v + pred1[x]) >> FRAC_BITS;
2818
            if(v&(~255)) v= ~(v>>31);
2819
            dst1[x] = v;
2820
        }
2821
    }
2822

    
2823
    /* copy the regions where obmc[] = (uint8_t)256 */
2824
    if(LOG2_OBMC_MAX == 8
2825
        && (mb_x == 0 || mb_x == b_stride-1)
2826
        && (mb_y == 0 || mb_y == b_height-1)){
2827
        if(mb_x == 0)
2828
            x1 = block_w;
2829
        else
2830
            x0 = block_w;
2831
        if(mb_y == 0)
2832
            y1 = block_w;
2833
        else
2834
            y0 = block_w;
2835
        for(y=y0; y<y1; y++)
2836
            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2837
    }
2838

    
2839
    if(block_w==16){
2840
        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2841
        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2842
        /* FIXME cmps overlap but do not cover the wavelet's whole support.
2843
         * So improving the score of one block is not strictly guaranteed
2844
         * to improve the score of the whole frame, thus iterative motion
2845
         * estimation does not always converge. */
2846
        if(s->avctx->me_cmp == FF_CMP_W97)
2847
            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2848
        else if(s->avctx->me_cmp == FF_CMP_W53)
2849
            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2850
        else{
2851
            distortion = 0;
2852
            for(i=0; i<4; i++){
2853
                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2854
                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2855
            }
2856
        }
2857
    }else{
2858
        assert(block_w==8);
2859
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2860
    }
2861

    
2862
    if(plane_index==0){
2863
        for(i=0; i<4; i++){
2864
/* ..RRr
2865
 * .RXx.
2866
 * rxx..
2867
 */
2868
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2869
        }
2870
        if(mb_x == b_stride-2)
2871
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2872
    }
2873
    return distortion + rate*penalty_factor;
2874
}
2875

    
2876
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2877
    int i, y2;
2878
    Plane *p= &s->plane[plane_index];
2879
    const int block_size = MB_SIZE >> s->block_max_depth;
2880
    const int block_w    = plane_index ? block_size/2 : block_size;
2881
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2882
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2883
    const int ref_stride= s->current_picture.linesize[plane_index];
2884
    uint8_t *dst= s->current_picture.data[plane_index];
2885
    uint8_t *src= s-> input_picture.data[plane_index];
2886
    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2887
    // const has only been removed from zero_dst to suppress a warning
2888
    static IDWTELEM zero_dst[4096]; //FIXME
2889
    const int b_stride = s->b_width << s->block_max_depth;
2890
    const int w= p->width;
2891
    const int h= p->height;
2892
    int distortion= 0;
2893
    int rate= 0;
2894
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2895

    
2896
    for(i=0; i<9; i++){
2897
        int mb_x2= mb_x + (i%3) - 1;
2898
        int mb_y2= mb_y + (i/3) - 1;
2899
        int x= block_w*mb_x2 + block_w/2;
2900
        int y= block_w*mb_y2 + block_w/2;
2901

    
2902
        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2903
                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2904

    
2905
        //FIXME find a cleaner/simpler way to skip the outside stuff
2906
        for(y2= y; y2<0; y2++)
2907
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2908
        for(y2= h; y2<y+block_w; y2++)
2909
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2910
        if(x<0){
2911
            for(y2= y; y2<y+block_w; y2++)
2912
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2913
        }
2914
        if(x+block_w > w){
2915
            for(y2= y; y2<y+block_w; y2++)
2916
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2917
        }
2918

    
2919
        assert(block_w== 8 || block_w==16);
2920
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2921
    }
2922

    
2923
    if(plane_index==0){
2924
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2925
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2926

    
2927
/* ..RRRr
2928
 * .RXXx.
2929
 * .RXXx.
2930
 * rxxx.
2931
 */
2932
        if(merged)
2933
            rate = get_block_bits(s, mb_x, mb_y, 2);
2934
        for(i=merged?4:0; i<9; i++){
2935
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2936
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2937
        }
2938
    }
2939
    return distortion + rate*penalty_factor;
2940
}
2941

    
2942
static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2943
    const int b_stride= s->b_width << s->block_max_depth;
2944
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2945
    BlockNode backup= *block;
2946
    int rd, index, value;
2947

    
2948
    assert(mb_x>=0 && mb_y>=0);
2949
    assert(mb_x<b_stride);
2950

    
2951
    if(intra){
2952
        block->color[0] = p[0];
2953
        block->color[1] = p[1];
2954
        block->color[2] = p[2];
2955
        block->type |= BLOCK_INTRA;
2956
    }else{
2957
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2958
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2959
        if(s->me_cache[index] == value)
2960
            return 0;
2961
        s->me_cache[index]= value;
2962

    
2963
        block->mx= p[0];
2964
        block->my= p[1];
2965
        block->type &= ~BLOCK_INTRA;
2966
    }
2967

    
2968
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2969

    
2970
//FIXME chroma
2971
    if(rd < *best_rd){
2972
        *best_rd= rd;
2973
        return 1;
2974
    }else{
2975
        *block= backup;
2976
        return 0;
2977
    }
2978
}
2979

    
2980
/* special case for int[2] args we discard afterwards,
2981
 * fixes compilation problem with gcc 2.95 */
2982
static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2983
    int p[2] = {p0, p1};
2984
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2985
}
2986

    
2987
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2988
    const int b_stride= s->b_width << s->block_max_depth;
2989
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2990
    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2991
    int rd, index, value;
2992

    
2993
    assert(mb_x>=0 && mb_y>=0);
2994
    assert(mb_x<b_stride);
2995
    assert(((mb_x|mb_y)&1) == 0);
2996

    
2997
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2998
    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
2999
    if(s->me_cache[index] == value)
3000
        return 0;
3001
    s->me_cache[index]= value;
3002

    
3003
    block->mx= p0;
3004
    block->my= p1;
3005
    block->ref= ref;
3006
    block->type &= ~BLOCK_INTRA;
3007
    block[1]= block[b_stride]= block[b_stride+1]= *block;
3008

    
3009
    rd= get_4block_rd(s, mb_x, mb_y, 0);
3010

    
3011
//FIXME chroma
3012
    if(rd < *best_rd){
3013
        *best_rd= rd;
3014
        return 1;
3015
    }else{
3016
        block[0]= backup[0];
3017
        block[1]= backup[1];
3018
        block[b_stride]= backup[2];
3019
        block[b_stride+1]= backup[3];
3020
        return 0;
3021
    }
3022
}
3023

    
3024
static void iterative_me(SnowContext *s){
3025
    int pass, mb_x, mb_y;
3026
    const int b_width = s->b_width  << s->block_max_depth;
3027
    const int b_height= s->b_height << s->block_max_depth;
3028
    const int b_stride= b_width;
3029
    int color[3];
3030

    
3031
    {
3032
        RangeCoder r = s->c;
3033
        uint8_t state[sizeof(s->block_state)];
3034
        memcpy(state, s->block_state, sizeof(s->block_state));
3035
        for(mb_y= 0; mb_y<s->b_height; mb_y++)
3036
            for(mb_x= 0; mb_x<s->b_width; mb_x++)
3037
                encode_q_branch(s, 0, mb_x, mb_y);
3038
        s->c = r;
3039
        memcpy(s->block_state, state, sizeof(s->block_state));
3040
    }
3041

    
3042
    for(pass=0; pass<25; pass++){
3043
        int change= 0;
3044

    
3045
        for(mb_y= 0; mb_y<b_height; mb_y++){
3046
            for(mb_x= 0; mb_x<b_width; mb_x++){
3047
                int dia_change, i, j, ref;
3048
                int best_rd= INT_MAX, ref_rd;
3049
                BlockNode backup, ref_b;
3050
                const int index= mb_x + mb_y * b_stride;
3051
                BlockNode *block= &s->block[index];
3052
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3053
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3054
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3055
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3056
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3057
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3058
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3059
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3060
                const int b_w= (MB_SIZE >> s->block_max_depth);
3061
                uint8_t obmc_edged[b_w*2][b_w*2];
3062

    
3063
                if(pass && (block->type & BLOCK_OPT))
3064
                    continue;
3065
                block->type |= BLOCK_OPT;
3066

    
3067
                backup= *block;
3068

    
3069
                if(!s->me_cache_generation)
3070
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3071
                s->me_cache_generation += 1<<22;
3072

    
3073
                //FIXME precalculate
3074
                {
3075
                    int x, y;
3076
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3077
                    if(mb_x==0)
3078
                        for(y=0; y<b_w*2; y++)
3079
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3080
                    if(mb_x==b_stride-1)
3081
                        for(y=0; y<b_w*2; y++)
3082
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3083
                    if(mb_y==0){
3084
                        for(x=0; x<b_w*2; x++)
3085
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3086
                        for(y=1; y<b_w; y++)
3087
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3088
                    }
3089
                    if(mb_y==b_height-1){
3090
                        for(x=0; x<b_w*2; x++)
3091
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3092
                        for(y=b_w; y<b_w*2-1; y++)
3093
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3094
                    }
3095
                }
3096

    
3097
                //skip stuff outside the picture
3098
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3099
                    uint8_t *src= s->  input_picture.data[0];
3100
                    uint8_t *dst= s->current_picture.data[0];
3101
                    const int stride= s->current_picture.linesize[0];
3102
                    const int block_w= MB_SIZE >> s->block_max_depth;
3103
                    const int sx= block_w*mb_x - block_w/2;
3104
                    const int sy= block_w*mb_y - block_w/2;
3105
                    const int w= s->plane[0].width;
3106
                    const int h= s->plane[0].height;
3107
                    int y;
3108

    
3109
                    for(y=sy; y<0; y++)
3110
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3111
                    for(y=h; y<sy+block_w*2; y++)
3112
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3113
                    if(sx<0){
3114
                        for(y=sy; y<sy+block_w*2; y++)
3115
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3116
                    }
3117
                    if(sx+block_w*2 > w){
3118
                        for(y=sy; y<sy+block_w*2; y++)
3119
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3120
                    }
3121
                }
3122

    
3123
                // intra(black) = neighbors' contribution to the current block
3124
                for(i=0; i<3; i++)
3125
                    color[i]= get_dc(s, mb_x, mb_y, i);
3126

    
3127
                // get previous score (cannot be cached due to OBMC)
3128
                if(pass > 0 && (block->type&BLOCK_INTRA)){
3129
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3130
                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3131
                }else
3132
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3133

    
3134
                ref_b= *block;
3135
                ref_rd= best_rd;
3136
                for(ref=0; ref < s->ref_frames; ref++){
3137
                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3138
                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3139
                        continue;
3140
                    block->ref= ref;
3141
                    best_rd= INT_MAX;
3142

    
3143
                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3144
                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3145
                    if(tb)
3146
                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3147
                    if(lb)
3148
                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3149
                    if(rb)
3150
                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3151
                    if(bb)
3152
                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3153

    
3154
                    /* fullpel ME */
3155
                    //FIXME avoid subpel interpolation / round to nearest integer
3156
                    do{
3157
                        dia_change=0;
3158
                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3159
                            for(j=0; j<i; j++){
3160
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3161
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3162
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3163
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3164
                            }
3165
                        }
3166
                    }while(dia_change);
3167
                    /* subpel ME */
3168
                    do{
3169
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3170
                        dia_change=0;
3171
                        for(i=0; i<8; i++)
3172
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3173
                    }while(dia_change);
3174
                    //FIXME or try the standard 2 pass qpel or similar
3175

    
3176
                    mvr[0][0]= block->mx;
3177
                    mvr[0][1]= block->my;
3178
                    if(ref_rd > best_rd){
3179
                        ref_rd= best_rd;
3180
                        ref_b= *block;
3181
                    }
3182
                }
3183
                best_rd= ref_rd;
3184
                *block= ref_b;
3185
#if 1
3186
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3187
                //FIXME RD style color selection
3188
#endif
3189
                if(!same_block(block, &backup)){
3190
                    if(tb ) tb ->type &= ~BLOCK_OPT;
3191
                    if(lb ) lb ->type &= ~BLOCK_OPT;
3192
                    if(rb ) rb ->type &= ~BLOCK_OPT;
3193
                    if(bb ) bb ->type &= ~BLOCK_OPT;
3194
                    if(tlb) tlb->type &= ~BLOCK_OPT;
3195
                    if(trb) trb->type &= ~BLOCK_OPT;
3196
                    if(blb) blb->type &= ~BLOCK_OPT;
3197
                    if(brb) brb->type &= ~BLOCK_OPT;
3198
                    change ++;
3199
                }
3200
            }
3201
        }
3202
        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3203
        if(!change)
3204
            break;
3205
    }
3206

    
3207
    if(s->block_max_depth == 1){
3208
        int change= 0;
3209
        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3210
            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3211
                int i;
3212
                int best_rd, init_rd;
3213
                const int index= mb_x + mb_y * b_stride;
3214
                BlockNode *b[4];
3215

    
3216
                b[0]= &s->block[index];
3217
                b[1]= b[0]+1;
3218
                b[2]= b[0]+b_stride;
3219
                b[3]= b[2]+1;
3220
                if(same_block(b[0], b[1]) &&
3221
                   same_block(b[0], b[2]) &&
3222
                   same_block(b[0], b[3]))
3223
                    continue;
3224

    
3225
                if(!s->me_cache_generation)
3226
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3227
                s->me_cache_generation += 1<<22;
3228

    
3229
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3230

    
3231
                //FIXME more multiref search?
3232
                check_4block_inter(s, mb_x, mb_y,
3233
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3234
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3235

    
3236
                for(i=0; i<4; i++)
3237
                    if(!(b[i]->type&BLOCK_INTRA))
3238
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3239

    
3240
                if(init_rd != best_rd)
3241
                    change++;
3242
            }
3243
        }
3244
        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3245
    }
3246
}
3247

    
3248
static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3249
    const int w= b->width;
3250
    const int h= b->height;
3251
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3252
    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3253
    int x,y, thres1, thres2;
3254

    
3255
    if(s->qlog == LOSSLESS_QLOG){
3256
        for(y=0; y<h; y++)
3257
            for(x=0; x<w; x++)
3258
                dst[x + y*stride]= src[x + y*stride];
3259
        return;
3260
    }
3261

    
3262
    bias= bias ? 0 : (3*qmul)>>3;
3263
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3264
    thres2= 2*thres1;
3265

    
3266
    if(!bias){
3267
        for(y=0; y<h; y++){
3268
            for(x=0; x<w; x++){
3269
                int i= src[x + y*stride];
3270

    
3271
                if((unsigned)(i+thres1) > thres2){
3272
                    if(i>=0){
3273
                        i<<= QEXPSHIFT;
3274
                        i/= qmul; //FIXME optimize
3275
                        dst[x + y*stride]=  i;
3276
                    }else{
3277
                        i= -i;
3278
                        i<<= QEXPSHIFT;
3279
                        i/= qmul; //FIXME optimize
3280
                        dst[x + y*stride]= -i;
3281
                    }
3282
                }else
3283
                    dst[x + y*stride]= 0;
3284
            }
3285
        }
3286
    }else{
3287
        for(y=0; y<h; y++){
3288
            for(x=0; x<w; x++){
3289
                int i= src[x + y*stride];
3290

    
3291
                if((unsigned)(i+thres1) > thres2){
3292
                    if(i>=0){
3293
                        i<<= QEXPSHIFT;
3294
                        i= (i + bias) / qmul; //FIXME optimize
3295
                        dst[x + y*stride]=  i;
3296
                    }else{
3297
                        i= -i;
3298
                        i<<= QEXPSHIFT;
3299
                        i= (i + bias) / qmul; //FIXME optimize
3300
                        dst[x + y*stride]= -i;
3301
                    }
3302
                }else
3303
                    dst[x + y*stride]= 0;
3304
            }
3305
        }
3306
    }
3307
}
3308

    
3309
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3310
    const int w= b->width;
3311
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3312
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3313
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3314
    int x,y;
3315

    
3316
    if(s->qlog == LOSSLESS_QLOG) return;
3317

    
3318
    for(y=start_y; y<end_y; y++){
3319
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3320
        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3321
        for(x=0; x<w; x++){
3322
            int i= line[x];
3323
            if(i<0){
3324
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3325
            }else if(i>0){
3326
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3327
            }
3328
        }
3329
    }
3330
}
3331

    
3332
static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3333
    const int w= b->width;
3334
    const int h= b->height;
3335
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3336
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3337
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3338
    int x,y;
3339

    
3340
    if(s->qlog == LOSSLESS_QLOG) return;
3341

    
3342
    for(y=0; y<h; y++){
3343
        for(x=0; x<w; x++){
3344
            int i= src[x + y*stride];
3345
            if(i<0){
3346
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3347
            }else if(i>0){
3348
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3349
            }
3350
        }
3351
    }
3352
}
3353

    
3354
static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3355
    const int w= b->width;
3356
    const int h= b->height;
3357
    int x,y;
3358

    
3359
    for(y=h-1; y>=0; y--){
3360
        for(x=w-1; x>=0; x--){
3361
            int i= x + y*stride;
3362

    
3363
            if(x){
3364
                if(use_median){
3365
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3366
                    else  src[i] -= src[i - 1];
3367
                }else{
3368
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3369
                    else  src[i] -= src[i - 1];
3370
                }
3371
            }else{
3372
                if(y) src[i] -= src[i - stride];
3373
            }
3374
        }
3375
    }
3376
}
3377

    
3378
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3379
    const int w= b->width;
3380
    int x,y;
3381

    
3382
    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3383
    IDWTELEM * prev;
3384

    
3385
    if (start_y != 0)
3386
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3387

    
3388
    for(y=start_y; y<end_y; y++){
3389
        prev = line;
3390
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3391
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3392
        for(x=0; x<w; x++){
3393
            if(x){
3394
                if(use_median){
3395
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3396
                    else  line[x] += line[x - 1];
3397
                }else{
3398
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3399
                    else  line[x] += line[x - 1];
3400
                }
3401
            }else{
3402
                if(y) line[x] += prev[x];
3403
            }
3404
        }
3405
    }
3406
}
3407

    
3408
static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3409
    const int w= b->width;
3410
    const int h= b->height;
3411
    int x,y;
3412

    
3413
    for(y=0; y<h; y++){
3414
        for(x=0; x<w; x++){
3415
            int i= x + y*stride;
3416

    
3417
            if(x){
3418
                if(use_median){
3419
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3420
                    else  src[i] += src[i - 1];
3421
                }else{
3422
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3423
                    else  src[i] += src[i - 1];
3424
                }
3425
            }else{
3426
                if(y) src[i] += src[i - stride];
3427
            }
3428
        }
3429
    }
3430
}
3431

    
3432
static void encode_qlogs(SnowContext *s){
3433
    int plane_index, level, orientation;
3434

    
3435
    for(plane_index=0; plane_index<2; plane_index++){
3436
        for(level=0; level<s->spatial_decomposition_count; level++){
3437
            for(orientation=level ? 1:0; orientation<4; orientation++){
3438
                if(orientation==2) continue;
3439
                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3440
            }
3441
        }
3442
    }
3443
}
3444

    
3445
static void encode_header(SnowContext *s){
3446
    int plane_index, i;
3447
    uint8_t kstate[32];
3448

    
3449
    memset(kstate, MID_STATE, sizeof(kstate));
3450

    
3451
    put_rac(&s->c, kstate, s->keyframe);
3452
    if(s->keyframe || s->always_reset){
3453
        reset_contexts(s);
3454
        s->last_spatial_decomposition_type=
3455
        s->last_qlog=
3456
        s->last_qbias=
3457
        s->last_mv_scale=
3458
        s->last_block_max_depth= 0;
3459
        for(plane_index=0; plane_index<2; plane_index++){
3460
            Plane *p= &s->plane[plane_index];
3461
            p->last_htaps=0;
3462
            p->last_diag_mc=0;
3463
            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3464
        }
3465
    }
3466
    if(s->keyframe){
3467
        put_symbol(&s->c, s->header_state, s->version, 0);
3468
        put_rac(&s->c, s->header_state, s->always_reset);
3469
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3470
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3471
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3472
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3473
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3474
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3475
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3476
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3477
        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3478

    
3479
        encode_qlogs(s);
3480
    }
3481

    
3482
    if(!s->keyframe){
3483
        int update_mc=0;
3484
        for(plane_index=0; plane_index<2; plane_index++){
3485
            Plane *p= &s->plane[plane_index];
3486
            update_mc |= p->last_htaps   != p->htaps;
3487
            update_mc |= p->last_diag_mc != p->diag_mc;
3488
            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3489
        }
3490
        put_rac(&s->c, s->header_state, update_mc);
3491
        if(update_mc){
3492
            for(plane_index=0; plane_index<2; plane_index++){
3493
                Plane *p= &s->plane[plane_index];
3494
                put_rac(&s->c, s->header_state, p->diag_mc);
3495
                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3496
                for(i= p->htaps/2; i; i--)
3497
                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3498
            }
3499
        }
3500
        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3501
            put_rac(&s->c, s->header_state, 1);
3502
            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3503
            encode_qlogs(s);
3504
        }else
3505
            put_rac(&s->c, s->header_state, 0);
3506
    }
3507

    
3508
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3509
    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3510
    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3511
    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3512
    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3513

    
3514
}
3515

    
3516
static void update_last_header_values(SnowContext *s){
3517
    int plane_index;
3518

    
3519
    if(!s->keyframe){
3520
        for(plane_index=0; plane_index<2; plane_index++){
3521
            Plane *p= &s->plane[plane_index];
3522
            p->last_diag_mc= p->diag_mc;
3523
            p->last_htaps  = p->htaps;
3524
            memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3525
        }
3526
    }
3527

    
3528
    s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
3529
    s->last_qlog                        = s->qlog;
3530
    s->last_qbias                       = s->qbias;
3531
    s->last_mv_scale                    = s->mv_scale;
3532
    s->last_block_max_depth             = s->block_max_depth;
3533
    s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3534
}
3535

    
3536
static void decode_qlogs(SnowContext *s){
3537
    int plane_index, level, orientation;
3538

    
3539
    for(plane_index=0; plane_index<3; plane_index++){
3540
        for(level=0; level<s->spatial_decomposition_count; level++){
3541
            for(orientation=level ? 1:0; orientation<4; orientation++){
3542
                int q;
3543
                if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3544
                else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3545
                else                    q= get_symbol(&s->c, s->header_state, 1);
3546
                s->plane[plane_index].band[level][orientation].qlog= q;
3547
            }
3548
        }
3549
    }
3550
}
3551

    
3552
static int decode_header(SnowContext *s){
3553
    int plane_index;
3554
    uint8_t kstate[32];
3555

    
3556
    memset(kstate, MID_STATE, sizeof(kstate));
3557

    
3558
    s->keyframe= get_rac(&s->c, kstate);
3559
    if(s->keyframe || s->always_reset){
3560
        reset_contexts(s);
3561
        s->spatial_decomposition_type=
3562
        s->qlog=
3563
        s->qbias=
3564
        s->mv_scale=
3565
        s->block_max_depth= 0;
3566
    }
3567
    if(s->keyframe){
3568
        s->version= get_symbol(&s->c, s->header_state, 0);
3569
        if(s->version>0){
3570
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3571
            return -1;
3572
        }
3573
        s->always_reset= get_rac(&s->c, s->header_state);
3574
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3575
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3576
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3577
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3578
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3579
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3580
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3581
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3582
        s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3583

    
3584
        decode_qlogs(s);
3585
    }
3586

    
3587
    if(!s->keyframe){
3588
        if(get_rac(&s->c, s->header_state)){
3589
            for(plane_index=0; plane_index<2; plane_index++){
3590
                int htaps, i, sum=0;
3591
                Plane *p= &s->plane[plane_index];
3592
                p->diag_mc= get_rac(&s->c, s->header_state);
3593
                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3594
                if((unsigned)htaps > HTAPS_MAX || htaps==0)
3595
                    return -1;
3596
                p->htaps= htaps;
3597
                for(i= htaps/2; i; i--){
3598
                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3599
                    sum += p->hcoeff[i];
3600
                }
3601
                p->hcoeff[0]= 32-sum;
3602
            }
3603
            s->plane[2].diag_mc= s->plane[1].diag_mc;
3604
            s->plane[2].htaps  = s->plane[1].htaps;
3605
            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3606
        }
3607
        if(get_rac(&s->c, s->header_state)){
3608
            s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3609
            decode_qlogs(s);
3610
        }
3611
    }
3612

    
3613
    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3614
    if(s->spatial_decomposition_type > 1){
3615
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3616
        return -1;
3617
    }
3618

    
3619
    s->qlog           += get_symbol(&s->c, s->header_state, 1);
3620
    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
3621
    s->qbias          += get_symbol(&s->c, s->header_state, 1);
3622
    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3623
    if(s->block_max_depth > 1 || s->block_max_depth < 0){
3624
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3625
        s->block_max_depth= 0;
3626
        return -1;
3627
    }
3628

    
3629
    return 0;
3630
}
3631

    
3632
static void init_qexp(void){
3633
    int i;
3634
    double v=128;
3635

    
3636
    for(i=0; i<QROOT; i++){
3637
        qexp[i]= lrintf(v);
3638
        v *= pow(2, 1.0 / QROOT);
3639
    }
3640
}
3641

    
3642
static int common_init(AVCodecContext *avctx){
3643
    SnowContext *s = avctx->priv_data;
3644
    int width, height;
3645
    int i, j;
3646

    
3647
    s->avctx= avctx;
3648

    
3649
    dsputil_init(&s->dsp, avctx);
3650

    
3651
#define mcf(dx,dy)\
3652
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3653
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3654
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3655
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3656
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3657
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3658

    
3659
    mcf( 0, 0)
3660
    mcf( 4, 0)
3661
    mcf( 8, 0)
3662
    mcf(12, 0)
3663
    mcf( 0, 4)
3664
    mcf( 4, 4)
3665
    mcf( 8, 4)
3666
    mcf(12, 4)
3667
    mcf( 0, 8)
3668
    mcf( 4, 8)
3669
    mcf( 8, 8)
3670
    mcf(12, 8)
3671
    mcf( 0,12)
3672
    mcf( 4,12)
3673
    mcf( 8,12)
3674
    mcf(12,12)
3675

    
3676
#define mcfh(dx,dy)\
3677
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3678
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3679
        mc_block_hpel ## dx ## dy ## 16;\
3680
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3681
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3682
        mc_block_hpel ## dx ## dy ## 8;
3683

    
3684
    mcfh(0, 0)
3685
    mcfh(8, 0)
3686
    mcfh(0, 8)
3687
    mcfh(8, 8)
3688

    
3689
    if(!qexp[0])
3690
        init_qexp();
3691

    
3692
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3693

    
3694
    width= s->avctx->width;
3695
    height= s->avctx->height;
3696

    
3697
    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3698
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3699

    
3700
    for(i=0; i<MAX_REF_FRAMES; i++)
3701
        for(j=0; j<MAX_REF_FRAMES; j++)
3702
            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3703

    
3704
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3705

    
3706
    return 0;
3707
}
3708

    
3709
static int common_init_after_header(AVCodecContext *avctx){
3710
    SnowContext *s = avctx->priv_data;
3711
    int plane_index, level, orientation;
3712

    
3713
    for(plane_index=0; plane_index<3; plane_index++){
3714
        int w= s->avctx->width;
3715
        int h= s->avctx->height;
3716

    
3717
        if(plane_index){
3718
            w>>= s->chroma_h_shift;
3719
            h>>= s->chroma_v_shift;
3720
        }
3721
        s->plane[plane_index].width = w;
3722
        s->plane[plane_index].height= h;
3723

    
3724
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3725
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3726
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3727

    
3728
                b->buf= s->spatial_dwt_buffer;
3729
                b->level= level;
3730
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3731
                b->width = (w + !(orientation&1))>>1;
3732
                b->height= (h + !(orientation>1))>>1;
3733

    
3734
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3735
                b->buf_x_offset = 0;
3736
                b->buf_y_offset = 0;
3737

    
3738
                if(orientation&1){
3739
                    b->buf += (w+1)>>1;
3740
                    b->buf_x_offset = (w+1)>>1;
3741
                }
3742
                if(orientation>1){
3743
                    b->buf += b->stride>>1;
3744
                    b->buf_y_offset = b->stride_line >> 1;
3745
                }
3746
                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3747

    
3748
                if(level)
3749
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3750
                //FIXME avoid this realloc
3751
                av_freep(&b->x_coeff);
3752
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3753
            }
3754
            w= (w+1)>>1;
3755
            h= (h+1)>>1;
3756
        }
3757
    }
3758

    
3759
    return 0;
3760
}
3761

    
3762
static int qscale2qlog(int qscale){
3763
    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3764
           + 61*QROOT/8; //<64 >60
3765
}
3766

    
3767
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3768
{
3769
    /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3770
     * FIXME we know exact mv bits at this point,
3771
     * but ratecontrol isn't set up to include them. */
3772
    uint32_t coef_sum= 0;
3773
    int level, orientation, delta_qlog;
3774

    
3775
    for(level=0; level<s->spatial_decomposition_count; level++){
3776
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3777
            SubBand *b= &s->plane[0].band[level][orientation];
3778
            IDWTELEM *buf= b->ibuf;
3779
            const int w= b->width;
3780
            const int h= b->height;
3781
            const int stride= b->stride;
3782
            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3783
            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3784
            const int qdiv= (1<<16)/qmul;
3785
            int x, y;
3786
            //FIXME this is ugly
3787
            for(y=0; y<h; y++)
3788
                for(x=0; x<w; x++)
3789
                    buf[x+y*stride]= b->buf[x+y*stride];
3790
            if(orientation==0)
3791
                decorrelate(s, b, buf, stride, 1, 0);
3792
            for(y=0; y<h; y++)
3793
                for(x=0; x<w; x++)
3794
                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3795
        }
3796
    }
3797

    
3798
    /* ugly, ratecontrol just takes a sqrt again */
3799
    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3800
    assert(coef_sum < INT_MAX);
3801

    
3802
    if(pict->pict_type == I_TYPE){
3803
        s->m.current_picture.mb_var_sum= coef_sum;
3804
        s->m.current_picture.mc_mb_var_sum= 0;
3805
    }else{
3806
        s->m.current_picture.mc_mb_var_sum= coef_sum;
3807
        s->m.current_picture.mb_var_sum= 0;
3808
    }
3809

    
3810
    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3811
    if (pict->quality < 0)
3812
        return INT_MIN;
3813
    s->lambda= pict->quality * 3/2;
3814
    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3815
    s->qlog+= delta_qlog;
3816
    return delta_qlog;
3817
}
3818

    
3819
static void calculate_visual_weight(SnowContext *s, Plane *p){
3820
    int width = p->width;
3821
    int height= p->height;
3822
    int level, orientation, x, y;
3823

    
3824
    for(level=0; level<s->spatial_decomposition_count; level++){
3825
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3826
            SubBand *b= &p->band[level][orientation];
3827
            IDWTELEM *ibuf= b->ibuf;
3828
            int64_t error=0;
3829

    
3830
            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3831
            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3832
            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3833
            for(y=0; y<height; y++){
3834
                for(x=0; x<width; x++){
3835
                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3836
                    error += d*d;
3837
                }
3838
            }
3839

    
3840
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3841
        }
3842
    }
3843
}
3844

    
3845
#define QUANTIZE2 0
3846

    
3847
#if QUANTIZE2==1
3848
#define Q2_STEP 8
3849

    
3850
static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3851
    SubBand *b= &p->band[level][orientation];
3852
    int x, y;
3853
    int xo=0;
3854
    int yo=0;
3855
    int step= 1 << (s->spatial_decomposition_count - level);
3856

    
3857
    if(orientation&1)
3858
        xo= step>>1;
3859
    if(orientation&2)
3860
        yo= step>>1;
3861

    
3862
    //FIXME bias for nonzero ?
3863
    //FIXME optimize
3864
    memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3865
    for(y=0; y<p->height; y++){
3866
        for(x=0; x<p->width; x++){
3867
            int sx= (x-xo + step/2) / step / Q2_STEP;
3868
            int sy= (y-yo + step/2) / step / Q2_STEP;
3869
            int v= r0[x + y*p->width] - r1[x + y*p->width];
3870
            assert(sx>=0 && sy>=0 && sx < score_stride);
3871
            v= ((v+8)>>4)<<4;
3872
            score[sx + sy*score_stride] += v*v;
3873
            assert(score[sx + sy*score_stride] >= 0);
3874
        }
3875
    }
3876
}
3877

    
3878
static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3879
    int level, orientation;
3880

    
3881
    for(level=0; level<s->spatial_decomposition_count; level++){
3882
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3883
            SubBand *b= &p->band[level][orientation];
3884
            IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3885

    
3886
            dequantize(s, b, dst, b->stride);
3887
        }
3888
    }
3889
}
3890

    
3891
static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3892
    int level, orientation, ys, xs, x, y, pass;
3893
    IDWTELEM best_dequant[height * stride];
3894
    IDWTELEM idwt2_buffer[height * stride];
3895
    const int score_stride= (width + 10)/Q2_STEP;
3896
    int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3897
    int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3898
    int threshold= (s->m.lambda * s->m.lambda) >> 6;
3899

    
3900
    //FIXME pass the copy cleanly ?
3901

    
3902
//    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3903
    ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3904

    
3905
    for(level=0; level<s->spatial_decomposition_count; level++){
3906
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3907
            SubBand *b= &p->band[level][orientation];
3908
            IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3909
             DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
3910
            assert(src == b->buf); // code does not depend on this but it is true currently
3911

    
3912
            quantize(s, b, dst, src, b->stride, s->qbias);
3913
        }
3914
    }
3915
    for(pass=0; pass<1; pass++){
3916
        if(s->qbias == 0) //keyframe
3917
            continue;
3918
        for(level=0; level<s->spatial_decomposition_count; level++){
3919
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3920
                SubBand *b= &p->band[level][orientation];
3921
                IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3922
                IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3923

    
3924
                for(ys= 0; ys<Q2_STEP; ys++){
3925
                    for(xs= 0; xs<Q2_STEP; xs++){
3926
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3927
                        dequantize_all(s, p, idwt2_buffer, width, height);
3928
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3929
                        find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3930
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3931
                        for(y=ys; y<b->height; y+= Q2_STEP){
3932
                            for(x=xs; x<b->width; x+= Q2_STEP){
3933
                                if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3934
                                if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3935
                                //FIXME try more than just --
3936
                            }
3937
                        }
3938
                        dequantize_all(s, p, idwt2_buffer, width, height);
3939
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3940
                        find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3941
                        for(y=ys; y<b->height; y+= Q2_STEP){
3942
                            for(x=xs; x<b->width; x+= Q2_STEP){
3943
                                int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3944
                                if(score[score_idx] <= best_score[score_idx] + threshold){
3945
                                    best_score[score_idx]= score[score_idx];
3946
                                    if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3947
                                    if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3948
                                    //FIXME copy instead
3949
                                }
3950
                            }
3951
                        }
3952
                    }
3953
                }
3954
            }
3955
        }
3956
    }
3957
    memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3958
}
3959

    
3960
#endif /* QUANTIZE2==1 */
3961

    
3962
static int encode_init(AVCodecContext *avctx)
3963
{
3964
    SnowContext *s = avctx->priv_data;
3965
    int plane_index;
3966

    
3967
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3968
        av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3969
               "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3970
        return -1;
3971
    }
3972

    
3973
    if(avctx->prediction_method == DWT_97
3974
       && (avctx->flags & CODEC_FLAG_QSCALE)
3975
       && avctx->global_quality == 0){
3976
        av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3977
        return -1;
3978
    }
3979

    
3980
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3981

    
3982
    s->chroma_h_shift= 1; //FIXME XXX
3983
    s->chroma_v_shift= 1;
3984

    
3985
    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3986
    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3987

    
3988
    for(plane_index=0; plane_index<3; plane_index++){
3989
        s->plane[plane_index].diag_mc= 1;
3990
        s->plane[plane_index].htaps= 6;
3991
        s->plane[plane_index].hcoeff[0]=  40;
3992
        s->plane[plane_index].hcoeff[1]= -10;
3993
        s->plane[plane_index].hcoeff[2]=   2;
3994
        s->plane[plane_index].fast_mc= 1;
3995
    }
3996

    
3997
    common_init(avctx);
3998