Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ a6f2c0d6

History | View | Annotate | Download (180 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "avcodec.h"
22
#include "dsputil.h"
23
#include "snow.h"
24

    
25
#include "rangecoder.h"
26

    
27
#include "mpegvideo.h"
28

    
29
#undef NDEBUG
30
#include <assert.h>
31

    
32
static const int8_t quant3[256]={
33
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49
};
50
static const int8_t quant3b[256]={
51
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
};
68
static const int8_t quant3bA[256]={
69
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
};
86
static const int8_t quant5[256]={
87
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103
};
104
static const int8_t quant7[256]={
105
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121
};
122
static const int8_t quant9[256]={
123
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139
};
140
static const int8_t quant11[256]={
141
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157
};
158
static const int8_t quant13[256]={
159
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
175
};
176

    
177
#if 0 //64*cubic
178
static const uint8_t obmc32[1024]={
179
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
180
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
181
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
182
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
183
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
184
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
185
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
186
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
187
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
188
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
189
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
190
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
191
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
192
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
193
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
194
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
195
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
196
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
197
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
198
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
199
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
200
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
201
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
202
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
203
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
204
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
205
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
206
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
207
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
208
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
209
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
210
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
211
//error:0.000022
212
};
213
static const uint8_t obmc16[256]={
214
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
215
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
216
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
217
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
218
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
219
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
220
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
221
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
222
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
223
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
224
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
225
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
226
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
227
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
228
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
229
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
230
//error:0.000033
231
};
232
#elif 1 // 64*linear
233
static const uint8_t obmc32[1024]={
234
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
235
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
236
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
237
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
238
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
239
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
240
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
241
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
242
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
243
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
244
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
245
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
246
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
247
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
248
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
249
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
250
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
251
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
252
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
253
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
254
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
255
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
256
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
257
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
258
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
259
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
260
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
261
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
262
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
263
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
264
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
265
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
266
 //error:0.000020
267
};
268
static const uint8_t obmc16[256]={
269
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
270
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
271
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
272
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
273
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
274
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
281
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
282
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
283
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
284
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
285
//error:0.000015
286
};
287
#else //64*cos
288
static const uint8_t obmc32[1024]={
289
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
290
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
291
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
292
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
293
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
294
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
295
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
296
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
297
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
298
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
299
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
300
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
301
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
302
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
303
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
304
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
305
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
306
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
307
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
308
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
309
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
310
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
311
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
312
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
313
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
314
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
315
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
316
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
317
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
318
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
319
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
320
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
321
//error:0.000022
322
};
323
static const uint8_t obmc16[256]={
324
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
325
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
326
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
327
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
328
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
329
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
330
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
331
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
332
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
333
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
334
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
335
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
336
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
337
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
338
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
339
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
340
//error:0.000022
341
};
342
#endif /* 0 */
343

    
344
//linear *64
345
static const uint8_t obmc8[64]={
346
  4, 12, 20, 28, 28, 20, 12,  4,
347
 12, 36, 60, 84, 84, 60, 36, 12,
348
 20, 60,100,140,140,100, 60, 20,
349
 28, 84,140,196,196,140, 84, 28,
350
 28, 84,140,196,196,140, 84, 28,
351
 20, 60,100,140,140,100, 60, 20,
352
 12, 36, 60, 84, 84, 60, 36, 12,
353
  4, 12, 20, 28, 28, 20, 12,  4,
354
//error:0.000000
355
};
356

    
357
//linear *64
358
static const uint8_t obmc4[16]={
359
 16, 48, 48, 16,
360
 48,144,144, 48,
361
 48,144,144, 48,
362
 16, 48, 48, 16,
363
//error:0.000000
364
};
365

    
366
static const uint8_t * const obmc_tab[4]={
367
    obmc32, obmc16, obmc8, obmc4
368
};
369

    
370
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
371

    
372
typedef struct BlockNode{
373
    int16_t mx;
374
    int16_t my;
375
    uint8_t ref;
376
    uint8_t color[3];
377
    uint8_t type;
378
//#define TYPE_SPLIT    1
379
#define BLOCK_INTRA   1
380
#define BLOCK_OPT     2
381
//#define TYPE_NOCOLOR  4
382
    uint8_t level; //FIXME merge into type?
383
}BlockNode;
384

    
385
static const BlockNode null_block= { //FIXME add border maybe
386
    .color= {128,128,128},
387
    .mx= 0,
388
    .my= 0,
389
    .ref= 0,
390
    .type= 0,
391
    .level= 0,
392
};
393

    
394
#define LOG2_MB_SIZE 4
395
#define MB_SIZE (1<<LOG2_MB_SIZE)
396
#define ENCODER_EXTRA_BITS 4
397
#define HTAPS_MAX 8
398

    
399
typedef struct x_and_coeff{
400
    int16_t x;
401
    uint16_t coeff;
402
} x_and_coeff;
403

    
404
typedef struct SubBand{
405
    int level;
406
    int stride;
407
    int width;
408
    int height;
409
    int qlog;        ///< log(qscale)/log[2^(1/6)]
410
    DWTELEM *buf;
411
    IDWTELEM *ibuf;
412
    int buf_x_offset;
413
    int buf_y_offset;
414
    int stride_line; ///< Stride measured in lines, not pixels.
415
    x_and_coeff * x_coeff;
416
    struct SubBand *parent;
417
    uint8_t state[/*7*2*/ 7 + 512][32];
418
}SubBand;
419

    
420
typedef struct Plane{
421
    int width;
422
    int height;
423
    SubBand band[MAX_DECOMPOSITIONS][4];
424

    
425
    int htaps;
426
    int8_t hcoeff[HTAPS_MAX/2];
427
    int diag_mc;
428
    int fast_mc;
429

    
430
    int last_htaps;
431
    int8_t last_hcoeff[HTAPS_MAX/2];
432
    int last_diag_mc;
433
}Plane;
434

    
435
typedef struct SnowContext{
436
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
437

    
438
    AVCodecContext *avctx;
439
    RangeCoder c;
440
    DSPContext dsp;
441
    AVFrame new_picture;
442
    AVFrame input_picture;              ///< new_picture with the internal linesizes
443
    AVFrame current_picture;
444
    AVFrame last_picture[MAX_REF_FRAMES];
445
    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446
    AVFrame mconly_picture;
447
//     uint8_t q_context[16];
448
    uint8_t header_state[32];
449
    uint8_t block_state[128 + 32*128];
450
    int keyframe;
451
    int always_reset;
452
    int version;
453
    int spatial_decomposition_type;
454
    int last_spatial_decomposition_type;
455
    int temporal_decomposition_type;
456
    int spatial_decomposition_count;
457
    int last_spatial_decomposition_count;
458
    int temporal_decomposition_count;
459
    int max_ref_frames;
460
    int ref_frames;
461
    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
462
    uint32_t *ref_scores[MAX_REF_FRAMES];
463
    DWTELEM *spatial_dwt_buffer;
464
    IDWTELEM *spatial_idwt_buffer;
465
    int colorspace_type;
466
    int chroma_h_shift;
467
    int chroma_v_shift;
468
    int spatial_scalability;
469
    int qlog;
470
    int last_qlog;
471
    int lambda;
472
    int lambda2;
473
    int pass1_rc;
474
    int mv_scale;
475
    int last_mv_scale;
476
    int qbias;
477
    int last_qbias;
478
#define QBIAS_SHIFT 3
479
    int b_width;
480
    int b_height;
481
    int block_max_depth;
482
    int last_block_max_depth;
483
    Plane plane[MAX_PLANES];
484
    BlockNode *block;
485
#define ME_CACHE_SIZE 1024
486
    int me_cache[ME_CACHE_SIZE];
487
    int me_cache_generation;
488
    slice_buffer sb;
489

    
490
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
491

    
492
    uint8_t *scratchbuf;
493
}SnowContext;
494

    
495
typedef struct {
496
    IDWTELEM *b0;
497
    IDWTELEM *b1;
498
    IDWTELEM *b2;
499
    IDWTELEM *b3;
500
    int y;
501
} dwt_compose_t;
502

    
503
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
504
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
505

    
506
static void iterative_me(SnowContext *s);
507

    
508
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
509
{
510
    int i;
511

    
512
    buf->base_buffer = base_buffer;
513
    buf->line_count = line_count;
514
    buf->line_width = line_width;
515
    buf->data_count = max_allocated_lines;
516
    buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
517
    buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
518

    
519
    for(i = 0; i < max_allocated_lines; i++){
520
        buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
521
    }
522

    
523
    buf->data_stack_top = max_allocated_lines - 1;
524
}
525

    
526
static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
527
{
528
    int offset;
529
    IDWTELEM * buffer;
530

    
531
    assert(buf->data_stack_top >= 0);
532
//  assert(!buf->line[line]);
533
    if (buf->line[line])
534
        return buf->line[line];
535

    
536
    offset = buf->line_width * line;
537
    buffer = buf->data_stack[buf->data_stack_top];
538
    buf->data_stack_top--;
539
    buf->line[line] = buffer;
540

    
541
    return buffer;
542
}
543

    
544
static void slice_buffer_release(slice_buffer * buf, int line)
545
{
546
    int offset;
547
    IDWTELEM * buffer;
548

    
549
    assert(line >= 0 && line < buf->line_count);
550
    assert(buf->line[line]);
551

    
552
    offset = buf->line_width * line;
553
    buffer = buf->line[line];
554
    buf->data_stack_top++;
555
    buf->data_stack[buf->data_stack_top] = buffer;
556
    buf->line[line] = NULL;
557
}
558

    
559
static void slice_buffer_flush(slice_buffer * buf)
560
{
561
    int i;
562
    for(i = 0; i < buf->line_count; i++){
563
        if (buf->line[i])
564
            slice_buffer_release(buf, i);
565
    }
566
}
567

    
568
static void slice_buffer_destroy(slice_buffer * buf)
569
{
570
    int i;
571
    slice_buffer_flush(buf);
572

    
573
    for(i = buf->data_count - 1; i >= 0; i--){
574
        av_freep(&buf->data_stack[i]);
575
    }
576
    av_freep(&buf->data_stack);
577
    av_freep(&buf->line);
578
}
579

    
580
#ifdef __sgi
581
// Avoid a name clash on SGI IRIX
582
#undef qexp
583
#endif
584
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
585
static uint8_t qexp[QROOT];
586

    
587
static inline int mirror(int v, int m){
588
    while((unsigned)v > (unsigned)m){
589
        v=-v;
590
        if(v<0) v+= 2*m;
591
    }
592
    return v;
593
}
594

    
595
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
596
    int i;
597

    
598
    if(v){
599
        const int a= FFABS(v);
600
        const int e= av_log2(a);
601
#if 1
602
        const int el= FFMIN(e, 10);
603
        put_rac(c, state+0, 0);
604

    
605
        for(i=0; i<el; i++){
606
            put_rac(c, state+1+i, 1);  //1..10
607
        }
608
        for(; i<e; i++){
609
            put_rac(c, state+1+9, 1);  //1..10
610
        }
611
        put_rac(c, state+1+FFMIN(i,9), 0);
612

    
613
        for(i=e-1; i>=el; i--){
614
            put_rac(c, state+22+9, (a>>i)&1); //22..31
615
        }
616
        for(; i>=0; i--){
617
            put_rac(c, state+22+i, (a>>i)&1); //22..31
618
        }
619

    
620
        if(is_signed)
621
            put_rac(c, state+11 + el, v < 0); //11..21
622
#else
623

    
624
        put_rac(c, state+0, 0);
625
        if(e<=9){
626
            for(i=0; i<e; i++){
627
                put_rac(c, state+1+i, 1);  //1..10
628
            }
629
            put_rac(c, state+1+i, 0);
630

    
631
            for(i=e-1; i>=0; i--){
632
                put_rac(c, state+22+i, (a>>i)&1); //22..31
633
            }
634

    
635
            if(is_signed)
636
                put_rac(c, state+11 + e, v < 0); //11..21
637
        }else{
638
            for(i=0; i<e; i++){
639
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
640
            }
641
            put_rac(c, state+1+FFMIN(i,9), 0);
642

    
643
            for(i=e-1; i>=0; i--){
644
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
645
            }
646

    
647
            if(is_signed)
648
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
649
        }
650
#endif /* 1 */
651
    }else{
652
        put_rac(c, state+0, 1);
653
    }
654
}
655

    
656
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
657
    if(get_rac(c, state+0))
658
        return 0;
659
    else{
660
        int i, e, a;
661
        e= 0;
662
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
663
            e++;
664
        }
665

    
666
        a= 1;
667
        for(i=e-1; i>=0; i--){
668
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
669
        }
670

    
671
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
672
            return -a;
673
        else
674
            return a;
675
    }
676
}
677

    
678
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
679
    int i;
680
    int r= log2>=0 ? 1<<log2 : 1;
681

    
682
    assert(v>=0);
683
    assert(log2>=-4);
684

    
685
    while(v >= r){
686
        put_rac(c, state+4+log2, 1);
687
        v -= r;
688
        log2++;
689
        if(log2>0) r+=r;
690
    }
691
    put_rac(c, state+4+log2, 0);
692

    
693
    for(i=log2-1; i>=0; i--){
694
        put_rac(c, state+31-i, (v>>i)&1);
695
    }
696
}
697

    
698
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
699
    int i;
700
    int r= log2>=0 ? 1<<log2 : 1;
701
    int v=0;
702

    
703
    assert(log2>=-4);
704

    
705
    while(get_rac(c, state+4+log2)){
706
        v+= r;
707
        log2++;
708
        if(log2>0) r+=r;
709
    }
710

    
711
    for(i=log2-1; i>=0; i--){
712
        v+= get_rac(c, state+31-i)<<i;
713
    }
714

    
715
    return v;
716
}
717

    
718
static av_always_inline void
719
lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
720
     int dst_step, int src_step, int ref_step,
721
     int width, int mul, int add, int shift,
722
     int highpass, int inverse){
723
    const int mirror_left= !highpass;
724
    const int mirror_right= (width&1) ^ highpass;
725
    const int w= (width>>1) - 1 + (highpass & width);
726
    int i;
727

    
728
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
729
    if(mirror_left){
730
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
731
        dst += dst_step;
732
        src += src_step;
733
    }
734

    
735
    for(i=0; i<w; i++){
736
        dst[i*dst_step] =
737
            LIFT(src[i*src_step],
738
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
739
                 inverse);
740
    }
741

    
742
    if(mirror_right){
743
        dst[w*dst_step] =
744
            LIFT(src[w*src_step],
745
                 ((mul*2*ref[w*ref_step]+add)>>shift),
746
                 inverse);
747
    }
748
}
749

    
750
static av_always_inline void
751
inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
752
         int dst_step, int src_step, int ref_step,
753
         int width, int mul, int add, int shift,
754
         int highpass, int inverse){
755
    const int mirror_left= !highpass;
756
    const int mirror_right= (width&1) ^ highpass;
757
    const int w= (width>>1) - 1 + (highpass & width);
758
    int i;
759

    
760
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
761
    if(mirror_left){
762
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
763
        dst += dst_step;
764
        src += src_step;
765
    }
766

    
767
    for(i=0; i<w; i++){
768
        dst[i*dst_step] =
769
            LIFT(src[i*src_step],
770
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
771
                 inverse);
772
    }
773

    
774
    if(mirror_right){
775
        dst[w*dst_step] =
776
            LIFT(src[w*src_step],
777
                 ((mul*2*ref[w*ref_step]+add)>>shift),
778
                 inverse);
779
    }
780
}
781

    
782
#ifndef liftS
783
static av_always_inline void
784
liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
785
      int dst_step, int src_step, int ref_step,
786
      int width, int mul, int add, int shift,
787
      int highpass, int inverse){
788
    const int mirror_left= !highpass;
789
    const int mirror_right= (width&1) ^ highpass;
790
    const int w= (width>>1) - 1 + (highpass & width);
791
    int i;
792

    
793
    assert(shift == 4);
794
#define LIFTS(src, ref, inv) \
795
        ((inv) ? \
796
            (src) + (((ref) + 4*(src))>>shift): \
797
            -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
798
    if(mirror_left){
799
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
800
        dst += dst_step;
801
        src += src_step;
802
    }
803

    
804
    for(i=0; i<w; i++){
805
        dst[i*dst_step] =
806
            LIFTS(src[i*src_step],
807
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
808
                  inverse);
809
    }
810

    
811
    if(mirror_right){
812
        dst[w*dst_step] =
813
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
814
    }
815
}
816
static av_always_inline void
817
inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
818
          int dst_step, int src_step, int ref_step,
819
          int width, int mul, int add, int shift,
820
          int highpass, int inverse){
821
    const int mirror_left= !highpass;
822
    const int mirror_right= (width&1) ^ highpass;
823
    const int w= (width>>1) - 1 + (highpass & width);
824
    int i;
825

    
826
    assert(shift == 4);
827
#define LIFTS(src, ref, inv) \
828
    ((inv) ? \
829
        (src) + (((ref) + 4*(src))>>shift): \
830
        -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
831
    if(mirror_left){
832
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
833
        dst += dst_step;
834
        src += src_step;
835
    }
836

    
837
    for(i=0; i<w; i++){
838
        dst[i*dst_step] =
839
            LIFTS(src[i*src_step],
840
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
841
                  inverse);
842
    }
843

    
844
    if(mirror_right){
845
        dst[w*dst_step] =
846
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
847
    }
848
}
849
#endif /* ! liftS */
850

    
851
static void horizontal_decompose53i(DWTELEM *b, int width){
852
    DWTELEM temp[width];
853
    const int width2= width>>1;
854
    int x;
855
    const int w2= (width+1)>>1;
856

    
857
    for(x=0; x<width2; x++){
858
        temp[x   ]= b[2*x    ];
859
        temp[x+w2]= b[2*x + 1];
860
    }
861
    if(width&1)
862
        temp[x   ]= b[2*x    ];
863
#if 0
864
    {
865
    int A1,A2,A3,A4;
866
    A2= temp[1       ];
867
    A4= temp[0       ];
868
    A1= temp[0+width2];
869
    A1 -= (A2 + A4)>>1;
870
    A4 += (A1 + 1)>>1;
871
    b[0+width2] = A1;
872
    b[0       ] = A4;
873
    for(x=1; x+1<width2; x+=2){
874
        A3= temp[x+width2];
875
        A4= temp[x+1     ];
876
        A3 -= (A2 + A4)>>1;
877
        A2 += (A1 + A3 + 2)>>2;
878
        b[x+width2] = A3;
879
        b[x       ] = A2;
880

881
        A1= temp[x+1+width2];
882
        A2= temp[x+2       ];
883
        A1 -= (A2 + A4)>>1;
884
        A4 += (A1 + A3 + 2)>>2;
885
        b[x+1+width2] = A1;
886
        b[x+1       ] = A4;
887
    }
888
    A3= temp[width-1];
889
    A3 -= A2;
890
    A2 += (A1 + A3 + 2)>>2;
891
    b[width -1] = A3;
892
    b[width2-1] = A2;
893
    }
894
#else
895
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
896
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
897
#endif /* 0 */
898
}
899

    
900
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
901
    int i;
902

    
903
    for(i=0; i<width; i++){
904
        b1[i] -= (b0[i] + b2[i])>>1;
905
    }
906
}
907

    
908
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
909
    int i;
910

    
911
    for(i=0; i<width; i++){
912
        b1[i] += (b0[i] + b2[i] + 2)>>2;
913
    }
914
}
915

    
916
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
917
    int y;
918
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
919
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
920

    
921
    for(y=-2; y<height; y+=2){
922
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
923
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
924

    
925
        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
926
        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
927

    
928
        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
929
        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
930

    
931
        b0=b2;
932
        b1=b3;
933
    }
934
}
935

    
936
static void horizontal_decompose97i(DWTELEM *b, int width){
937
    DWTELEM temp[width];
938
    const int w2= (width+1)>>1;
939

    
940
    lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
941
    liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
942
    lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
943
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
944
}
945

    
946

    
947
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
948
    int i;
949

    
950
    for(i=0; i<width; i++){
951
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
952
    }
953
}
954

    
955
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
956
    int i;
957

    
958
    for(i=0; i<width; i++){
959
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
960
    }
961
}
962

    
963
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
964
    int i;
965

    
966
    for(i=0; i<width; i++){
967
#ifdef liftS
968
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
969
#else
970
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
971
#endif
972
    }
973
}
974

    
975
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
976
    int i;
977

    
978
    for(i=0; i<width; i++){
979
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
980
    }
981
}
982

    
983
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
984
    int y;
985
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
986
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
987
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
988
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
989

    
990
    for(y=-4; y<height; y+=2){
991
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
992
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
993

    
994
        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
995
        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
996

    
997
        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
998
        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
999
        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1000
        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1001

    
1002
        b0=b2;
1003
        b1=b3;
1004
        b2=b4;
1005
        b3=b5;
1006
    }
1007
}
1008

    
1009
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1010
    int level;
1011

    
1012
    for(level=0; level<decomposition_count; level++){
1013
        switch(type){
1014
        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1015
        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1016
        }
1017
    }
1018
}
1019

    
1020
static void horizontal_compose53i(IDWTELEM *b, int width){
1021
    IDWTELEM temp[width];
1022
    const int width2= width>>1;
1023
    const int w2= (width+1)>>1;
1024
    int x;
1025

    
1026
#if 0
1027
    int A1,A2,A3,A4;
1028
    A2= temp[1       ];
1029
    A4= temp[0       ];
1030
    A1= temp[0+width2];
1031
    A1 -= (A2 + A4)>>1;
1032
    A4 += (A1 + 1)>>1;
1033
    b[0+width2] = A1;
1034
    b[0       ] = A4;
1035
    for(x=1; x+1<width2; x+=2){
1036
        A3= temp[x+width2];
1037
        A4= temp[x+1     ];
1038
        A3 -= (A2 + A4)>>1;
1039
        A2 += (A1 + A3 + 2)>>2;
1040
        b[x+width2] = A3;
1041
        b[x       ] = A2;
1042

1043
        A1= temp[x+1+width2];
1044
        A2= temp[x+2       ];
1045
        A1 -= (A2 + A4)>>1;
1046
        A4 += (A1 + A3 + 2)>>2;
1047
        b[x+1+width2] = A1;
1048
        b[x+1       ] = A4;
1049
    }
1050
    A3= temp[width-1];
1051
    A3 -= A2;
1052
    A2 += (A1 + A3 + 2)>>2;
1053
    b[width -1] = A3;
1054
    b[width2-1] = A2;
1055
#else
1056
    inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1057
    inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1058
#endif /* 0 */
1059
    for(x=0; x<width2; x++){
1060
        b[2*x    ]= temp[x   ];
1061
        b[2*x + 1]= temp[x+w2];
1062
    }
1063
    if(width&1)
1064
        b[2*x    ]= temp[x   ];
1065
}
1066

    
1067
static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1068
    int i;
1069

    
1070
    for(i=0; i<width; i++){
1071
        b1[i] += (b0[i] + b2[i])>>1;
1072
    }
1073
}
1074

    
1075
static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1076
    int i;
1077

    
1078
    for(i=0; i<width; i++){
1079
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1080
    }
1081
}
1082

    
1083
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1084
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1085
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1086
    cs->y = -1;
1087
}
1088

    
1089
static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1090
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1091
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1092
    cs->y = -1;
1093
}
1094

    
1095
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1096
    int y= cs->y;
1097

    
1098
    IDWTELEM *b0= cs->b0;
1099
    IDWTELEM *b1= cs->b1;
1100
    IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1101
    IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1102

    
1103
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1104
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1105

    
1106
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1107
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1108

    
1109
    cs->b0 = b2;
1110
    cs->b1 = b3;
1111
    cs->y += 2;
1112
}
1113

    
1114
static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1115
    int y= cs->y;
1116
    IDWTELEM *b0= cs->b0;
1117
    IDWTELEM *b1= cs->b1;
1118
    IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1119
    IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1120

    
1121
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1122
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1123

    
1124
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1125
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1126

    
1127
    cs->b0 = b2;
1128
    cs->b1 = b3;
1129
    cs->y += 2;
1130
}
1131

    
1132
static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1133
    dwt_compose_t cs;
1134
    spatial_compose53i_init(&cs, buffer, height, stride);
1135
    while(cs.y <= height)
1136
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1137
}
1138

    
1139

    
1140
void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1141
    IDWTELEM temp[width];
1142
    const int w2= (width+1)>>1;
1143

    
1144
    inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1145
    inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1146
    inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1147
    inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1148
}
1149

    
1150
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1151
    int i;
1152

    
1153
    for(i=0; i<width; i++){
1154
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1155
    }
1156
}
1157

    
1158
static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1159
    int i;
1160

    
1161
    for(i=0; i<width; i++){
1162
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1163
    }
1164
}
1165

    
1166
static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1167
    int i;
1168

    
1169
    for(i=0; i<width; i++){
1170
#ifdef liftS
1171
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1172
#else
1173
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1174
#endif
1175
    }
1176
}
1177

    
1178
static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1179
    int i;
1180

    
1181
    for(i=0; i<width; i++){
1182
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1183
    }
1184
}
1185

    
1186
void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1187
    int i;
1188

    
1189
    for(i=0; i<width; i++){
1190
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1191
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1192
#ifdef liftS
1193
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1194
#else
1195
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1196
#endif
1197
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1198
    }
1199
}
1200

    
1201
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1202
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1203
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1204
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1205
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1206
    cs->y = -3;
1207
}
1208

    
1209
static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1210
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1211
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1212
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1213
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1214
    cs->y = -3;
1215
}
1216

    
1217
static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1218
    int y = cs->y;
1219

    
1220
    IDWTELEM *b0= cs->b0;
1221
    IDWTELEM *b1= cs->b1;
1222
    IDWTELEM *b2= cs->b2;
1223
    IDWTELEM *b3= cs->b3;
1224
    IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1225
    IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1226

    
1227
    if(y>0 && y+4<height){
1228
        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1229
    }else{
1230
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1231
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1232
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1233
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1234
    }
1235

    
1236
        if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1237
        if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1238

    
1239
    cs->b0=b2;
1240
    cs->b1=b3;
1241
    cs->b2=b4;
1242
    cs->b3=b5;
1243
    cs->y += 2;
1244
}
1245

    
1246
static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1247
    int y = cs->y;
1248
    IDWTELEM *b0= cs->b0;
1249
    IDWTELEM *b1= cs->b1;
1250
    IDWTELEM *b2= cs->b2;
1251
    IDWTELEM *b3= cs->b3;
1252
    IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1253
    IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1254

    
1255
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1256
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1257
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1258
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1259

    
1260
        if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1261
        if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1262

    
1263
    cs->b0=b2;
1264
    cs->b1=b3;
1265
    cs->b2=b4;
1266
    cs->b3=b5;
1267
    cs->y += 2;
1268
}
1269

    
1270
static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1271
    dwt_compose_t cs;
1272
    spatial_compose97i_init(&cs, buffer, height, stride);
1273
    while(cs.y <= height)
1274
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1275
}
1276

    
1277
static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1278
    int level;
1279
    for(level=decomposition_count-1; level>=0; level--){
1280
        switch(type){
1281
        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1282
        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1283
        }
1284
    }
1285
}
1286

    
1287
static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1288
    int level;
1289
    for(level=decomposition_count-1; level>=0; level--){
1290
        switch(type){
1291
        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1292
        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1293
        }
1294
    }
1295
}
1296

    
1297
static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1298
    const int support = type==1 ? 3 : 5;
1299
    int level;
1300
    if(type==2) return;
1301

    
1302
    for(level=decomposition_count-1; level>=0; level--){
1303
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1304
            switch(type){
1305
            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1306
                break;
1307
            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1308
                break;
1309
            }
1310
        }
1311
    }
1312
}
1313

    
1314
static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1315
    const int support = type==1 ? 3 : 5;
1316
    int level;
1317
    if(type==2) return;
1318

    
1319
    for(level=decomposition_count-1; level>=0; level--){
1320
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1321
            switch(type){
1322
            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1323
                break;
1324
            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1325
                break;
1326
            }
1327
        }
1328
    }
1329
}
1330

    
1331
static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1332
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1333
        int y;
1334
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1335
        for(y=0; y<height; y+=4)
1336
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1337
}
1338

    
1339
static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1340
    const int w= b->width;
1341
    const int h= b->height;
1342
    int x, y;
1343

    
1344
    if(1){
1345
        int run=0;
1346
        int runs[w*h];
1347
        int run_index=0;
1348
        int max_index;
1349

    
1350
        for(y=0; y<h; y++){
1351
            for(x=0; x<w; x++){
1352
                int v, p=0;
1353
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1354
                v= src[x + y*stride];
1355

    
1356
                if(y){
1357
                    t= src[x + (y-1)*stride];
1358
                    if(x){
1359
                        lt= src[x - 1 + (y-1)*stride];
1360
                    }
1361
                    if(x + 1 < w){
1362
                        rt= src[x + 1 + (y-1)*stride];
1363
                    }
1364
                }
1365
                if(x){
1366
                    l= src[x - 1 + y*stride];
1367
                    /*if(x > 1){
1368
                        if(orientation==1) ll= src[y + (x-2)*stride];
1369
                        else               ll= src[x - 2 + y*stride];
1370
                    }*/
1371
                }
1372
                if(parent){
1373
                    int px= x>>1;
1374
                    int py= y>>1;
1375
                    if(px<b->parent->width && py<b->parent->height)
1376
                        p= parent[px + py*2*stride];
1377
                }
1378
                if(!(/*ll|*/l|lt|t|rt|p)){
1379
                    if(v){
1380
                        runs[run_index++]= run;
1381
                        run=0;
1382
                    }else{
1383
                        run++;
1384
                    }
1385
                }
1386
            }
1387
        }
1388
        max_index= run_index;
1389
        runs[run_index++]= run;
1390
        run_index=0;
1391
        run= runs[run_index++];
1392

    
1393
        put_symbol2(&s->c, b->state[30], max_index, 0);
1394
        if(run_index <= max_index)
1395
            put_symbol2(&s->c, b->state[1], run, 3);
1396

    
1397
        for(y=0; y<h; y++){
1398
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1399
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1400
                return -1;
1401
            }
1402
            for(x=0; x<w; x++){
1403
                int v, p=0;
1404
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1405
                v= src[x + y*stride];
1406

    
1407
                if(y){
1408
                    t= src[x + (y-1)*stride];
1409
                    if(x){
1410
                        lt= src[x - 1 + (y-1)*stride];
1411
                    }
1412
                    if(x + 1 < w){
1413
                        rt= src[x + 1 + (y-1)*stride];
1414
                    }
1415
                }
1416
                if(x){
1417
                    l= src[x - 1 + y*stride];
1418
                    /*if(x > 1){
1419
                        if(orientation==1) ll= src[y + (x-2)*stride];
1420
                        else               ll= src[x - 2 + y*stride];
1421
                    }*/
1422
                }
1423
                if(parent){
1424
                    int px= x>>1;
1425
                    int py= y>>1;
1426
                    if(px<b->parent->width && py<b->parent->height)
1427
                        p= parent[px + py*2*stride];
1428
                }
1429
                if(/*ll|*/l|lt|t|rt|p){
1430
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1431

    
1432
                    put_rac(&s->c, &b->state[0][context], !!v);
1433
                }else{
1434
                    if(!run){
1435
                        run= runs[run_index++];
1436

    
1437
                        if(run_index <= max_index)
1438
                            put_symbol2(&s->c, b->state[1], run, 3);
1439
                        assert(v);
1440
                    }else{
1441
                        run--;
1442
                        assert(!v);
1443
                    }
1444
                }
1445
                if(v){
1446
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1447
                    int l2= 2*FFABS(l) + (l<0);
1448
                    int t2= 2*FFABS(t) + (t<0);
1449

    
1450
                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1451
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1452
                }
1453
            }
1454
        }
1455
    }
1456
    return 0;
1457
}
1458

    
1459
static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1460
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1461
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1462
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1463
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1464
}
1465

    
1466
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1467
    const int w= b->width;
1468
    const int h= b->height;
1469
    int x,y;
1470

    
1471
    if(1){
1472
        int run, runs;
1473
        x_and_coeff *xc= b->x_coeff;
1474
        x_and_coeff *prev_xc= NULL;
1475
        x_and_coeff *prev2_xc= xc;
1476
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1477
        x_and_coeff *prev_parent_xc= parent_xc;
1478

    
1479
        runs= get_symbol2(&s->c, b->state[30], 0);
1480
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1481
        else           run= INT_MAX;
1482

    
1483
        for(y=0; y<h; y++){
1484
            int v=0;
1485
            int lt=0, t=0, rt=0;
1486

    
1487
            if(y && prev_xc->x == 0){
1488
                rt= prev_xc->coeff;
1489
            }
1490
            for(x=0; x<w; x++){
1491
                int p=0;
1492
                const int l= v;
1493

    
1494
                lt= t; t= rt;
1495

    
1496
                if(y){
1497
                    if(prev_xc->x <= x)
1498
                        prev_xc++;
1499
                    if(prev_xc->x == x + 1)
1500
                        rt= prev_xc->coeff;
1501
                    else
1502
                        rt=0;
1503
                }
1504
                if(parent_xc){
1505
                    if(x>>1 > parent_xc->x){
1506
                        parent_xc++;
1507
                    }
1508
                    if(x>>1 == parent_xc->x){
1509
                        p= parent_xc->coeff;
1510
                    }
1511
                }
1512
                if(/*ll|*/l|lt|t|rt|p){
1513
                    int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1514

    
1515
                    v=get_rac(&s->c, &b->state[0][context]);
1516
                    if(v){
1517
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1518
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1519

    
1520
                        xc->x=x;
1521
                        (xc++)->coeff= v;
1522
                    }
1523
                }else{
1524
                    if(!run){
1525
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1526
                        else           run= INT_MAX;
1527
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1528
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1529

    
1530
                        xc->x=x;
1531
                        (xc++)->coeff= v;
1532
                    }else{
1533
                        int max_run;
1534
                        run--;
1535
                        v=0;
1536

    
1537
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1538
                        else  max_run= FFMIN(run, w-x-1);
1539
                        if(parent_xc)
1540
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1541
                        x+= max_run;
1542
                        run-= max_run;
1543
                    }
1544
                }
1545
            }
1546
            (xc++)->x= w+1; //end marker
1547
            prev_xc= prev2_xc;
1548
            prev2_xc= xc;
1549

    
1550
            if(parent_xc){
1551
                if(y&1){
1552
                    while(parent_xc->x != parent->width+1)
1553
                        parent_xc++;
1554
                    parent_xc++;
1555
                    prev_parent_xc= parent_xc;
1556
                }else{
1557
                    parent_xc= prev_parent_xc;
1558
                }
1559
            }
1560
        }
1561

    
1562
        (xc++)->x= w+1; //end marker
1563
    }
1564
}
1565

    
1566
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1567
    const int w= b->width;
1568
    int y;
1569
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1570
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1571
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1572
    int new_index = 0;
1573

    
1574
    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1575
        qadd= 0;
1576
        qmul= 1<<QEXPSHIFT;
1577
    }
1578

    
1579
    /* If we are on the second or later slice, restore our index. */
1580
    if (start_y != 0)
1581
        new_index = save_state[0];
1582

    
1583

    
1584
    for(y=start_y; y<h; y++){
1585
        int x = 0;
1586
        int v;
1587
        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1588
        memset(line, 0, b->width*sizeof(IDWTELEM));
1589
        v = b->x_coeff[new_index].coeff;
1590
        x = b->x_coeff[new_index++].x;
1591
        while(x < w){
1592
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1593
            register int u= -(v&1);
1594
            line[x] = (t^u) - u;
1595

    
1596
            v = b->x_coeff[new_index].coeff;
1597
            x = b->x_coeff[new_index++].x;
1598
        }
1599
    }
1600

    
1601
    /* Save our variables for the next slice. */
1602
    save_state[0] = new_index;
1603

    
1604
    return;
1605
}
1606

    
1607
static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1608
    int plane_index, level, orientation;
1609

    
1610
    for(plane_index=0; plane_index<3; plane_index++){
1611
        for(level=0; level<MAX_DECOMPOSITIONS; level++){
1612
            for(orientation=level ? 1:0; orientation<4; orientation++){
1613
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1614
            }
1615
        }
1616
    }
1617
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1618
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1619
}
1620

    
1621
static int alloc_blocks(SnowContext *s){
1622
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1623
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1624

    
1625
    s->b_width = w;
1626
    s->b_height= h;
1627

    
1628
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1629
    return 0;
1630
}
1631

    
1632
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1633
    uint8_t *bytestream= d->bytestream;
1634
    uint8_t *bytestream_start= d->bytestream_start;
1635
    *d= *s;
1636
    d->bytestream= bytestream;
1637
    d->bytestream_start= bytestream_start;
1638
}
1639

    
1640
//near copy & paste from dsputil, FIXME
1641
static int pix_sum(uint8_t * pix, int line_size, int w)
1642
{
1643
    int s, i, j;
1644

    
1645
    s = 0;
1646
    for (i = 0; i < w; i++) {
1647
        for (j = 0; j < w; j++) {
1648
            s += pix[0];
1649
            pix ++;
1650
        }
1651
        pix += line_size - w;
1652
    }
1653
    return s;
1654
}
1655

    
1656
//near copy & paste from dsputil, FIXME
1657
static int pix_norm1(uint8_t * pix, int line_size, int w)
1658
{
1659
    int s, i, j;
1660
    uint32_t *sq = ff_squareTbl + 256;
1661

    
1662
    s = 0;
1663
    for (i = 0; i < w; i++) {
1664
        for (j = 0; j < w; j ++) {
1665
            s += sq[pix[0]];
1666
            pix ++;
1667
        }
1668
        pix += line_size - w;
1669
    }
1670
    return s;
1671
}
1672

    
1673
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1674
    const int w= s->b_width << s->block_max_depth;
1675
    const int rem_depth= s->block_max_depth - level;
1676
    const int index= (x + y*w) << rem_depth;
1677
    const int block_w= 1<<rem_depth;
1678
    BlockNode block;
1679
    int i,j;
1680

    
1681
    block.color[0]= l;
1682
    block.color[1]= cb;
1683
    block.color[2]= cr;
1684
    block.mx= mx;
1685
    block.my= my;
1686
    block.ref= ref;
1687
    block.type= type;
1688
    block.level= level;
1689

    
1690
    for(j=0; j<block_w; j++){
1691
        for(i=0; i<block_w; i++){
1692
            s->block[index + i + j*w]= block;
1693
        }
1694
    }
1695
}
1696

    
1697
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1698
    const int offset[3]= {
1699
          y*c->  stride + x,
1700
        ((y*c->uvstride + x)>>1),
1701
        ((y*c->uvstride + x)>>1),
1702
    };
1703
    int i;
1704
    for(i=0; i<3; i++){
1705
        c->src[0][i]= src [i];
1706
        c->ref[0][i]= ref [i] + offset[i];
1707
    }
1708
    assert(!ref_index);
1709
}
1710

    
1711
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1712
                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1713
    if(s->ref_frames == 1){
1714
        *mx = mid_pred(left->mx, top->mx, tr->mx);
1715
        *my = mid_pred(left->my, top->my, tr->my);
1716
    }else{
1717
        const int *scale = scale_mv_ref[ref];
1718
        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1719
                       (top ->mx * scale[top ->ref] + 128) >>8,
1720
                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1721
        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1722
                       (top ->my * scale[top ->ref] + 128) >>8,
1723
                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
1724
    }
1725
}
1726

    
1727
//FIXME copy&paste
1728
#define P_LEFT P[1]
1729
#define P_TOP P[2]
1730
#define P_TOPRIGHT P[3]
1731
#define P_MEDIAN P[4]
1732
#define P_MV1 P[9]
1733
#define FLAG_QPEL   1 //must be 1
1734

    
1735
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1736
    uint8_t p_buffer[1024];
1737
    uint8_t i_buffer[1024];
1738
    uint8_t p_state[sizeof(s->block_state)];
1739
    uint8_t i_state[sizeof(s->block_state)];
1740
    RangeCoder pc, ic;
1741
    uint8_t *pbbak= s->c.bytestream;
1742
    uint8_t *pbbak_start= s->c.bytestream_start;
1743
    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1744
    const int w= s->b_width  << s->block_max_depth;
1745
    const int h= s->b_height << s->block_max_depth;
1746
    const int rem_depth= s->block_max_depth - level;
1747
    const int index= (x + y*w) << rem_depth;
1748
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1749
    int trx= (x+1)<<rem_depth;
1750
    int try= (y+1)<<rem_depth;
1751
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1752
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1753
    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1754
    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1755
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1756
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1757
    int pl = left->color[0];
1758
    int pcb= left->color[1];
1759
    int pcr= left->color[2];
1760
    int pmx, pmy;
1761
    int mx=0, my=0;
1762
    int l,cr,cb;
1763
    const int stride= s->current_picture.linesize[0];
1764
    const int uvstride= s->current_picture.linesize[1];
1765
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
1766
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1767
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1768
    int P[10][2];
1769
    int16_t last_mv[3][2];
1770
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1771
    const int shift= 1+qpel;
1772
    MotionEstContext *c= &s->m.me;
1773
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1774
    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1775
    int my_context= av_log2(2*FFABS(left->my - top->my));
1776
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1777
    int ref, best_ref, ref_score, ref_mx, ref_my;
1778

    
1779
    assert(sizeof(s->block_state) >= 256);
1780
    if(s->keyframe){
1781
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1782
        return 0;
1783
    }
1784

    
1785
//    clip predictors / edge ?
1786

    
1787
    P_LEFT[0]= left->mx;
1788
    P_LEFT[1]= left->my;
1789
    P_TOP [0]= top->mx;
1790
    P_TOP [1]= top->my;
1791
    P_TOPRIGHT[0]= tr->mx;
1792
    P_TOPRIGHT[1]= tr->my;
1793

    
1794
    last_mv[0][0]= s->block[index].mx;
1795
    last_mv[0][1]= s->block[index].my;
1796
    last_mv[1][0]= right->mx;
1797
    last_mv[1][1]= right->my;
1798
    last_mv[2][0]= bottom->mx;
1799
    last_mv[2][1]= bottom->my;
1800

    
1801
    s->m.mb_stride=2;
1802
    s->m.mb_x=
1803
    s->m.mb_y= 0;
1804
    c->skip= 0;
1805

    
1806
    assert(c->  stride ==   stride);
1807
    assert(c->uvstride == uvstride);
1808

    
1809
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1810
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1811
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1812
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1813

    
1814
    c->xmin = - x*block_w - 16+2;
1815
    c->ymin = - y*block_w - 16+2;
1816
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1817
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1818

    
1819
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1820
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
1821
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1822
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1823
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1824
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1825
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1826

    
1827
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1828
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1829

    
1830
    if (!y) {
1831
        c->pred_x= P_LEFT[0];
1832
        c->pred_y= P_LEFT[1];
1833
    } else {
1834
        c->pred_x = P_MEDIAN[0];
1835
        c->pred_y = P_MEDIAN[1];
1836
    }
1837

    
1838
    score= INT_MAX;
1839
    best_ref= 0;
1840
    for(ref=0; ref<s->ref_frames; ref++){
1841
        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1842

    
1843
        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1844
                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1845

    
1846
        assert(ref_mx >= c->xmin);
1847
        assert(ref_mx <= c->xmax);
1848
        assert(ref_my >= c->ymin);
1849
        assert(ref_my <= c->ymax);
1850

    
1851
        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1852
        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1853
        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1854
        if(s->ref_mvs[ref]){
1855
            s->ref_mvs[ref][index][0]= ref_mx;
1856
            s->ref_mvs[ref][index][1]= ref_my;
1857
            s->ref_scores[ref][index]= ref_score;
1858
        }
1859
        if(score > ref_score){
1860
            score= ref_score;
1861
            best_ref= ref;
1862
            mx= ref_mx;
1863
            my= ref_my;
1864
        }
1865
    }
1866
    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1867

    
1868
  //  subpel search
1869
    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1870
    pc= s->c;
1871
    pc.bytestream_start=
1872
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1873
    memcpy(p_state, s->block_state, sizeof(s->block_state));
1874

    
1875
    if(level!=s->block_max_depth)
1876
        put_rac(&pc, &p_state[4 + s_context], 1);
1877
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1878
    if(s->ref_frames > 1)
1879
        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1880
    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1881
    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1882
    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1883
    p_len= pc.bytestream - pc.bytestream_start;
1884
    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1885

    
1886
    block_s= block_w*block_w;
1887
    sum = pix_sum(current_data[0], stride, block_w);
1888
    l= (sum + block_s/2)/block_s;
1889
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1890

    
1891
    block_s= block_w*block_w>>2;
1892
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
1893
    cb= (sum + block_s/2)/block_s;
1894
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1895
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
1896
    cr= (sum + block_s/2)/block_s;
1897
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1898

    
1899
    ic= s->c;
1900
    ic.bytestream_start=
1901
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1902
    memcpy(i_state, s->block_state, sizeof(s->block_state));
1903
    if(level!=s->block_max_depth)
1904
        put_rac(&ic, &i_state[4 + s_context], 1);
1905
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1906
    put_symbol(&ic, &i_state[32],  l-pl , 1);
1907
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1908
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1909
    i_len= ic.bytestream - ic.bytestream_start;
1910
    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1911

    
1912
//    assert(score==256*256*256*64-1);
1913
    assert(iscore < 255*255*256 + s->lambda2*10);
1914
    assert(iscore >= 0);
1915
    assert(l>=0 && l<=255);
1916
    assert(pl>=0 && pl<=255);
1917

    
1918
    if(level==0){
1919
        int varc= iscore >> 8;
1920
        int vard= score >> 8;
1921
        if (vard <= 64 || vard < varc)
1922
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1923
        else
1924
            c->scene_change_score+= s->m.qscale;
1925
    }
1926

    
1927
    if(level!=s->block_max_depth){
1928
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
1929
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1930
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1931
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1932
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1933
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1934

    
1935
        if(score2 < score && score2 < iscore)
1936
            return score2;
1937
    }
1938

    
1939
    if(iscore < score){
1940
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1941
        memcpy(pbbak, i_buffer, i_len);
1942
        s->c= ic;
1943
        s->c.bytestream_start= pbbak_start;
1944
        s->c.bytestream= pbbak + i_len;
1945
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1946
        memcpy(s->block_state, i_state, sizeof(s->block_state));
1947
        return iscore;
1948
    }else{
1949
        memcpy(pbbak, p_buffer, p_len);
1950
        s->c= pc;
1951
        s->c.bytestream_start= pbbak_start;
1952
        s->c.bytestream= pbbak + p_len;
1953
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1954
        memcpy(s->block_state, p_state, sizeof(s->block_state));
1955
        return score;
1956
    }
1957
}
1958

    
1959
static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1960
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1961
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1962
    }else{
1963
        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1964
    }
1965
}
1966

    
1967
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1968
    const int w= s->b_width  << s->block_max_depth;
1969
    const int rem_depth= s->block_max_depth - level;
1970
    const int index= (x + y*w) << rem_depth;
1971
    int trx= (x+1)<<rem_depth;
1972
    BlockNode *b= &s->block[index];
1973
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1974
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1975
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1976
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1977
    int pl = left->color[0];
1978
    int pcb= left->color[1];
1979
    int pcr= left->color[2];
1980
    int pmx, pmy;
1981
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1982
    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1983
    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1984
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1985

    
1986
    if(s->keyframe){
1987
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1988
        return;
1989
    }
1990

    
1991
    if(level!=s->block_max_depth){
1992
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1993
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
1994
        }else{
1995
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
1996
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1997
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1998
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1999
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2000
            return;
2001
        }
2002
    }
2003
    if(b->type & BLOCK_INTRA){
2004
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2005
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2006
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2007
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2008
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2009
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2010
    }else{
2011
        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2012
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2013
        if(s->ref_frames > 1)
2014
            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2015
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2016
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2017
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2018
    }
2019
}
2020

    
2021
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2022
    const int w= s->b_width << s->block_max_depth;
2023
    const int rem_depth= s->block_max_depth - level;
2024
    const int index= (x + y*w) << rem_depth;
2025
    int trx= (x+1)<<rem_depth;
2026
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2027
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2028
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2029
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2030
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2031

    
2032
    if(s->keyframe){
2033
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2034
        return;
2035
    }
2036

    
2037
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2038
        int type, mx, my;
2039
        int l = left->color[0];
2040
        int cb= left->color[1];
2041
        int cr= left->color[2];
2042
        int ref = 0;
2043
        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2044
        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2045
        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2046

    
2047
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2048

    
2049
        if(type){
2050
            pred_mv(s, &mx, &my, 0, left, top, tr);
2051
            l += get_symbol(&s->c, &s->block_state[32], 1);
2052
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2053
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2054
        }else{
2055
            if(s->ref_frames > 1)
2056
                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2057
            pred_mv(s, &mx, &my, ref, left, top, tr);
2058
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2059
            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2060
        }
2061
        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2062
    }else{
2063
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2064
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2065
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2066
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2067
    }
2068
}
2069

    
2070
static void encode_blocks(SnowContext *s, int search){
2071
    int x, y;
2072
    int w= s->b_width;
2073
    int h= s->b_height;
2074

    
2075
    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2076
        iterative_me(s);
2077

    
2078
    for(y=0; y<h; y++){
2079
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2080
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2081
            return;
2082
        }
2083
        for(x=0; x<w; x++){
2084
            if(s->avctx->me_method == ME_ITER || !search)
2085
                encode_q_branch2(s, 0, x, y);
2086
            else
2087
                encode_q_branch (s, 0, x, y);
2088
        }
2089
    }
2090
}
2091

    
2092
static void decode_blocks(SnowContext *s){
2093
    int x, y;
2094
    int w= s->b_width;
2095
    int h= s->b_height;
2096

    
2097
    for(y=0; y<h; y++){
2098
        for(x=0; x<w; x++){
2099
            decode_q_branch(s, 0, x, y);
2100
        }
2101
    }
2102
}
2103

    
2104
static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2105
    static const uint8_t weight[64]={
2106
    8,7,6,5,4,3,2,1,
2107
    7,7,0,0,0,0,0,1,
2108
    6,0,6,0,0,0,2,0,
2109
    5,0,0,5,0,3,0,0,
2110
    4,0,0,0,4,0,0,0,
2111
    3,0,0,5,0,3,0,0,
2112
    2,0,6,0,0,0,2,0,
2113
    1,7,0,0,0,0,0,1,
2114
    };
2115

    
2116
    static const uint8_t brane[256]={
2117
    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2118
    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2119
    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2120
    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2121
    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2122
    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2123
    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2124
    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2125
    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2126
    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2127
    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2128
    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2129
    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2130
    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2131
    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2132
    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2133
    };
2134

    
2135
    static const uint8_t needs[16]={
2136
    0,1,0,0,
2137
    2,4,2,0,
2138
    0,1,0,0,
2139
    15
2140
    };
2141

    
2142
    int x, y, b, r, l;
2143
    int16_t tmpIt   [64*(32+HTAPS_MAX)];
2144
    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2145
    int16_t *tmpI= tmpIt;
2146
    uint8_t *tmp2= tmp2t[0];
2147
    const uint8_t *hpel[11];
2148
    assert(dx<16 && dy<16);
2149
    r= brane[dx + 16*dy]&15;
2150
    l= brane[dx + 16*dy]>>4;
2151

    
2152
    b= needs[l] | needs[r];
2153
    if(p && !p->diag_mc)
2154
        b= 15;
2155

    
2156
    if(b&5){
2157
        for(y=0; y < b_h+HTAPS_MAX-1; y++){
2158
            for(x=0; x < b_w; x++){
2159
                int a_1=src[x + HTAPS_MAX/2-4];
2160
                int a0= src[x + HTAPS_MAX/2-3];
2161
                int a1= src[x + HTAPS_MAX/2-2];
2162
                int a2= src[x + HTAPS_MAX/2-1];
2163
                int a3= src[x + HTAPS_MAX/2+0];
2164
                int a4= src[x + HTAPS_MAX/2+1];
2165
                int a5= src[x + HTAPS_MAX/2+2];
2166
                int a6= src[x + HTAPS_MAX/2+3];
2167
                int am=0;
2168
                if(!p || p->fast_mc){
2169
                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2170
                    tmpI[x]= am;
2171
                    am= (am+16)>>5;
2172
                }else{
2173
                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2174
                    tmpI[x]= am;
2175
                    am= (am+32)>>6;
2176
                }
2177

    
2178
                if(am&(~255)) am= ~(am>>31);
2179
                tmp2[x]= am;
2180
            }
2181
            tmpI+= 64;
2182
            tmp2+= stride;
2183
            src += stride;
2184
        }
2185
        src -= stride*y;
2186
    }
2187
    src += HTAPS_MAX/2 - 1;
2188
    tmp2= tmp2t[1];
2189

    
2190
    if(b&2){
2191
        for(y=0; y < b_h; y++){
2192
            for(x=0; x < b_w+1; x++){
2193
                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2194
                int a0= src[x + (HTAPS_MAX/2-3)*stride];
2195
                int a1= src[x + (HTAPS_MAX/2-2)*stride];
2196
                int a2= src[x + (HTAPS_MAX/2-1)*stride];
2197
                int a3= src[x + (HTAPS_MAX/2+0)*stride];
2198
                int a4= src[x + (HTAPS_MAX/2+1)*stride];
2199
                int a5= src[x + (HTAPS_MAX/2+2)*stride];
2200
                int a6= src[x + (HTAPS_MAX/2+3)*stride];
2201
                int am=0;
2202
                if(!p || p->fast_mc)
2203
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2204
                else
2205
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2206

    
2207
                if(am&(~255)) am= ~(am>>31);
2208
                tmp2[x]= am;
2209
            }
2210
            src += stride;
2211
            tmp2+= stride;
2212
        }
2213
        src -= stride*y;
2214
    }
2215
    src += stride*(HTAPS_MAX/2 - 1);
2216
    tmp2= tmp2t[2];
2217
    tmpI= tmpIt;
2218
    if(b&4){
2219
        for(y=0; y < b_h; y++){
2220
            for(x=0; x < b_w; x++){
2221
                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2222
                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2223
                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2224
                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2225
                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2226
                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2227
                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2228
                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2229
                int am=0;
2230
                if(!p || p->fast_mc)
2231
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2232
                else
2233
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2234
                if(am&(~255)) am= ~(am>>31);
2235
                tmp2[x]= am;
2236
            }
2237
            tmpI+= 64;
2238
            tmp2+= stride;
2239
        }
2240
    }
2241

    
2242
    hpel[ 0]= src;
2243
    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2244
    hpel[ 2]= src + 1;
2245

    
2246
    hpel[ 4]= tmp2t[1];
2247
    hpel[ 5]= tmp2t[2];
2248
    hpel[ 6]= tmp2t[1] + 1;
2249

    
2250
    hpel[ 8]= src + stride;
2251
    hpel[ 9]= hpel[1] + stride;
2252
    hpel[10]= hpel[8] + 1;
2253

    
2254
    if(b==15){
2255
        const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
2256
        const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2257
        const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2258
        const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2259
        dx&=7;
2260
        dy&=7;
2261
        for(y=0; y < b_h; y++){
2262
            for(x=0; x < b_w; x++){
2263
                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2264
                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
2265
            }
2266
            src1+=stride;
2267
            src2+=stride;
2268
            src3+=stride;
2269
            src4+=stride;
2270
            dst +=stride;
2271
        }
2272
    }else{
2273
        const uint8_t *src1= hpel[l];
2274
        const uint8_t *src2= hpel[r];
2275
        int a= weight[((dx&7) + (8*(dy&7)))];
2276
        int b= 8-a;
2277
        for(y=0; y < b_h; y++){
2278
            for(x=0; x < b_w; x++){
2279
                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2280
            }
2281
            src1+=stride;
2282
            src2+=stride;
2283
            dst +=stride;
2284
        }
2285
    }
2286
}
2287

    
2288
#define mca(dx,dy,b_w)\
2289
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2290
    uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2291
    assert(h==b_w);\
2292
    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2293
}
2294

    
2295
mca( 0, 0,16)
2296
mca( 8, 0,16)
2297
mca( 0, 8,16)
2298
mca( 8, 8,16)
2299
mca( 0, 0,8)
2300
mca( 8, 0,8)
2301
mca( 0, 8,8)
2302
mca( 8, 8,8)
2303

    
2304
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2305
    if(block->type & BLOCK_INTRA){
2306
        int x, y;
2307
        const int color = block->color[plane_index];
2308
        const int color4= color*0x01010101;
2309
        if(b_w==32){
2310
            for(y=0; y < b_h; y++){
2311
                *(uint32_t*)&dst[0 + y*stride]= color4;
2312
                *(uint32_t*)&dst[4 + y*stride]= color4;
2313
                *(uint32_t*)&dst[8 + y*stride]= color4;
2314
                *(uint32_t*)&dst[12+ y*stride]= color4;
2315
                *(uint32_t*)&dst[16+ y*stride]= color4;
2316
                *(uint32_t*)&dst[20+ y*stride]= color4;
2317
                *(uint32_t*)&dst[24+ y*stride]= color4;
2318
                *(uint32_t*)&dst[28+ y*stride]= color4;
2319
            }
2320
        }else if(b_w==16){
2321
            for(y=0; y < b_h; y++){
2322
                *(uint32_t*)&dst[0 + y*stride]= color4;
2323
                *(uint32_t*)&dst[4 + y*stride]= color4;
2324
                *(uint32_t*)&dst[8 + y*stride]= color4;
2325
                *(uint32_t*)&dst[12+ y*stride]= color4;
2326
            }
2327
        }else if(b_w==8){
2328
            for(y=0; y < b_h; y++){
2329
                *(uint32_t*)&dst[0 + y*stride]= color4;
2330
                *(uint32_t*)&dst[4 + y*stride]= color4;
2331
            }
2332
        }else if(b_w==4){
2333
            for(y=0; y < b_h; y++){
2334
                *(uint32_t*)&dst[0 + y*stride]= color4;
2335
            }
2336
        }else{
2337
            for(y=0; y < b_h; y++){
2338
                for(x=0; x < b_w; x++){
2339
                    dst[x + y*stride]= color;
2340
                }
2341
            }
2342
        }
2343
    }else{
2344
        uint8_t *src= s->last_picture[block->ref].data[plane_index];
2345
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2346
        int mx= block->mx*scale;
2347
        int my= block->my*scale;
2348
        const int dx= mx&15;
2349
        const int dy= my&15;
2350
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2351
        sx += (mx>>4) - (HTAPS_MAX/2-1);
2352
        sy += (my>>4) - (HTAPS_MAX/2-1);
2353
        src += sx + sy*stride;
2354
        if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2355
           || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2356
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2357
            src= tmp + MB_SIZE;
2358
        }
2359
//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2360
//        assert(!(b_w&(b_w-1)));
2361
        assert(b_w>1 && b_h>1);
2362
        assert((tab_index>=0 && tab_index<4) || b_w==32);
2363
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2364
            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2365
        else if(b_w==32){
2366
            int y;
2367
            for(y=0; y<b_h; y+=16){
2368
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2369
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2370
            }
2371
        }else if(b_w==b_h)
2372
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2373
        else if(b_w==2*b_h){
2374
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
2375
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2376
        }else{
2377
            assert(2*b_w==b_h);
2378
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
2379
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2380
        }
2381
    }
2382
}
2383

    
2384
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2385
                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2386
    int y, x;
2387
    IDWTELEM * dst;
2388
    for(y=0; y<b_h; y++){
2389
        //FIXME ugly misuse of obmc_stride
2390
        const uint8_t *obmc1= obmc + y*obmc_stride;
2391
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2392
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2393
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2394
        dst = slice_buffer_get_line(sb, src_y + y);
2395
        for(x=0; x<b_w; x++){
2396
            int v=   obmc1[x] * block[3][x + y*src_stride]
2397
                    +obmc2[x] * block[2][x + y*src_stride]
2398
                    +obmc3[x] * block[1][x + y*src_stride]
2399
                    +obmc4[x] * block[0][x + y*src_stride];
2400

    
2401
            v <<= 8 - LOG2_OBMC_MAX;
2402
            if(FRAC_BITS != 8){
2403
                v >>= 8 - FRAC_BITS;
2404
            }
2405
            if(add){
2406
                v += dst[x + src_x];
2407
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2408
                if(v&(~255)) v= ~(v>>31);
2409
                dst8[x + y*src_stride] = v;
2410
            }else{
2411
                dst[x + src_x] -= v;
2412
            }
2413
        }
2414
    }
2415
}
2416

    
2417
//FIXME name cleanup (b_w, block_w, b_width stuff)
2418
static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2419
    const int b_width = s->b_width  << s->block_max_depth;
2420
    const int b_height= s->b_height << s->block_max_depth;
2421
    const int b_stride= b_width;
2422
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2423
    BlockNode *rt= lt+1;
2424
    BlockNode *lb= lt+b_stride;
2425
    BlockNode *rb= lb+1;
2426
    uint8_t *block[4];
2427
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2428
    uint8_t *tmp = s->scratchbuf;
2429
    uint8_t *ptmp;
2430
    int x,y;
2431

    
2432
    if(b_x<0){
2433
        lt= rt;
2434
        lb= rb;
2435
    }else if(b_x + 1 >= b_width){
2436
        rt= lt;
2437
        rb= lb;
2438
    }
2439
    if(b_y<0){
2440
        lt= lb;
2441
        rt= rb;
2442
    }else if(b_y + 1 >= b_height){
2443
        lb= lt;
2444
        rb= rt;
2445
    }
2446

    
2447
    if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2448
        obmc -= src_x;
2449
        b_w += src_x;
2450
        if(!sliced && !offset_dst)
2451
            dst -= src_x;
2452
        src_x=0;
2453
    }else if(src_x + b_w > w){
2454
        b_w = w - src_x;
2455
    }
2456
    if(src_y<0){
2457
        obmc -= src_y*obmc_stride;
2458
        b_h += src_y;
2459
        if(!sliced && !offset_dst)
2460
            dst -= src_y*dst_stride;
2461
        src_y=0;
2462
    }else if(src_y + b_h> h){
2463
        b_h = h - src_y;
2464
    }
2465

    
2466
    if(b_w<=0 || b_h<=0) return;
2467

    
2468
    assert(src_stride > 2*MB_SIZE + 5);
2469

    
2470
    if(!sliced && offset_dst)
2471
        dst += src_x + src_y*dst_stride;
2472
    dst8+= src_x + src_y*src_stride;
2473
//    src += src_x + src_y*src_stride;
2474

    
2475
    ptmp= tmp + 3*tmp_step;
2476
    block[0]= ptmp;
2477
    ptmp+=tmp_step;
2478
    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2479

    
2480
    if(same_block(lt, rt)){
2481
        block[1]= block[0];
2482
    }else{
2483
        block[1]= ptmp;
2484
        ptmp+=tmp_step;
2485
        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2486
    }
2487

    
2488
    if(same_block(lt, lb)){
2489
        block[2]= block[0];
2490
    }else if(same_block(rt, lb)){
2491
        block[2]= block[1];
2492
    }else{
2493
        block[2]= ptmp;
2494
        ptmp+=tmp_step;
2495
        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2496
    }
2497

    
2498
    if(same_block(lt, rb) ){
2499
        block[3]= block[0];
2500
    }else if(same_block(rt, rb)){
2501
        block[3]= block[1];
2502
    }else if(same_block(lb, rb)){
2503
        block[3]= block[2];
2504
    }else{
2505
        block[3]= ptmp;
2506
        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2507
    }
2508
#if 0
2509
    for(y=0; y<b_h; y++){
2510
        for(x=0; x<b_w; x++){
2511
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2512
            if(add) dst[x + y*dst_stride] += v;
2513
            else    dst[x + y*dst_stride] -= v;
2514
        }
2515
    }
2516
    for(y=0; y<b_h; y++){
2517
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2518
        for(x=0; x<b_w; x++){
2519
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2520
            if(add) dst[x + y*dst_stride] += v;
2521
            else    dst[x + y*dst_stride] -= v;
2522
        }
2523
    }
2524
    for(y=0; y<b_h; y++){
2525
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2526
        for(x=0; x<b_w; x++){
2527
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2528
            if(add) dst[x + y*dst_stride] += v;
2529
            else    dst[x + y*dst_stride] -= v;
2530
        }
2531
    }
2532
    for(y=0; y<b_h; y++){
2533
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2534
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2535
        for(x=0; x<b_w; x++){
2536
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2537
            if(add) dst[x + y*dst_stride] += v;
2538
            else    dst[x + y*dst_stride] -= v;
2539
        }
2540
    }
2541
#else
2542
    if(sliced){
2543
        s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2544
    }else{
2545
        for(y=0; y<b_h; y++){
2546
            //FIXME ugly misuse of obmc_stride
2547
            const uint8_t *obmc1= obmc + y*obmc_stride;
2548
            const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2549
            const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2550
            const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2551
            for(x=0; x<b_w; x++){
2552
                int v=   obmc1[x] * block[3][x + y*src_stride]
2553
                        +obmc2[x] * block[2][x + y*src_stride]
2554
                        +obmc3[x] * block[1][x + y*src_stride]
2555
                        +obmc4[x] * block[0][x + y*src_stride];
2556

    
2557
                v <<= 8 - LOG2_OBMC_MAX;
2558
                if(FRAC_BITS != 8){
2559
                    v >>= 8 - FRAC_BITS;
2560
                }
2561
                if(add){
2562
                    v += dst[x + y*dst_stride];
2563
                    v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2564
                    if(v&(~255)) v= ~(v>>31);
2565
                    dst8[x + y*src_stride] = v;
2566
                }else{
2567
                    dst[x + y*dst_stride] -= v;
2568
                }
2569
            }
2570
        }
2571
    }
2572
#endif /* 0 */
2573
}
2574

    
2575
static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2576
    Plane *p= &s->plane[plane_index];
2577
    const int mb_w= s->b_width  << s->block_max_depth;
2578
    const int mb_h= s->b_height << s->block_max_depth;
2579
    int x, y, mb_x;
2580
    int block_size = MB_SIZE >> s->block_max_depth;
2581
    int block_w    = plane_index ? block_size/2 : block_size;
2582
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2583
    int obmc_stride= plane_index ? block_size : 2*block_size;
2584
    int ref_stride= s->current_picture.linesize[plane_index];
2585
    uint8_t *dst8= s->current_picture.data[plane_index];
2586
    int w= p->width;
2587
    int h= p->height;
2588

    
2589
    if(s->keyframe || (s->avctx->debug&512)){
2590
        if(mb_y==mb_h)
2591
            return;
2592

    
2593
        if(add){
2594
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2595
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2596
                IDWTELEM * line = sb->line[y];
2597
                for(x=0; x<w; x++){
2598
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2599
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2600
                    v >>= FRAC_BITS;
2601
                    if(v&(~255)) v= ~(v>>31);
2602
                    dst8[x + y*ref_stride]= v;
2603
                }
2604
            }
2605
        }else{
2606
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2607
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2608
                IDWTELEM * line = sb->line[y];
2609
                for(x=0; x<w; x++){
2610
                    line[x] -= 128 << FRAC_BITS;
2611
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2612
                }
2613
            }
2614
        }
2615

    
2616
        return;
2617
    }
2618

    
2619
    for(mb_x=0; mb_x<=mb_w; mb_x++){
2620
        add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2621
                   block_w*mb_x - block_w/2,
2622
                   block_w*mb_y - block_w/2,
2623
                   block_w, block_w,
2624
                   w, h,
2625
                   w, ref_stride, obmc_stride,
2626
                   mb_x - 1, mb_y - 1,
2627
                   add, 0, plane_index);
2628
    }
2629
}
2630

    
2631
static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2632
    Plane *p= &s->plane[plane_index];
2633
    const int mb_w= s->b_width  << s->block_max_depth;
2634
    const int mb_h= s->b_height << s->block_max_depth;
2635
    int x, y, mb_x;
2636
    int block_size = MB_SIZE >> s->block_max_depth;
2637
    int block_w    = plane_index ? block_size/2 : block_size;
2638
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2639
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2640
    int ref_stride= s->current_picture.linesize[plane_index];
2641
    uint8_t *dst8= s->current_picture.data[plane_index];
2642
    int w= p->width;
2643
    int h= p->height;
2644

    
2645
    if(s->keyframe || (s->avctx->debug&512)){
2646
        if(mb_y==mb_h)
2647
            return;
2648

    
2649
        if(add){
2650
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2651
                for(x=0; x<w; x++){
2652
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2653
                    v >>= FRAC_BITS;
2654
                    if(v&(~255)) v= ~(v>>31);
2655
                    dst8[x + y*ref_stride]= v;
2656
                }
2657
            }
2658
        }else{
2659
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2660
                for(x=0; x<w; x++){
2661
                    buf[x + y*w]-= 128<<FRAC_BITS;
2662
                }
2663
            }
2664
        }
2665

    
2666
        return;
2667
    }
2668

    
2669
    for(mb_x=0; mb_x<=mb_w; mb_x++){
2670
        add_yblock(s, 0, NULL, buf, dst8, obmc,
2671
                   block_w*mb_x - block_w/2,
2672
                   block_w*mb_y - block_w/2,
2673
                   block_w, block_w,
2674
                   w, h,
2675
                   w, ref_stride, obmc_stride,
2676
                   mb_x - 1, mb_y - 1,
2677
                   add, 1, plane_index);
2678
    }
2679
}
2680

    
2681
static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2682
    const int mb_h= s->b_height << s->block_max_depth;
2683
    int mb_y;
2684
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2685
        predict_slice(s, buf, plane_index, add, mb_y);
2686
}
2687

    
2688
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2689
    int i, x2, y2;
2690
    Plane *p= &s->plane[plane_index];
2691
    const int block_size = MB_SIZE >> s->block_max_depth;
2692
    const int block_w    = plane_index ? block_size/2 : block_size;
2693
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2694
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2695
    const int ref_stride= s->current_picture.linesize[plane_index];
2696
    uint8_t *src= s-> input_picture.data[plane_index];
2697
    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2698
    const int b_stride = s->b_width << s->block_max_depth;
2699
    const int w= p->width;
2700
    const int h= p->height;
2701
    int index= mb_x + mb_y*b_stride;
2702
    BlockNode *b= &s->block[index];
2703
    BlockNode backup= *b;
2704
    int ab=0;
2705
    int aa=0;
2706

    
2707
    b->type|= BLOCK_INTRA;
2708
    b->color[plane_index]= 0;
2709
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2710

    
2711
    for(i=0; i<4; i++){
2712
        int mb_x2= mb_x + (i &1) - 1;
2713
        int mb_y2= mb_y + (i>>1) - 1;
2714
        int x= block_w*mb_x2 + block_w/2;
2715
        int y= block_w*mb_y2 + block_w/2;
2716

    
2717
        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2718
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2719

    
2720
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2721
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2722
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2723
                int obmc_v= obmc[index];
2724
                int d;
2725
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2726
                if(x<0) obmc_v += obmc[index + block_w];
2727
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2728
                if(x+block_w>w) obmc_v += obmc[index - block_w];
2729
                //FIXME precalculate this or simplify it somehow else
2730

    
2731
                d = -dst[index] + (1<<(FRAC_BITS-1));
2732
                dst[index] = d;
2733
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2734
                aa += obmc_v * obmc_v; //FIXME precalculate this
2735
            }
2736
        }
2737
    }
2738
    *b= backup;
2739

    
2740
    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2741
}
2742

    
2743
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2744
    const int b_stride = s->b_width << s->block_max_depth;
2745
    const int b_height = s->b_height<< s->block_max_depth;
2746
    int index= x + y*b_stride;
2747
    const BlockNode *b     = &s->block[index];
2748
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2749
    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2750
    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2751
    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2752
    int dmx, dmy;
2753
//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2754
//  int my_context= av_log2(2*FFABS(left->my - top->my));
2755

    
2756
    if(x<0 || x>=b_stride || y>=b_height)
2757
        return 0;
2758
/*
2759
1            0      0
2760
01X          1-2    1
2761
001XX        3-6    2-3
2762
0001XXX      7-14   4-7
2763
00001XXXX   15-30   8-15
2764
*/
2765
//FIXME try accurate rate
2766
//FIXME intra and inter predictors if surrounding blocks are not the same type
2767
    if(b->type & BLOCK_INTRA){
2768
        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2769
                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
2770
                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
2771
    }else{
2772
        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2773
        dmx-= b->mx;
2774
        dmy-= b->my;
2775
        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2776
                    + av_log2(2*FFABS(dmy))
2777
                    + av_log2(2*b->ref));
2778
    }
2779
}
2780

    
2781
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2782
    Plane *p= &s->plane[plane_index];
2783
    const int block_size = MB_SIZE >> s->block_max_depth;
2784
    const int block_w    = plane_index ? block_size/2 : block_size;
2785
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2786
    const int ref_stride= s->current_picture.linesize[plane_index];
2787
    uint8_t *dst= s->current_picture.data[plane_index];
2788
    uint8_t *src= s->  input_picture.data[plane_index];
2789
    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2790
    uint8_t *cur = s->scratchbuf;
2791
    uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2792
    const int b_stride = s->b_width << s->block_max_depth;
2793
    const int b_height = s->b_height<< s->block_max_depth;
2794
    const int w= p->width;
2795
    const int h= p->height;
2796
    int distortion;
2797
    int rate= 0;
2798
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2799
    int sx= block_w*mb_x - block_w/2;
2800
    int sy= block_w*mb_y - block_w/2;
2801
    int x0= FFMAX(0,-sx);
2802
    int y0= FFMAX(0,-sy);
2803
    int x1= FFMIN(block_w*2, w-sx);
2804
    int y1= FFMIN(block_w*2, h-sy);
2805
    int i,x,y;
2806

    
2807
    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2808

    
2809
    for(y=y0; y<y1; y++){
2810
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2811
        const IDWTELEM *pred1 = pred + y*obmc_stride;
2812
        uint8_t *cur1 = cur + y*ref_stride;
2813
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2814
        for(x=x0; x<x1; x++){
2815
#if FRAC_BITS >= LOG2_OBMC_MAX
2816
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2817
#else
2818
            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2819
#endif
2820
            v = (v + pred1[x]) >> FRAC_BITS;
2821
            if(v&(~255)) v= ~(v>>31);
2822
            dst1[x] = v;
2823
        }
2824
    }
2825

    
2826
    /* copy the regions where obmc[] = (uint8_t)256 */
2827
    if(LOG2_OBMC_MAX == 8
2828
        && (mb_x == 0 || mb_x == b_stride-1)
2829
        && (mb_y == 0 || mb_y == b_height-1)){
2830
        if(mb_x == 0)
2831
            x1 = block_w;
2832
        else
2833
            x0 = block_w;
2834
        if(mb_y == 0)
2835
            y1 = block_w;
2836
        else
2837
            y0 = block_w;
2838
        for(y=y0; y<y1; y++)
2839
            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2840
    }
2841

    
2842
    if(block_w==16){
2843
        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2844
        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2845
        /* FIXME cmps overlap but do not cover the wavelet's whole support.
2846
         * So improving the score of one block is not strictly guaranteed
2847
         * to improve the score of the whole frame, thus iterative motion
2848
         * estimation does not always converge. */
2849
        if(s->avctx->me_cmp == FF_CMP_W97)
2850
            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2851
        else if(s->avctx->me_cmp == FF_CMP_W53)
2852
            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2853
        else{
2854
            distortion = 0;
2855
            for(i=0; i<4; i++){
2856
                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2857
                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2858
            }
2859
        }
2860
    }else{
2861
        assert(block_w==8);
2862
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2863
    }
2864

    
2865
    if(plane_index==0){
2866
        for(i=0; i<4; i++){
2867
/* ..RRr
2868
 * .RXx.
2869
 * rxx..
2870
 */
2871
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2872
        }
2873
        if(mb_x == b_stride-2)
2874
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2875
    }
2876
    return distortion + rate*penalty_factor;
2877
}
2878

    
2879
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2880
    int i, y2;
2881
    Plane *p= &s->plane[plane_index];
2882
    const int block_size = MB_SIZE >> s->block_max_depth;
2883
    const int block_w    = plane_index ? block_size/2 : block_size;
2884
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2885
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2886
    const int ref_stride= s->current_picture.linesize[plane_index];
2887
    uint8_t *dst= s->current_picture.data[plane_index];
2888
    uint8_t *src= s-> input_picture.data[plane_index];
2889
    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2890
    // const has only been removed from zero_dst to suppress a warning
2891
    static IDWTELEM zero_dst[4096]; //FIXME
2892
    const int b_stride = s->b_width << s->block_max_depth;
2893
    const int w= p->width;
2894
    const int h= p->height;
2895
    int distortion= 0;
2896
    int rate= 0;
2897
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2898

    
2899
    for(i=0; i<9; i++){
2900
        int mb_x2= mb_x + (i%3) - 1;
2901
        int mb_y2= mb_y + (i/3) - 1;
2902
        int x= block_w*mb_x2 + block_w/2;
2903
        int y= block_w*mb_y2 + block_w/2;
2904

    
2905
        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2906
                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2907

    
2908
        //FIXME find a cleaner/simpler way to skip the outside stuff
2909
        for(y2= y; y2<0; y2++)
2910
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2911
        for(y2= h; y2<y+block_w; y2++)
2912
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2913
        if(x<0){
2914
            for(y2= y; y2<y+block_w; y2++)
2915
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2916
        }
2917
        if(x+block_w > w){
2918
            for(y2= y; y2<y+block_w; y2++)
2919
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2920
        }
2921

    
2922
        assert(block_w== 8 || block_w==16);
2923
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2924
    }
2925

    
2926
    if(plane_index==0){
2927
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2928
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2929

    
2930
/* ..RRRr
2931
 * .RXXx.
2932
 * .RXXx.
2933
 * rxxx.
2934
 */
2935
        if(merged)
2936
            rate = get_block_bits(s, mb_x, mb_y, 2);
2937
        for(i=merged?4:0; i<9; i++){
2938
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2939
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2940
        }
2941
    }
2942
    return distortion + rate*penalty_factor;
2943
}
2944

    
2945
static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2946
    const int b_stride= s->b_width << s->block_max_depth;
2947
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2948
    BlockNode backup= *block;
2949
    int rd, index, value;
2950

    
2951
    assert(mb_x>=0 && mb_y>=0);
2952
    assert(mb_x<b_stride);
2953

    
2954
    if(intra){
2955
        block->color[0] = p[0];
2956
        block->color[1] = p[1];
2957
        block->color[2] = p[2];
2958
        block->type |= BLOCK_INTRA;
2959
    }else{
2960
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2961
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2962
        if(s->me_cache[index] == value)
2963
            return 0;
2964
        s->me_cache[index]= value;
2965

    
2966
        block->mx= p[0];
2967
        block->my= p[1];
2968
        block->type &= ~BLOCK_INTRA;
2969
    }
2970

    
2971
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2972

    
2973
//FIXME chroma
2974
    if(rd < *best_rd){
2975
        *best_rd= rd;
2976
        return 1;
2977
    }else{
2978
        *block= backup;
2979
        return 0;
2980
    }
2981
}
2982

    
2983
/* special case for int[2] args we discard afterwards,
2984
 * fixes compilation problem with gcc 2.95 */
2985
static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2986
    int p[2] = {p0, p1};
2987
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2988
}
2989

    
2990
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2991
    const int b_stride= s->b_width << s->block_max_depth;
2992
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2993
    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2994
    int rd, index, value;
2995

    
2996
    assert(mb_x>=0 && mb_y>=0);
2997
    assert(mb_x<b_stride);
2998
    assert(((mb_x|mb_y)&1) == 0);
2999

    
3000
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3001
    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3002
    if(s->me_cache[index] == value)
3003
        return 0;
3004
    s->me_cache[index]= value;
3005

    
3006
    block->mx= p0;
3007
    block->my= p1;
3008
    block->ref= ref;
3009
    block->type &= ~BLOCK_INTRA;
3010
    block[1]= block[b_stride]= block[b_stride+1]= *block;
3011

    
3012
    rd= get_4block_rd(s, mb_x, mb_y, 0);
3013

    
3014
//FIXME chroma
3015
    if(rd < *best_rd){
3016
        *best_rd= rd;
3017
        return 1;
3018
    }else{
3019
        block[0]= backup[0];
3020
        block[1]= backup[1];
3021
        block[b_stride]= backup[2];
3022
        block[b_stride+1]= backup[3];
3023
        return 0;
3024
    }
3025
}
3026

    
3027
static void iterative_me(SnowContext *s){
3028
    int pass, mb_x, mb_y;
3029
    const int b_width = s->b_width  << s->block_max_depth;
3030
    const int b_height= s->b_height << s->block_max_depth;
3031
    const int b_stride= b_width;
3032
    int color[3];
3033

    
3034
    {
3035
        RangeCoder r = s->c;
3036
        uint8_t state[sizeof(s->block_state)];
3037
        memcpy(state, s->block_state, sizeof(s->block_state));
3038
        for(mb_y= 0; mb_y<s->b_height; mb_y++)
3039
            for(mb_x= 0; mb_x<s->b_width; mb_x++)
3040
                encode_q_branch(s, 0, mb_x, mb_y);
3041
        s->c = r;
3042
        memcpy(s->block_state, state, sizeof(s->block_state));
3043
    }
3044

    
3045
    for(pass=0; pass<25; pass++){
3046
        int change= 0;
3047

    
3048
        for(mb_y= 0; mb_y<b_height; mb_y++){
3049
            for(mb_x= 0; mb_x<b_width; mb_x++){
3050
                int dia_change, i, j, ref;
3051
                int best_rd= INT_MAX, ref_rd;
3052
                BlockNode backup, ref_b;
3053
                const int index= mb_x + mb_y * b_stride;
3054
                BlockNode *block= &s->block[index];
3055
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3056
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3057
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3058
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3059
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3060
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3061
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3062
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3063
                const int b_w= (MB_SIZE >> s->block_max_depth);
3064
                uint8_t obmc_edged[b_w*2][b_w*2];
3065

    
3066
                if(pass && (block->type & BLOCK_OPT))
3067
                    continue;
3068
                block->type |= BLOCK_OPT;
3069

    
3070
                backup= *block;
3071

    
3072
                if(!s->me_cache_generation)
3073
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3074
                s->me_cache_generation += 1<<22;
3075

    
3076
                //FIXME precalculate
3077
                {
3078
                    int x, y;
3079
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3080
                    if(mb_x==0)
3081
                        for(y=0; y<b_w*2; y++)
3082
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3083
                    if(mb_x==b_stride-1)
3084
                        for(y=0; y<b_w*2; y++)
3085
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3086
                    if(mb_y==0){
3087
                        for(x=0; x<b_w*2; x++)
3088
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3089
                        for(y=1; y<b_w; y++)
3090
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3091
                    }
3092
                    if(mb_y==b_height-1){
3093
                        for(x=0; x<b_w*2; x++)
3094
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3095
                        for(y=b_w; y<b_w*2-1; y++)
3096
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3097
                    }
3098
                }
3099

    
3100
                //skip stuff outside the picture
3101
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3102
                    uint8_t *src= s->  input_picture.data[0];
3103
                    uint8_t *dst= s->current_picture.data[0];
3104
                    const int stride= s->current_picture.linesize[0];
3105
                    const int block_w= MB_SIZE >> s->block_max_depth;
3106
                    const int sx= block_w*mb_x - block_w/2;
3107
                    const int sy= block_w*mb_y - block_w/2;
3108
                    const int w= s->plane[0].width;
3109
                    const int h= s->plane[0].height;
3110
                    int y;
3111

    
3112
                    for(y=sy; y<0; y++)
3113
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3114
                    for(y=h; y<sy+block_w*2; y++)
3115
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3116
                    if(sx<0){
3117
                        for(y=sy; y<sy+block_w*2; y++)
3118
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3119
                    }
3120
                    if(sx+block_w*2 > w){
3121
                        for(y=sy; y<sy+block_w*2; y++)
3122
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3123
                    }
3124
                }
3125

    
3126
                // intra(black) = neighbors' contribution to the current block
3127
                for(i=0; i<3; i++)
3128
                    color[i]= get_dc(s, mb_x, mb_y, i);
3129

    
3130
                // get previous score (cannot be cached due to OBMC)
3131
                if(pass > 0 && (block->type&BLOCK_INTRA)){
3132
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3133
                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3134
                }else
3135
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3136

    
3137
                ref_b= *block;
3138
                ref_rd= best_rd;
3139
                for(ref=0; ref < s->ref_frames; ref++){
3140
                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3141
                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3142
                        continue;
3143
                    block->ref= ref;
3144
                    best_rd= INT_MAX;
3145

    
3146
                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3147
                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3148
                    if(tb)
3149
                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3150
                    if(lb)
3151
                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3152
                    if(rb)
3153
                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3154
                    if(bb)
3155
                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3156

    
3157
                    /* fullpel ME */
3158
                    //FIXME avoid subpel interpolation / round to nearest integer
3159
                    do{
3160
                        dia_change=0;
3161
                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3162
                            for(j=0; j<i; j++){
3163
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3164
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3165
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3166
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3167
                            }
3168
                        }
3169
                    }while(dia_change);
3170
                    /* subpel ME */
3171
                    do{
3172
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3173
                        dia_change=0;
3174
                        for(i=0; i<8; i++)
3175
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3176
                    }while(dia_change);
3177
                    //FIXME or try the standard 2 pass qpel or similar
3178

    
3179
                    mvr[0][0]= block->mx;
3180
                    mvr[0][1]= block->my;
3181
                    if(ref_rd > best_rd){
3182
                        ref_rd= best_rd;
3183
                        ref_b= *block;
3184
                    }
3185
                }
3186
                best_rd= ref_rd;
3187
                *block= ref_b;
3188
#if 1
3189
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3190
                //FIXME RD style color selection
3191
#endif
3192
                if(!same_block(block, &backup)){
3193
                    if(tb ) tb ->type &= ~BLOCK_OPT;
3194
                    if(lb ) lb ->type &= ~BLOCK_OPT;
3195
                    if(rb ) rb ->type &= ~BLOCK_OPT;
3196
                    if(bb ) bb ->type &= ~BLOCK_OPT;
3197
                    if(tlb) tlb->type &= ~BLOCK_OPT;
3198
                    if(trb) trb->type &= ~BLOCK_OPT;
3199
                    if(blb) blb->type &= ~BLOCK_OPT;
3200
                    if(brb) brb->type &= ~BLOCK_OPT;
3201
                    change ++;
3202
                }
3203
            }
3204
        }
3205
        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3206
        if(!change)
3207
            break;
3208
    }
3209

    
3210
    if(s->block_max_depth == 1){
3211
        int change= 0;
3212
        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3213
            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3214
                int i;
3215
                int best_rd, init_rd;
3216
                const int index= mb_x + mb_y * b_stride;
3217
                BlockNode *b[4];
3218

    
3219
                b[0]= &s->block[index];
3220
                b[1]= b[0]+1;
3221
                b[2]= b[0]+b_stride;
3222
                b[3]= b[2]+1;
3223
                if(same_block(b[0], b[1]) &&
3224
                   same_block(b[0], b[2]) &&
3225
                   same_block(b[0], b[3]))
3226
                    continue;
3227

    
3228
                if(!s->me_cache_generation)
3229
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3230
                s->me_cache_generation += 1<<22;
3231

    
3232
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3233

    
3234
                //FIXME more multiref search?
3235
                check_4block_inter(s, mb_x, mb_y,
3236
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3237
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3238

    
3239
                for(i=0; i<4; i++)
3240
                    if(!(b[i]->type&BLOCK_INTRA))
3241
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3242

    
3243
                if(init_rd != best_rd)
3244
                    change++;
3245
            }
3246
        }
3247
        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3248
    }
3249
}
3250

    
3251
static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3252
    const int w= b->width;
3253
    const int h= b->height;
3254
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3255
    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3256
    int x,y, thres1, thres2;
3257

    
3258
    if(s->qlog == LOSSLESS_QLOG){
3259
        for(y=0; y<h; y++)
3260
            for(x=0; x<w; x++)
3261
                dst[x + y*stride]= src[x + y*stride];
3262
        return;
3263
    }
3264

    
3265
    bias= bias ? 0 : (3*qmul)>>3;
3266
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3267
    thres2= 2*thres1;
3268

    
3269
    if(!bias){
3270
        for(y=0; y<h; y++){
3271
            for(x=0; x<w; x++){
3272
                int i= src[x + y*stride];
3273

    
3274
                if((unsigned)(i+thres1) > thres2){
3275
                    if(i>=0){
3276
                        i<<= QEXPSHIFT;
3277
                        i/= qmul; //FIXME optimize
3278
                        dst[x + y*stride]=  i;
3279
                    }else{
3280
                        i= -i;
3281
                        i<<= QEXPSHIFT;
3282
                        i/= qmul; //FIXME optimize
3283
                        dst[x + y*stride]= -i;
3284
                    }
3285
                }else
3286
                    dst[x + y*stride]= 0;
3287
            }
3288
        }
3289
    }else{
3290
        for(y=0; y<h; y++){
3291
            for(x=0; x<w; x++){
3292
                int i= src[x + y*stride];
3293

    
3294
                if((unsigned)(i+thres1) > thres2){
3295
                    if(i>=0){
3296
                        i<<= QEXPSHIFT;
3297
                        i= (i + bias) / qmul; //FIXME optimize
3298
                        dst[x + y*stride]=  i;
3299
                    }else{
3300
                        i= -i;
3301
                        i<<= QEXPSHIFT;
3302
                        i= (i + bias) / qmul; //FIXME optimize
3303
                        dst[x + y*stride]= -i;
3304
                    }
3305
                }else
3306
                    dst[x + y*stride]= 0;
3307
            }
3308
        }
3309
    }
3310
}
3311

    
3312
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3313
    const int w= b->width;
3314
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3315
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3316
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3317
    int x,y;
3318

    
3319
    if(s->qlog == LOSSLESS_QLOG) return;
3320

    
3321
    for(y=start_y; y<end_y; y++){
3322
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3323
        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3324
        for(x=0; x<w; x++){
3325
            int i= line[x];
3326
            if(i<0){
3327
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3328
            }else if(i>0){
3329
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3330
            }
3331
        }
3332
    }
3333
}
3334

    
3335
static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3336
    const int w= b->width;
3337
    const int h= b->height;
3338
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3339
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3340
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3341
    int x,y;
3342

    
3343
    if(s->qlog == LOSSLESS_QLOG) return;
3344

    
3345
    for(y=0; y<h; y++){
3346
        for(x=0; x<w; x++){
3347
            int i= src[x + y*stride];
3348
            if(i<0){
3349
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3350
            }else if(i>0){
3351
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3352
            }
3353
        }
3354
    }
3355
}
3356

    
3357
static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3358
    const int w= b->width;
3359
    const int h= b->height;
3360
    int x,y;
3361

    
3362
    for(y=h-1; y>=0; y--){
3363
        for(x=w-1; x>=0; x--){
3364
            int i= x + y*stride;
3365

    
3366
            if(x){
3367
                if(use_median){
3368
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3369
                    else  src[i] -= src[i - 1];
3370
                }else{
3371
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3372
                    else  src[i] -= src[i - 1];
3373
                }
3374
            }else{
3375
                if(y) src[i] -= src[i - stride];
3376
            }
3377
        }
3378
    }
3379
}
3380

    
3381
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3382
    const int w= b->width;
3383
    int x,y;
3384

    
3385
    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3386
    IDWTELEM * prev;
3387

    
3388
    if (start_y != 0)
3389
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3390

    
3391
    for(y=start_y; y<end_y; y++){
3392
        prev = line;
3393
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3394
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3395
        for(x=0; x<w; x++){
3396
            if(x){
3397
                if(use_median){
3398
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3399
                    else  line[x] += line[x - 1];
3400
                }else{
3401
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3402
                    else  line[x] += line[x - 1];
3403
                }
3404
            }else{
3405
                if(y) line[x] += prev[x];
3406
            }
3407
        }
3408
    }
3409
}
3410

    
3411
static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3412
    const int w= b->width;
3413
    const int h= b->height;
3414
    int x,y;
3415

    
3416
    for(y=0; y<h; y++){
3417
        for(x=0; x<w; x++){
3418
            int i= x + y*stride;
3419

    
3420
            if(x){
3421
                if(use_median){
3422
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3423
                    else  src[i] += src[i - 1];
3424
                }else{
3425
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3426
                    else  src[i] += src[i - 1];
3427
                }
3428
            }else{
3429
                if(y) src[i] += src[i - stride];
3430
            }
3431
        }
3432
    }
3433
}
3434

    
3435
static void encode_qlogs(SnowContext *s){
3436
    int plane_index, level, orientation;
3437

    
3438
    for(plane_index=0; plane_index<2; plane_index++){
3439
        for(level=0; level<s->spatial_decomposition_count; level++){
3440
            for(orientation=level ? 1:0; orientation<4; orientation++){
3441
                if(orientation==2) continue;
3442
                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3443
            }
3444
        }
3445
    }
3446
}
3447

    
3448
static void encode_header(SnowContext *s){
3449
    int plane_index, i;
3450
    uint8_t kstate[32];
3451

    
3452
    memset(kstate, MID_STATE, sizeof(kstate));
3453

    
3454
    put_rac(&s->c, kstate, s->keyframe);
3455
    if(s->keyframe || s->always_reset){
3456
        reset_contexts(s);
3457
        s->last_spatial_decomposition_type=
3458
        s->last_qlog=
3459
        s->last_qbias=
3460
        s->last_mv_scale=
3461
        s->last_block_max_depth= 0;
3462
        for(plane_index=0; plane_index<2; plane_index++){
3463
            Plane *p= &s->plane[plane_index];
3464
            p->last_htaps=0;
3465
            p->last_diag_mc=0;
3466
            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3467
        }
3468
    }
3469
    if(s->keyframe){
3470
        put_symbol(&s->c, s->header_state, s->version, 0);
3471
        put_rac(&s->c, s->header_state, s->always_reset);
3472
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3473
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3474
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3475
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3476
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3477
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3478
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3479
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3480
        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3481

    
3482
        encode_qlogs(s);
3483
    }
3484

    
3485
    if(!s->keyframe){
3486
        int update_mc=0;
3487
        for(plane_index=0; plane_index<2; plane_index++){
3488
            Plane *p= &s->plane[plane_index];
3489
            update_mc |= p->last_htaps   != p->htaps;
3490
            update_mc |= p->last_diag_mc != p->diag_mc;
3491
            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3492
        }
3493
        put_rac(&s->c, s->header_state, update_mc);
3494
        if(update_mc){
3495
            for(plane_index=0; plane_index<2; plane_index++){
3496
                Plane *p= &s->plane[plane_index];
3497
                put_rac(&s->c, s->header_state, p->diag_mc);
3498
                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3499
                for(i= p->htaps/2; i; i--)
3500
                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3501
            }
3502
        }
3503
        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3504
            put_rac(&s->c, s->header_state, 1);
3505
            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3506
            encode_qlogs(s);
3507
        }else
3508
            put_rac(&s->c, s->header_state, 0);
3509
    }
3510

    
3511
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3512
    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3513
    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3514
    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3515
    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3516

    
3517
}
3518

    
3519
static void update_last_header_values(SnowContext *s){
3520
    int plane_index;
3521

    
3522
    if(!s->keyframe){
3523
        for(plane_index=0; plane_index<2; plane_index++){
3524
            Plane *p= &s->plane[plane_index];
3525
            p->last_diag_mc= p->diag_mc;
3526
            p->last_htaps  = p->htaps;
3527
            memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3528
        }
3529
    }
3530

    
3531
    s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
3532
    s->last_qlog                        = s->qlog;
3533
    s->last_qbias                       = s->qbias;
3534
    s->last_mv_scale                    = s->mv_scale;
3535
    s->last_block_max_depth             = s->block_max_depth;
3536
    s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3537
}
3538

    
3539
static void decode_qlogs(SnowContext *s){
3540
    int plane_index, level, orientation;
3541

    
3542
    for(plane_index=0; plane_index<3; plane_index++){
3543
        for(level=0; level<s->spatial_decomposition_count; level++){
3544
            for(orientation=level ? 1:0; orientation<4; orientation++){
3545
                int q;
3546
                if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3547
                else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3548
                else                    q= get_symbol(&s->c, s->header_state, 1);
3549
                s->plane[plane_index].band[level][orientation].qlog= q;
3550
            }
3551
        }
3552
    }
3553
}
3554

    
3555
static int decode_header(SnowContext *s){
3556
    int plane_index;
3557
    uint8_t kstate[32];
3558

    
3559
    memset(kstate, MID_STATE, sizeof(kstate));
3560

    
3561
    s->keyframe= get_rac(&s->c, kstate);
3562
    if(s->keyframe || s->always_reset){
3563
        reset_contexts(s);
3564
        s->spatial_decomposition_type=
3565
        s->qlog=
3566
        s->qbias=
3567
        s->mv_scale=
3568
        s->block_max_depth= 0;
3569
    }
3570
    if(s->keyframe){
3571
        s->version= get_symbol(&s->c, s->header_state, 0);
3572
        if(s->version>0){
3573
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3574
            return -1;
3575
        }
3576
        s->always_reset= get_rac(&s->c, s->header_state);
3577
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3578
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3579
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3580
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3581
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3582
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3583
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3584
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3585
        s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3586

    
3587
        decode_qlogs(s);
3588
    }
3589

    
3590
    if(!s->keyframe){
3591
        if(get_rac(&s->c, s->header_state)){
3592
            for(plane_index=0; plane_index<2; plane_index++){
3593
                int htaps, i, sum=0;
3594
                Plane *p= &s->plane[plane_index];
3595
                p->diag_mc= get_rac(&s->c, s->header_state);
3596
                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3597
                if((unsigned)htaps > HTAPS_MAX || htaps==0)
3598
                    return -1;
3599
                p->htaps= htaps;
3600
                for(i= htaps/2; i; i--){
3601
                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3602
                    sum += p->hcoeff[i];
3603
                }
3604
                p->hcoeff[0]= 32-sum;
3605
            }
3606
            s->plane[2].diag_mc= s->plane[1].diag_mc;
3607
            s->plane[2].htaps  = s->plane[1].htaps;
3608
            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3609
        }
3610
        if(get_rac(&s->c, s->header_state)){
3611
            s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3612
            decode_qlogs(s);
3613
        }
3614
    }
3615

    
3616
    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3617
    if(s->spatial_decomposition_type > 1){
3618
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3619
        return -1;
3620
    }
3621

    
3622
    s->qlog           += get_symbol(&s->c, s->header_state, 1);
3623
    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
3624
    s->qbias          += get_symbol(&s->c, s->header_state, 1);
3625
    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3626
    if(s->block_max_depth > 1 || s->block_max_depth < 0){
3627
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3628
        s->block_max_depth= 0;
3629
        return -1;
3630
    }
3631

    
3632
    return 0;
3633
}
3634

    
3635
static void init_qexp(void){
3636
    int i;
3637
    double v=128;
3638

    
3639
    for(i=0; i<QROOT; i++){
3640
        qexp[i]= lrintf(v);
3641
        v *= pow(2, 1.0 / QROOT);
3642
    }
3643
}
3644

    
3645
static av_cold int common_init(AVCodecContext *avctx){
3646
    SnowContext *s = avctx->priv_data;
3647
    int width, height;
3648
    int i, j;
3649

    
3650
    s->avctx= avctx;
3651

    
3652
    dsputil_init(&s->dsp, avctx);
3653

    
3654
#define mcf(dx,dy)\
3655
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3656
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3657
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3658
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3659
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3660
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3661

    
3662
    mcf( 0, 0)
3663
    mcf( 4, 0)
3664
    mcf( 8, 0)
3665
    mcf(12, 0)
3666
    mcf( 0, 4)
3667
    mcf( 4, 4)
3668
    mcf( 8, 4)
3669
    mcf(12, 4)
3670
    mcf( 0, 8)
3671
    mcf( 4, 8)
3672
    mcf( 8, 8)
3673
    mcf(12, 8)
3674
    mcf( 0,12)
3675
    mcf( 4,12)
3676
    mcf( 8,12)
3677
    mcf(12,12)
3678

    
3679
#define mcfh(dx,dy)\
3680
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3681
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3682
        mc_block_hpel ## dx ## dy ## 16;\
3683
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3684
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3685
        mc_block_hpel ## dx ## dy ## 8;
3686

    
3687
    mcfh(0, 0)
3688
    mcfh(8, 0)
3689
    mcfh(0, 8)
3690
    mcfh(8, 8)
3691

    
3692
    if(!qexp[0])
3693
        init_qexp();
3694

    
3695
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3696

    
3697
    width= s->avctx->width;
3698
    height= s->avctx->height;
3699

    
3700
    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3701
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3702

    
3703
    for(i=0; i<MAX_REF_FRAMES; i++)
3704
        for(j=0; j<MAX_REF_FRAMES; j++)
3705
            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3706

    
3707
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3708
    s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
3709

    
3710
    return 0;
3711
}
3712

    
3713
static int common_init_after_header(AVCodecContext *avctx){
3714
    SnowContext *s = avctx->priv_data;
3715
    int plane_index, level, orientation;
3716

    
3717
    for(plane_index=0; plane_index<3; plane_index++){
3718
        int w= s->avctx->width;
3719
        int h= s->avctx->height;
3720

    
3721
        if(plane_index){
3722
            w>>= s->chroma_h_shift;
3723
            h>>= s->chroma_v_shift;
3724
        }
3725
        s->plane[plane_index].width = w;
3726
        s->plane[plane_index].height= h;
3727

    
3728
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3729
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3730
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3731

    
3732
                b->buf= s->spatial_dwt_buffer;
3733
                b->level= level;
3734
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3735
                b->width = (w + !(orientation&1))>>1;
3736
                b->height= (h + !(orientation>1))>>1;
3737

    
3738
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3739
                b->buf_x_offset = 0;
3740
                b->buf_y_offset = 0;
3741

    
3742
                if(orientation&1){
3743
                    b->buf += (w+1)>>1;
3744
                    b->buf_x_offset = (w+1)>>1;
3745
                }
3746
                if(orientation>1){
3747
                    b->buf += b->stride>>1;
3748
                    b->buf_y_offset = b->stride_line >> 1;
3749
                }
3750
                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3751

    
3752
                if(level)
3753
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3754
                //FIXME avoid this realloc
3755
                av_freep(&b->x_coeff);
3756
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3757
            }
3758
            w= (w+1)>>1;
3759
            h= (h+1)>>1;
3760
        }
3761
    }
3762

    
3763
    return 0;
3764
}
3765

    
3766
static int qscale2qlog(int qscale){
3767
    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3768
           + 61*QROOT/8; //<64 >60
3769
}
3770

    
3771
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3772
{
3773
    /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3774
     * FIXME we know exact mv bits at this point,
3775
     * but ratecontrol isn't set up to include them. */
3776
    uint32_t coef_sum= 0;
3777
    int level, orientation, delta_qlog;
3778

    
3779
    for(level=0; level<s->spatial_decomposition_count; level++){
3780
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3781
            SubBand *b= &s->plane[0].band[level][orientation];
3782
            IDWTELEM *buf= b->ibuf;
3783
            const int w= b->width;
3784
            const int h= b->height;
3785
            const int stride= b->stride;
3786
            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3787
            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3788
            const int qdiv= (1<<16)/qmul;
3789
            int x, y;
3790
            //FIXME this is ugly
3791
            for(y=0; y<h; y++)
3792
                for(x=0; x<w; x++)
3793
                    buf[x+y*stride]= b->buf[x+y*stride];
3794
            if(orientation==0)
3795
                decorrelate(s, b, buf, stride, 1, 0);
3796
            for(y=0; y<h; y++)
3797
                for(x=0; x<w; x++)
3798
                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3799
        }
3800
    }
3801

    
3802
    /* ugly, ratecontrol just takes a sqrt again */
3803
    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3804
    assert(coef_sum < INT_MAX);
3805

    
3806
    if(pict->pict_type == FF_I_TYPE){
3807
        s->m.current_picture.mb_var_sum= coef_sum;
3808
        s->m.current_picture.mc_mb_var_sum= 0;
3809
    }else{
3810
        s->m.current_picture.mc_mb_var_sum= coef_sum;
3811
        s->m.current_picture.mb_var_sum= 0;
3812
    }
3813

    
3814
    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3815
    if (pict->quality < 0)
3816
        return INT_MIN;
3817
    s->lambda= pict->quality * 3/2;
3818
    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3819
    s->qlog+= delta_qlog;
3820
    return delta_qlog;
3821
}
3822

    
3823
static void calculate_visual_weight(SnowContext *s, Plane *p){
3824
    int width = p->width;
3825
    int height= p->height;
3826
    int level, orientation, x, y;
3827

    
3828
    for(level=0; level<s->spatial_decomposition_count; level++){
3829
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3830
            SubBand *b= &p->band[level][orientation];
3831
            IDWTELEM *ibuf= b->ibuf;
3832
            int64_t error=0;
3833

    
3834
            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3835
            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3836
            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3837
            for(y=0; y<height; y++){
3838
                for(x=0; x<width; x++){
3839
                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3840
                    error += d*d;
3841
                }
3842
            }
3843

    
3844
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3845
        }
3846
    }
3847
}
3848

    
3849
#define QUANTIZE2 0
3850

    
3851
#if QUANTIZE2==1
3852
#define Q2_STEP 8
3853

    
3854
static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3855
    SubBand *b= &p->band[level][orientation];
3856
    int x, y;
3857
    int xo=0;
3858
    int yo=0;
3859
    int step= 1 << (s->spatial_decomposition_count - level);
3860

    
3861
    if(orientation&1)
3862
        xo= step>>1;
3863
    if(orientation&2)
3864
        yo= step>>1;
3865

    
3866
    //FIXME bias for nonzero ?
3867
    //FIXME optimize
3868
    memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3869
    for(y=0; y<p->height; y++){
3870
        for(x=0; x<p->width; x++){
3871
            int sx= (x-xo + step/2) / step / Q2_STEP;
3872
            int sy= (y-yo + step/2) / step / Q2_STEP;
3873
            int v= r0[x + y*p->width] - r1[x + y*p->width];
3874
            assert(sx>=0 && sy>=0 && sx < score_stride);
3875
            v= ((v+8)>>4)<<4;
3876
            score[sx + sy*score_stride] += v*v;
3877
            assert(score[sx + sy*score_stride] >= 0);
3878
        }
3879
    }
3880
}
3881

    
3882
static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3883
    int level, orientation;
3884

    
3885
    for(level=0; level<s->spatial_decomposition_count; level++){
3886
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3887
            SubBand *b= &p->band[level][orientation];
3888
            IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3889

    
3890
            dequantize(s, b, dst, b->stride);
3891
        }
3892
    }
3893
}
3894

    
3895
static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3896
    int level, orientation, ys, xs, x, y, pass;
3897
    IDWTELEM best_dequant[height * stride];
3898
    IDWTELEM idwt2_buffer[height * stride];
3899
    const int score_stride= (width + 10)/Q2_STEP;
3900
    int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3901
    int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3902
    int threshold= (s->m.lambda * s->m.lambda) >> 6;
3903

    
3904
    //FIXME pass the copy cleanly ?
3905

    
3906
//    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3907
    ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3908

    
3909
    for(level=0; level<s->spatial_decomposition_count; level++){
3910
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3911
            SubBand *b= &p->band[level][orientation];
3912
            IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3913
             DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
3914
            assert(src == b->buf); // code does not depend on this but it is true currently
3915

    
3916
            quantize(s, b, dst, src, b->stride, s->qbias);
3917
        }
3918
    }
3919
    for(pass=0; pass<1; pass++){
3920
        if(s->qbias == 0) //keyframe
3921
            continue;
3922
        for(level=0; level<s->spatial_decomposition_count; level++){
3923
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3924
                SubBand *b= &p->band[level][orientation];
3925
                IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3926
                IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3927

    
3928
                for(ys= 0; ys<Q2_STEP; ys++){
3929
                    for(xs= 0; xs<Q2_STEP; xs++){
3930
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3931
                        dequantize_all(s, p, idwt2_buffer, width, height);
3932
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3933
                        find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3934
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3935
                        for(y=ys; y<b->height; y+= Q2_STEP){
3936
                            for(x=xs; x<b->width; x+= Q2_STEP){
3937
                                if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3938
                                if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3939
                                //FIXME try more than just --
3940
                            }
3941
                        }
3942
                        dequantize_all(s, p, idwt2_buffer, width, height);
3943
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3944
                        find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3945
                        for(y=ys; y<b->height; y+= Q2_STEP){
3946
                            for(x=xs; x<b->width; x+= Q2_STEP){
3947
                                int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3948
                                if(score[score_idx] <= best_score[score_idx] + threshold){
3949
                                    best_score[score_idx]= score[score_idx];
3950
                                    if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3951
                                    if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3952
                                    //FIXME copy instead
3953
                                }
3954
                            }
3955
                        }
3956
                    }
3957
                }
3958
            }
3959
        }
3960
    }
3961
    memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3962
}
3963

    
3964
#endif /* QUANTIZE2==1 */
3965

    
3966
static av_cold int encode_init(AVCodecContext *avctx)
3967
{
3968
    SnowContext *s = avctx->priv_data;
3969
    int plane_index;
3970

    
3971
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3972
        av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3973
               "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3974
        return -1;
3975
    }
3976

    
3977
    if(avctx->prediction_method == DWT_97
3978
       && (avctx->flags & CODEC_FLAG_QSCALE)
3979
       && avctx->global_quality == 0){
3980
        av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3981
        return -1;
3982
    }
3983

    
3984
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3985

    
3986
    s->chroma_h_shift= 1; //FIXME XXX
3987
    s->chroma_v_shift= 1;
3988

    
3989
    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3990
    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3991

    
3992
    for(plane_index=0; plane_index<3; plane_index++){
3993
        s->plane[plane_index].diag_mc= 1;
3994
        s->plane[plane_index].htaps= 6;
3995
        s->plane[plane_index].hcoeff[0]=  40;
3996
        s->plane[plane_index].hcoeff[1]= -10;
3997
        s->plane[plane_index].hcoeff[2]=   2;
3998