Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 199436b9

History | View | Annotate | Download (180 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "avcodec.h"
22
#include "dsputil.h"
23
#include "snow.h"
24

    
25
#include "rangecoder.h"
26
#include "mathops.h"
27

    
28
#include "mpegvideo.h"
29

    
30
#undef NDEBUG
31
#include <assert.h>
32

    
33
static const int8_t quant3[256]={
34
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50
};
51
static const int8_t quant3b[256]={
52
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
};
69
static const int8_t quant3bA[256]={
70
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
};
87
static const int8_t quant5[256]={
88
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104
};
105
static const int8_t quant7[256]={
106
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122
};
123
static const int8_t quant9[256]={
124
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140
};
141
static const int8_t quant11[256]={
142
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158
};
159
static const int8_t quant13[256]={
160
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
176
};
177

    
178
#if 0 //64*cubic
179
static const uint8_t obmc32[1024]={
180
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
181
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
182
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
183
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
184
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
185
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
186
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
187
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
188
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
189
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
190
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
191
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
192
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
193
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
194
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
195
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
196
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
197
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
198
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
199
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
200
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
201
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
202
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
203
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
204
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
205
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
206
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
207
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
208
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
209
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
210
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
211
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
212
//error:0.000022
213
};
214
static const uint8_t obmc16[256]={
215
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
216
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
217
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
218
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
219
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
220
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
221
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
222
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
223
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
224
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
225
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
226
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
227
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
228
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
229
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
230
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
231
//error:0.000033
232
};
233
#elif 1 // 64*linear
234
static const uint8_t obmc32[1024]={
235
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
236
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
237
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
238
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
239
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
240
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
241
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
242
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
243
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
244
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
245
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
246
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
247
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
248
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
249
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
250
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
251
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
252
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
253
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
254
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
255
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
256
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
257
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
258
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
259
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
260
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
261
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
262
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
263
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
264
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
265
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
266
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
267
 //error:0.000020
268
};
269
static const uint8_t obmc16[256]={
270
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
271
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
272
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
273
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
274
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
275
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
282
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
283
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
284
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
285
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
286
//error:0.000015
287
};
288
#else //64*cos
289
static const uint8_t obmc32[1024]={
290
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
291
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
292
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
293
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
294
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
295
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
296
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
297
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
298
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
299
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
300
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
301
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
302
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
303
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
304
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
305
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
306
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
307
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
308
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
309
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
310
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
311
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
312
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
313
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
314
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
315
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
316
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
317
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
318
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
319
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
320
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
321
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
322
//error:0.000022
323
};
324
static const uint8_t obmc16[256]={
325
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
326
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
327
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
328
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
329
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
330
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
331
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
332
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
333
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
334
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
335
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
336
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
337
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
338
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
339
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
340
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
341
//error:0.000022
342
};
343
#endif /* 0 */
344

    
345
//linear *64
346
static const uint8_t obmc8[64]={
347
  4, 12, 20, 28, 28, 20, 12,  4,
348
 12, 36, 60, 84, 84, 60, 36, 12,
349
 20, 60,100,140,140,100, 60, 20,
350
 28, 84,140,196,196,140, 84, 28,
351
 28, 84,140,196,196,140, 84, 28,
352
 20, 60,100,140,140,100, 60, 20,
353
 12, 36, 60, 84, 84, 60, 36, 12,
354
  4, 12, 20, 28, 28, 20, 12,  4,
355
//error:0.000000
356
};
357

    
358
//linear *64
359
static const uint8_t obmc4[16]={
360
 16, 48, 48, 16,
361
 48,144,144, 48,
362
 48,144,144, 48,
363
 16, 48, 48, 16,
364
//error:0.000000
365
};
366

    
367
static const uint8_t * const obmc_tab[4]={
368
    obmc32, obmc16, obmc8, obmc4
369
};
370

    
371
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372

    
373
typedef struct BlockNode{
374
    int16_t mx;
375
    int16_t my;
376
    uint8_t ref;
377
    uint8_t color[3];
378
    uint8_t type;
379
//#define TYPE_SPLIT    1
380
#define BLOCK_INTRA   1
381
#define BLOCK_OPT     2
382
//#define TYPE_NOCOLOR  4
383
    uint8_t level; //FIXME merge into type?
384
}BlockNode;
385

    
386
static const BlockNode null_block= { //FIXME add border maybe
387
    .color= {128,128,128},
388
    .mx= 0,
389
    .my= 0,
390
    .ref= 0,
391
    .type= 0,
392
    .level= 0,
393
};
394

    
395
#define LOG2_MB_SIZE 4
396
#define MB_SIZE (1<<LOG2_MB_SIZE)
397
#define ENCODER_EXTRA_BITS 4
398
#define HTAPS_MAX 8
399

    
400
typedef struct x_and_coeff{
401
    int16_t x;
402
    uint16_t coeff;
403
} x_and_coeff;
404

    
405
typedef struct SubBand{
406
    int level;
407
    int stride;
408
    int width;
409
    int height;
410
    int qlog;        ///< log(qscale)/log[2^(1/6)]
411
    DWTELEM *buf;
412
    IDWTELEM *ibuf;
413
    int buf_x_offset;
414
    int buf_y_offset;
415
    int stride_line; ///< Stride measured in lines, not pixels.
416
    x_and_coeff * x_coeff;
417
    struct SubBand *parent;
418
    uint8_t state[/*7*2*/ 7 + 512][32];
419
}SubBand;
420

    
421
typedef struct Plane{
422
    int width;
423
    int height;
424
    SubBand band[MAX_DECOMPOSITIONS][4];
425

    
426
    int htaps;
427
    int8_t hcoeff[HTAPS_MAX/2];
428
    int diag_mc;
429
    int fast_mc;
430

    
431
    int last_htaps;
432
    int8_t last_hcoeff[HTAPS_MAX/2];
433
    int last_diag_mc;
434
}Plane;
435

    
436
typedef struct SnowContext{
437
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
438

    
439
    AVCodecContext *avctx;
440
    RangeCoder c;
441
    DSPContext dsp;
442
    AVFrame new_picture;
443
    AVFrame input_picture;              ///< new_picture with the internal linesizes
444
    AVFrame current_picture;
445
    AVFrame last_picture[MAX_REF_FRAMES];
446
    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
447
    AVFrame mconly_picture;
448
//     uint8_t q_context[16];
449
    uint8_t header_state[32];
450
    uint8_t block_state[128 + 32*128];
451
    int keyframe;
452
    int always_reset;
453
    int version;
454
    int spatial_decomposition_type;
455
    int last_spatial_decomposition_type;
456
    int temporal_decomposition_type;
457
    int spatial_decomposition_count;
458
    int last_spatial_decomposition_count;
459
    int temporal_decomposition_count;
460
    int max_ref_frames;
461
    int ref_frames;
462
    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
463
    uint32_t *ref_scores[MAX_REF_FRAMES];
464
    DWTELEM *spatial_dwt_buffer;
465
    IDWTELEM *spatial_idwt_buffer;
466
    int colorspace_type;
467
    int chroma_h_shift;
468
    int chroma_v_shift;
469
    int spatial_scalability;
470
    int qlog;
471
    int last_qlog;
472
    int lambda;
473
    int lambda2;
474
    int pass1_rc;
475
    int mv_scale;
476
    int last_mv_scale;
477
    int qbias;
478
    int last_qbias;
479
#define QBIAS_SHIFT 3
480
    int b_width;
481
    int b_height;
482
    int block_max_depth;
483
    int last_block_max_depth;
484
    Plane plane[MAX_PLANES];
485
    BlockNode *block;
486
#define ME_CACHE_SIZE 1024
487
    int me_cache[ME_CACHE_SIZE];
488
    int me_cache_generation;
489
    slice_buffer sb;
490

    
491
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
492

    
493
    uint8_t *scratchbuf;
494
}SnowContext;
495

    
496
typedef struct {
497
    IDWTELEM *b0;
498
    IDWTELEM *b1;
499
    IDWTELEM *b2;
500
    IDWTELEM *b3;
501
    int y;
502
} DWTCompose;
503

    
504
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
506

    
507
static void iterative_me(SnowContext *s);
508

    
509
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
510
{
511
    int i;
512

    
513
    buf->base_buffer = base_buffer;
514
    buf->line_count = line_count;
515
    buf->line_width = line_width;
516
    buf->data_count = max_allocated_lines;
517
    buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
518
    buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
519

    
520
    for(i = 0; i < max_allocated_lines; i++){
521
        buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
522
    }
523

    
524
    buf->data_stack_top = max_allocated_lines - 1;
525
}
526

    
527
static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
528
{
529
    int offset;
530
    IDWTELEM * buffer;
531

    
532
    assert(buf->data_stack_top >= 0);
533
//  assert(!buf->line[line]);
534
    if (buf->line[line])
535
        return buf->line[line];
536

    
537
    offset = buf->line_width * line;
538
    buffer = buf->data_stack[buf->data_stack_top];
539
    buf->data_stack_top--;
540
    buf->line[line] = buffer;
541

    
542
    return buffer;
543
}
544

    
545
static void slice_buffer_release(slice_buffer * buf, int line)
546
{
547
    int offset;
548
    IDWTELEM * buffer;
549

    
550
    assert(line >= 0 && line < buf->line_count);
551
    assert(buf->line[line]);
552

    
553
    offset = buf->line_width * line;
554
    buffer = buf->line[line];
555
    buf->data_stack_top++;
556
    buf->data_stack[buf->data_stack_top] = buffer;
557
    buf->line[line] = NULL;
558
}
559

    
560
static void slice_buffer_flush(slice_buffer * buf)
561
{
562
    int i;
563
    for(i = 0; i < buf->line_count; i++){
564
        if (buf->line[i])
565
            slice_buffer_release(buf, i);
566
    }
567
}
568

    
569
static void slice_buffer_destroy(slice_buffer * buf)
570
{
571
    int i;
572
    slice_buffer_flush(buf);
573

    
574
    for(i = buf->data_count - 1; i >= 0; i--){
575
        av_freep(&buf->data_stack[i]);
576
    }
577
    av_freep(&buf->data_stack);
578
    av_freep(&buf->line);
579
}
580

    
581
#ifdef __sgi
582
// Avoid a name clash on SGI IRIX
583
#undef qexp
584
#endif
585
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
586
static uint8_t qexp[QROOT];
587

    
588
static inline int mirror(int v, int m){
589
    while((unsigned)v > (unsigned)m){
590
        v=-v;
591
        if(v<0) v+= 2*m;
592
    }
593
    return v;
594
}
595

    
596
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
597
    int i;
598

    
599
    if(v){
600
        const int a= FFABS(v);
601
        const int e= av_log2(a);
602
#if 1
603
        const int el= FFMIN(e, 10);
604
        put_rac(c, state+0, 0);
605

    
606
        for(i=0; i<el; i++){
607
            put_rac(c, state+1+i, 1);  //1..10
608
        }
609
        for(; i<e; i++){
610
            put_rac(c, state+1+9, 1);  //1..10
611
        }
612
        put_rac(c, state+1+FFMIN(i,9), 0);
613

    
614
        for(i=e-1; i>=el; i--){
615
            put_rac(c, state+22+9, (a>>i)&1); //22..31
616
        }
617
        for(; i>=0; i--){
618
            put_rac(c, state+22+i, (a>>i)&1); //22..31
619
        }
620

    
621
        if(is_signed)
622
            put_rac(c, state+11 + el, v < 0); //11..21
623
#else
624

    
625
        put_rac(c, state+0, 0);
626
        if(e<=9){
627
            for(i=0; i<e; i++){
628
                put_rac(c, state+1+i, 1);  //1..10
629
            }
630
            put_rac(c, state+1+i, 0);
631

    
632
            for(i=e-1; i>=0; i--){
633
                put_rac(c, state+22+i, (a>>i)&1); //22..31
634
            }
635

    
636
            if(is_signed)
637
                put_rac(c, state+11 + e, v < 0); //11..21
638
        }else{
639
            for(i=0; i<e; i++){
640
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
641
            }
642
            put_rac(c, state+1+FFMIN(i,9), 0);
643

    
644
            for(i=e-1; i>=0; i--){
645
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
646
            }
647

    
648
            if(is_signed)
649
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
650
        }
651
#endif /* 1 */
652
    }else{
653
        put_rac(c, state+0, 1);
654
    }
655
}
656

    
657
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
658
    if(get_rac(c, state+0))
659
        return 0;
660
    else{
661
        int i, e, a;
662
        e= 0;
663
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
664
            e++;
665
        }
666

    
667
        a= 1;
668
        for(i=e-1; i>=0; i--){
669
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
670
        }
671

    
672
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
673
            return -a;
674
        else
675
            return a;
676
    }
677
}
678

    
679
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
680
    int i;
681
    int r= log2>=0 ? 1<<log2 : 1;
682

    
683
    assert(v>=0);
684
    assert(log2>=-4);
685

    
686
    while(v >= r){
687
        put_rac(c, state+4+log2, 1);
688
        v -= r;
689
        log2++;
690
        if(log2>0) r+=r;
691
    }
692
    put_rac(c, state+4+log2, 0);
693

    
694
    for(i=log2-1; i>=0; i--){
695
        put_rac(c, state+31-i, (v>>i)&1);
696
    }
697
}
698

    
699
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
700
    int i;
701
    int r= log2>=0 ? 1<<log2 : 1;
702
    int v=0;
703

    
704
    assert(log2>=-4);
705

    
706
    while(get_rac(c, state+4+log2)){
707
        v+= r;
708
        log2++;
709
        if(log2>0) r+=r;
710
    }
711

    
712
    for(i=log2-1; i>=0; i--){
713
        v+= get_rac(c, state+31-i)<<i;
714
    }
715

    
716
    return v;
717
}
718

    
719
static av_always_inline void
720
lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
721
     int dst_step, int src_step, int ref_step,
722
     int width, int mul, int add, int shift,
723
     int highpass, int inverse){
724
    const int mirror_left= !highpass;
725
    const int mirror_right= (width&1) ^ highpass;
726
    const int w= (width>>1) - 1 + (highpass & width);
727
    int i;
728

    
729
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
730
    if(mirror_left){
731
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
732
        dst += dst_step;
733
        src += src_step;
734
    }
735

    
736
    for(i=0; i<w; i++){
737
        dst[i*dst_step] =
738
            LIFT(src[i*src_step],
739
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
740
                 inverse);
741
    }
742

    
743
    if(mirror_right){
744
        dst[w*dst_step] =
745
            LIFT(src[w*src_step],
746
                 ((mul*2*ref[w*ref_step]+add)>>shift),
747
                 inverse);
748
    }
749
}
750

    
751
static av_always_inline void
752
inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
753
         int dst_step, int src_step, int ref_step,
754
         int width, int mul, int add, int shift,
755
         int highpass, int inverse){
756
    const int mirror_left= !highpass;
757
    const int mirror_right= (width&1) ^ highpass;
758
    const int w= (width>>1) - 1 + (highpass & width);
759
    int i;
760

    
761
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
762
    if(mirror_left){
763
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
764
        dst += dst_step;
765
        src += src_step;
766
    }
767

    
768
    for(i=0; i<w; i++){
769
        dst[i*dst_step] =
770
            LIFT(src[i*src_step],
771
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
772
                 inverse);
773
    }
774

    
775
    if(mirror_right){
776
        dst[w*dst_step] =
777
            LIFT(src[w*src_step],
778
                 ((mul*2*ref[w*ref_step]+add)>>shift),
779
                 inverse);
780
    }
781
}
782

    
783
#ifndef liftS
784
static av_always_inline void
785
liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
786
      int dst_step, int src_step, int ref_step,
787
      int width, int mul, int add, int shift,
788
      int highpass, int inverse){
789
    const int mirror_left= !highpass;
790
    const int mirror_right= (width&1) ^ highpass;
791
    const int w= (width>>1) - 1 + (highpass & width);
792
    int i;
793

    
794
    assert(shift == 4);
795
#define LIFTS(src, ref, inv) \
796
        ((inv) ? \
797
            (src) + (((ref) + 4*(src))>>shift): \
798
            -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
799
    if(mirror_left){
800
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
801
        dst += dst_step;
802
        src += src_step;
803
    }
804

    
805
    for(i=0; i<w; i++){
806
        dst[i*dst_step] =
807
            LIFTS(src[i*src_step],
808
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
809
                  inverse);
810
    }
811

    
812
    if(mirror_right){
813
        dst[w*dst_step] =
814
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
815
    }
816
}
817
static av_always_inline void
818
inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
819
          int dst_step, int src_step, int ref_step,
820
          int width, int mul, int add, int shift,
821
          int highpass, int inverse){
822
    const int mirror_left= !highpass;
823
    const int mirror_right= (width&1) ^ highpass;
824
    const int w= (width>>1) - 1 + (highpass & width);
825
    int i;
826

    
827
    assert(shift == 4);
828
#define LIFTS(src, ref, inv) \
829
    ((inv) ? \
830
        (src) + (((ref) + 4*(src))>>shift): \
831
        -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
832
    if(mirror_left){
833
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
834
        dst += dst_step;
835
        src += src_step;
836
    }
837

    
838
    for(i=0; i<w; i++){
839
        dst[i*dst_step] =
840
            LIFTS(src[i*src_step],
841
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
842
                  inverse);
843
    }
844

    
845
    if(mirror_right){
846
        dst[w*dst_step] =
847
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
848
    }
849
}
850
#endif /* ! liftS */
851

    
852
static void horizontal_decompose53i(DWTELEM *b, int width){
853
    DWTELEM temp[width];
854
    const int width2= width>>1;
855
    int x;
856
    const int w2= (width+1)>>1;
857

    
858
    for(x=0; x<width2; x++){
859
        temp[x   ]= b[2*x    ];
860
        temp[x+w2]= b[2*x + 1];
861
    }
862
    if(width&1)
863
        temp[x   ]= b[2*x    ];
864
#if 0
865
    {
866
    int A1,A2,A3,A4;
867
    A2= temp[1       ];
868
    A4= temp[0       ];
869
    A1= temp[0+width2];
870
    A1 -= (A2 + A4)>>1;
871
    A4 += (A1 + 1)>>1;
872
    b[0+width2] = A1;
873
    b[0       ] = A4;
874
    for(x=1; x+1<width2; x+=2){
875
        A3= temp[x+width2];
876
        A4= temp[x+1     ];
877
        A3 -= (A2 + A4)>>1;
878
        A2 += (A1 + A3 + 2)>>2;
879
        b[x+width2] = A3;
880
        b[x       ] = A2;
881

882
        A1= temp[x+1+width2];
883
        A2= temp[x+2       ];
884
        A1 -= (A2 + A4)>>1;
885
        A4 += (A1 + A3 + 2)>>2;
886
        b[x+1+width2] = A1;
887
        b[x+1       ] = A4;
888
    }
889
    A3= temp[width-1];
890
    A3 -= A2;
891
    A2 += (A1 + A3 + 2)>>2;
892
    b[width -1] = A3;
893
    b[width2-1] = A2;
894
    }
895
#else
896
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
897
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
898
#endif /* 0 */
899
}
900

    
901
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
902
    int i;
903

    
904
    for(i=0; i<width; i++){
905
        b1[i] -= (b0[i] + b2[i])>>1;
906
    }
907
}
908

    
909
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
910
    int i;
911

    
912
    for(i=0; i<width; i++){
913
        b1[i] += (b0[i] + b2[i] + 2)>>2;
914
    }
915
}
916

    
917
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
918
    int y;
919
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
920
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
921

    
922
    for(y=-2; y<height; y+=2){
923
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
924
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
925

    
926
        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
927
        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
928

    
929
        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
930
        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
931

    
932
        b0=b2;
933
        b1=b3;
934
    }
935
}
936

    
937
static void horizontal_decompose97i(DWTELEM *b, int width){
938
    DWTELEM temp[width];
939
    const int w2= (width+1)>>1;
940

    
941
    lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
942
    liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
943
    lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
944
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
945
}
946

    
947

    
948
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
949
    int i;
950

    
951
    for(i=0; i<width; i++){
952
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
953
    }
954
}
955

    
956
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
957
    int i;
958

    
959
    for(i=0; i<width; i++){
960
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
961
    }
962
}
963

    
964
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
965
    int i;
966

    
967
    for(i=0; i<width; i++){
968
#ifdef liftS
969
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
970
#else
971
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
972
#endif
973
    }
974
}
975

    
976
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
977
    int i;
978

    
979
    for(i=0; i<width; i++){
980
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
981
    }
982
}
983

    
984
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
985
    int y;
986
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
987
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
988
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
989
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
990

    
991
    for(y=-4; y<height; y+=2){
992
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
993
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
994

    
995
        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
996
        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
997

    
998
        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
999
        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1000
        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1001
        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1002

    
1003
        b0=b2;
1004
        b1=b3;
1005
        b2=b4;
1006
        b3=b5;
1007
    }
1008
}
1009

    
1010
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1011
    int level;
1012

    
1013
    for(level=0; level<decomposition_count; level++){
1014
        switch(type){
1015
        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1016
        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1017
        }
1018
    }
1019
}
1020

    
1021
static void horizontal_compose53i(IDWTELEM *b, int width){
1022
    IDWTELEM temp[width];
1023
    const int width2= width>>1;
1024
    const int w2= (width+1)>>1;
1025
    int x;
1026

    
1027
#if 0
1028
    int A1,A2,A3,A4;
1029
    A2= temp[1       ];
1030
    A4= temp[0       ];
1031
    A1= temp[0+width2];
1032
    A1 -= (A2 + A4)>>1;
1033
    A4 += (A1 + 1)>>1;
1034
    b[0+width2] = A1;
1035
    b[0       ] = A4;
1036
    for(x=1; x+1<width2; x+=2){
1037
        A3= temp[x+width2];
1038
        A4= temp[x+1     ];
1039
        A3 -= (A2 + A4)>>1;
1040
        A2 += (A1 + A3 + 2)>>2;
1041
        b[x+width2] = A3;
1042
        b[x       ] = A2;
1043

1044
        A1= temp[x+1+width2];
1045
        A2= temp[x+2       ];
1046
        A1 -= (A2 + A4)>>1;
1047
        A4 += (A1 + A3 + 2)>>2;
1048
        b[x+1+width2] = A1;
1049
        b[x+1       ] = A4;
1050
    }
1051
    A3= temp[width-1];
1052
    A3 -= A2;
1053
    A2 += (A1 + A3 + 2)>>2;
1054
    b[width -1] = A3;
1055
    b[width2-1] = A2;
1056
#else
1057
    inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1058
    inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1059
#endif /* 0 */
1060
    for(x=0; x<width2; x++){
1061
        b[2*x    ]= temp[x   ];
1062
        b[2*x + 1]= temp[x+w2];
1063
    }
1064
    if(width&1)
1065
        b[2*x    ]= temp[x   ];
1066
}
1067

    
1068
static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1069
    int i;
1070

    
1071
    for(i=0; i<width; i++){
1072
        b1[i] += (b0[i] + b2[i])>>1;
1073
    }
1074
}
1075

    
1076
static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1077
    int i;
1078

    
1079
    for(i=0; i<width; i++){
1080
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1081
    }
1082
}
1083

    
1084
static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1085
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1086
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1087
    cs->y = -1;
1088
}
1089

    
1090
static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1091
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1092
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1093
    cs->y = -1;
1094
}
1095

    
1096
static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1097
    int y= cs->y;
1098

    
1099
    IDWTELEM *b0= cs->b0;
1100
    IDWTELEM *b1= cs->b1;
1101
    IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1102
    IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1103

    
1104
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1105
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1106

    
1107
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1108
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1109

    
1110
    cs->b0 = b2;
1111
    cs->b1 = b3;
1112
    cs->y += 2;
1113
}
1114

    
1115
static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1116
    int y= cs->y;
1117
    IDWTELEM *b0= cs->b0;
1118
    IDWTELEM *b1= cs->b1;
1119
    IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1120
    IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1121

    
1122
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1123
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1124

    
1125
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1126
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1127

    
1128
    cs->b0 = b2;
1129
    cs->b1 = b3;
1130
    cs->y += 2;
1131
}
1132

    
1133
static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1134
    DWTCompose cs;
1135
    spatial_compose53i_init(&cs, buffer, height, stride);
1136
    while(cs.y <= height)
1137
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1138
}
1139

    
1140

    
1141
void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1142
    IDWTELEM temp[width];
1143
    const int w2= (width+1)>>1;
1144

    
1145
    inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1146
    inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1147
    inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1148
    inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1149
}
1150

    
1151
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1152
    int i;
1153

    
1154
    for(i=0; i<width; i++){
1155
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1156
    }
1157
}
1158

    
1159
static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1160
    int i;
1161

    
1162
    for(i=0; i<width; i++){
1163
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1164
    }
1165
}
1166

    
1167
static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1168
    int i;
1169

    
1170
    for(i=0; i<width; i++){
1171
#ifdef liftS
1172
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1173
#else
1174
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1175
#endif
1176
    }
1177
}
1178

    
1179
static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1180
    int i;
1181

    
1182
    for(i=0; i<width; i++){
1183
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1184
    }
1185
}
1186

    
1187
void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1188
    int i;
1189

    
1190
    for(i=0; i<width; i++){
1191
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1192
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1193
#ifdef liftS
1194
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1195
#else
1196
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1197
#endif
1198
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1199
    }
1200
}
1201

    
1202
static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1203
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1204
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1205
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1206
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1207
    cs->y = -3;
1208
}
1209

    
1210
static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1211
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1212
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1213
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1214
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1215
    cs->y = -3;
1216
}
1217

    
1218
static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1219
    int y = cs->y;
1220

    
1221
    IDWTELEM *b0= cs->b0;
1222
    IDWTELEM *b1= cs->b1;
1223
    IDWTELEM *b2= cs->b2;
1224
    IDWTELEM *b3= cs->b3;
1225
    IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1226
    IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1227

    
1228
    if(y>0 && y+4<height){
1229
        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1230
    }else{
1231
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1232
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1233
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1234
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1235
    }
1236

    
1237
        if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1238
        if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1239

    
1240
    cs->b0=b2;
1241
    cs->b1=b3;
1242
    cs->b2=b4;
1243
    cs->b3=b5;
1244
    cs->y += 2;
1245
}
1246

    
1247
static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1248
    int y = cs->y;
1249
    IDWTELEM *b0= cs->b0;
1250
    IDWTELEM *b1= cs->b1;
1251
    IDWTELEM *b2= cs->b2;
1252
    IDWTELEM *b3= cs->b3;
1253
    IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1254
    IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1255

    
1256
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1257
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1258
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1259
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1260

    
1261
        if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1262
        if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1263

    
1264
    cs->b0=b2;
1265
    cs->b1=b3;
1266
    cs->b2=b4;
1267
    cs->b3=b5;
1268
    cs->y += 2;
1269
}
1270

    
1271
static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1272
    DWTCompose cs;
1273
    spatial_compose97i_init(&cs, buffer, height, stride);
1274
    while(cs.y <= height)
1275
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1276
}
1277

    
1278
static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1279
    int level;
1280
    for(level=decomposition_count-1; level>=0; level--){
1281
        switch(type){
1282
        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1283
        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1284
        }
1285
    }
1286
}
1287

    
1288
static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1289
    int level;
1290
    for(level=decomposition_count-1; level>=0; level--){
1291
        switch(type){
1292
        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1293
        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1294
        }
1295
    }
1296
}
1297

    
1298
static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1299
    const int support = type==1 ? 3 : 5;
1300
    int level;
1301
    if(type==2) return;
1302

    
1303
    for(level=decomposition_count-1; level>=0; level--){
1304
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1305
            switch(type){
1306
            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1307
                break;
1308
            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1309
                break;
1310
            }
1311
        }
1312
    }
1313
}
1314

    
1315
static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1316
    const int support = type==1 ? 3 : 5;
1317
    int level;
1318
    if(type==2) return;
1319

    
1320
    for(level=decomposition_count-1; level>=0; level--){
1321
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1322
            switch(type){
1323
            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1324
                break;
1325
            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1326
                break;
1327
            }
1328
        }
1329
    }
1330
}
1331

    
1332
static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1333
        DWTCompose cs[MAX_DECOMPOSITIONS];
1334
        int y;
1335
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1336
        for(y=0; y<height; y+=4)
1337
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1338
}
1339

    
1340
static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1341
    const int w= b->width;
1342
    const int h= b->height;
1343
    int x, y;
1344

    
1345
    if(1){
1346
        int run=0;
1347
        int runs[w*h];
1348
        int run_index=0;
1349
        int max_index;
1350

    
1351
        for(y=0; y<h; y++){
1352
            for(x=0; x<w; x++){
1353
                int v, p=0;
1354
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1355
                v= src[x + y*stride];
1356

    
1357
                if(y){
1358
                    t= src[x + (y-1)*stride];
1359
                    if(x){
1360
                        lt= src[x - 1 + (y-1)*stride];
1361
                    }
1362
                    if(x + 1 < w){
1363
                        rt= src[x + 1 + (y-1)*stride];
1364
                    }
1365
                }
1366
                if(x){
1367
                    l= src[x - 1 + y*stride];
1368
                    /*if(x > 1){
1369
                        if(orientation==1) ll= src[y + (x-2)*stride];
1370
                        else               ll= src[x - 2 + y*stride];
1371
                    }*/
1372
                }
1373
                if(parent){
1374
                    int px= x>>1;
1375
                    int py= y>>1;
1376
                    if(px<b->parent->width && py<b->parent->height)
1377
                        p= parent[px + py*2*stride];
1378
                }
1379
                if(!(/*ll|*/l|lt|t|rt|p)){
1380
                    if(v){
1381
                        runs[run_index++]= run;
1382
                        run=0;
1383
                    }else{
1384
                        run++;
1385
                    }
1386
                }
1387
            }
1388
        }
1389
        max_index= run_index;
1390
        runs[run_index++]= run;
1391
        run_index=0;
1392
        run= runs[run_index++];
1393

    
1394
        put_symbol2(&s->c, b->state[30], max_index, 0);
1395
        if(run_index <= max_index)
1396
            put_symbol2(&s->c, b->state[1], run, 3);
1397

    
1398
        for(y=0; y<h; y++){
1399
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1400
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1401
                return -1;
1402
            }
1403
            for(x=0; x<w; x++){
1404
                int v, p=0;
1405
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1406
                v= src[x + y*stride];
1407

    
1408
                if(y){
1409
                    t= src[x + (y-1)*stride];
1410
                    if(x){
1411
                        lt= src[x - 1 + (y-1)*stride];
1412
                    }
1413
                    if(x + 1 < w){
1414
                        rt= src[x + 1 + (y-1)*stride];
1415
                    }
1416
                }
1417
                if(x){
1418
                    l= src[x - 1 + y*stride];
1419
                    /*if(x > 1){
1420
                        if(orientation==1) ll= src[y + (x-2)*stride];
1421
                        else               ll= src[x - 2 + y*stride];
1422
                    }*/
1423
                }
1424
                if(parent){
1425
                    int px= x>>1;
1426
                    int py= y>>1;
1427
                    if(px<b->parent->width && py<b->parent->height)
1428
                        p= parent[px + py*2*stride];
1429
                }
1430
                if(/*ll|*/l|lt|t|rt|p){
1431
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1432

    
1433
                    put_rac(&s->c, &b->state[0][context], !!v);
1434
                }else{
1435
                    if(!run){
1436
                        run= runs[run_index++];
1437

    
1438
                        if(run_index <= max_index)
1439
                            put_symbol2(&s->c, b->state[1], run, 3);
1440
                        assert(v);
1441
                    }else{
1442
                        run--;
1443
                        assert(!v);
1444
                    }
1445
                }
1446
                if(v){
1447
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1448
                    int l2= 2*FFABS(l) + (l<0);
1449
                    int t2= 2*FFABS(t) + (t<0);
1450

    
1451
                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1452
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1453
                }
1454
            }
1455
        }
1456
    }
1457
    return 0;
1458
}
1459

    
1460
static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1461
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1462
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1463
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1464
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1465
}
1466

    
1467
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1468
    const int w= b->width;
1469
    const int h= b->height;
1470
    int x,y;
1471

    
1472
    if(1){
1473
        int run, runs;
1474
        x_and_coeff *xc= b->x_coeff;
1475
        x_and_coeff *prev_xc= NULL;
1476
        x_and_coeff *prev2_xc= xc;
1477
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1478
        x_and_coeff *prev_parent_xc= parent_xc;
1479

    
1480
        runs= get_symbol2(&s->c, b->state[30], 0);
1481
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1482
        else           run= INT_MAX;
1483

    
1484
        for(y=0; y<h; y++){
1485
            int v=0;
1486
            int lt=0, t=0, rt=0;
1487

    
1488
            if(y && prev_xc->x == 0){
1489
                rt= prev_xc->coeff;
1490
            }
1491
            for(x=0; x<w; x++){
1492
                int p=0;
1493
                const int l= v;
1494

    
1495
                lt= t; t= rt;
1496

    
1497
                if(y){
1498
                    if(prev_xc->x <= x)
1499
                        prev_xc++;
1500
                    if(prev_xc->x == x + 1)
1501
                        rt= prev_xc->coeff;
1502
                    else
1503
                        rt=0;
1504
                }
1505
                if(parent_xc){
1506
                    if(x>>1 > parent_xc->x){
1507
                        parent_xc++;
1508
                    }
1509
                    if(x>>1 == parent_xc->x){
1510
                        p= parent_xc->coeff;
1511
                    }
1512
                }
1513
                if(/*ll|*/l|lt|t|rt|p){
1514
                    int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1515

    
1516
                    v=get_rac(&s->c, &b->state[0][context]);
1517
                    if(v){
1518
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1519
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1520

    
1521
                        xc->x=x;
1522
                        (xc++)->coeff= v;
1523
                    }
1524
                }else{
1525
                    if(!run){
1526
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1527
                        else           run= INT_MAX;
1528
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1529
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1530

    
1531
                        xc->x=x;
1532
                        (xc++)->coeff= v;
1533
                    }else{
1534
                        int max_run;
1535
                        run--;
1536
                        v=0;
1537

    
1538
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1539
                        else  max_run= FFMIN(run, w-x-1);
1540
                        if(parent_xc)
1541
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1542
                        x+= max_run;
1543
                        run-= max_run;
1544
                    }
1545
                }
1546
            }
1547
            (xc++)->x= w+1; //end marker
1548
            prev_xc= prev2_xc;
1549
            prev2_xc= xc;
1550

    
1551
            if(parent_xc){
1552
                if(y&1){
1553
                    while(parent_xc->x != parent->width+1)
1554
                        parent_xc++;
1555
                    parent_xc++;
1556
                    prev_parent_xc= parent_xc;
1557
                }else{
1558
                    parent_xc= prev_parent_xc;
1559
                }
1560
            }
1561
        }
1562

    
1563
        (xc++)->x= w+1; //end marker
1564
    }
1565
}
1566

    
1567
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1568
    const int w= b->width;
1569
    int y;
1570
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1571
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1572
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1573
    int new_index = 0;
1574

    
1575
    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1576
        qadd= 0;
1577
        qmul= 1<<QEXPSHIFT;
1578
    }
1579

    
1580
    /* If we are on the second or later slice, restore our index. */
1581
    if (start_y != 0)
1582
        new_index = save_state[0];
1583

    
1584

    
1585
    for(y=start_y; y<h; y++){
1586
        int x = 0;
1587
        int v;
1588
        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1589
        memset(line, 0, b->width*sizeof(IDWTELEM));
1590
        v = b->x_coeff[new_index].coeff;
1591
        x = b->x_coeff[new_index++].x;
1592
        while(x < w){
1593
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1594
            register int u= -(v&1);
1595
            line[x] = (t^u) - u;
1596

    
1597
            v = b->x_coeff[new_index].coeff;
1598
            x = b->x_coeff[new_index++].x;
1599
        }
1600
    }
1601

    
1602
    /* Save our variables for the next slice. */
1603
    save_state[0] = new_index;
1604

    
1605
    return;
1606
}
1607

    
1608
static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1609
    int plane_index, level, orientation;
1610

    
1611
    for(plane_index=0; plane_index<3; plane_index++){
1612
        for(level=0; level<MAX_DECOMPOSITIONS; level++){
1613
            for(orientation=level ? 1:0; orientation<4; orientation++){
1614
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1615
            }
1616
        }
1617
    }
1618
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1619
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1620
}
1621

    
1622
static int alloc_blocks(SnowContext *s){
1623
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1624
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1625

    
1626
    s->b_width = w;
1627
    s->b_height= h;
1628

    
1629
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1630
    return 0;
1631
}
1632

    
1633
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1634
    uint8_t *bytestream= d->bytestream;
1635
    uint8_t *bytestream_start= d->bytestream_start;
1636
    *d= *s;
1637
    d->bytestream= bytestream;
1638
    d->bytestream_start= bytestream_start;
1639
}
1640

    
1641
//near copy & paste from dsputil, FIXME
1642
static int pix_sum(uint8_t * pix, int line_size, int w)
1643
{
1644
    int s, i, j;
1645

    
1646
    s = 0;
1647
    for (i = 0; i < w; i++) {
1648
        for (j = 0; j < w; j++) {
1649
            s += pix[0];
1650
            pix ++;
1651
        }
1652
        pix += line_size - w;
1653
    }
1654
    return s;
1655
}
1656

    
1657
//near copy & paste from dsputil, FIXME
1658
static int pix_norm1(uint8_t * pix, int line_size, int w)
1659
{
1660
    int s, i, j;
1661
    uint32_t *sq = ff_squareTbl + 256;
1662

    
1663
    s = 0;
1664
    for (i = 0; i < w; i++) {
1665
        for (j = 0; j < w; j ++) {
1666
            s += sq[pix[0]];
1667
            pix ++;
1668
        }
1669
        pix += line_size - w;
1670
    }
1671
    return s;
1672
}
1673

    
1674
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1675
    const int w= s->b_width << s->block_max_depth;
1676
    const int rem_depth= s->block_max_depth - level;
1677
    const int index= (x + y*w) << rem_depth;
1678
    const int block_w= 1<<rem_depth;
1679
    BlockNode block;
1680
    int i,j;
1681

    
1682
    block.color[0]= l;
1683
    block.color[1]= cb;
1684
    block.color[2]= cr;
1685
    block.mx= mx;
1686
    block.my= my;
1687
    block.ref= ref;
1688
    block.type= type;
1689
    block.level= level;
1690

    
1691
    for(j=0; j<block_w; j++){
1692
        for(i=0; i<block_w; i++){
1693
            s->block[index + i + j*w]= block;
1694
        }
1695
    }
1696
}
1697

    
1698
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1699
    const int offset[3]= {
1700
          y*c->  stride + x,
1701
        ((y*c->uvstride + x)>>1),
1702
        ((y*c->uvstride + x)>>1),
1703
    };
1704
    int i;
1705
    for(i=0; i<3; i++){
1706
        c->src[0][i]= src [i];
1707
        c->ref[0][i]= ref [i] + offset[i];
1708
    }
1709
    assert(!ref_index);
1710
}
1711

    
1712
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1713
                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1714
    if(s->ref_frames == 1){
1715
        *mx = mid_pred(left->mx, top->mx, tr->mx);
1716
        *my = mid_pred(left->my, top->my, tr->my);
1717
    }else{
1718
        const int *scale = scale_mv_ref[ref];
1719
        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1720
                       (top ->mx * scale[top ->ref] + 128) >>8,
1721
                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1722
        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1723
                       (top ->my * scale[top ->ref] + 128) >>8,
1724
                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
1725
    }
1726
}
1727

    
1728
//FIXME copy&paste
1729
#define P_LEFT P[1]
1730
#define P_TOP P[2]
1731
#define P_TOPRIGHT P[3]
1732
#define P_MEDIAN P[4]
1733
#define P_MV1 P[9]
1734
#define FLAG_QPEL   1 //must be 1
1735

    
1736
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1737
    uint8_t p_buffer[1024];
1738
    uint8_t i_buffer[1024];
1739
    uint8_t p_state[sizeof(s->block_state)];
1740
    uint8_t i_state[sizeof(s->block_state)];
1741
    RangeCoder pc, ic;
1742
    uint8_t *pbbak= s->c.bytestream;
1743
    uint8_t *pbbak_start= s->c.bytestream_start;
1744
    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1745
    const int w= s->b_width  << s->block_max_depth;
1746
    const int h= s->b_height << s->block_max_depth;
1747
    const int rem_depth= s->block_max_depth - level;
1748
    const int index= (x + y*w) << rem_depth;
1749
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1750
    int trx= (x+1)<<rem_depth;
1751
    int try= (y+1)<<rem_depth;
1752
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1753
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1754
    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1755
    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1756
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1757
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1758
    int pl = left->color[0];
1759
    int pcb= left->color[1];
1760
    int pcr= left->color[2];
1761
    int pmx, pmy;
1762
    int mx=0, my=0;
1763
    int l,cr,cb;
1764
    const int stride= s->current_picture.linesize[0];
1765
    const int uvstride= s->current_picture.linesize[1];
1766
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
1767
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1768
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1769
    int P[10][2];
1770
    int16_t last_mv[3][2];
1771
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1772
    const int shift= 1+qpel;
1773
    MotionEstContext *c= &s->m.me;
1774
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1775
    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1776
    int my_context= av_log2(2*FFABS(left->my - top->my));
1777
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1778
    int ref, best_ref, ref_score, ref_mx, ref_my;
1779

    
1780
    assert(sizeof(s->block_state) >= 256);
1781
    if(s->keyframe){
1782
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1783
        return 0;
1784
    }
1785

    
1786
//    clip predictors / edge ?
1787

    
1788
    P_LEFT[0]= left->mx;
1789
    P_LEFT[1]= left->my;
1790
    P_TOP [0]= top->mx;
1791
    P_TOP [1]= top->my;
1792
    P_TOPRIGHT[0]= tr->mx;
1793
    P_TOPRIGHT[1]= tr->my;
1794

    
1795
    last_mv[0][0]= s->block[index].mx;
1796
    last_mv[0][1]= s->block[index].my;
1797
    last_mv[1][0]= right->mx;
1798
    last_mv[1][1]= right->my;
1799
    last_mv[2][0]= bottom->mx;
1800
    last_mv[2][1]= bottom->my;
1801

    
1802
    s->m.mb_stride=2;
1803
    s->m.mb_x=
1804
    s->m.mb_y= 0;
1805
    c->skip= 0;
1806

    
1807
    assert(c->  stride ==   stride);
1808
    assert(c->uvstride == uvstride);
1809

    
1810
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1811
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1812
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1813
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1814

    
1815
    c->xmin = - x*block_w - 16+2;
1816
    c->ymin = - y*block_w - 16+2;
1817
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1818
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1819

    
1820
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1821
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
1822
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1823
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1824
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1825
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1826
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1827

    
1828
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1829
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1830

    
1831
    if (!y) {
1832
        c->pred_x= P_LEFT[0];
1833
        c->pred_y= P_LEFT[1];
1834
    } else {
1835
        c->pred_x = P_MEDIAN[0];
1836
        c->pred_y = P_MEDIAN[1];
1837
    }
1838

    
1839
    score= INT_MAX;
1840
    best_ref= 0;
1841
    for(ref=0; ref<s->ref_frames; ref++){
1842
        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1843

    
1844
        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1845
                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1846

    
1847
        assert(ref_mx >= c->xmin);
1848
        assert(ref_mx <= c->xmax);
1849
        assert(ref_my >= c->ymin);
1850
        assert(ref_my <= c->ymax);
1851

    
1852
        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1853
        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1854
        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1855
        if(s->ref_mvs[ref]){
1856
            s->ref_mvs[ref][index][0]= ref_mx;
1857
            s->ref_mvs[ref][index][1]= ref_my;
1858
            s->ref_scores[ref][index]= ref_score;
1859
        }
1860
        if(score > ref_score){
1861
            score= ref_score;
1862
            best_ref= ref;
1863
            mx= ref_mx;
1864
            my= ref_my;
1865
        }
1866
    }
1867
    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1868

    
1869
  //  subpel search
1870
    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1871
    pc= s->c;
1872
    pc.bytestream_start=
1873
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1874
    memcpy(p_state, s->block_state, sizeof(s->block_state));
1875

    
1876
    if(level!=s->block_max_depth)
1877
        put_rac(&pc, &p_state[4 + s_context], 1);
1878
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1879
    if(s->ref_frames > 1)
1880
        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1881
    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1882
    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1883
    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1884
    p_len= pc.bytestream - pc.bytestream_start;
1885
    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1886

    
1887
    block_s= block_w*block_w;
1888
    sum = pix_sum(current_data[0], stride, block_w);
1889
    l= (sum + block_s/2)/block_s;
1890
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1891

    
1892
    block_s= block_w*block_w>>2;
1893
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
1894
    cb= (sum + block_s/2)/block_s;
1895
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1896
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
1897
    cr= (sum + block_s/2)/block_s;
1898
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1899

    
1900
    ic= s->c;
1901
    ic.bytestream_start=
1902
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1903
    memcpy(i_state, s->block_state, sizeof(s->block_state));
1904
    if(level!=s->block_max_depth)
1905
        put_rac(&ic, &i_state[4 + s_context], 1);
1906
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1907
    put_symbol(&ic, &i_state[32],  l-pl , 1);
1908
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1909
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1910
    i_len= ic.bytestream - ic.bytestream_start;
1911
    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1912

    
1913
//    assert(score==256*256*256*64-1);
1914
    assert(iscore < 255*255*256 + s->lambda2*10);
1915
    assert(iscore >= 0);
1916
    assert(l>=0 && l<=255);
1917
    assert(pl>=0 && pl<=255);
1918

    
1919
    if(level==0){
1920
        int varc= iscore >> 8;
1921
        int vard= score >> 8;
1922
        if (vard <= 64 || vard < varc)
1923
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1924
        else
1925
            c->scene_change_score+= s->m.qscale;
1926
    }
1927

    
1928
    if(level!=s->block_max_depth){
1929
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
1930
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1931
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1932
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1933
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1934
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1935

    
1936
        if(score2 < score && score2 < iscore)
1937
            return score2;
1938
    }
1939

    
1940
    if(iscore < score){
1941
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1942
        memcpy(pbbak, i_buffer, i_len);
1943
        s->c= ic;
1944
        s->c.bytestream_start= pbbak_start;
1945
        s->c.bytestream= pbbak + i_len;
1946
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1947
        memcpy(s->block_state, i_state, sizeof(s->block_state));
1948
        return iscore;
1949
    }else{
1950
        memcpy(pbbak, p_buffer, p_len);
1951
        s->c= pc;
1952
        s->c.bytestream_start= pbbak_start;
1953
        s->c.bytestream= pbbak + p_len;
1954
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1955
        memcpy(s->block_state, p_state, sizeof(s->block_state));
1956
        return score;
1957
    }
1958
}
1959

    
1960
static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1961
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1962
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1963
    }else{
1964
        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1965
    }
1966
}
1967

    
1968
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1969
    const int w= s->b_width  << s->block_max_depth;
1970
    const int rem_depth= s->block_max_depth - level;
1971
    const int index= (x + y*w) << rem_depth;
1972
    int trx= (x+1)<<rem_depth;
1973
    BlockNode *b= &s->block[index];
1974
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1975
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1976
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1977
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1978
    int pl = left->color[0];
1979
    int pcb= left->color[1];
1980
    int pcr= left->color[2];
1981
    int pmx, pmy;
1982
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1983
    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1984
    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1985
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1986

    
1987
    if(s->keyframe){
1988
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1989
        return;
1990
    }
1991

    
1992
    if(level!=s->block_max_depth){
1993
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1994
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
1995
        }else{
1996
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
1997
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1998
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1999
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2000
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2001
            return;
2002
        }
2003
    }
2004
    if(b->type & BLOCK_INTRA){
2005
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2006
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2007
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2008
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2009
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2010
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2011
    }else{
2012
        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2013
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2014
        if(s->ref_frames > 1)
2015
            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2016
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2017
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2018
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2019
    }
2020
}
2021

    
2022
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2023
    const int w= s->b_width << s->block_max_depth;
2024
    const int rem_depth= s->block_max_depth - level;
2025
    const int index= (x + y*w) << rem_depth;
2026
    int trx= (x+1)<<rem_depth;
2027
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2028
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2029
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2030
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2031
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2032

    
2033
    if(s->keyframe){
2034
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2035
        return;
2036
    }
2037

    
2038
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2039
        int type, mx, my;
2040
        int l = left->color[0];
2041
        int cb= left->color[1];
2042
        int cr= left->color[2];
2043
        int ref = 0;
2044
        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2045
        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2046
        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2047

    
2048
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2049

    
2050
        if(type){
2051
            pred_mv(s, &mx, &my, 0, left, top, tr);
2052
            l += get_symbol(&s->c, &s->block_state[32], 1);
2053
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2054
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2055
        }else{
2056
            if(s->ref_frames > 1)
2057
                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2058
            pred_mv(s, &mx, &my, ref, left, top, tr);
2059
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2060
            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2061
        }
2062
        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2063
    }else{
2064
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2065
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2066
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2067
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2068
    }
2069
}
2070

    
2071
static void encode_blocks(SnowContext *s, int search){
2072
    int x, y;
2073
    int w= s->b_width;
2074
    int h= s->b_height;
2075

    
2076
    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2077
        iterative_me(s);
2078

    
2079
    for(y=0; y<h; y++){
2080
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2081
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2082
            return;
2083
        }
2084
        for(x=0; x<w; x++){
2085
            if(s->avctx->me_method == ME_ITER || !search)
2086
                encode_q_branch2(s, 0, x, y);
2087
            else
2088
                encode_q_branch (s, 0, x, y);
2089
        }
2090
    }
2091
}
2092

    
2093
static void decode_blocks(SnowContext *s){
2094
    int x, y;
2095
    int w= s->b_width;
2096
    int h= s->b_height;
2097

    
2098
    for(y=0; y<h; y++){
2099
        for(x=0; x<w; x++){
2100
            decode_q_branch(s, 0, x, y);
2101
        }
2102
    }
2103
}
2104

    
2105
static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2106
    static const uint8_t weight[64]={
2107
    8,7,6,5,4,3,2,1,
2108
    7,7,0,0,0,0,0,1,
2109
    6,0,6,0,0,0,2,0,
2110
    5,0,0,5,0,3,0,0,
2111
    4,0,0,0,4,0,0,0,
2112
    3,0,0,5,0,3,0,0,
2113
    2,0,6,0,0,0,2,0,
2114
    1,7,0,0,0,0,0,1,
2115
    };
2116

    
2117
    static const uint8_t brane[256]={
2118
    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2119
    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2120
    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2121
    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2122
    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2123
    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2124
    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2125
    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2126
    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2127
    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2128
    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2129
    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2130
    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2131
    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2132
    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2133
    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2134
    };
2135

    
2136
    static const uint8_t needs[16]={
2137
    0,1,0,0,
2138
    2,4,2,0,
2139
    0,1,0,0,
2140
    15
2141
    };
2142

    
2143
    int x, y, b, r, l;
2144
    int16_t tmpIt   [64*(32+HTAPS_MAX)];
2145
    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2146
    int16_t *tmpI= tmpIt;
2147
    uint8_t *tmp2= tmp2t[0];
2148
    const uint8_t *hpel[11];
2149
    assert(dx<16 && dy<16);
2150
    r= brane[dx + 16*dy]&15;
2151
    l= brane[dx + 16*dy]>>4;
2152

    
2153
    b= needs[l] | needs[r];
2154
    if(p && !p->diag_mc)
2155
        b= 15;
2156

    
2157
    if(b&5){
2158
        for(y=0; y < b_h+HTAPS_MAX-1; y++){
2159
            for(x=0; x < b_w; x++){
2160
                int a_1=src[x + HTAPS_MAX/2-4];
2161
                int a0= src[x + HTAPS_MAX/2-3];
2162
                int a1= src[x + HTAPS_MAX/2-2];
2163
                int a2= src[x + HTAPS_MAX/2-1];
2164
                int a3= src[x + HTAPS_MAX/2+0];
2165
                int a4= src[x + HTAPS_MAX/2+1];
2166
                int a5= src[x + HTAPS_MAX/2+2];
2167
                int a6= src[x + HTAPS_MAX/2+3];
2168
                int am=0;
2169
                if(!p || p->fast_mc){
2170
                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2171
                    tmpI[x]= am;
2172
                    am= (am+16)>>5;
2173
                }else{
2174
                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2175
                    tmpI[x]= am;
2176
                    am= (am+32)>>6;
2177
                }
2178

    
2179
                if(am&(~255)) am= ~(am>>31);
2180
                tmp2[x]= am;
2181
            }
2182
            tmpI+= 64;
2183
            tmp2+= stride;
2184
            src += stride;
2185
        }
2186
        src -= stride*y;
2187
    }
2188
    src += HTAPS_MAX/2 - 1;
2189
    tmp2= tmp2t[1];
2190

    
2191
    if(b&2){
2192
        for(y=0; y < b_h; y++){
2193
            for(x=0; x < b_w+1; x++){
2194
                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2195
                int a0= src[x + (HTAPS_MAX/2-3)*stride];
2196
                int a1= src[x + (HTAPS_MAX/2-2)*stride];
2197
                int a2= src[x + (HTAPS_MAX/2-1)*stride];
2198
                int a3= src[x + (HTAPS_MAX/2+0)*stride];
2199
                int a4= src[x + (HTAPS_MAX/2+1)*stride];
2200
                int a5= src[x + (HTAPS_MAX/2+2)*stride];
2201
                int a6= src[x + (HTAPS_MAX/2+3)*stride];
2202
                int am=0;
2203
                if(!p || p->fast_mc)
2204
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2205
                else
2206
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2207

    
2208
                if(am&(~255)) am= ~(am>>31);
2209
                tmp2[x]= am;
2210
            }
2211
            src += stride;
2212
            tmp2+= stride;
2213
        }
2214
        src -= stride*y;
2215
    }
2216
    src += stride*(HTAPS_MAX/2 - 1);
2217
    tmp2= tmp2t[2];
2218
    tmpI= tmpIt;
2219
    if(b&4){
2220
        for(y=0; y < b_h; y++){
2221
            for(x=0; x < b_w; x++){
2222
                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2223
                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2224
                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2225
                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2226
                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2227
                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2228
                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2229
                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2230
                int am=0;
2231
                if(!p || p->fast_mc)
2232
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2233
                else
2234
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2235
                if(am&(~255)) am= ~(am>>31);
2236
                tmp2[x]= am;
2237
            }
2238
            tmpI+= 64;
2239
            tmp2+= stride;
2240
        }
2241
    }
2242

    
2243
    hpel[ 0]= src;
2244
    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2245
    hpel[ 2]= src + 1;
2246

    
2247
    hpel[ 4]= tmp2t[1];
2248
    hpel[ 5]= tmp2t[2];
2249
    hpel[ 6]= tmp2t[1] + 1;
2250

    
2251
    hpel[ 8]= src + stride;
2252
    hpel[ 9]= hpel[1] + stride;
2253
    hpel[10]= hpel[8] + 1;
2254

    
2255
    if(b==15){
2256
        const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
2257
        const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2258
        const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2259
        const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2260
        dx&=7;
2261
        dy&=7;
2262
        for(y=0; y < b_h; y++){
2263
            for(x=0; x < b_w; x++){
2264
                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2265
                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
2266
            }
2267
            src1+=stride;
2268
            src2+=stride;
2269
            src3+=stride;
2270
            src4+=stride;
2271
            dst +=stride;
2272
        }
2273
    }else{
2274
        const uint8_t *src1= hpel[l];
2275
        const uint8_t *src2= hpel[r];
2276
        int a= weight[((dx&7) + (8*(dy&7)))];
2277
        int b= 8-a;
2278
        for(y=0; y < b_h; y++){
2279
            for(x=0; x < b_w; x++){
2280
                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2281
            }
2282
            src1+=stride;
2283
            src2+=stride;
2284
            dst +=stride;
2285
        }
2286
    }
2287
}
2288

    
2289
#define mca(dx,dy,b_w)\
2290
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2291
    uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2292
    assert(h==b_w);\
2293
    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2294
}
2295

    
2296
mca( 0, 0,16)
2297
mca( 8, 0,16)
2298
mca( 0, 8,16)
2299
mca( 8, 8,16)
2300
mca( 0, 0,8)
2301
mca( 8, 0,8)
2302
mca( 0, 8,8)
2303
mca( 8, 8,8)
2304

    
2305
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2306
    if(block->type & BLOCK_INTRA){
2307
        int x, y;
2308
        const int color = block->color[plane_index];
2309
        const int color4= color*0x01010101;
2310
        if(b_w==32){
2311
            for(y=0; y < b_h; y++){
2312
                *(uint32_t*)&dst[0 + y*stride]= color4;
2313
                *(uint32_t*)&dst[4 + y*stride]= color4;
2314
                *(uint32_t*)&dst[8 + y*stride]= color4;
2315
                *(uint32_t*)&dst[12+ y*stride]= color4;
2316
                *(uint32_t*)&dst[16+ y*stride]= color4;
2317
                *(uint32_t*)&dst[20+ y*stride]= color4;
2318
                *(uint32_t*)&dst[24+ y*stride]= color4;
2319
                *(uint32_t*)&dst[28+ y*stride]= color4;
2320
            }
2321
        }else if(b_w==16){
2322
            for(y=0; y < b_h; y++){
2323
                *(uint32_t*)&dst[0 + y*stride]= color4;
2324
                *(uint32_t*)&dst[4 + y*stride]= color4;
2325
                *(uint32_t*)&dst[8 + y*stride]= color4;
2326
                *(uint32_t*)&dst[12+ y*stride]= color4;
2327
            }
2328
        }else if(b_w==8){
2329
            for(y=0; y < b_h; y++){
2330
                *(uint32_t*)&dst[0 + y*stride]= color4;
2331
                *(uint32_t*)&dst[4 + y*stride]= color4;
2332
            }
2333
        }else if(b_w==4){
2334
            for(y=0; y < b_h; y++){
2335
                *(uint32_t*)&dst[0 + y*stride]= color4;
2336
            }
2337
        }else{
2338
            for(y=0; y < b_h; y++){
2339
                for(x=0; x < b_w; x++){
2340
                    dst[x + y*stride]= color;
2341
                }
2342
            }
2343
        }
2344
    }else{
2345
        uint8_t *src= s->last_picture[block->ref].data[plane_index];
2346
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2347
        int mx= block->mx*scale;
2348
        int my= block->my*scale;
2349
        const int dx= mx&15;
2350
        const int dy= my&15;
2351
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2352
        sx += (mx>>4) - (HTAPS_MAX/2-1);
2353
        sy += (my>>4) - (HTAPS_MAX/2-1);
2354
        src += sx + sy*stride;
2355
        if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2356
           || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2357
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2358
            src= tmp + MB_SIZE;
2359
        }
2360
//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2361
//        assert(!(b_w&(b_w-1)));
2362
        assert(b_w>1 && b_h>1);
2363
        assert((tab_index>=0 && tab_index<4) || b_w==32);
2364
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2365
            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2366
        else if(b_w==32){
2367
            int y;
2368
            for(y=0; y<b_h; y+=16){
2369
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2370
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2371
            }
2372
        }else if(b_w==b_h)
2373
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2374
        else if(b_w==2*b_h){
2375
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
2376
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2377
        }else{
2378
            assert(2*b_w==b_h);
2379
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
2380
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2381
        }
2382
    }
2383
}
2384

    
2385
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2386
                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2387
    int y, x;
2388
    IDWTELEM * dst;
2389
    for(y=0; y<b_h; y++){
2390
        //FIXME ugly misuse of obmc_stride
2391
        const uint8_t *obmc1= obmc + y*obmc_stride;
2392
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2393
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2394
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2395
        dst = slice_buffer_get_line(sb, src_y + y);
2396
        for(x=0; x<b_w; x++){
2397
            int v=   obmc1[x] * block[3][x + y*src_stride]
2398
                    +obmc2[x] * block[2][x + y*src_stride]
2399
                    +obmc3[x] * block[1][x + y*src_stride]
2400
                    +obmc4[x] * block[0][x + y*src_stride];
2401

    
2402
            v <<= 8 - LOG2_OBMC_MAX;
2403
            if(FRAC_BITS != 8){
2404
                v >>= 8 - FRAC_BITS;
2405
            }
2406
            if(add){
2407
                v += dst[x + src_x];
2408
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2409
                if(v&(~255)) v= ~(v>>31);
2410
                dst8[x + y*src_stride] = v;
2411
            }else{
2412
                dst[x + src_x] -= v;
2413
            }
2414
        }
2415
    }
2416
}
2417

    
2418
//FIXME name cleanup (b_w, block_w, b_width stuff)
2419
static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2420
    const int b_width = s->b_width  << s->block_max_depth;
2421
    const int b_height= s->b_height << s->block_max_depth;
2422
    const int b_stride= b_width;
2423
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2424
    BlockNode *rt= lt+1;
2425
    BlockNode *lb= lt+b_stride;
2426
    BlockNode *rb= lb+1;
2427
    uint8_t *block[4];
2428
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2429
    uint8_t *tmp = s->scratchbuf;
2430
    uint8_t *ptmp;
2431
    int x,y;
2432

    
2433
    if(b_x<0){
2434
        lt= rt;
2435
        lb= rb;
2436
    }else if(b_x + 1 >= b_width){
2437
        rt= lt;
2438
        rb= lb;
2439
    }
2440
    if(b_y<0){
2441
        lt= lb;
2442
        rt= rb;
2443
    }else if(b_y + 1 >= b_height){
2444
        lb= lt;
2445
        rb= rt;
2446
    }
2447

    
2448
    if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2449
        obmc -= src_x;
2450
        b_w += src_x;
2451
        if(!sliced && !offset_dst)
2452
            dst -= src_x;
2453
        src_x=0;
2454
    }else if(src_x + b_w > w){
2455
        b_w = w - src_x;
2456
    }
2457
    if(src_y<0){
2458
        obmc -= src_y*obmc_stride;
2459
        b_h += src_y;
2460
        if(!sliced && !offset_dst)
2461
            dst -= src_y*dst_stride;
2462
        src_y=0;
2463
    }else if(src_y + b_h> h){
2464
        b_h = h - src_y;
2465
    }
2466

    
2467
    if(b_w<=0 || b_h<=0) return;
2468

    
2469
    assert(src_stride > 2*MB_SIZE + 5);
2470

    
2471
    if(!sliced && offset_dst)
2472
        dst += src_x + src_y*dst_stride;
2473
    dst8+= src_x + src_y*src_stride;
2474
//    src += src_x + src_y*src_stride;
2475

    
2476
    ptmp= tmp + 3*tmp_step;
2477
    block[0]= ptmp;
2478
    ptmp+=tmp_step;
2479
    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2480

    
2481
    if(same_block(lt, rt)){
2482
        block[1]= block[0];
2483
    }else{
2484
        block[1]= ptmp;
2485
        ptmp+=tmp_step;
2486
        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2487
    }
2488

    
2489
    if(same_block(lt, lb)){
2490
        block[2]= block[0];
2491
    }else if(same_block(rt, lb)){
2492
        block[2]= block[1];
2493
    }else{
2494
        block[2]= ptmp;
2495
        ptmp+=tmp_step;
2496
        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2497
    }
2498

    
2499
    if(same_block(lt, rb) ){
2500
        block[3]= block[0];
2501
    }else if(same_block(rt, rb)){
2502
        block[3]= block[1];
2503
    }else if(same_block(lb, rb)){
2504
        block[3]= block[2];
2505
    }else{
2506
        block[3]= ptmp;
2507
        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2508
    }
2509
#if 0
2510
    for(y=0; y<b_h; y++){
2511
        for(x=0; x<b_w; x++){
2512
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2513
            if(add) dst[x + y*dst_stride] += v;
2514
            else    dst[x + y*dst_stride] -= v;
2515
        }
2516
    }
2517
    for(y=0; y<b_h; y++){
2518
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2519
        for(x=0; x<b_w; x++){
2520
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2521
            if(add) dst[x + y*dst_stride] += v;
2522
            else    dst[x + y*dst_stride] -= v;
2523
        }
2524
    }
2525
    for(y=0; y<b_h; y++){
2526
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2527
        for(x=0; x<b_w; x++){
2528
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2529
            if(add) dst[x + y*dst_stride] += v;
2530
            else    dst[x + y*dst_stride] -= v;
2531
        }
2532
    }
2533
    for(y=0; y<b_h; y++){
2534
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2535
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2536
        for(x=0; x<b_w; x++){
2537
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2538
            if(add) dst[x + y*dst_stride] += v;
2539
            else    dst[x + y*dst_stride] -= v;
2540
        }
2541
    }
2542
#else
2543
    if(sliced){
2544
        s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2545
    }else{
2546
        for(y=0; y<b_h; y++){
2547
            //FIXME ugly misuse of obmc_stride
2548
            const uint8_t *obmc1= obmc + y*obmc_stride;
2549
            const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2550
            const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2551
            const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2552
            for(x=0; x<b_w; x++){
2553
                int v=   obmc1[x] * block[3][x + y*src_stride]
2554
                        +obmc2[x] * block[2][x + y*src_stride]
2555
                        +obmc3[x] * block[1][x + y*src_stride]
2556
                        +obmc4[x] * block[0][x + y*src_stride];
2557

    
2558
                v <<= 8 - LOG2_OBMC_MAX;
2559
                if(FRAC_BITS != 8){
2560
                    v >>= 8 - FRAC_BITS;
2561
                }
2562
                if(add){
2563
                    v += dst[x + y*dst_stride];
2564
                    v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2565
                    if(v&(~255)) v= ~(v>>31);
2566
                    dst8[x + y*src_stride] = v;
2567
                }else{
2568
                    dst[x + y*dst_stride] -= v;
2569
                }
2570
            }
2571
        }
2572
    }
2573
#endif /* 0 */
2574
}
2575

    
2576
static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2577
    Plane *p= &s->plane[plane_index];
2578
    const int mb_w= s->b_width  << s->block_max_depth;
2579
    const int mb_h= s->b_height << s->block_max_depth;
2580
    int x, y, mb_x;
2581
    int block_size = MB_SIZE >> s->block_max_depth;
2582
    int block_w    = plane_index ? block_size/2 : block_size;
2583
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2584
    int obmc_stride= plane_index ? block_size : 2*block_size;
2585
    int ref_stride= s->current_picture.linesize[plane_index];
2586
    uint8_t *dst8= s->current_picture.data[plane_index];
2587
    int w= p->width;
2588
    int h= p->height;
2589

    
2590
    if(s->keyframe || (s->avctx->debug&512)){
2591
        if(mb_y==mb_h)
2592
            return;
2593

    
2594
        if(add){
2595
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2596
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2597
                IDWTELEM * line = sb->line[y];
2598
                for(x=0; x<w; x++){
2599
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2600
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2601
                    v >>= FRAC_BITS;
2602
                    if(v&(~255)) v= ~(v>>31);
2603
                    dst8[x + y*ref_stride]= v;
2604
                }
2605
            }
2606
        }else{
2607
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2608
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2609
                IDWTELEM * line = sb->line[y];
2610
                for(x=0; x<w; x++){
2611
                    line[x] -= 128 << FRAC_BITS;
2612
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2613
                }
2614
            }
2615
        }
2616

    
2617
        return;
2618
    }
2619

    
2620
    for(mb_x=0; mb_x<=mb_w; mb_x++){
2621
        add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2622
                   block_w*mb_x - block_w/2,
2623
                   block_w*mb_y - block_w/2,
2624
                   block_w, block_w,
2625
                   w, h,
2626
                   w, ref_stride, obmc_stride,
2627
                   mb_x - 1, mb_y - 1,
2628
                   add, 0, plane_index);
2629
    }
2630
}
2631

    
2632
static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2633
    Plane *p= &s->plane[plane_index];
2634
    const int mb_w= s->b_width  << s->block_max_depth;
2635
    const int mb_h= s->b_height << s->block_max_depth;
2636
    int x, y, mb_x;
2637
    int block_size = MB_SIZE >> s->block_max_depth;
2638
    int block_w    = plane_index ? block_size/2 : block_size;
2639
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2640
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2641
    int ref_stride= s->current_picture.linesize[plane_index];
2642
    uint8_t *dst8= s->current_picture.data[plane_index];
2643
    int w= p->width;
2644
    int h= p->height;
2645

    
2646
    if(s->keyframe || (s->avctx->debug&512)){
2647
        if(mb_y==mb_h)
2648
            return;
2649

    
2650
        if(add){
2651
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2652
                for(x=0; x<w; x++){
2653
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2654
                    v >>= FRAC_BITS;
2655
                    if(v&(~255)) v= ~(v>>31);
2656
                    dst8[x + y*ref_stride]= v;
2657
                }
2658
            }
2659
        }else{
2660
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2661
                for(x=0; x<w; x++){
2662
                    buf[x + y*w]-= 128<<FRAC_BITS;
2663
                }
2664
            }
2665
        }
2666

    
2667
        return;
2668
    }
2669

    
2670
    for(mb_x=0; mb_x<=mb_w; mb_x++){
2671
        add_yblock(s, 0, NULL, buf, dst8, obmc,
2672
                   block_w*mb_x - block_w/2,
2673
                   block_w*mb_y - block_w/2,
2674
                   block_w, block_w,
2675
                   w, h,
2676
                   w, ref_stride, obmc_stride,
2677
                   mb_x - 1, mb_y - 1,
2678
                   add, 1, plane_index);
2679
    }
2680
}
2681

    
2682
static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2683
    const int mb_h= s->b_height << s->block_max_depth;
2684
    int mb_y;
2685
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2686
        predict_slice(s, buf, plane_index, add, mb_y);
2687
}
2688

    
2689
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2690
    int i, x2, y2;
2691
    Plane *p= &s->plane[plane_index];
2692
    const int block_size = MB_SIZE >> s->block_max_depth;
2693
    const int block_w    = plane_index ? block_size/2 : block_size;
2694
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2695
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2696
    const int ref_stride= s->current_picture.linesize[plane_index];
2697
    uint8_t *src= s-> input_picture.data[plane_index];
2698
    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2699
    const int b_stride = s->b_width << s->block_max_depth;
2700
    const int w= p->width;
2701
    const int h= p->height;
2702
    int index= mb_x + mb_y*b_stride;
2703
    BlockNode *b= &s->block[index];
2704
    BlockNode backup= *b;
2705
    int ab=0;
2706
    int aa=0;
2707

    
2708
    b->type|= BLOCK_INTRA;
2709
    b->color[plane_index]= 0;
2710
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2711

    
2712
    for(i=0; i<4; i++){
2713
        int mb_x2= mb_x + (i &1) - 1;
2714
        int mb_y2= mb_y + (i>>1) - 1;
2715
        int x= block_w*mb_x2 + block_w/2;
2716
        int y= block_w*mb_y2 + block_w/2;
2717

    
2718
        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2719
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2720

    
2721
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2722
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2723
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2724
                int obmc_v= obmc[index];
2725
                int d;
2726
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2727
                if(x<0) obmc_v += obmc[index + block_w];
2728
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2729
                if(x+block_w>w) obmc_v += obmc[index - block_w];
2730
                //FIXME precalculate this or simplify it somehow else
2731

    
2732
                d = -dst[index] + (1<<(FRAC_BITS-1));
2733
                dst[index] = d;
2734
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2735
                aa += obmc_v * obmc_v; //FIXME precalculate this
2736
            }
2737
        }
2738
    }
2739
    *b= backup;
2740

    
2741
    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2742
}
2743

    
2744
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2745
    const int b_stride = s->b_width << s->block_max_depth;
2746
    const int b_height = s->b_height<< s->block_max_depth;
2747
    int index= x + y*b_stride;
2748
    const BlockNode *b     = &s->block[index];
2749
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2750
    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2751
    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2752
    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2753
    int dmx, dmy;
2754
//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2755
//  int my_context= av_log2(2*FFABS(left->my - top->my));
2756

    
2757
    if(x<0 || x>=b_stride || y>=b_height)
2758
        return 0;
2759
/*
2760
1            0      0
2761
01X          1-2    1
2762
001XX        3-6    2-3
2763
0001XXX      7-14   4-7
2764
00001XXXX   15-30   8-15
2765
*/
2766
//FIXME try accurate rate
2767
//FIXME intra and inter predictors if surrounding blocks are not the same type
2768
    if(b->type & BLOCK_INTRA){
2769
        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2770
                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
2771
                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
2772
    }else{
2773
        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2774
        dmx-= b->mx;
2775
        dmy-= b->my;
2776
        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2777
                    + av_log2(2*FFABS(dmy))
2778
                    + av_log2(2*b->ref));
2779
    }
2780
}
2781

    
2782
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2783
    Plane *p= &s->plane[plane_index];
2784
    const int block_size = MB_SIZE >> s->block_max_depth;
2785
    const int block_w    = plane_index ? block_size/2 : block_size;
2786
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2787
    const int ref_stride= s->current_picture.linesize[plane_index];
2788
    uint8_t *dst= s->current_picture.data[plane_index];
2789
    uint8_t *src= s->  input_picture.data[plane_index];
2790
    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2791
    uint8_t *cur = s->scratchbuf;
2792
    uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2793
    const int b_stride = s->b_width << s->block_max_depth;
2794
    const int b_height = s->b_height<< s->block_max_depth;
2795
    const int w= p->width;
2796
    const int h= p->height;
2797
    int distortion;
2798
    int rate= 0;
2799
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2800
    int sx= block_w*mb_x - block_w/2;
2801
    int sy= block_w*mb_y - block_w/2;
2802
    int x0= FFMAX(0,-sx);
2803
    int y0= FFMAX(0,-sy);
2804
    int x1= FFMIN(block_w*2, w-sx);
2805
    int y1= FFMIN(block_w*2, h-sy);
2806
    int i,x,y;
2807

    
2808
    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2809

    
2810
    for(y=y0; y<y1; y++){
2811
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2812
        const IDWTELEM *pred1 = pred + y*obmc_stride;
2813
        uint8_t *cur1 = cur + y*ref_stride;
2814
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2815
        for(x=x0; x<x1; x++){
2816
#if FRAC_BITS >= LOG2_OBMC_MAX
2817
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2818
#else
2819
            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2820
#endif
2821
            v = (v + pred1[x]) >> FRAC_BITS;
2822
            if(v&(~255)) v= ~(v>>31);
2823
            dst1[x] = v;
2824
        }
2825
    }
2826

    
2827
    /* copy the regions where obmc[] = (uint8_t)256 */
2828
    if(LOG2_OBMC_MAX == 8
2829
        && (mb_x == 0 || mb_x == b_stride-1)
2830
        && (mb_y == 0 || mb_y == b_height-1)){
2831
        if(mb_x == 0)
2832
            x1 = block_w;
2833
        else
2834
            x0 = block_w;
2835
        if(mb_y == 0)
2836
            y1 = block_w;
2837
        else
2838
            y0 = block_w;
2839
        for(y=y0; y<y1; y++)
2840
            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2841
    }
2842

    
2843
    if(block_w==16){
2844
        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2845
        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2846
        /* FIXME cmps overlap but do not cover the wavelet's whole support.
2847
         * So improving the score of one block is not strictly guaranteed
2848
         * to improve the score of the whole frame, thus iterative motion
2849
         * estimation does not always converge. */
2850
        if(s->avctx->me_cmp == FF_CMP_W97)
2851
            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2852
        else if(s->avctx->me_cmp == FF_CMP_W53)
2853
            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2854
        else{
2855
            distortion = 0;
2856
            for(i=0; i<4; i++){
2857
                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2858
                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2859
            }
2860
        }
2861
    }else{
2862
        assert(block_w==8);
2863
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2864
    }
2865

    
2866
    if(plane_index==0){
2867
        for(i=0; i<4; i++){
2868
/* ..RRr
2869
 * .RXx.
2870
 * rxx..
2871
 */
2872
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2873
        }
2874
        if(mb_x == b_stride-2)
2875
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2876
    }
2877
    return distortion + rate*penalty_factor;
2878
}
2879

    
2880
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2881
    int i, y2;
2882
    Plane *p= &s->plane[plane_index];
2883
    const int block_size = MB_SIZE >> s->block_max_depth;
2884
    const int block_w    = plane_index ? block_size/2 : block_size;
2885
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2886
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2887
    const int ref_stride= s->current_picture.linesize[plane_index];
2888
    uint8_t *dst= s->current_picture.data[plane_index];
2889
    uint8_t *src= s-> input_picture.data[plane_index];
2890
    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2891
    // const has only been removed from zero_dst to suppress a warning
2892
    static IDWTELEM zero_dst[4096]; //FIXME
2893
    const int b_stride = s->b_width << s->block_max_depth;
2894
    const int w= p->width;
2895
    const int h= p->height;
2896
    int distortion= 0;
2897
    int rate= 0;
2898
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2899

    
2900
    for(i=0; i<9; i++){
2901
        int mb_x2= mb_x + (i%3) - 1;
2902
        int mb_y2= mb_y + (i/3) - 1;
2903
        int x= block_w*mb_x2 + block_w/2;
2904
        int y= block_w*mb_y2 + block_w/2;
2905

    
2906
        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2907
                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2908

    
2909
        //FIXME find a cleaner/simpler way to skip the outside stuff
2910
        for(y2= y; y2<0; y2++)
2911
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2912
        for(y2= h; y2<y+block_w; y2++)
2913
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2914
        if(x<0){
2915
            for(y2= y; y2<y+block_w; y2++)
2916
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2917
        }
2918
        if(x+block_w > w){
2919
            for(y2= y; y2<y+block_w; y2++)
2920
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2921
        }
2922

    
2923
        assert(block_w== 8 || block_w==16);
2924
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2925
    }
2926

    
2927
    if(plane_index==0){
2928
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2929
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2930

    
2931
/* ..RRRr
2932
 * .RXXx.
2933
 * .RXXx.
2934
 * rxxx.
2935
 */
2936
        if(merged)
2937
            rate = get_block_bits(s, mb_x, mb_y, 2);
2938
        for(i=merged?4:0; i<9; i++){
2939
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2940
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2941
        }
2942
    }
2943
    return distortion + rate*penalty_factor;
2944
}
2945

    
2946
static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2947
    const int b_stride= s->b_width << s->block_max_depth;
2948
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2949
    BlockNode backup= *block;
2950
    int rd, index, value;
2951

    
2952
    assert(mb_x>=0 && mb_y>=0);
2953
    assert(mb_x<b_stride);
2954

    
2955
    if(intra){
2956
        block->color[0] = p[0];
2957
        block->color[1] = p[1];
2958
        block->color[2] = p[2];
2959
        block->type |= BLOCK_INTRA;
2960
    }else{
2961
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2962
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2963
        if(s->me_cache[index] == value)
2964
            return 0;
2965
        s->me_cache[index]= value;
2966

    
2967
        block->mx= p[0];
2968
        block->my= p[1];
2969
        block->type &= ~BLOCK_INTRA;
2970
    }
2971

    
2972
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2973

    
2974
//FIXME chroma
2975
    if(rd < *best_rd){
2976
        *best_rd= rd;
2977
        return 1;
2978
    }else{
2979
        *block= backup;
2980
        return 0;
2981
    }
2982
}
2983

    
2984
/* special case for int[2] args we discard afterwards,
2985
 * fixes compilation problem with gcc 2.95 */
2986
static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2987
    int p[2] = {p0, p1};
2988
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2989
}
2990

    
2991
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2992
    const int b_stride= s->b_width << s->block_max_depth;
2993
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2994
    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2995
    int rd, index, value;
2996

    
2997
    assert(mb_x>=0 && mb_y>=0);
2998
    assert(mb_x<b_stride);
2999
    assert(((mb_x|mb_y)&1) == 0);
3000

    
3001
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3002
    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3003
    if(s->me_cache[index] == value)
3004
        return 0;
3005
    s->me_cache[index]= value;
3006

    
3007
    block->mx= p0;
3008
    block->my= p1;
3009
    block->ref= ref;
3010
    block->type &= ~BLOCK_INTRA;
3011
    block[1]= block[b_stride]= block[b_stride+1]= *block;
3012

    
3013
    rd= get_4block_rd(s, mb_x, mb_y, 0);
3014

    
3015
//FIXME chroma
3016
    if(rd < *best_rd){
3017
        *best_rd= rd;
3018
        return 1;
3019
    }else{
3020
        block[0]= backup[0];
3021
        block[1]= backup[1];
3022
        block[b_stride]= backup[2];
3023
        block[b_stride+1]= backup[3];
3024
        return 0;
3025
    }
3026
}
3027

    
3028
static void iterative_me(SnowContext *s){
3029
    int pass, mb_x, mb_y;
3030
    const int b_width = s->b_width  << s->block_max_depth;
3031
    const int b_height= s->b_height << s->block_max_depth;
3032
    const int b_stride= b_width;
3033
    int color[3];
3034

    
3035
    {
3036
        RangeCoder r = s->c;
3037
        uint8_t state[sizeof(s->block_state)];
3038
        memcpy(state, s->block_state, sizeof(s->block_state));
3039
        for(mb_y= 0; mb_y<s->b_height; mb_y++)
3040
            for(mb_x= 0; mb_x<s->b_width; mb_x++)
3041
                encode_q_branch(s, 0, mb_x, mb_y);
3042
        s->c = r;
3043
        memcpy(s->block_state, state, sizeof(s->block_state));
3044
    }
3045

    
3046
    for(pass=0; pass<25; pass++){
3047
        int change= 0;
3048

    
3049
        for(mb_y= 0; mb_y<b_height; mb_y++){
3050
            for(mb_x= 0; mb_x<b_width; mb_x++){
3051
                int dia_change, i, j, ref;
3052
                int best_rd= INT_MAX, ref_rd;
3053
                BlockNode backup, ref_b;
3054
                const int index= mb_x + mb_y * b_stride;
3055
                BlockNode *block= &s->block[index];
3056
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3057
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3058
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3059
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3060
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3061
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3062
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3063
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3064
                const int b_w= (MB_SIZE >> s->block_max_depth);
3065
                uint8_t obmc_edged[b_w*2][b_w*2];
3066

    
3067
                if(pass && (block->type & BLOCK_OPT))
3068
                    continue;
3069
                block->type |= BLOCK_OPT;
3070

    
3071
                backup= *block;
3072

    
3073
                if(!s->me_cache_generation)
3074
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3075
                s->me_cache_generation += 1<<22;
3076

    
3077
                //FIXME precalculate
3078
                {
3079
                    int x, y;
3080
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3081
                    if(mb_x==0)
3082
                        for(y=0; y<b_w*2; y++)
3083
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3084
                    if(mb_x==b_stride-1)
3085
                        for(y=0; y<b_w*2; y++)
3086
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3087
                    if(mb_y==0){
3088
                        for(x=0; x<b_w*2; x++)
3089
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3090
                        for(y=1; y<b_w; y++)
3091
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3092
                    }
3093
                    if(mb_y==b_height-1){
3094
                        for(x=0; x<b_w*2; x++)
3095
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3096
                        for(y=b_w; y<b_w*2-1; y++)
3097
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3098
                    }
3099
                }
3100

    
3101
                //skip stuff outside the picture
3102
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3103
                    uint8_t *src= s->  input_picture.data[0];
3104
                    uint8_t *dst= s->current_picture.data[0];
3105
                    const int stride= s->current_picture.linesize[0];
3106
                    const int block_w= MB_SIZE >> s->block_max_depth;
3107
                    const int sx= block_w*mb_x - block_w/2;
3108
                    const int sy= block_w*mb_y - block_w/2;
3109
                    const int w= s->plane[0].width;
3110
                    const int h= s->plane[0].height;
3111
                    int y;
3112

    
3113
                    for(y=sy; y<0; y++)
3114
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3115
                    for(y=h; y<sy+block_w*2; y++)
3116
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3117
                    if(sx<0){
3118
                        for(y=sy; y<sy+block_w*2; y++)
3119
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3120
                    }
3121
                    if(sx+block_w*2 > w){
3122
                        for(y=sy; y<sy+block_w*2; y++)
3123
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3124
                    }
3125
                }
3126

    
3127
                // intra(black) = neighbors' contribution to the current block
3128
                for(i=0; i<3; i++)
3129
                    color[i]= get_dc(s, mb_x, mb_y, i);
3130

    
3131
                // get previous score (cannot be cached due to OBMC)
3132
                if(pass > 0 && (block->type&BLOCK_INTRA)){
3133
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3134
                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3135
                }else
3136
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3137

    
3138
                ref_b= *block;
3139
                ref_rd= best_rd;
3140
                for(ref=0; ref < s->ref_frames; ref++){
3141
                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3142
                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3143
                        continue;
3144
                    block->ref= ref;
3145
                    best_rd= INT_MAX;
3146

    
3147
                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3148
                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3149
                    if(tb)
3150
                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3151
                    if(lb)
3152
                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3153
                    if(rb)
3154
                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3155
                    if(bb)
3156
                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3157

    
3158
                    /* fullpel ME */
3159
                    //FIXME avoid subpel interpolation / round to nearest integer
3160
                    do{
3161
                        dia_change=0;
3162
                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3163
                            for(j=0; j<i; j++){
3164
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3165
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3166
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3167
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3168
                            }
3169
                        }
3170
                    }while(dia_change);
3171
                    /* subpel ME */
3172
                    do{
3173
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3174
                        dia_change=0;
3175
                        for(i=0; i<8; i++)
3176
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3177
                    }while(dia_change);
3178
                    //FIXME or try the standard 2 pass qpel or similar
3179

    
3180
                    mvr[0][0]= block->mx;
3181
                    mvr[0][1]= block->my;
3182
                    if(ref_rd > best_rd){
3183
                        ref_rd= best_rd;
3184
                        ref_b= *block;
3185
                    }
3186
                }
3187
                best_rd= ref_rd;
3188
                *block= ref_b;
3189
#if 1
3190
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3191
                //FIXME RD style color selection
3192
#endif
3193
                if(!same_block(block, &backup)){
3194
                    if(tb ) tb ->type &= ~BLOCK_OPT;
3195
                    if(lb ) lb ->type &= ~BLOCK_OPT;
3196
                    if(rb ) rb ->type &= ~BLOCK_OPT;
3197
                    if(bb ) bb ->type &= ~BLOCK_OPT;
3198
                    if(tlb) tlb->type &= ~BLOCK_OPT;
3199
                    if(trb) trb->type &= ~BLOCK_OPT;
3200
                    if(blb) blb->type &= ~BLOCK_OPT;
3201
                    if(brb) brb->type &= ~BLOCK_OPT;
3202
                    change ++;
3203
                }
3204
            }
3205
        }
3206
        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3207
        if(!change)
3208
            break;
3209
    }
3210

    
3211
    if(s->block_max_depth == 1){
3212
        int change= 0;
3213
        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3214
            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3215
                int i;
3216
                int best_rd, init_rd;
3217
                const int index= mb_x + mb_y * b_stride;
3218
                BlockNode *b[4];
3219

    
3220
                b[0]= &s->block[index];
3221
                b[1]= b[0]+1;
3222
                b[2]= b[0]+b_stride;
3223
                b[3]= b[2]+1;
3224
                if(same_block(b[0], b[1]) &&
3225
                   same_block(b[0], b[2]) &&
3226
                   same_block(b[0], b[3]))
3227
                    continue;
3228

    
3229
                if(!s->me_cache_generation)
3230
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3231
                s->me_cache_generation += 1<<22;
3232

    
3233
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3234

    
3235
                //FIXME more multiref search?
3236
                check_4block_inter(s, mb_x, mb_y,
3237
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3238
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3239

    
3240
                for(i=0; i<4; i++)
3241
                    if(!(b[i]->type&BLOCK_INTRA))
3242
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3243

    
3244
                if(init_rd != best_rd)
3245
                    change++;
3246
            }
3247
        }
3248
        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3249
    }
3250
}
3251

    
3252
static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3253
    const int w= b->width;
3254
    const int h= b->height;
3255
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3256
    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3257
    int x,y, thres1, thres2;
3258

    
3259
    if(s->qlog == LOSSLESS_QLOG){
3260
        for(y=0; y<h; y++)
3261
            for(x=0; x<w; x++)
3262
                dst[x + y*stride]= src[x + y*stride];
3263
        return;
3264
    }
3265

    
3266
    bias= bias ? 0 : (3*qmul)>>3;
3267
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3268
    thres2= 2*thres1;
3269

    
3270
    if(!bias){
3271
        for(y=0; y<h; y++){
3272
            for(x=0; x<w; x++){
3273
                int i= src[x + y*stride];
3274

    
3275
                if((unsigned)(i+thres1) > thres2){
3276
                    if(i>=0){
3277
                        i<<= QEXPSHIFT;
3278
                        i/= qmul; //FIXME optimize
3279
                        dst[x + y*stride]=  i;
3280
                    }else{
3281
                        i= -i;
3282
                        i<<= QEXPSHIFT;
3283
                        i/= qmul; //FIXME optimize
3284
                        dst[x + y*stride]= -i;
3285
                    }
3286
                }else
3287
                    dst[x + y*stride]= 0;
3288
            }
3289
        }
3290
    }else{
3291
        for(y=0; y<h; y++){
3292
            for(x=0; x<w; x++){
3293
                int i= src[x + y*stride];
3294

    
3295
                if((unsigned)(i+thres1) > thres2){
3296
                    if(i>=0){
3297
                        i<<= QEXPSHIFT;
3298
                        i= (i + bias) / qmul; //FIXME optimize
3299
                        dst[x + y*stride]=  i;
3300
                    }else{
3301
                        i= -i;
3302
                        i<<= QEXPSHIFT;
3303
                        i= (i + bias) / qmul; //FIXME optimize
3304
                        dst[x + y*stride]= -i;
3305
                    }
3306
                }else
3307
                    dst[x + y*stride]= 0;
3308
            }
3309
        }
3310
    }
3311
}
3312

    
3313
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3314
    const int w= b->width;
3315
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3316
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3317
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3318
    int x,y;
3319

    
3320
    if(s->qlog == LOSSLESS_QLOG) return;
3321

    
3322
    for(y=start_y; y<end_y; y++){
3323
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3324
        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3325
        for(x=0; x<w; x++){
3326
            int i= line[x];
3327
            if(i<0){
3328
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3329
            }else if(i>0){
3330
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3331
            }
3332
        }
3333
    }
3334
}
3335

    
3336
static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3337
    const int w= b->width;
3338
    const int h= b->height;
3339
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3340
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3341
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3342
    int x,y;
3343

    
3344
    if(s->qlog == LOSSLESS_QLOG) return;
3345

    
3346
    for(y=0; y<h; y++){
3347
        for(x=0; x<w; x++){
3348
            int i= src[x + y*stride];
3349
            if(i<0){
3350
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3351
            }else if(i>0){
3352
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3353
            }
3354
        }
3355
    }
3356
}
3357

    
3358
static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3359
    const int w= b->width;
3360
    const int h= b->height;
3361
    int x,y;
3362

    
3363
    for(y=h-1; y>=0; y--){
3364
        for(x=w-1; x>=0; x--){
3365
            int i= x + y*stride;
3366

    
3367
            if(x){
3368
                if(use_median){
3369
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3370
                    else  src[i] -= src[i - 1];
3371
                }else{
3372
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3373
                    else  src[i] -= src[i - 1];
3374
                }
3375
            }else{
3376
                if(y) src[i] -= src[i - stride];
3377
            }
3378
        }
3379
    }
3380
}
3381

    
3382
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3383
    const int w= b->width;
3384
    int x,y;
3385

    
3386
    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3387
    IDWTELEM * prev;
3388

    
3389
    if (start_y != 0)
3390
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3391

    
3392
    for(y=start_y; y<end_y; y++){
3393
        prev = line;
3394
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3395
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3396
        for(x=0; x<w; x++){
3397
            if(x){
3398
                if(use_median){
3399
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3400
                    else  line[x] += line[x - 1];
3401
                }else{
3402
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3403
                    else  line[x] += line[x - 1];
3404
                }
3405
            }else{
3406
                if(y) line[x] += prev[x];
3407
            }
3408
        }
3409
    }
3410
}
3411

    
3412
static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3413
    const int w= b->width;
3414
    const int h= b->height;
3415
    int x,y;
3416

    
3417
    for(y=0; y<h; y++){
3418
        for(x=0; x<w; x++){
3419
            int i= x + y*stride;
3420

    
3421
            if(x){
3422
                if(use_median){
3423
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3424
                    else  src[i] += src[i - 1];
3425
                }else{
3426
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3427
                    else  src[i] += src[i - 1];
3428
                }
3429
            }else{
3430
                if(y) src[i] += src[i - stride];
3431
            }
3432
        }
3433
    }
3434
}
3435

    
3436
static void encode_qlogs(SnowContext *s){
3437
    int plane_index, level, orientation;
3438

    
3439
    for(plane_index=0; plane_index<2; plane_index++){
3440
        for(level=0; level<s->spatial_decomposition_count; level++){
3441
            for(orientation=level ? 1:0; orientation<4; orientation++){
3442
                if(orientation==2) continue;
3443
                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3444
            }
3445
        }
3446
    }
3447
}
3448

    
3449
static void encode_header(SnowContext *s){
3450
    int plane_index, i;
3451
    uint8_t kstate[32];
3452

    
3453
    memset(kstate, MID_STATE, sizeof(kstate));
3454

    
3455
    put_rac(&s->c, kstate, s->keyframe);
3456
    if(s->keyframe || s->always_reset){
3457
        reset_contexts(s);
3458
        s->last_spatial_decomposition_type=
3459
        s->last_qlog=
3460
        s->last_qbias=
3461
        s->last_mv_scale=
3462
        s->last_block_max_depth= 0;
3463
        for(plane_index=0; plane_index<2; plane_index++){
3464
            Plane *p= &s->plane[plane_index];
3465
            p->last_htaps=0;
3466
            p->last_diag_mc=0;
3467
            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3468
        }
3469
    }
3470
    if(s->keyframe){
3471
        put_symbol(&s->c, s->header_state, s->version, 0);
3472
        put_rac(&s->c, s->header_state, s->always_reset);
3473
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3474
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3475
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3476
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3477
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3478
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3479
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3480
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3481
        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3482

    
3483
        encode_qlogs(s);
3484
    }
3485

    
3486
    if(!s->keyframe){
3487
        int update_mc=0;
3488
        for(plane_index=0; plane_index<2; plane_index++){
3489
            Plane *p= &s->plane[plane_index];
3490
            update_mc |= p->last_htaps   != p->htaps;
3491
            update_mc |= p->last_diag_mc != p->diag_mc;
3492
            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3493
        }
3494
        put_rac(&s->c, s->header_state, update_mc);
3495
        if(update_mc){
3496
            for(plane_index=0; plane_index<2; plane_index++){
3497
                Plane *p= &s->plane[plane_index];
3498
                put_rac(&s->c, s->header_state, p->diag_mc);
3499
                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3500
                for(i= p->htaps/2; i; i--)
3501
                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3502
            }
3503
        }
3504
        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3505
            put_rac(&s->c, s->header_state, 1);
3506
            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3507
            encode_qlogs(s);
3508
        }else
3509
            put_rac(&s->c, s->header_state, 0);
3510
    }
3511

    
3512
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3513
    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3514
    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3515
    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3516
    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3517

    
3518
}
3519

    
3520
static void update_last_header_values(SnowContext *s){
3521
    int plane_index;
3522

    
3523
    if(!s->keyframe){
3524
        for(plane_index=0; plane_index<2; plane_index++){
3525
            Plane *p= &s->plane[plane_index];
3526
            p->last_diag_mc= p->diag_mc;
3527
            p->last_htaps  = p->htaps;
3528
            memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3529
        }
3530
    }
3531

    
3532
    s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
3533
    s->last_qlog                        = s->qlog;
3534
    s->last_qbias                       = s->qbias;
3535
    s->last_mv_scale                    = s->mv_scale;
3536
    s->last_block_max_depth             = s->block_max_depth;
3537
    s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3538
}
3539

    
3540
static void decode_qlogs(SnowContext *s){
3541
    int plane_index, level, orientation;
3542

    
3543
    for(plane_index=0; plane_index<3; plane_index++){
3544
        for(level=0; level<s->spatial_decomposition_count; level++){
3545
            for(orientation=level ? 1:0; orientation<4; orientation++){
3546
                int q;
3547
                if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3548
                else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3549
                else                    q= get_symbol(&s->c, s->header_state, 1);
3550
                s->plane[plane_index].band[level][orientation].qlog= q;
3551
            }
3552
        }
3553
    }
3554
}
3555

    
3556
static int decode_header(SnowContext *s){
3557
    int plane_index;
3558
    uint8_t kstate[32];
3559

    
3560
    memset(kstate, MID_STATE, sizeof(kstate));
3561

    
3562
    s->keyframe= get_rac(&s->c, kstate);
3563
    if(s->keyframe || s->always_reset){
3564
        reset_contexts(s);
3565
        s->spatial_decomposition_type=
3566
        s->qlog=
3567
        s->qbias=
3568
        s->mv_scale=
3569
        s->block_max_depth= 0;
3570
    }
3571
    if(s->keyframe){
3572
        s->version= get_symbol(&s->c, s->header_state, 0);
3573
        if(s->version>0){
3574
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3575
            return -1;
3576
        }
3577
        s->always_reset= get_rac(&s->c, s->header_state);
3578
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3579
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3580
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3581
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3582
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3583
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3584
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3585
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3586
        s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3587

    
3588
        decode_qlogs(s);
3589
    }
3590

    
3591
    if(!s->keyframe){
3592
        if(get_rac(&s->c, s->header_state)){
3593
            for(plane_index=0; plane_index<2; plane_index++){
3594
                int htaps, i, sum=0;
3595
                Plane *p= &s->plane[plane_index];
3596
                p->diag_mc= get_rac(&s->c, s->header_state);
3597
                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3598
                if((unsigned)htaps > HTAPS_MAX || htaps==0)
3599
                    return -1;
3600
                p->htaps= htaps;
3601
                for(i= htaps/2; i; i--){
3602
                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3603
                    sum += p->hcoeff[i];
3604
                }
3605
                p->hcoeff[0]= 32-sum;
3606
            }
3607
            s->plane[2].diag_mc= s->plane[1].diag_mc;
3608
            s->plane[2].htaps  = s->plane[1].htaps;
3609
            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3610
        }
3611
        if(get_rac(&s->c, s->header_state)){
3612
            s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3613
            decode_qlogs(s);
3614
        }
3615
    }
3616

    
3617
    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3618
    if(s->spatial_decomposition_type > 1){
3619
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3620
        return -1;
3621
    }
3622

    
3623
    s->qlog           += get_symbol(&s->c, s->header_state, 1);
3624
    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
3625
    s->qbias          += get_symbol(&s->c, s->header_state, 1);
3626
    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3627
    if(s->block_max_depth > 1 || s->block_max_depth < 0){
3628
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3629
        s->block_max_depth= 0;
3630
        return -1;
3631
    }
3632

    
3633
    return 0;
3634
}
3635

    
3636
static void init_qexp(void){
3637
    int i;
3638
    double v=128;
3639

    
3640
    for(i=0; i<QROOT; i++){
3641
        qexp[i]= lrintf(v);
3642
        v *= pow(2, 1.0 / QROOT);
3643
    }
3644
}
3645

    
3646
static av_cold int common_init(AVCodecContext *avctx){
3647
    SnowContext *s = avctx->priv_data;
3648
    int width, height;
3649
    int i, j;
3650

    
3651
    s->avctx= avctx;
3652

    
3653
    dsputil_init(&s->dsp, avctx);
3654

    
3655
#define mcf(dx,dy)\
3656
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3657
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3658
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3659
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3660
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3661
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3662

    
3663
    mcf( 0, 0)
3664
    mcf( 4, 0)
3665
    mcf( 8, 0)
3666
    mcf(12, 0)
3667
    mcf( 0, 4)
3668
    mcf( 4, 4)
3669
    mcf( 8, 4)
3670
    mcf(12, 4)
3671
    mcf( 0, 8)
3672
    mcf( 4, 8)
3673
    mcf( 8, 8)
3674
    mcf(12, 8)
3675
    mcf( 0,12)
3676
    mcf( 4,12)
3677
    mcf( 8,12)
3678
    mcf(12,12)
3679

    
3680
#define mcfh(dx,dy)\
3681
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3682
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3683
        mc_block_hpel ## dx ## dy ## 16;\
3684
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3685
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3686
        mc_block_hpel ## dx ## dy ## 8;
3687

    
3688
    mcfh(0, 0)
3689
    mcfh(8, 0)
3690
    mcfh(0, 8)
3691
    mcfh(8, 8)
3692

    
3693
    if(!qexp[0])
3694
        init_qexp();
3695

    
3696
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3697

    
3698
    width= s->avctx->width;
3699
    height= s->avctx->height;
3700

    
3701
    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3702
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3703

    
3704
    for(i=0; i<MAX_REF_FRAMES; i++)
3705
        for(j=0; j<MAX_REF_FRAMES; j++)
3706
            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3707

    
3708
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3709
    s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
3710

    
3711
    return 0;
3712
}
3713

    
3714
static int common_init_after_header(AVCodecContext *avctx){
3715
    SnowContext *s = avctx->priv_data;
3716
    int plane_index, level, orientation;
3717

    
3718
    for(plane_index=0; plane_index<3; plane_index++){
3719
        int w= s->avctx->width;
3720
        int h= s->avctx->height;
3721

    
3722
        if(plane_index){
3723
            w>>= s->chroma_h_shift;
3724
            h>>= s->chroma_v_shift;
3725
        }
3726
        s->plane[plane_index].width = w;
3727
        s->plane[plane_index].height= h;
3728

    
3729
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3730
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3731
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3732

    
3733
                b->buf= s->spatial_dwt_buffer;
3734
                b->level= level;
3735
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3736
                b->width = (w + !(orientation&1))>>1;
3737
                b->height= (h + !(orientation>1))>>1;
3738

    
3739
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3740
                b->buf_x_offset = 0;
3741
                b->buf_y_offset = 0;
3742

    
3743
                if(orientation&1){
3744
                    b->buf += (w+1)>>1;
3745
                    b->buf_x_offset = (w+1)>>1;
3746
                }
3747
                if(orientation>1){
3748
                    b->buf += b->stride>>1;
3749
                    b->buf_y_offset = b->stride_line >> 1;
3750
                }
3751
                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3752

    
3753
                if(level)
3754
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3755
                //FIXME avoid this realloc
3756
                av_freep(&b->x_coeff);
3757
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3758
            }
3759
            w= (w+1)>>1;
3760
            h= (h+1)>>1;
3761
        }
3762
    }
3763

    
3764
    return 0;
3765
}
3766

    
3767
static int qscale2qlog(int qscale){
3768
    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3769
           + 61*QROOT/8; //<64 >60
3770
}
3771

    
3772
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3773
{
3774
    /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3775
     * FIXME we know exact mv bits at this point,
3776
     * but ratecontrol isn't set up to include them. */
3777
    uint32_t coef_sum= 0;
3778
    int level, orientation, delta_qlog;
3779

    
3780
    for(level=0; level<s->spatial_decomposition_count; level++){
3781
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3782
            SubBand *b= &s->plane[0].band[level][orientation];
3783
            IDWTELEM *buf= b->ibuf;
3784
            const int w= b->width;
3785
            const int h= b->height;
3786
            const int stride= b->stride;
3787
            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3788
            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3789
            const int qdiv= (1<<16)/qmul;
3790
            int x, y;
3791
            //FIXME this is ugly
3792
            for(y=0; y<h; y++)
3793
                for(x=0; x<w; x++)
3794
                    buf[x+y*stride]= b->buf[x+y*stride];
3795
            if(orientation==0)
3796
                decorrelate(s, b, buf, stride, 1, 0);
3797
            for(y=0; y<h; y++)
3798
                for(x=0; x<w; x++)
3799
                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3800
        }
3801
    }
3802

    
3803
    /* ugly, ratecontrol just takes a sqrt again */
3804
    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3805
    assert(coef_sum < INT_MAX);
3806

    
3807
    if(pict->pict_type == FF_I_TYPE){
3808
        s->m.current_picture.mb_var_sum= coef_sum;
3809
        s->m.current_picture.mc_mb_var_sum= 0;
3810
    }else{
3811
        s->m.current_picture.mc_mb_var_sum= coef_sum;
3812
        s->m.current_picture.mb_var_sum= 0;
3813
    }
3814

    
3815
    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3816
    if (pict->quality < 0)
3817
        return INT_MIN;
3818
    s->lambda= pict->quality * 3/2;
3819
    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3820
    s->qlog+= delta_qlog;
3821
    return delta_qlog;
3822
}
3823

    
3824
static void calculate_visual_weight(SnowContext *s, Plane *p){
3825
    int width = p->width;
3826
    int height= p->height;
3827
    int level, orientation, x, y;
3828

    
3829
    for(level=0; level<s->spatial_decomposition_count; level++){
3830
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3831
            SubBand *b= &p->band[level][orientation];
3832
            IDWTELEM *ibuf= b->ibuf;
3833
            int64_t error=0;
3834

    
3835
            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3836
            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3837
            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3838
            for(y=0; y<height; y++){
3839
                for(x=0; x<width; x++){
3840
                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3841
                    error += d*d;
3842
                }
3843
            }
3844

    
3845
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3846
        }
3847
    }
3848
}
3849

    
3850
#define QUANTIZE2 0
3851

    
3852
#if QUANTIZE2==1
3853
#define Q2_STEP 8
3854

    
3855
static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3856
    SubBand *b= &p->band[level][orientation];
3857
    int x, y;
3858
    int xo=0;
3859
    int yo=0;
3860
    int step= 1 << (s->spatial_decomposition_count - level);
3861

    
3862
    if(orientation&1)
3863
        xo= step>>1;
3864
    if(orientation&2)
3865
        yo= step>>1;
3866

    
3867
    //FIXME bias for nonzero ?
3868
    //FIXME optimize
3869
    memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3870
    for(y=0; y<p->height; y++){
3871
        for(x=0; x<p->width; x++){
3872
            int sx= (x-xo + step/2) / step / Q2_STEP;
3873
            int sy= (y-yo + step/2) / step / Q2_STEP;
3874
            int v= r0[x + y*p->width] - r1[x + y*p->width];
3875
            assert(sx>=0 && sy>=0 && sx < score_stride);
3876
            v= ((v+8)>>4)<<4;
3877
            score[sx + sy*score_stride] += v*v;
3878
            assert(score[sx + sy*score_stride] >= 0);
3879
        }
3880
    }
3881
}
3882

    
3883
static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3884
    int level, orientation;
3885

    
3886
    for(level=0; level<s->spatial_decomposition_count; level++){
3887
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3888
            SubBand *b= &p->band[level][orientation];
3889
            IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3890

    
3891
            dequantize(s, b, dst, b->stride);
3892
        }
3893
    }
3894
}
3895

    
3896
static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3897
    int level, orientation, ys, xs, x, y, pass;
3898
    IDWTELEM best_dequant[height * stride];
3899
    IDWTELEM idwt2_buffer[height * stride];
3900
    const int score_stride= (width + 10)/Q2_STEP;
3901
    int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3902
    int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3903
    int threshold= (s->m.lambda * s->m.lambda) >> 6;
3904

    
3905
    //FIXME pass the copy cleanly ?
3906

    
3907
//    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3908
    ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3909

    
3910
    for(level=0; level<s->spatial_decomposition_count; level++){
3911
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3912
            SubBand *b= &p->band[level][orientation];
3913
            IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3914
             DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
3915
            assert(src == b->buf); // code does not depend on this but it is true currently
3916

    
3917
            quantize(s, b, dst, src, b->stride, s->qbias);
3918
        }
3919
    }
3920
    for(pass=0; pass<1; pass++){
3921
        if(s->qbias == 0) //keyframe
3922
            continue;
3923
        for(level=0; level<s->spatial_decomposition_count; level++){
3924
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3925
                SubBand *b= &p->band[level][orientation];
3926
                IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3927
                IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3928

    
3929
                for(ys= 0; ys<Q2_STEP; ys++){
3930
                    for(xs= 0; xs<Q2_STEP; xs++){
3931
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3932
                        dequantize_all(s, p, idwt2_buffer, width, height);
3933
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3934
                        find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3935
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3936
                        for(y=ys; y<b->height; y+= Q2_STEP){
3937
                            for(x=xs; x<b->width; x+= Q2_STEP){
3938
                                if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3939
                                if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3940
                                //FIXME try more than just --
3941
                            }
3942
                        }
3943
                        dequantize_all(s, p, idwt2_buffer, width, height);
3944
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3945
                        find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3946
                        for(y=ys; y<b->height; y+= Q2_STEP){
3947
                            for(x=xs; x<b->width; x+= Q2_STEP){
3948
                                int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3949
                                if(score[score_idx] <= best_score[score_idx] + threshold){
3950
                                    best_score[score_idx]= score[score_idx];
3951
                                    if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3952
                                    if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3953
                                    //FIXME copy instead
3954
                                }
3955
                            }
3956
                        }
3957
                    }
3958
                }
3959
            }
3960
        }
3961
    }
3962
    memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3963
}
3964

    
3965
#endif /* QUANTIZE2==1 */
3966

    
3967
static av_cold int encode_init(AVCodecContext *avctx)
3968
{
3969
    SnowContext *s = avctx->priv_data;
3970
    int plane_index;
3971

    
3972
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3973
        av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3974
               "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3975
        return -1;
3976
    }
3977

    
3978
    if(avctx->prediction_method == DWT_97
3979
       && (avctx->flags & CODEC_FLAG_QSCALE)
3980
       && avctx->global_quality == 0){
3981
        av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3982
        return -1;
3983
    }
3984

    
3985
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3986

    
3987
    s->chroma_h_shift= 1; //FIXME XXX
3988
    s->chroma_v_shift= 1;
3989

    
3990
    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3991
    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3992

    
3993
    for(plane_index=0; plane_index<3; plane_index++){
3994
        s->plane[plane_index].diag_mc= 1;
3995
        s->plane[plane_index].htaps= 6;
3996
        s->plane[plane_index].hcoeff[0]=  40;
3997
        s->plane[plane_index].hcoeff[1]= -10;
3998
        s->plane[plane_index].