Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 2b2b7f41

History | View | Annotate | Download (180 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "avcodec.h"
22
#include "dsputil.h"
23
#include "snow.h"
24

    
25
#include "rangecoder.h"
26
#include "mathops.h"
27

    
28
#include "mpegvideo.h"
29

    
30
#undef NDEBUG
31
#include <assert.h>
32

    
33
static const int8_t quant3[256]={
34
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50
};
51
static const int8_t quant3b[256]={
52
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
};
69
static const int8_t quant3bA[256]={
70
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
};
87
static const int8_t quant5[256]={
88
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104
};
105
static const int8_t quant7[256]={
106
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122
};
123
static const int8_t quant9[256]={
124
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140
};
141
static const int8_t quant11[256]={
142
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158
};
159
static const int8_t quant13[256]={
160
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
176
};
177

    
178
#if 0 //64*cubic
179
static const uint8_t obmc32[1024]={
180
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
181
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
182
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
183
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
184
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
185
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
186
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
187
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
188
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
189
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
190
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
191
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
192
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
193
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
194
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
195
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
196
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
197
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
198
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
199
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
200
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
201
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
202
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
203
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
204
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
205
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
206
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
207
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
208
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
209
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
210
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
211
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
212
//error:0.000022
213
};
214
static const uint8_t obmc16[256]={
215
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
216
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
217
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
218
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
219
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
220
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
221
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
222
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
223
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
224
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
225
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
226
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
227
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
228
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
229
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
230
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
231
//error:0.000033
232
};
233
#elif 1 // 64*linear
234
static const uint8_t obmc32[1024]={
235
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
236
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
237
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
238
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
239
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
240
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
241
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
242
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
243
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
244
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
245
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
246
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
247
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
248
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
249
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
250
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
251
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
252
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
253
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
254
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
255
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
256
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
257
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
258
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
259
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
260
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
261
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
262
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
263
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
264
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
265
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
266
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
267
 //error:0.000020
268
};
269
static const uint8_t obmc16[256]={
270
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
271
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
272
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
273
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
274
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
275
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
282
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
283
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
284
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
285
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
286
//error:0.000015
287
};
288
#else //64*cos
289
static const uint8_t obmc32[1024]={
290
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
291
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
292
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
293
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
294
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
295
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
296
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
297
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
298
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
299
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
300
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
301
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
302
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
303
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
304
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
305
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
306
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
307
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
308
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
309
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
310
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
311
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
312
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
313
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
314
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
315
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
316
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
317
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
318
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
319
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
320
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
321
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
322
//error:0.000022
323
};
324
static const uint8_t obmc16[256]={
325
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
326
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
327
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
328
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
329
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
330
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
331
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
332
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
333
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
334
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
335
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
336
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
337
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
338
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
339
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
340
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
341
//error:0.000022
342
};
343
#endif /* 0 */
344

    
345
//linear *64
346
static const uint8_t obmc8[64]={
347
  4, 12, 20, 28, 28, 20, 12,  4,
348
 12, 36, 60, 84, 84, 60, 36, 12,
349
 20, 60,100,140,140,100, 60, 20,
350
 28, 84,140,196,196,140, 84, 28,
351
 28, 84,140,196,196,140, 84, 28,
352
 20, 60,100,140,140,100, 60, 20,
353
 12, 36, 60, 84, 84, 60, 36, 12,
354
  4, 12, 20, 28, 28, 20, 12,  4,
355
//error:0.000000
356
};
357

    
358
//linear *64
359
static const uint8_t obmc4[16]={
360
 16, 48, 48, 16,
361
 48,144,144, 48,
362
 48,144,144, 48,
363
 16, 48, 48, 16,
364
//error:0.000000
365
};
366

    
367
static const uint8_t * const obmc_tab[4]={
368
    obmc32, obmc16, obmc8, obmc4
369
};
370

    
371
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372

    
373
typedef struct BlockNode{
374
    int16_t mx;
375
    int16_t my;
376
    uint8_t ref;
377
    uint8_t color[3];
378
    uint8_t type;
379
//#define TYPE_SPLIT    1
380
#define BLOCK_INTRA   1
381
#define BLOCK_OPT     2
382
//#define TYPE_NOCOLOR  4
383
    uint8_t level; //FIXME merge into type?
384
}BlockNode;
385

    
386
static const BlockNode null_block= { //FIXME add border maybe
387
    .color= {128,128,128},
388
    .mx= 0,
389
    .my= 0,
390
    .ref= 0,
391
    .type= 0,
392
    .level= 0,
393
};
394

    
395
#define LOG2_MB_SIZE 4
396
#define MB_SIZE (1<<LOG2_MB_SIZE)
397
#define ENCODER_EXTRA_BITS 4
398
#define HTAPS_MAX 8
399

    
400
typedef struct x_and_coeff{
401
    int16_t x;
402
    uint16_t coeff;
403
} x_and_coeff;
404

    
405
typedef struct SubBand{
406
    int level;
407
    int stride;
408
    int width;
409
    int height;
410
    int qlog;        ///< log(qscale)/log[2^(1/6)]
411
    DWTELEM *buf;
412
    IDWTELEM *ibuf;
413
    int buf_x_offset;
414
    int buf_y_offset;
415
    int stride_line; ///< Stride measured in lines, not pixels.
416
    x_and_coeff * x_coeff;
417
    struct SubBand *parent;
418
    uint8_t state[/*7*2*/ 7 + 512][32];
419
}SubBand;
420

    
421
typedef struct Plane{
422
    int width;
423
    int height;
424
    SubBand band[MAX_DECOMPOSITIONS][4];
425

    
426
    int htaps;
427
    int8_t hcoeff[HTAPS_MAX/2];
428
    int diag_mc;
429
    int fast_mc;
430

    
431
    int last_htaps;
432
    int8_t last_hcoeff[HTAPS_MAX/2];
433
    int last_diag_mc;
434
}Plane;
435

    
436
typedef struct SnowContext{
437
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
438

    
439
    AVCodecContext *avctx;
440
    RangeCoder c;
441
    DSPContext dsp;
442
    AVFrame new_picture;
443
    AVFrame input_picture;              ///< new_picture with the internal linesizes
444
    AVFrame current_picture;
445
    AVFrame last_picture[MAX_REF_FRAMES];
446
    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
447
    AVFrame mconly_picture;
448
//     uint8_t q_context[16];
449
    uint8_t header_state[32];
450
    uint8_t block_state[128 + 32*128];
451
    int keyframe;
452
    int always_reset;
453
    int version;
454
    int spatial_decomposition_type;
455
    int last_spatial_decomposition_type;
456
    int temporal_decomposition_type;
457
    int spatial_decomposition_count;
458
    int last_spatial_decomposition_count;
459
    int temporal_decomposition_count;
460
    int max_ref_frames;
461
    int ref_frames;
462
    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
463
    uint32_t *ref_scores[MAX_REF_FRAMES];
464
    DWTELEM *spatial_dwt_buffer;
465
    IDWTELEM *spatial_idwt_buffer;
466
    int colorspace_type;
467
    int chroma_h_shift;
468
    int chroma_v_shift;
469
    int spatial_scalability;
470
    int qlog;
471
    int last_qlog;
472
    int lambda;
473
    int lambda2;
474
    int pass1_rc;
475
    int mv_scale;
476
    int last_mv_scale;
477
    int qbias;
478
    int last_qbias;
479
#define QBIAS_SHIFT 3
480
    int b_width;
481
    int b_height;
482
    int block_max_depth;
483
    int last_block_max_depth;
484
    Plane plane[MAX_PLANES];
485
    BlockNode *block;
486
#define ME_CACHE_SIZE 1024
487
    int me_cache[ME_CACHE_SIZE];
488
    int me_cache_generation;
489
    slice_buffer sb;
490

    
491
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
492

    
493
    uint8_t *scratchbuf;
494
}SnowContext;
495

    
496
typedef struct {
497
    IDWTELEM *b0;
498
    IDWTELEM *b1;
499
    IDWTELEM *b2;
500
    IDWTELEM *b3;
501
    int y;
502
} DWTCompose;
503

    
504
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
506

    
507
static void iterative_me(SnowContext *s);
508

    
509
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
510
{
511
    int i;
512

    
513
    buf->base_buffer = base_buffer;
514
    buf->line_count = line_count;
515
    buf->line_width = line_width;
516
    buf->data_count = max_allocated_lines;
517
    buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
518
    buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
519

    
520
    for(i = 0; i < max_allocated_lines; i++){
521
        buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
522
    }
523

    
524
    buf->data_stack_top = max_allocated_lines - 1;
525
}
526

    
527
static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
528
{
529
    IDWTELEM * buffer;
530

    
531
    assert(buf->data_stack_top >= 0);
532
//  assert(!buf->line[line]);
533
    if (buf->line[line])
534
        return buf->line[line];
535

    
536
    buffer = buf->data_stack[buf->data_stack_top];
537
    buf->data_stack_top--;
538
    buf->line[line] = buffer;
539

    
540
    return buffer;
541
}
542

    
543
static void slice_buffer_release(slice_buffer * buf, int line)
544
{
545
    IDWTELEM * buffer;
546

    
547
    assert(line >= 0 && line < buf->line_count);
548
    assert(buf->line[line]);
549

    
550
    buffer = buf->line[line];
551
    buf->data_stack_top++;
552
    buf->data_stack[buf->data_stack_top] = buffer;
553
    buf->line[line] = NULL;
554
}
555

    
556
static void slice_buffer_flush(slice_buffer * buf)
557
{
558
    int i;
559
    for(i = 0; i < buf->line_count; i++){
560
        if (buf->line[i])
561
            slice_buffer_release(buf, i);
562
    }
563
}
564

    
565
static void slice_buffer_destroy(slice_buffer * buf)
566
{
567
    int i;
568
    slice_buffer_flush(buf);
569

    
570
    for(i = buf->data_count - 1; i >= 0; i--){
571
        av_freep(&buf->data_stack[i]);
572
    }
573
    av_freep(&buf->data_stack);
574
    av_freep(&buf->line);
575
}
576

    
577
#ifdef __sgi
578
// Avoid a name clash on SGI IRIX
579
#undef qexp
580
#endif
581
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
582
static uint8_t qexp[QROOT];
583

    
584
static inline int mirror(int v, int m){
585
    while((unsigned)v > (unsigned)m){
586
        v=-v;
587
        if(v<0) v+= 2*m;
588
    }
589
    return v;
590
}
591

    
592
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
593
    int i;
594

    
595
    if(v){
596
        const int a= FFABS(v);
597
        const int e= av_log2(a);
598
#if 1
599
        const int el= FFMIN(e, 10);
600
        put_rac(c, state+0, 0);
601

    
602
        for(i=0; i<el; i++){
603
            put_rac(c, state+1+i, 1);  //1..10
604
        }
605
        for(; i<e; i++){
606
            put_rac(c, state+1+9, 1);  //1..10
607
        }
608
        put_rac(c, state+1+FFMIN(i,9), 0);
609

    
610
        for(i=e-1; i>=el; i--){
611
            put_rac(c, state+22+9, (a>>i)&1); //22..31
612
        }
613
        for(; i>=0; i--){
614
            put_rac(c, state+22+i, (a>>i)&1); //22..31
615
        }
616

    
617
        if(is_signed)
618
            put_rac(c, state+11 + el, v < 0); //11..21
619
#else
620

    
621
        put_rac(c, state+0, 0);
622
        if(e<=9){
623
            for(i=0; i<e; i++){
624
                put_rac(c, state+1+i, 1);  //1..10
625
            }
626
            put_rac(c, state+1+i, 0);
627

    
628
            for(i=e-1; i>=0; i--){
629
                put_rac(c, state+22+i, (a>>i)&1); //22..31
630
            }
631

    
632
            if(is_signed)
633
                put_rac(c, state+11 + e, v < 0); //11..21
634
        }else{
635
            for(i=0; i<e; i++){
636
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
637
            }
638
            put_rac(c, state+1+9, 0);
639

    
640
            for(i=e-1; i>=0; i--){
641
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
642
            }
643

    
644
            if(is_signed)
645
                put_rac(c, state+11 + 10, v < 0); //11..21
646
        }
647
#endif /* 1 */
648
    }else{
649
        put_rac(c, state+0, 1);
650
    }
651
}
652

    
653
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
654
    if(get_rac(c, state+0))
655
        return 0;
656
    else{
657
        int i, e, a;
658
        e= 0;
659
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
660
            e++;
661
        }
662

    
663
        a= 1;
664
        for(i=e-1; i>=0; i--){
665
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
666
        }
667

    
668
        e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
669
        return (a^e)-e;
670
    }
671
}
672

    
673
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
674
    int i;
675
    int r= log2>=0 ? 1<<log2 : 1;
676

    
677
    assert(v>=0);
678
    assert(log2>=-4);
679

    
680
    while(v >= r){
681
        put_rac(c, state+4+log2, 1);
682
        v -= r;
683
        log2++;
684
        if(log2>0) r+=r;
685
    }
686
    put_rac(c, state+4+log2, 0);
687

    
688
    for(i=log2-1; i>=0; i--){
689
        put_rac(c, state+31-i, (v>>i)&1);
690
    }
691
}
692

    
693
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
694
    int i;
695
    int r= log2>=0 ? 1<<log2 : 1;
696
    int v=0;
697

    
698
    assert(log2>=-4);
699

    
700
    while(get_rac(c, state+4+log2)){
701
        v+= r;
702
        log2++;
703
        if(log2>0) r+=r;
704
    }
705

    
706
    for(i=log2-1; i>=0; i--){
707
        v+= get_rac(c, state+31-i)<<i;
708
    }
709

    
710
    return v;
711
}
712

    
713
static av_always_inline void
714
lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
715
     int dst_step, int src_step, int ref_step,
716
     int width, int mul, int add, int shift,
717
     int highpass, int inverse){
718
    const int mirror_left= !highpass;
719
    const int mirror_right= (width&1) ^ highpass;
720
    const int w= (width>>1) - 1 + (highpass & width);
721
    int i;
722

    
723
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
724
    if(mirror_left){
725
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
726
        dst += dst_step;
727
        src += src_step;
728
    }
729

    
730
    for(i=0; i<w; i++){
731
        dst[i*dst_step] =
732
            LIFT(src[i*src_step],
733
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
734
                 inverse);
735
    }
736

    
737
    if(mirror_right){
738
        dst[w*dst_step] =
739
            LIFT(src[w*src_step],
740
                 ((mul*2*ref[w*ref_step]+add)>>shift),
741
                 inverse);
742
    }
743
}
744

    
745
static av_always_inline void
746
inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
747
         int dst_step, int src_step, int ref_step,
748
         int width, int mul, int add, int shift,
749
         int highpass, int inverse){
750
    const int mirror_left= !highpass;
751
    const int mirror_right= (width&1) ^ highpass;
752
    const int w= (width>>1) - 1 + (highpass & width);
753
    int i;
754

    
755
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
756
    if(mirror_left){
757
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
758
        dst += dst_step;
759
        src += src_step;
760
    }
761

    
762
    for(i=0; i<w; i++){
763
        dst[i*dst_step] =
764
            LIFT(src[i*src_step],
765
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
766
                 inverse);
767
    }
768

    
769
    if(mirror_right){
770
        dst[w*dst_step] =
771
            LIFT(src[w*src_step],
772
                 ((mul*2*ref[w*ref_step]+add)>>shift),
773
                 inverse);
774
    }
775
}
776

    
777
#ifndef liftS
778
static av_always_inline void
779
liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
780
      int dst_step, int src_step, int ref_step,
781
      int width, int mul, int add, int shift,
782
      int highpass, int inverse){
783
    const int mirror_left= !highpass;
784
    const int mirror_right= (width&1) ^ highpass;
785
    const int w= (width>>1) - 1 + (highpass & width);
786
    int i;
787

    
788
    assert(shift == 4);
789
#define LIFTS(src, ref, inv) \
790
        ((inv) ? \
791
            (src) + (((ref) + 4*(src))>>shift): \
792
            -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
793
    if(mirror_left){
794
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
795
        dst += dst_step;
796
        src += src_step;
797
    }
798

    
799
    for(i=0; i<w; i++){
800
        dst[i*dst_step] =
801
            LIFTS(src[i*src_step],
802
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
803
                  inverse);
804
    }
805

    
806
    if(mirror_right){
807
        dst[w*dst_step] =
808
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
809
    }
810
}
811
static av_always_inline void
812
inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
813
          int dst_step, int src_step, int ref_step,
814
          int width, int mul, int add, int shift,
815
          int highpass, int inverse){
816
    const int mirror_left= !highpass;
817
    const int mirror_right= (width&1) ^ highpass;
818
    const int w= (width>>1) - 1 + (highpass & width);
819
    int i;
820

    
821
    assert(shift == 4);
822
#define LIFTS(src, ref, inv) \
823
    ((inv) ? \
824
        (src) + (((ref) + 4*(src))>>shift): \
825
        -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
826
    if(mirror_left){
827
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
828
        dst += dst_step;
829
        src += src_step;
830
    }
831

    
832
    for(i=0; i<w; i++){
833
        dst[i*dst_step] =
834
            LIFTS(src[i*src_step],
835
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
836
                  inverse);
837
    }
838

    
839
    if(mirror_right){
840
        dst[w*dst_step] =
841
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
842
    }
843
}
844
#endif /* ! liftS */
845

    
846
static void horizontal_decompose53i(DWTELEM *b, int width){
847
    DWTELEM temp[width];
848
    const int width2= width>>1;
849
    int x;
850
    const int w2= (width+1)>>1;
851

    
852
    for(x=0; x<width2; x++){
853
        temp[x   ]= b[2*x    ];
854
        temp[x+w2]= b[2*x + 1];
855
    }
856
    if(width&1)
857
        temp[x   ]= b[2*x    ];
858
#if 0
859
    {
860
    int A1,A2,A3,A4;
861
    A2= temp[1       ];
862
    A4= temp[0       ];
863
    A1= temp[0+width2];
864
    A1 -= (A2 + A4)>>1;
865
    A4 += (A1 + 1)>>1;
866
    b[0+width2] = A1;
867
    b[0       ] = A4;
868
    for(x=1; x+1<width2; x+=2){
869
        A3= temp[x+width2];
870
        A4= temp[x+1     ];
871
        A3 -= (A2 + A4)>>1;
872
        A2 += (A1 + A3 + 2)>>2;
873
        b[x+width2] = A3;
874
        b[x       ] = A2;
875

876
        A1= temp[x+1+width2];
877
        A2= temp[x+2       ];
878
        A1 -= (A2 + A4)>>1;
879
        A4 += (A1 + A3 + 2)>>2;
880
        b[x+1+width2] = A1;
881
        b[x+1       ] = A4;
882
    }
883
    A3= temp[width-1];
884
    A3 -= A2;
885
    A2 += (A1 + A3 + 2)>>2;
886
    b[width -1] = A3;
887
    b[width2-1] = A2;
888
    }
889
#else
890
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
891
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
892
#endif /* 0 */
893
}
894

    
895
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
896
    int i;
897

    
898
    for(i=0; i<width; i++){
899
        b1[i] -= (b0[i] + b2[i])>>1;
900
    }
901
}
902

    
903
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
904
    int i;
905

    
906
    for(i=0; i<width; i++){
907
        b1[i] += (b0[i] + b2[i] + 2)>>2;
908
    }
909
}
910

    
911
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
912
    int y;
913
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
914
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
915

    
916
    for(y=-2; y<height; y+=2){
917
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
918
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
919

    
920
        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
921
        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
922

    
923
        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
924
        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
925

    
926
        b0=b2;
927
        b1=b3;
928
    }
929
}
930

    
931
static void horizontal_decompose97i(DWTELEM *b, int width){
932
    DWTELEM temp[width];
933
    const int w2= (width+1)>>1;
934

    
935
    lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
936
    liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
937
    lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
938
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
939
}
940

    
941

    
942
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
943
    int i;
944

    
945
    for(i=0; i<width; i++){
946
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
947
    }
948
}
949

    
950
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
951
    int i;
952

    
953
    for(i=0; i<width; i++){
954
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
955
    }
956
}
957

    
958
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
959
    int i;
960

    
961
    for(i=0; i<width; i++){
962
#ifdef liftS
963
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
964
#else
965
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
966
#endif
967
    }
968
}
969

    
970
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
971
    int i;
972

    
973
    for(i=0; i<width; i++){
974
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
975
    }
976
}
977

    
978
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
979
    int y;
980
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
981
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
982
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
983
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
984

    
985
    for(y=-4; y<height; y+=2){
986
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
987
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
988

    
989
        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
990
        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
991

    
992
        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
993
        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
994
        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
995
        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
996

    
997
        b0=b2;
998
        b1=b3;
999
        b2=b4;
1000
        b3=b5;
1001
    }
1002
}
1003

    
1004
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1005
    int level;
1006

    
1007
    for(level=0; level<decomposition_count; level++){
1008
        switch(type){
1009
        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1010
        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1011
        }
1012
    }
1013
}
1014

    
1015
static void horizontal_compose53i(IDWTELEM *b, int width){
1016
    IDWTELEM temp[width];
1017
    const int width2= width>>1;
1018
    const int w2= (width+1)>>1;
1019
    int x;
1020

    
1021
#if 0
1022
    int A1,A2,A3,A4;
1023
    A2= temp[1       ];
1024
    A4= temp[0       ];
1025
    A1= temp[0+width2];
1026
    A1 -= (A2 + A4)>>1;
1027
    A4 += (A1 + 1)>>1;
1028
    b[0+width2] = A1;
1029
    b[0       ] = A4;
1030
    for(x=1; x+1<width2; x+=2){
1031
        A3= temp[x+width2];
1032
        A4= temp[x+1     ];
1033
        A3 -= (A2 + A4)>>1;
1034
        A2 += (A1 + A3 + 2)>>2;
1035
        b[x+width2] = A3;
1036
        b[x       ] = A2;
1037

1038
        A1= temp[x+1+width2];
1039
        A2= temp[x+2       ];
1040
        A1 -= (A2 + A4)>>1;
1041
        A4 += (A1 + A3 + 2)>>2;
1042
        b[x+1+width2] = A1;
1043
        b[x+1       ] = A4;
1044
    }
1045
    A3= temp[width-1];
1046
    A3 -= A2;
1047
    A2 += (A1 + A3 + 2)>>2;
1048
    b[width -1] = A3;
1049
    b[width2-1] = A2;
1050
#else
1051
    inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1052
    inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1053
#endif /* 0 */
1054
    for(x=0; x<width2; x++){
1055
        b[2*x    ]= temp[x   ];
1056
        b[2*x + 1]= temp[x+w2];
1057
    }
1058
    if(width&1)
1059
        b[2*x    ]= temp[x   ];
1060
}
1061

    
1062
static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1063
    int i;
1064

    
1065
    for(i=0; i<width; i++){
1066
        b1[i] += (b0[i] + b2[i])>>1;
1067
    }
1068
}
1069

    
1070
static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1071
    int i;
1072

    
1073
    for(i=0; i<width; i++){
1074
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1075
    }
1076
}
1077

    
1078
static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1079
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1080
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1081
    cs->y = -1;
1082
}
1083

    
1084
static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1085
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1086
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1087
    cs->y = -1;
1088
}
1089

    
1090
static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1091
    int y= cs->y;
1092

    
1093
    IDWTELEM *b0= cs->b0;
1094
    IDWTELEM *b1= cs->b1;
1095
    IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1096
    IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1097

    
1098
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1099
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1100

    
1101
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1102
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1103

    
1104
    cs->b0 = b2;
1105
    cs->b1 = b3;
1106
    cs->y += 2;
1107
}
1108

    
1109
static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1110
    int y= cs->y;
1111
    IDWTELEM *b0= cs->b0;
1112
    IDWTELEM *b1= cs->b1;
1113
    IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1114
    IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1115

    
1116
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1117
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1118

    
1119
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1120
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1121

    
1122
    cs->b0 = b2;
1123
    cs->b1 = b3;
1124
    cs->y += 2;
1125
}
1126

    
1127
static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1128
    DWTCompose cs;
1129
    spatial_compose53i_init(&cs, buffer, height, stride);
1130
    while(cs.y <= height)
1131
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1132
}
1133

    
1134

    
1135
void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1136
    IDWTELEM temp[width];
1137
    const int w2= (width+1)>>1;
1138

    
1139
    inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1140
    inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1141
    inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1142
    inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1143
}
1144

    
1145
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1146
    int i;
1147

    
1148
    for(i=0; i<width; i++){
1149
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1150
    }
1151
}
1152

    
1153
static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1154
    int i;
1155

    
1156
    for(i=0; i<width; i++){
1157
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1158
    }
1159
}
1160

    
1161
static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1162
    int i;
1163

    
1164
    for(i=0; i<width; i++){
1165
#ifdef liftS
1166
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1167
#else
1168
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1169
#endif
1170
    }
1171
}
1172

    
1173
static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1174
    int i;
1175

    
1176
    for(i=0; i<width; i++){
1177
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1178
    }
1179
}
1180

    
1181
void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1182
    int i;
1183

    
1184
    for(i=0; i<width; i++){
1185
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1186
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1187
#ifdef liftS
1188
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1189
#else
1190
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1191
#endif
1192
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1193
    }
1194
}
1195

    
1196
static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1197
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1198
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1199
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1200
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1201
    cs->y = -3;
1202
}
1203

    
1204
static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1205
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1206
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1207
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1208
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1209
    cs->y = -3;
1210
}
1211

    
1212
static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1213
    int y = cs->y;
1214

    
1215
    IDWTELEM *b0= cs->b0;
1216
    IDWTELEM *b1= cs->b1;
1217
    IDWTELEM *b2= cs->b2;
1218
    IDWTELEM *b3= cs->b3;
1219
    IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1220
    IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1221

    
1222
    if(y>0 && y+4<height){
1223
        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1224
    }else{
1225
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1226
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1227
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1228
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1229
    }
1230

    
1231
    if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1232
    if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1233

    
1234
    cs->b0=b2;
1235
    cs->b1=b3;
1236
    cs->b2=b4;
1237
    cs->b3=b5;
1238
    cs->y += 2;
1239
}
1240

    
1241
static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1242
    int y = cs->y;
1243
    IDWTELEM *b0= cs->b0;
1244
    IDWTELEM *b1= cs->b1;
1245
    IDWTELEM *b2= cs->b2;
1246
    IDWTELEM *b3= cs->b3;
1247
    IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1248
    IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1249

    
1250
    if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1251
    if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1252
    if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1253
    if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1254

    
1255
    if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1256
    if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1257

    
1258
    cs->b0=b2;
1259
    cs->b1=b3;
1260
    cs->b2=b4;
1261
    cs->b3=b5;
1262
    cs->y += 2;
1263
}
1264

    
1265
static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1266
    DWTCompose cs;
1267
    spatial_compose97i_init(&cs, buffer, height, stride);
1268
    while(cs.y <= height)
1269
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1270
}
1271

    
1272
static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1273
    int level;
1274
    for(level=decomposition_count-1; level>=0; level--){
1275
        switch(type){
1276
        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1277
        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1278
        }
1279
    }
1280
}
1281

    
1282
static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1283
    int level;
1284
    for(level=decomposition_count-1; level>=0; level--){
1285
        switch(type){
1286
        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1287
        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1288
        }
1289
    }
1290
}
1291

    
1292
static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1293
    const int support = type==1 ? 3 : 5;
1294
    int level;
1295
    if(type==2) return;
1296

    
1297
    for(level=decomposition_count-1; level>=0; level--){
1298
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1299
            switch(type){
1300
            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1301
                break;
1302
            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1303
                break;
1304
            }
1305
        }
1306
    }
1307
}
1308

    
1309
static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1310
    const int support = type==1 ? 3 : 5;
1311
    int level;
1312
    if(type==2) return;
1313

    
1314
    for(level=decomposition_count-1; level>=0; level--){
1315
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1316
            switch(type){
1317
            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1318
                break;
1319
            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1320
                break;
1321
            }
1322
        }
1323
    }
1324
}
1325

    
1326
static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1327
        DWTCompose cs[MAX_DECOMPOSITIONS];
1328
        int y;
1329
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1330
        for(y=0; y<height; y+=4)
1331
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1332
}
1333

    
1334
static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1335
    const int w= b->width;
1336
    const int h= b->height;
1337
    int x, y;
1338

    
1339
    if(1){
1340
        int run=0;
1341
        int runs[w*h];
1342
        int run_index=0;
1343
        int max_index;
1344

    
1345
        for(y=0; y<h; y++){
1346
            for(x=0; x<w; x++){
1347
                int v, p=0;
1348
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1349
                v= src[x + y*stride];
1350

    
1351
                if(y){
1352
                    t= src[x + (y-1)*stride];
1353
                    if(x){
1354
                        lt= src[x - 1 + (y-1)*stride];
1355
                    }
1356
                    if(x + 1 < w){
1357
                        rt= src[x + 1 + (y-1)*stride];
1358
                    }
1359
                }
1360
                if(x){
1361
                    l= src[x - 1 + y*stride];
1362
                    /*if(x > 1){
1363
                        if(orientation==1) ll= src[y + (x-2)*stride];
1364
                        else               ll= src[x - 2 + y*stride];
1365
                    }*/
1366
                }
1367
                if(parent){
1368
                    int px= x>>1;
1369
                    int py= y>>1;
1370
                    if(px<b->parent->width && py<b->parent->height)
1371
                        p= parent[px + py*2*stride];
1372
                }
1373
                if(!(/*ll|*/l|lt|t|rt|p)){
1374
                    if(v){
1375
                        runs[run_index++]= run;
1376
                        run=0;
1377
                    }else{
1378
                        run++;
1379
                    }
1380
                }
1381
            }
1382
        }
1383
        max_index= run_index;
1384
        runs[run_index++]= run;
1385
        run_index=0;
1386
        run= runs[run_index++];
1387

    
1388
        put_symbol2(&s->c, b->state[30], max_index, 0);
1389
        if(run_index <= max_index)
1390
            put_symbol2(&s->c, b->state[1], run, 3);
1391

    
1392
        for(y=0; y<h; y++){
1393
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1394
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1395
                return -1;
1396
            }
1397
            for(x=0; x<w; x++){
1398
                int v, p=0;
1399
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1400
                v= src[x + y*stride];
1401

    
1402
                if(y){
1403
                    t= src[x + (y-1)*stride];
1404
                    if(x){
1405
                        lt= src[x - 1 + (y-1)*stride];
1406
                    }
1407
                    if(x + 1 < w){
1408
                        rt= src[x + 1 + (y-1)*stride];
1409
                    }
1410
                }
1411
                if(x){
1412
                    l= src[x - 1 + y*stride];
1413
                    /*if(x > 1){
1414
                        if(orientation==1) ll= src[y + (x-2)*stride];
1415
                        else               ll= src[x - 2 + y*stride];
1416
                    }*/
1417
                }
1418
                if(parent){
1419
                    int px= x>>1;
1420
                    int py= y>>1;
1421
                    if(px<b->parent->width && py<b->parent->height)
1422
                        p= parent[px + py*2*stride];
1423
                }
1424
                if(/*ll|*/l|lt|t|rt|p){
1425
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1426

    
1427
                    put_rac(&s->c, &b->state[0][context], !!v);
1428
                }else{
1429
                    if(!run){
1430
                        run= runs[run_index++];
1431

    
1432
                        if(run_index <= max_index)
1433
                            put_symbol2(&s->c, b->state[1], run, 3);
1434
                        assert(v);
1435
                    }else{
1436
                        run--;
1437
                        assert(!v);
1438
                    }
1439
                }
1440
                if(v){
1441
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1442
                    int l2= 2*FFABS(l) + (l<0);
1443
                    int t2= 2*FFABS(t) + (t<0);
1444

    
1445
                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1446
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1447
                }
1448
            }
1449
        }
1450
    }
1451
    return 0;
1452
}
1453

    
1454
static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1455
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1456
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1457
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1458
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1459
}
1460

    
1461
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1462
    const int w= b->width;
1463
    const int h= b->height;
1464
    int x,y;
1465

    
1466
    if(1){
1467
        int run, runs;
1468
        x_and_coeff *xc= b->x_coeff;
1469
        x_and_coeff *prev_xc= NULL;
1470
        x_and_coeff *prev2_xc= xc;
1471
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1472
        x_and_coeff *prev_parent_xc= parent_xc;
1473

    
1474
        runs= get_symbol2(&s->c, b->state[30], 0);
1475
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1476
        else           run= INT_MAX;
1477

    
1478
        for(y=0; y<h; y++){
1479
            int v=0;
1480
            int lt=0, t=0, rt=0;
1481

    
1482
            if(y && prev_xc->x == 0){
1483
                rt= prev_xc->coeff;
1484
            }
1485
            for(x=0; x<w; x++){
1486
                int p=0;
1487
                const int l= v;
1488

    
1489
                lt= t; t= rt;
1490

    
1491
                if(y){
1492
                    if(prev_xc->x <= x)
1493
                        prev_xc++;
1494
                    if(prev_xc->x == x + 1)
1495
                        rt= prev_xc->coeff;
1496
                    else
1497
                        rt=0;
1498
                }
1499
                if(parent_xc){
1500
                    if(x>>1 > parent_xc->x){
1501
                        parent_xc++;
1502
                    }
1503
                    if(x>>1 == parent_xc->x){
1504
                        p= parent_xc->coeff;
1505
                    }
1506
                }
1507
                if(/*ll|*/l|lt|t|rt|p){
1508
                    int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1509

    
1510
                    v=get_rac(&s->c, &b->state[0][context]);
1511
                    if(v){
1512
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1513
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1514

    
1515
                        xc->x=x;
1516
                        (xc++)->coeff= v;
1517
                    }
1518
                }else{
1519
                    if(!run){
1520
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1521
                        else           run= INT_MAX;
1522
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1523
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1524

    
1525
                        xc->x=x;
1526
                        (xc++)->coeff= v;
1527
                    }else{
1528
                        int max_run;
1529
                        run--;
1530
                        v=0;
1531

    
1532
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1533
                        else  max_run= FFMIN(run, w-x-1);
1534
                        if(parent_xc)
1535
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1536
                        x+= max_run;
1537
                        run-= max_run;
1538
                    }
1539
                }
1540
            }
1541
            (xc++)->x= w+1; //end marker
1542
            prev_xc= prev2_xc;
1543
            prev2_xc= xc;
1544

    
1545
            if(parent_xc){
1546
                if(y&1){
1547
                    while(parent_xc->x != parent->width+1)
1548
                        parent_xc++;
1549
                    parent_xc++;
1550
                    prev_parent_xc= parent_xc;
1551
                }else{
1552
                    parent_xc= prev_parent_xc;
1553
                }
1554
            }
1555
        }
1556

    
1557
        (xc++)->x= w+1; //end marker
1558
    }
1559
}
1560

    
1561
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1562
    const int w= b->width;
1563
    int y;
1564
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1565
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1566
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1567
    int new_index = 0;
1568

    
1569
    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1570
        qadd= 0;
1571
        qmul= 1<<QEXPSHIFT;
1572
    }
1573

    
1574
    /* If we are on the second or later slice, restore our index. */
1575
    if (start_y != 0)
1576
        new_index = save_state[0];
1577

    
1578

    
1579
    for(y=start_y; y<h; y++){
1580
        int x = 0;
1581
        int v;
1582
        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1583
        memset(line, 0, b->width*sizeof(IDWTELEM));
1584
        v = b->x_coeff[new_index].coeff;
1585
        x = b->x_coeff[new_index++].x;
1586
        while(x < w){
1587
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1588
            register int u= -(v&1);
1589
            line[x] = (t^u) - u;
1590

    
1591
            v = b->x_coeff[new_index].coeff;
1592
            x = b->x_coeff[new_index++].x;
1593
        }
1594
    }
1595

    
1596
    /* Save our variables for the next slice. */
1597
    save_state[0] = new_index;
1598

    
1599
    return;
1600
}
1601

    
1602
static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1603
    int plane_index, level, orientation;
1604

    
1605
    for(plane_index=0; plane_index<3; plane_index++){
1606
        for(level=0; level<MAX_DECOMPOSITIONS; level++){
1607
            for(orientation=level ? 1:0; orientation<4; orientation++){
1608
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1609
            }
1610
        }
1611
    }
1612
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1613
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1614
}
1615

    
1616
static int alloc_blocks(SnowContext *s){
1617
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1618
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1619

    
1620
    s->b_width = w;
1621
    s->b_height= h;
1622

    
1623
    av_free(s->block);
1624
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1625
    return 0;
1626
}
1627

    
1628
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1629
    uint8_t *bytestream= d->bytestream;
1630
    uint8_t *bytestream_start= d->bytestream_start;
1631
    *d= *s;
1632
    d->bytestream= bytestream;
1633
    d->bytestream_start= bytestream_start;
1634
}
1635

    
1636
//near copy & paste from dsputil, FIXME
1637
static int pix_sum(uint8_t * pix, int line_size, int w)
1638
{
1639
    int s, i, j;
1640

    
1641
    s = 0;
1642
    for (i = 0; i < w; i++) {
1643
        for (j = 0; j < w; j++) {
1644
            s += pix[0];
1645
            pix ++;
1646
        }
1647
        pix += line_size - w;
1648
    }
1649
    return s;
1650
}
1651

    
1652
//near copy & paste from dsputil, FIXME
1653
static int pix_norm1(uint8_t * pix, int line_size, int w)
1654
{
1655
    int s, i, j;
1656
    uint32_t *sq = ff_squareTbl + 256;
1657

    
1658
    s = 0;
1659
    for (i = 0; i < w; i++) {
1660
        for (j = 0; j < w; j ++) {
1661
            s += sq[pix[0]];
1662
            pix ++;
1663
        }
1664
        pix += line_size - w;
1665
    }
1666
    return s;
1667
}
1668

    
1669
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1670
    const int w= s->b_width << s->block_max_depth;
1671
    const int rem_depth= s->block_max_depth - level;
1672
    const int index= (x + y*w) << rem_depth;
1673
    const int block_w= 1<<rem_depth;
1674
    BlockNode block;
1675
    int i,j;
1676

    
1677
    block.color[0]= l;
1678
    block.color[1]= cb;
1679
    block.color[2]= cr;
1680
    block.mx= mx;
1681
    block.my= my;
1682
    block.ref= ref;
1683
    block.type= type;
1684
    block.level= level;
1685

    
1686
    for(j=0; j<block_w; j++){
1687
        for(i=0; i<block_w; i++){
1688
            s->block[index + i + j*w]= block;
1689
        }
1690
    }
1691
}
1692

    
1693
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1694
    const int offset[3]= {
1695
          y*c->  stride + x,
1696
        ((y*c->uvstride + x)>>1),
1697
        ((y*c->uvstride + x)>>1),
1698
    };
1699
    int i;
1700
    for(i=0; i<3; i++){
1701
        c->src[0][i]= src [i];
1702
        c->ref[0][i]= ref [i] + offset[i];
1703
    }
1704
    assert(!ref_index);
1705
}
1706

    
1707
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1708
                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1709
    if(s->ref_frames == 1){
1710
        *mx = mid_pred(left->mx, top->mx, tr->mx);
1711
        *my = mid_pred(left->my, top->my, tr->my);
1712
    }else{
1713
        const int *scale = scale_mv_ref[ref];
1714
        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1715
                       (top ->mx * scale[top ->ref] + 128) >>8,
1716
                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1717
        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1718
                       (top ->my * scale[top ->ref] + 128) >>8,
1719
                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
1720
    }
1721
}
1722

    
1723
//FIXME copy&paste
1724
#define P_LEFT P[1]
1725
#define P_TOP P[2]
1726
#define P_TOPRIGHT P[3]
1727
#define P_MEDIAN P[4]
1728
#define P_MV1 P[9]
1729
#define FLAG_QPEL   1 //must be 1
1730

    
1731
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1732
    uint8_t p_buffer[1024];
1733
    uint8_t i_buffer[1024];
1734
    uint8_t p_state[sizeof(s->block_state)];
1735
    uint8_t i_state[sizeof(s->block_state)];
1736
    RangeCoder pc, ic;
1737
    uint8_t *pbbak= s->c.bytestream;
1738
    uint8_t *pbbak_start= s->c.bytestream_start;
1739
    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1740
    const int w= s->b_width  << s->block_max_depth;
1741
    const int h= s->b_height << s->block_max_depth;
1742
    const int rem_depth= s->block_max_depth - level;
1743
    const int index= (x + y*w) << rem_depth;
1744
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1745
    int trx= (x+1)<<rem_depth;
1746
    int try= (y+1)<<rem_depth;
1747
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1748
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1749
    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1750
    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1751
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1752
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1753
    int pl = left->color[0];
1754
    int pcb= left->color[1];
1755
    int pcr= left->color[2];
1756
    int pmx, pmy;
1757
    int mx=0, my=0;
1758
    int l,cr,cb;
1759
    const int stride= s->current_picture.linesize[0];
1760
    const int uvstride= s->current_picture.linesize[1];
1761
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
1762
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1763
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1764
    int P[10][2];
1765
    int16_t last_mv[3][2];
1766
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1767
    const int shift= 1+qpel;
1768
    MotionEstContext *c= &s->m.me;
1769
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1770
    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1771
    int my_context= av_log2(2*FFABS(left->my - top->my));
1772
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1773
    int ref, best_ref, ref_score, ref_mx, ref_my;
1774

    
1775
    assert(sizeof(s->block_state) >= 256);
1776
    if(s->keyframe){
1777
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1778
        return 0;
1779
    }
1780

    
1781
//    clip predictors / edge ?
1782

    
1783
    P_LEFT[0]= left->mx;
1784
    P_LEFT[1]= left->my;
1785
    P_TOP [0]= top->mx;
1786
    P_TOP [1]= top->my;
1787
    P_TOPRIGHT[0]= tr->mx;
1788
    P_TOPRIGHT[1]= tr->my;
1789

    
1790
    last_mv[0][0]= s->block[index].mx;
1791
    last_mv[0][1]= s->block[index].my;
1792
    last_mv[1][0]= right->mx;
1793
    last_mv[1][1]= right->my;
1794
    last_mv[2][0]= bottom->mx;
1795
    last_mv[2][1]= bottom->my;
1796

    
1797
    s->m.mb_stride=2;
1798
    s->m.mb_x=
1799
    s->m.mb_y= 0;
1800
    c->skip= 0;
1801

    
1802
    assert(c->  stride ==   stride);
1803
    assert(c->uvstride == uvstride);
1804

    
1805
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1806
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1807
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1808
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1809

    
1810
    c->xmin = - x*block_w - 16+3;
1811
    c->ymin = - y*block_w - 16+3;
1812
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
1813
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
1814

    
1815
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1816
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
1817
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1818
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1819
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1820
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1821
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1822

    
1823
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1824
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1825

    
1826
    if (!y) {
1827
        c->pred_x= P_LEFT[0];
1828
        c->pred_y= P_LEFT[1];
1829
    } else {
1830
        c->pred_x = P_MEDIAN[0];
1831
        c->pred_y = P_MEDIAN[1];
1832
    }
1833

    
1834
    score= INT_MAX;
1835
    best_ref= 0;
1836
    for(ref=0; ref<s->ref_frames; ref++){
1837
        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1838

    
1839
        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1840
                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1841

    
1842
        assert(ref_mx >= c->xmin);
1843
        assert(ref_mx <= c->xmax);
1844
        assert(ref_my >= c->ymin);
1845
        assert(ref_my <= c->ymax);
1846

    
1847
        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1848
        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1849
        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1850
        if(s->ref_mvs[ref]){
1851
            s->ref_mvs[ref][index][0]= ref_mx;
1852
            s->ref_mvs[ref][index][1]= ref_my;
1853
            s->ref_scores[ref][index]= ref_score;
1854
        }
1855
        if(score > ref_score){
1856
            score= ref_score;
1857
            best_ref= ref;
1858
            mx= ref_mx;
1859
            my= ref_my;
1860
        }
1861
    }
1862
    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1863

    
1864
  //  subpel search
1865
    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1866
    pc= s->c;
1867
    pc.bytestream_start=
1868
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1869
    memcpy(p_state, s->block_state, sizeof(s->block_state));
1870

    
1871
    if(level!=s->block_max_depth)
1872
        put_rac(&pc, &p_state[4 + s_context], 1);
1873
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1874
    if(s->ref_frames > 1)
1875
        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1876
    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1877
    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1878
    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1879
    p_len= pc.bytestream - pc.bytestream_start;
1880
    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1881

    
1882
    block_s= block_w*block_w;
1883
    sum = pix_sum(current_data[0], stride, block_w);
1884
    l= (sum + block_s/2)/block_s;
1885
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1886

    
1887
    block_s= block_w*block_w>>2;
1888
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
1889
    cb= (sum + block_s/2)/block_s;
1890
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1891
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
1892
    cr= (sum + block_s/2)/block_s;
1893
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1894

    
1895
    ic= s->c;
1896
    ic.bytestream_start=
1897
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1898
    memcpy(i_state, s->block_state, sizeof(s->block_state));
1899
    if(level!=s->block_max_depth)
1900
        put_rac(&ic, &i_state[4 + s_context], 1);
1901
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1902
    put_symbol(&ic, &i_state[32],  l-pl , 1);
1903
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1904
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1905
    i_len= ic.bytestream - ic.bytestream_start;
1906
    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1907

    
1908
//    assert(score==256*256*256*64-1);
1909
    assert(iscore < 255*255*256 + s->lambda2*10);
1910
    assert(iscore >= 0);
1911
    assert(l>=0 && l<=255);
1912
    assert(pl>=0 && pl<=255);
1913

    
1914
    if(level==0){
1915
        int varc= iscore >> 8;
1916
        int vard= score >> 8;
1917
        if (vard <= 64 || vard < varc)
1918
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1919
        else
1920
            c->scene_change_score+= s->m.qscale;
1921
    }
1922

    
1923
    if(level!=s->block_max_depth){
1924
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
1925
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1926
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1927
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1928
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1929
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1930

    
1931
        if(score2 < score && score2 < iscore)
1932
            return score2;
1933
    }
1934

    
1935
    if(iscore < score){
1936
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1937
        memcpy(pbbak, i_buffer, i_len);
1938
        s->c= ic;
1939
        s->c.bytestream_start= pbbak_start;
1940
        s->c.bytestream= pbbak + i_len;
1941
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1942
        memcpy(s->block_state, i_state, sizeof(s->block_state));
1943
        return iscore;
1944
    }else{
1945
        memcpy(pbbak, p_buffer, p_len);
1946
        s->c= pc;
1947
        s->c.bytestream_start= pbbak_start;
1948
        s->c.bytestream= pbbak + p_len;
1949
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1950
        memcpy(s->block_state, p_state, sizeof(s->block_state));
1951
        return score;
1952
    }
1953
}
1954

    
1955
static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1956
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1957
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1958
    }else{
1959
        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1960
    }
1961
}
1962

    
1963
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1964
    const int w= s->b_width  << s->block_max_depth;
1965
    const int rem_depth= s->block_max_depth - level;
1966
    const int index= (x + y*w) << rem_depth;
1967
    int trx= (x+1)<<rem_depth;
1968
    BlockNode *b= &s->block[index];
1969
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1970
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1971
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1972
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1973
    int pl = left->color[0];
1974
    int pcb= left->color[1];
1975
    int pcr= left->color[2];
1976
    int pmx, pmy;
1977
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1978
    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1979
    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1980
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1981

    
1982
    if(s->keyframe){
1983
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1984
        return;
1985
    }
1986

    
1987
    if(level!=s->block_max_depth){
1988
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1989
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
1990
        }else{
1991
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
1992
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1993
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1994
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1995
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
1996
            return;
1997
        }
1998
    }
1999
    if(b->type & BLOCK_INTRA){
2000
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2001
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2002
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2003
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2004
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2005
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2006
    }else{
2007
        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2008
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2009
        if(s->ref_frames > 1)
2010
            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2011
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2012
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2013
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2014
    }
2015
}
2016

    
2017
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2018
    const int w= s->b_width << s->block_max_depth;
2019
    const int rem_depth= s->block_max_depth - level;
2020
    const int index= (x + y*w) << rem_depth;
2021
    int trx= (x+1)<<rem_depth;
2022
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2023
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2024
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2025
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2026
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2027

    
2028
    if(s->keyframe){
2029
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2030
        return;
2031
    }
2032

    
2033
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2034
        int type, mx, my;
2035
        int l = left->color[0];
2036
        int cb= left->color[1];
2037
        int cr= left->color[2];
2038
        int ref = 0;
2039
        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2040
        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2041
        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2042

    
2043
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2044

    
2045
        if(type){
2046
            pred_mv(s, &mx, &my, 0, left, top, tr);
2047
            l += get_symbol(&s->c, &s->block_state[32], 1);
2048
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2049
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2050
        }else{
2051
            if(s->ref_frames > 1)
2052
                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2053
            pred_mv(s, &mx, &my, ref, left, top, tr);
2054
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2055
            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2056
        }
2057
        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2058
    }else{
2059
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2060
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2061
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2062
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2063
    }
2064
}
2065

    
2066
static void encode_blocks(SnowContext *s, int search){
2067
    int x, y;
2068
    int w= s->b_width;
2069
    int h= s->b_height;
2070

    
2071
    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2072
        iterative_me(s);
2073

    
2074
    for(y=0; y<h; y++){
2075
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2076
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2077
            return;
2078
        }
2079
        for(x=0; x<w; x++){
2080
            if(s->avctx->me_method == ME_ITER || !search)
2081
                encode_q_branch2(s, 0, x, y);
2082
            else
2083
                encode_q_branch (s, 0, x, y);
2084
        }
2085
    }
2086
}
2087

    
2088
static void decode_blocks(SnowContext *s){
2089
    int x, y;
2090
    int w= s->b_width;
2091
    int h= s->b_height;
2092

    
2093
    for(y=0; y<h; y++){
2094
        for(x=0; x<w; x++){
2095
            decode_q_branch(s, 0, x, y);
2096
        }
2097
    }
2098
}
2099

    
2100
static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2101
    static const uint8_t weight[64]={
2102
    8,7,6,5,4,3,2,1,
2103
    7,7,0,0,0,0,0,1,
2104
    6,0,6,0,0,0,2,0,
2105
    5,0,0,5,0,3,0,0,
2106
    4,0,0,0,4,0,0,0,
2107
    3,0,0,5,0,3,0,0,
2108
    2,0,6,0,0,0,2,0,
2109
    1,7,0,0,0,0,0,1,
2110
    };
2111

    
2112
    static const uint8_t brane[256]={
2113
    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2114
    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2115
    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2116
    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2117
    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2118
    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2119
    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2120
    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2121
    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2122
    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2123
    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2124
    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2125
    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2126
    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2127
    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2128
    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2129
    };
2130

    
2131
    static const uint8_t needs[16]={
2132
    0,1,0,0,
2133
    2,4,2,0,
2134
    0,1,0,0,
2135
    15
2136
    };
2137

    
2138
    int x, y, b, r, l;
2139
    int16_t tmpIt   [64*(32+HTAPS_MAX)];
2140
    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2141
    int16_t *tmpI= tmpIt;
2142
    uint8_t *tmp2= tmp2t[0];
2143
    const uint8_t *hpel[11];
2144
    assert(dx<16 && dy<16);
2145
    r= brane[dx + 16*dy]&15;
2146
    l= brane[dx + 16*dy]>>4;
2147

    
2148
    b= needs[l] | needs[r];
2149
    if(p && !p->diag_mc)
2150
        b= 15;
2151

    
2152
    if(b&5){
2153
        for(y=0; y < b_h+HTAPS_MAX-1; y++){
2154
            for(x=0; x < b_w; x++){
2155
                int a_1=src[x + HTAPS_MAX/2-4];
2156
                int a0= src[x + HTAPS_MAX/2-3];
2157
                int a1= src[x + HTAPS_MAX/2-2];
2158
                int a2= src[x + HTAPS_MAX/2-1];
2159
                int a3= src[x + HTAPS_MAX/2+0];
2160
                int a4= src[x + HTAPS_MAX/2+1];
2161
                int a5= src[x + HTAPS_MAX/2+2];
2162
                int a6= src[x + HTAPS_MAX/2+3];
2163
                int am=0;
2164
                if(!p || p->fast_mc){
2165
                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2166
                    tmpI[x]= am;
2167
                    am= (am+16)>>5;
2168
                }else{
2169
                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2170
                    tmpI[x]= am;
2171
                    am= (am+32)>>6;
2172
                }
2173

    
2174
                if(am&(~255)) am= ~(am>>31);
2175
                tmp2[x]= am;
2176
            }
2177
            tmpI+= 64;
2178
            tmp2+= stride;
2179
            src += stride;
2180
        }
2181
        src -= stride*y;
2182
    }
2183
    src += HTAPS_MAX/2 - 1;
2184
    tmp2= tmp2t[1];
2185

    
2186
    if(b&2){
2187
        for(y=0; y < b_h; y++){
2188
            for(x=0; x < b_w+1; x++){
2189
                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2190
                int a0= src[x + (HTAPS_MAX/2-3)*stride];
2191
                int a1= src[x + (HTAPS_MAX/2-2)*stride];
2192
                int a2= src[x + (HTAPS_MAX/2-1)*stride];
2193
                int a3= src[x + (HTAPS_MAX/2+0)*stride];
2194
                int a4= src[x + (HTAPS_MAX/2+1)*stride];
2195
                int a5= src[x + (HTAPS_MAX/2+2)*stride];
2196
                int a6= src[x + (HTAPS_MAX/2+3)*stride];
2197
                int am=0;
2198
                if(!p || p->fast_mc)
2199
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2200
                else
2201
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2202

    
2203
                if(am&(~255)) am= ~(am>>31);
2204
                tmp2[x]= am;
2205
            }
2206
            src += stride;
2207
            tmp2+= stride;
2208
        }
2209
        src -= stride*y;
2210
    }
2211
    src += stride*(HTAPS_MAX/2 - 1);
2212
    tmp2= tmp2t[2];
2213
    tmpI= tmpIt;
2214
    if(b&4){
2215
        for(y=0; y < b_h; y++){
2216
            for(x=0; x < b_w; x++){
2217
                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2218
                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2219
                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2220
                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2221
                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2222
                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2223
                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2224
                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2225
                int am=0;
2226
                if(!p || p->fast_mc)
2227
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2228
                else
2229
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2230
                if(am&(~255)) am= ~(am>>31);
2231
                tmp2[x]= am;
2232
            }
2233
            tmpI+= 64;
2234
            tmp2+= stride;
2235
        }
2236
    }
2237

    
2238
    hpel[ 0]= src;
2239
    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2240
    hpel[ 2]= src + 1;
2241

    
2242
    hpel[ 4]= tmp2t[1];
2243
    hpel[ 5]= tmp2t[2];
2244
    hpel[ 6]= tmp2t[1] + 1;
2245

    
2246
    hpel[ 8]= src + stride;
2247
    hpel[ 9]= hpel[1] + stride;
2248
    hpel[10]= hpel[8] + 1;
2249

    
2250
    if(b==15){
2251
        const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
2252
        const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2253
        const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2254
        const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2255
        dx&=7;
2256
        dy&=7;
2257
        for(y=0; y < b_h; y++){
2258
            for(x=0; x < b_w; x++){
2259
                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2260
                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
2261
            }
2262
            src1+=stride;
2263
            src2+=stride;
2264
            src3+=stride;
2265
            src4+=stride;
2266
            dst +=stride;
2267
        }
2268
    }else{
2269
        const uint8_t *src1= hpel[l];
2270
        const uint8_t *src2= hpel[r];
2271
        int a= weight[((dx&7) + (8*(dy&7)))];
2272
        int b= 8-a;
2273
        for(y=0; y < b_h; y++){
2274
            for(x=0; x < b_w; x++){
2275
                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2276
            }
2277
            src1+=stride;
2278
            src2+=stride;
2279
            dst +=stride;
2280
        }
2281
    }
2282
}
2283

    
2284
#define mca(dx,dy,b_w)\
2285
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2286
    uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2287
    assert(h==b_w);\
2288
    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2289
}
2290

    
2291
mca( 0, 0,16)
2292
mca( 8, 0,16)
2293
mca( 0, 8,16)
2294
mca( 8, 8,16)
2295
mca( 0, 0,8)
2296
mca( 8, 0,8)
2297
mca( 0, 8,8)
2298
mca( 8, 8,8)
2299

    
2300
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2301
    if(block->type & BLOCK_INTRA){
2302
        int x, y;
2303
        const int color = block->color[plane_index];
2304
        const int color4= color*0x01010101;
2305
        if(b_w==32){
2306
            for(y=0; y < b_h; y++){
2307
                *(uint32_t*)&dst[0 + y*stride]= color4;
2308
                *(uint32_t*)&dst[4 + y*stride]= color4;
2309
                *(uint32_t*)&dst[8 + y*stride]= color4;
2310
                *(uint32_t*)&dst[12+ y*stride]= color4;
2311
                *(uint32_t*)&dst[16+ y*stride]= color4;
2312
                *(uint32_t*)&dst[20+ y*stride]= color4;
2313
                *(uint32_t*)&dst[24+ y*stride]= color4;
2314
                *(uint32_t*)&dst[28+ y*stride]= color4;
2315
            }
2316
        }else if(b_w==16){
2317
            for(y=0; y < b_h; y++){
2318
                *(uint32_t*)&dst[0 + y*stride]= color4;
2319
                *(uint32_t*)&dst[4 + y*stride]= color4;
2320
                *(uint32_t*)&dst[8 + y*stride]= color4;
2321
                *(uint32_t*)&dst[12+ y*stride]= color4;
2322
            }
2323
        }else if(b_w==8){
2324
            for(y=0; y < b_h; y++){
2325
                *(uint32_t*)&dst[0 + y*stride]= color4;
2326
                *(uint32_t*)&dst[4 + y*stride]= color4;
2327
            }
2328
        }else if(b_w==4){
2329
            for(y=0; y < b_h; y++){
2330
                *(uint32_t*)&dst[0 + y*stride]= color4;
2331
            }
2332
        }else{
2333
            for(y=0; y < b_h; y++){
2334
                for(x=0; x < b_w; x++){
2335
                    dst[x + y*stride]= color;
2336
                }
2337
            }
2338
        }
2339
    }else{
2340
        uint8_t *src= s->last_picture[block->ref].data[plane_index];
2341
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2342
        int mx= block->mx*scale;
2343
        int my= block->my*scale;
2344
        const int dx= mx&15;
2345
        const int dy= my&15;
2346
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2347
        sx += (mx>>4) - (HTAPS_MAX/2-1);
2348
        sy += (my>>4) - (HTAPS_MAX/2-1);
2349
        src += sx + sy*stride;
2350
        if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2351
           || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2352
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2353
            src= tmp + MB_SIZE;
2354
        }
2355
//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2356
//        assert(!(b_w&(b_w-1)));
2357
        assert(b_w>1 && b_h>1);
2358
        assert((tab_index>=0 && tab_index<4) || b_w==32);
2359
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2360
            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2361
        else if(b_w==32){
2362
            int y;
2363
            for(y=0; y<b_h; y+=16){
2364
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2365
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2366
            }
2367
        }else if(b_w==b_h)
2368
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2369
        else if(b_w==2*b_h){
2370
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
2371
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2372
        }else{
2373
            assert(2*b_w==b_h);
2374
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
2375
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2376
        }
2377
    }
2378
}
2379

    
2380
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2381
                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2382
    int y, x;
2383
    IDWTELEM * dst;
2384
    for(y=0; y<b_h; y++){
2385
        //FIXME ugly misuse of obmc_stride
2386
        const uint8_t *obmc1= obmc + y*obmc_stride;
2387
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2388
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2389
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2390
        dst = slice_buffer_get_line(sb, src_y + y);
2391
        for(x=0; x<b_w; x++){
2392
            int v=   obmc1[x] * block[3][x + y*src_stride]
2393
                    +obmc2[x] * block[2][x + y*src_stride]
2394
                    +obmc3[x] * block[1][x + y*src_stride]
2395
                    +obmc4[x] * block[0][x + y*src_stride];
2396

    
2397
            v <<= 8 - LOG2_OBMC_MAX;
2398
            if(FRAC_BITS != 8){
2399
                v >>= 8 - FRAC_BITS;
2400
            }
2401
            if(add){
2402
                v += dst[x + src_x];
2403
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2404
                if(v&(~255)) v= ~(v>>31);
2405
                dst8[x + y*src_stride] = v;
2406
            }else{
2407
                dst[x + src_x] -= v;
2408
            }
2409
        }
2410
    }
2411
}
2412

    
2413
//FIXME name cleanup (b_w, block_w, b_width stuff)
2414
static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2415
    const int b_width = s->b_width  << s->block_max_depth;
2416
    const int b_height= s->b_height << s->block_max_depth;
2417
    const int b_stride= b_width;
2418
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2419
    BlockNode *rt= lt+1;
2420
    BlockNode *lb= lt+b_stride;
2421
    BlockNode *rb= lb+1;
2422
    uint8_t *block[4];
2423
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2424
    uint8_t *tmp = s->scratchbuf;
2425
    uint8_t *ptmp;
2426
    int x,y;
2427

    
2428
    if(b_x<0){
2429
        lt= rt;
2430
        lb= rb;
2431
    }else if(b_x + 1 >= b_width){
2432
        rt= lt;
2433
        rb= lb;
2434
    }
2435
    if(b_y<0){
2436
        lt= lb;
2437
        rt= rb;
2438
    }else if(b_y + 1 >= b_height){
2439
        lb= lt;
2440
        rb= rt;
2441
    }
2442

    
2443
    if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2444
        obmc -= src_x;
2445
        b_w += src_x;
2446
        if(!sliced && !offset_dst)
2447
            dst -= src_x;
2448
        src_x=0;
2449
    }else if(src_x + b_w > w){
2450
        b_w = w - src_x;
2451
    }
2452
    if(src_y<0){
2453
        obmc -= src_y*obmc_stride;
2454
        b_h += src_y;
2455
        if(!sliced && !offset_dst)
2456
            dst -= src_y*dst_stride;
2457
        src_y=0;
2458
    }else if(src_y + b_h> h){
2459
        b_h = h - src_y;
2460
    }
2461

    
2462
    if(b_w<=0 || b_h<=0) return;
2463

    
2464
    assert(src_stride > 2*MB_SIZE + 5);
2465

    
2466
    if(!sliced && offset_dst)
2467
        dst += src_x + src_y*dst_stride;
2468
    dst8+= src_x + src_y*src_stride;
2469
//    src += src_x + src_y*src_stride;
2470

    
2471
    ptmp= tmp + 3*tmp_step;
2472
    block[0]= ptmp;
2473
    ptmp+=tmp_step;
2474
    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2475

    
2476
    if(same_block(lt, rt)){
2477
        block[1]= block[0];
2478
    }else{
2479
        block[1]= ptmp;
2480
        ptmp+=tmp_step;
2481
        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2482
    }
2483

    
2484
    if(same_block(lt, lb)){
2485
        block[2]= block[0];
2486
    }else if(same_block(rt, lb)){
2487
        block[2]= block[1];
2488
    }else{
2489
        block[2]= ptmp;
2490
        ptmp+=tmp_step;
2491
        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2492
    }
2493

    
2494
    if(same_block(lt, rb) ){
2495
        block[3]= block[0];
2496
    }else if(same_block(rt, rb)){
2497
        block[3]= block[1];
2498
    }else if(same_block(lb, rb)){
2499
        block[3]= block[2];
2500
    }else{
2501
        block[3]= ptmp;
2502
        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2503
    }
2504
#if 0
2505
    for(y=0; y<b_h; y++){
2506
        for(x=0; x<b_w; x++){
2507
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2508
            if(add) dst[x + y*dst_stride] += v;
2509
            else    dst[x + y*dst_stride] -= v;
2510
        }
2511
    }
2512
    for(y=0; y<b_h; y++){
2513
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2514
        for(x=0; x<b_w; x++){
2515
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2516
            if(add) dst[x + y*dst_stride] += v;
2517
            else    dst[x + y*dst_stride] -= v;
2518
        }
2519
    }
2520
    for(y=0; y<b_h; y++){
2521
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2522
        for(x=0; x<b_w; x++){
2523
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2524
            if(add) dst[x + y*dst_stride] += v;
2525
            else    dst[x + y*dst_stride] -= v;
2526
        }
2527
    }
2528
    for(y=0; y<b_h; y++){
2529
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2530
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2531
        for(x=0; x<b_w; x++){
2532
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2533
            if(add) dst[x + y*dst_stride] += v;
2534
            else    dst[x + y*dst_stride] -= v;
2535
        }
2536
    }
2537
#else
2538
    if(sliced){
2539
        s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2540
    }else{
2541
        for(y=0; y<b_h; y++){
2542
            //FIXME ugly misuse of obmc_stride
2543
            const uint8_t *obmc1= obmc + y*obmc_stride;
2544
            const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2545
            const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2546
            const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2547
            for(x=0; x<b_w; x++){
2548
                int v=   obmc1[x] * block[3][x + y*src_stride]
2549
                        +obmc2[x] * block[2][x + y*src_stride]
2550
                        +obmc3[x] * block[1][x + y*src_stride]
2551
                        +obmc4[x] * block[0][x + y*src_stride];
2552

    
2553
                v <<= 8 - LOG2_OBMC_MAX;
2554
                if(FRAC_BITS != 8){
2555
                    v >>= 8 - FRAC_BITS;
2556
                }
2557
                if(add){
2558
                    v += dst[x + y*dst_stride];
2559
                    v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2560
                    if(v&(~255)) v= ~(v>>31);
2561
                    dst8[x + y*src_stride] = v;
2562
                }else{
2563
                    dst[x + y*dst_stride] -= v;
2564
                }
2565
            }
2566
        }
2567
    }
2568
#endif /* 0 */
2569
}
2570

    
2571
static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2572
    Plane *p= &s->plane[plane_index];
2573
    const int mb_w= s->b_width  << s->block_max_depth;
2574
    const int mb_h= s->b_height << s->block_max_depth;
2575
    int x, y, mb_x;
2576
    int block_size = MB_SIZE >> s->block_max_depth;
2577
    int block_w    = plane_index ? block_size/2 : block_size;
2578
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2579
    int obmc_stride= plane_index ? block_size : 2*block_size;
2580
    int ref_stride= s->current_picture.linesize[plane_index];
2581
    uint8_t *dst8= s->current_picture.data[plane_index];
2582
    int w= p->width;
2583
    int h= p->height;
2584

    
2585
    if(s->keyframe || (s->avctx->debug&512)){
2586
        if(mb_y==mb_h)
2587
            return;
2588

    
2589
        if(add){
2590
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2591
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2592
                IDWTELEM * line = sb->line[y];
2593
                for(x=0; x<w; x++){
2594
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2595
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2596
                    v >>= FRAC_BITS;
2597
                    if(v&(~255)) v= ~(v>>31);
2598
                    dst8[x + y*ref_stride]= v;
2599
                }
2600
            }
2601
        }else{
2602
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2603
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2604
                IDWTELEM * line = sb->line[y];
2605
                for(x=0; x<w; x++){
2606
                    line[x] -= 128 << FRAC_BITS;
2607
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2608
                }
2609
            }
2610
        }
2611

    
2612
        return;
2613
    }
2614

    
2615
    for(mb_x=0; mb_x<=mb_w; mb_x++){
2616
        add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2617
                   block_w*mb_x - block_w/2,
2618
                   block_w*mb_y - block_w/2,
2619
                   block_w, block_w,
2620
                   w, h,
2621
                   w, ref_stride, obmc_stride,
2622
                   mb_x - 1, mb_y - 1,
2623
                   add, 0, plane_index);
2624
    }
2625
}
2626

    
2627
static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2628
    Plane *p= &s->plane[plane_index];
2629
    const int mb_w= s->b_width  << s->block_max_depth;
2630
    const int mb_h= s->b_height << s->block_max_depth;
2631
    int x, y, mb_x;
2632
    int block_size = MB_SIZE >> s->block_max_depth;
2633
    int block_w    = plane_index ? block_size/2 : block_size;
2634
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2635
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2636
    int ref_stride= s->current_picture.linesize[plane_index];
2637
    uint8_t *dst8= s->current_picture.data[plane_index];
2638
    int w= p->width;
2639
    int h= p->height;
2640

    
2641
    if(s->keyframe || (s->avctx->debug&512)){
2642
        if(mb_y==mb_h)
2643
            return;
2644

    
2645
        if(add){
2646
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2647
                for(x=0; x<w; x++){
2648
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2649
                    v >>= FRAC_BITS;
2650
                    if(v&(~255)) v= ~(v>>31);
2651
                    dst8[x + y*ref_stride]= v;
2652
                }
2653
            }
2654
        }else{
2655
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2656
                for(x=0; x<w; x++){
2657
                    buf[x + y*w]-= 128<<FRAC_BITS;
2658
                }
2659
            }
2660
        }
2661

    
2662
        return;
2663
    }
2664

    
2665
    for(mb_x=0; mb_x<=mb_w; mb_x++){
2666
        add_yblock(s, 0, NULL, buf, dst8, obmc,
2667
                   block_w*mb_x - block_w/2,
2668
                   block_w*mb_y - block_w/2,
2669
                   block_w, block_w,
2670
                   w, h,
2671
                   w, ref_stride, obmc_stride,
2672
                   mb_x - 1, mb_y - 1,
2673
                   add, 1, plane_index);
2674
    }
2675
}
2676

    
2677
static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2678
    const int mb_h= s->b_height << s->block_max_depth;
2679
    int mb_y;
2680
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2681
        predict_slice(s, buf, plane_index, add, mb_y);
2682
}
2683

    
2684
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2685
    int i, x2, y2;
2686
    Plane *p= &s->plane[plane_index];
2687
    const int block_size = MB_SIZE >> s->block_max_depth;
2688
    const int block_w    = plane_index ? block_size/2 : block_size;
2689
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2690
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2691
    const int ref_stride= s->current_picture.linesize[plane_index];
2692
    uint8_t *src= s-> input_picture.data[plane_index];
2693
    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2694
    const int b_stride = s->b_width << s->block_max_depth;
2695
    const int w= p->width;
2696
    const int h= p->height;
2697
    int index= mb_x + mb_y*b_stride;
2698
    BlockNode *b= &s->block[index];
2699
    BlockNode backup= *b;
2700
    int ab=0;
2701
    int aa=0;
2702

    
2703
    b->type|= BLOCK_INTRA;
2704
    b->color[plane_index]= 0;
2705
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2706

    
2707
    for(i=0; i<4; i++){
2708
        int mb_x2= mb_x + (i &1) - 1;
2709
        int mb_y2= mb_y + (i>>1) - 1;
2710
        int x= block_w*mb_x2 + block_w/2;
2711
        int y= block_w*mb_y2 + block_w/2;
2712

    
2713
        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2714
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2715

    
2716
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2717
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2718
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2719
                int obmc_v= obmc[index];
2720
                int d;
2721
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2722
                if(x<0) obmc_v += obmc[index + block_w];
2723
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2724
                if(x+block_w>w) obmc_v += obmc[index - block_w];
2725
                //FIXME precalculate this or simplify it somehow else
2726

    
2727
                d = -dst[index] + (1<<(FRAC_BITS-1));
2728
                dst[index] = d;
2729
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2730
                aa += obmc_v * obmc_v; //FIXME precalculate this
2731
            }
2732
        }
2733
    }
2734
    *b= backup;
2735

    
2736
    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2737
}
2738

    
2739
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2740
    const int b_stride = s->b_width << s->block_max_depth;
2741
    const int b_height = s->b_height<< s->block_max_depth;
2742
    int index= x + y*b_stride;
2743
    const BlockNode *b     = &s->block[index];
2744
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2745
    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2746
    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2747
    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2748
    int dmx, dmy;
2749
//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2750
//  int my_context= av_log2(2*FFABS(left->my - top->my));
2751

    
2752
    if(x<0 || x>=b_stride || y>=b_height)
2753
        return 0;
2754
/*
2755
1            0      0
2756
01X          1-2    1
2757
001XX        3-6    2-3
2758
0001XXX      7-14   4-7
2759
00001XXXX   15-30   8-15
2760
*/
2761
//FIXME try accurate rate
2762
//FIXME intra and inter predictors if surrounding blocks are not the same type
2763
    if(b->type & BLOCK_INTRA){
2764
        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2765
                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
2766
                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
2767
    }else{
2768
        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2769
        dmx-= b->mx;
2770
        dmy-= b->my;
2771
        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2772
                    + av_log2(2*FFABS(dmy))
2773
                    + av_log2(2*b->ref));
2774
    }
2775
}
2776

    
2777
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2778
    Plane *p= &s->plane[plane_index];
2779
    const int block_size = MB_SIZE >> s->block_max_depth;
2780
    const int block_w    = plane_index ? block_size/2 : block_size;
2781
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2782
    const int ref_stride= s->current_picture.linesize[plane_index];
2783
    uint8_t *dst= s->current_picture.data[plane_index];
2784
    uint8_t *src= s->  input_picture.data[plane_index];
2785
    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2786
    uint8_t *cur = s->scratchbuf;
2787
    uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2788
    const int b_stride = s->b_width << s->block_max_depth;
2789
    const int b_height = s->b_height<< s->block_max_depth;
2790
    const int w= p->width;
2791
    const int h= p->height;
2792
    int distortion;
2793
    int rate= 0;
2794
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2795
    int sx= block_w*mb_x - block_w/2;
2796
    int sy= block_w*mb_y - block_w/2;
2797
    int x0= FFMAX(0,-sx);
2798
    int y0= FFMAX(0,-sy);
2799
    int x1= FFMIN(block_w*2, w-sx);
2800
    int y1= FFMIN(block_w*2, h-sy);
2801
    int i,x,y;
2802

    
2803
    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2804

    
2805
    for(y=y0; y<y1; y++){
2806
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2807
        const IDWTELEM *pred1 = pred + y*obmc_stride;
2808
        uint8_t *cur1 = cur + y*ref_stride;
2809
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2810
        for(x=x0; x<x1; x++){
2811
#if FRAC_BITS >= LOG2_OBMC_MAX
2812
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2813
#else
2814
            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2815
#endif
2816
            v = (v + pred1[x]) >> FRAC_BITS;
2817
            if(v&(~255)) v= ~(v>>31);
2818
            dst1[x] = v;
2819
        }
2820
    }
2821

    
2822
    /* copy the regions where obmc[] = (uint8_t)256 */
2823
    if(LOG2_OBMC_MAX == 8
2824
        && (mb_x == 0 || mb_x == b_stride-1)
2825
        && (mb_y == 0 || mb_y == b_height-1)){
2826
        if(mb_x == 0)
2827
            x1 = block_w;
2828
        else
2829
            x0 = block_w;
2830
        if(mb_y == 0)
2831
            y1 = block_w;
2832
        else
2833
            y0 = block_w;
2834
        for(y=y0; y<y1; y++)
2835
            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2836
    }
2837

    
2838
    if(block_w==16){
2839
        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2840
        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2841
        /* FIXME cmps overlap but do not cover the wavelet's whole support.
2842
         * So improving the score of one block is not strictly guaranteed
2843
         * to improve the score of the whole frame, thus iterative motion
2844
         * estimation does not always converge. */
2845
        if(s->avctx->me_cmp == FF_CMP_W97)
2846
            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2847
        else if(s->avctx->me_cmp == FF_CMP_W53)
2848
            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2849
        else{
2850
            distortion = 0;
2851
            for(i=0; i<4; i++){
2852
                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2853
                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2854
            }
2855
        }
2856
    }else{
2857
        assert(block_w==8);
2858
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2859
    }
2860

    
2861
    if(plane_index==0){
2862
        for(i=0; i<4; i++){
2863
/* ..RRr
2864
 * .RXx.
2865
 * rxx..
2866
 */
2867
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2868
        }
2869
        if(mb_x == b_stride-2)
2870
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2871
    }
2872
    return distortion + rate*penalty_factor;
2873
}
2874

    
2875
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2876
    int i, y2;
2877
    Plane *p= &s->plane[plane_index];
2878
    const int block_size = MB_SIZE >> s->block_max_depth;
2879
    const int block_w    = plane_index ? block_size/2 : block_size;
2880
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2881
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2882
    const int ref_stride= s->current_picture.linesize[plane_index];
2883
    uint8_t *dst= s->current_picture.data[plane_index];
2884
    uint8_t *src= s-> input_picture.data[plane_index];
2885
    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2886
    // const has only been removed from zero_dst to suppress a warning
2887
    static IDWTELEM zero_dst[4096]; //FIXME
2888
    const int b_stride = s->b_width << s->block_max_depth;
2889
    const int w= p->width;
2890
    const int h= p->height;
2891
    int distortion= 0;
2892
    int rate= 0;
2893
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2894

    
2895
    for(i=0; i<9; i++){
2896
        int mb_x2= mb_x + (i%3) - 1;
2897
        int mb_y2= mb_y + (i/3) - 1;
2898
        int x= block_w*mb_x2 + block_w/2;
2899
        int y= block_w*mb_y2 + block_w/2;
2900

    
2901
        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2902
                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2903

    
2904
        //FIXME find a cleaner/simpler way to skip the outside stuff
2905
        for(y2= y; y2<0; y2++)
2906
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2907
        for(y2= h; y2<y+block_w; y2++)
2908
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2909
        if(x<0){
2910
            for(y2= y; y2<y+block_w; y2++)
2911
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2912
        }
2913
        if(x+block_w > w){
2914
            for(y2= y; y2<y+block_w; y2++)
2915
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2916
        }
2917

    
2918
        assert(block_w== 8 || block_w==16);
2919
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2920
    }
2921

    
2922
    if(plane_index==0){
2923
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2924
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2925

    
2926
/* ..RRRr
2927
 * .RXXx.
2928
 * .RXXx.
2929
 * rxxx.
2930
 */
2931
        if(merged)
2932
            rate = get_block_bits(s, mb_x, mb_y, 2);
2933
        for(i=merged?4:0; i<9; i++){
2934
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2935
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2936
        }
2937
    }
2938
    return distortion + rate*penalty_factor;
2939
}
2940

    
2941
static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2942
    const int b_stride= s->b_width << s->block_max_depth;
2943
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2944
    BlockNode backup= *block;
2945
    int rd, index, value;
2946

    
2947
    assert(mb_x>=0 && mb_y>=0);
2948
    assert(mb_x<b_stride);
2949

    
2950
    if(intra){
2951
        block->color[0] = p[0];
2952
        block->color[1] = p[1];
2953
        block->color[2] = p[2];
2954
        block->type |= BLOCK_INTRA;
2955
    }else{
2956
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2957
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2958
        if(s->me_cache[index] == value)
2959
            return 0;
2960
        s->me_cache[index]= value;
2961

    
2962
        block->mx= p[0];
2963
        block->my= p[1];
2964
        block->type &= ~BLOCK_INTRA;
2965
    }
2966

    
2967
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2968

    
2969
//FIXME chroma
2970
    if(rd < *best_rd){
2971
        *best_rd= rd;
2972
        return 1;
2973
    }else{
2974
        *block= backup;
2975
        return 0;
2976
    }
2977
}
2978

    
2979
/* special case for int[2] args we discard afterwards,
2980
 * fixes compilation problem with gcc 2.95 */
2981
static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2982
    int p[2] = {p0, p1};
2983
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2984
}
2985

    
2986
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2987
    const int b_stride= s->b_width << s->block_max_depth;
2988
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2989
    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2990
    int rd, index, value;
2991

    
2992
    assert(mb_x>=0 && mb_y>=0);
2993
    assert(mb_x<b_stride);
2994
    assert(((mb_x|mb_y)&1) == 0);
2995

    
2996
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2997
    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
2998
    if(s->me_cache[index] == value)
2999
        return 0;
3000
    s->me_cache[index]= value;
3001

    
3002
    block->mx= p0;
3003
    block->my= p1;
3004
    block->ref= ref;
3005
    block->type &= ~BLOCK_INTRA;
3006
    block[1]= block[b_stride]= block[b_stride+1]= *block;
3007

    
3008
    rd= get_4block_rd(s, mb_x, mb_y, 0);
3009

    
3010
//FIXME chroma
3011
    if(rd < *best_rd){
3012
        *best_rd= rd;
3013
        return 1;
3014
    }else{
3015
        block[0]= backup[0];
3016
        block[1]= backup[1];
3017
        block[b_stride]= backup[2];
3018
        block[b_stride+1]= backup[3];
3019
        return 0;
3020
    }
3021
}
3022

    
3023
static void iterative_me(SnowContext *s){
3024
    int pass, mb_x, mb_y;
3025
    const int b_width = s->b_width  << s->block_max_depth;
3026
    const int b_height= s->b_height << s->block_max_depth;
3027
    const int b_stride= b_width;
3028
    int color[3];
3029

    
3030
    {
3031
        RangeCoder r = s->c;
3032
        uint8_t state[sizeof(s->block_state)];
3033
        memcpy(state, s->block_state, sizeof(s->block_state));
3034
        for(mb_y= 0; mb_y<s->b_height; mb_y++)
3035
            for(mb_x= 0; mb_x<s->b_width; mb_x++)
3036
                encode_q_branch(s, 0, mb_x, mb_y);
3037
        s->c = r;
3038
        memcpy(s->block_state, state, sizeof(s->block_state));
3039
    }
3040

    
3041
    for(pass=0; pass<25; pass++){
3042
        int change= 0;
3043

    
3044
        for(mb_y= 0; mb_y<b_height; mb_y++){
3045
            for(mb_x= 0; mb_x<b_width; mb_x++){
3046
                int dia_change, i, j, ref;
3047
                int best_rd= INT_MAX, ref_rd;
3048
                BlockNode backup, ref_b;
3049
                const int index= mb_x + mb_y * b_stride;
3050
                BlockNode *block= &s->block[index];
3051
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3052
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3053
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3054
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3055
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3056
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3057
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3058
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3059
                const int b_w= (MB_SIZE >> s->block_max_depth);
3060
                uint8_t obmc_edged[b_w*2][b_w*2];
3061

    
3062
                if(pass && (block->type & BLOCK_OPT))
3063
                    continue;
3064
                block->type |= BLOCK_OPT;
3065

    
3066
                backup= *block;
3067

    
3068
                if(!s->me_cache_generation)
3069
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3070
                s->me_cache_generation += 1<<22;
3071

    
3072
                //FIXME precalculate
3073
                {
3074
                    int x, y;
3075
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3076
                    if(mb_x==0)
3077
                        for(y=0; y<b_w*2; y++)
3078
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3079
                    if(mb_x==b_stride-1)
3080
                        for(y=0; y<b_w*2; y++)
3081
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3082
                    if(mb_y==0){
3083
                        for(x=0; x<b_w*2; x++)
3084
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3085
                        for(y=1; y<b_w; y++)
3086
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3087
                    }
3088
                    if(mb_y==b_height-1){
3089
                        for(x=0; x<b_w*2; x++)
3090
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3091
                        for(y=b_w; y<b_w*2-1; y++)
3092
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3093
                    }
3094
                }
3095

    
3096
                //skip stuff outside the picture
3097
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3098
                    uint8_t *src= s->  input_picture.data[0];
3099
                    uint8_t *dst= s->current_picture.data[0];
3100
                    const int stride= s->current_picture.linesize[0];
3101
                    const int block_w= MB_SIZE >> s->block_max_depth;
3102
                    const int sx= block_w*mb_x - block_w/2;
3103
                    const int sy= block_w*mb_y - block_w/2;
3104
                    const int w= s->plane[0].width;
3105
                    const int h= s->plane[0].height;
3106
                    int y;
3107

    
3108
                    for(y=sy; y<0; y++)
3109
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3110
                    for(y=h; y<sy+block_w*2; y++)
3111
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3112
                    if(sx<0){
3113
                        for(y=sy; y<sy+block_w*2; y++)
3114
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3115
                    }
3116
                    if(sx+block_w*2 > w){
3117
                        for(y=sy; y<sy+block_w*2; y++)
3118
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3119
                    }
3120
                }
3121

    
3122
                // intra(black) = neighbors' contribution to the current block
3123
                for(i=0; i<3; i++)
3124
                    color[i]= get_dc(s, mb_x, mb_y, i);
3125

    
3126
                // get previous score (cannot be cached due to OBMC)
3127
                if(pass > 0 && (block->type&BLOCK_INTRA)){
3128
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3129
                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3130
                }else
3131
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3132

    
3133
                ref_b= *block;
3134
                ref_rd= best_rd;
3135
                for(ref=0; ref < s->ref_frames; ref++){
3136
                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3137
                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3138
                        continue;
3139
                    block->ref= ref;
3140
                    best_rd= INT_MAX;
3141

    
3142
                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3143
                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3144
                    if(tb)
3145
                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3146
                    if(lb)
3147
                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3148
                    if(rb)
3149
                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3150
                    if(bb)
3151
                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3152

    
3153
                    /* fullpel ME */
3154
                    //FIXME avoid subpel interpolation / round to nearest integer
3155
                    do{
3156
                        dia_change=0;
3157
                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3158
                            for(j=0; j<i; j++){
3159
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3160
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3161
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3162
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3163
                            }
3164
                        }
3165
                    }while(dia_change);
3166
                    /* subpel ME */
3167
                    do{
3168
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3169
                        dia_change=0;
3170
                        for(i=0; i<8; i++)
3171
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3172
                    }while(dia_change);
3173
                    //FIXME or try the standard 2 pass qpel or similar
3174

    
3175
                    mvr[0][0]= block->mx;
3176
                    mvr[0][1]= block->my;
3177
                    if(ref_rd > best_rd){
3178
                        ref_rd= best_rd;
3179
                        ref_b= *block;
3180
                    }
3181
                }
3182
                best_rd= ref_rd;
3183
                *block= ref_b;
3184
#if 1
3185
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3186
                //FIXME RD style color selection
3187
#endif
3188
                if(!same_block(block, &backup)){
3189
                    if(tb ) tb ->type &= ~BLOCK_OPT;
3190
                    if(lb ) lb ->type &= ~BLOCK_OPT;
3191
                    if(rb ) rb ->type &= ~BLOCK_OPT;
3192
                    if(bb ) bb ->type &= ~BLOCK_OPT;
3193
                    if(tlb) tlb->type &= ~BLOCK_OPT;
3194
                    if(trb) trb->type &= ~BLOCK_OPT;
3195
                    if(blb) blb->type &= ~BLOCK_OPT;
3196
                    if(brb) brb->type &= ~BLOCK_OPT;
3197
                    change ++;
3198
                }
3199
            }
3200
        }
3201
        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3202
        if(!change)
3203
            break;
3204
    }
3205

    
3206
    if(s->block_max_depth == 1){
3207
        int change= 0;
3208
        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3209
            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3210
                int i;
3211
                int best_rd, init_rd;
3212
                const int index= mb_x + mb_y * b_stride;
3213
                BlockNode *b[4];
3214

    
3215
                b[0]= &s->block[index];
3216
                b[1]= b[0]+1;
3217
                b[2]= b[0]+b_stride;
3218
                b[3]= b[2]+1;
3219
                if(same_block(b[0], b[1]) &&
3220
                   same_block(b[0], b[2]) &&
3221
                   same_block(b[0], b[3]))
3222
                    continue;
3223

    
3224
                if(!s->me_cache_generation)
3225
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3226
                s->me_cache_generation += 1<<22;
3227

    
3228
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3229

    
3230
                //FIXME more multiref search?
3231
                check_4block_inter(s, mb_x, mb_y,
3232
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3233
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3234

    
3235
                for(i=0; i<4; i++)
3236
                    if(!(b[i]->type&BLOCK_INTRA))
3237
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3238

    
3239
                if(init_rd != best_rd)
3240
                    change++;
3241
            }
3242
        }
3243
        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3244
    }
3245
}
3246

    
3247
static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3248
    const int w= b->width;
3249
    const int h= b->height;
3250
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3251
    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3252
    int x,y, thres1, thres2;
3253

    
3254
    if(s->qlog == LOSSLESS_QLOG){
3255
        for(y=0; y<h; y++)
3256
            for(x=0; x<w; x++)
3257
                dst[x + y*stride]= src[x + y*stride];
3258
        return;
3259
    }
3260

    
3261
    bias= bias ? 0 : (3*qmul)>>3;
3262
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3263
    thres2= 2*thres1;
3264

    
3265
    if(!bias){
3266
        for(y=0; y<h; y++){
3267
            for(x=0; x<w; x++){
3268
                int i= src[x + y*stride];
3269

    
3270
                if((unsigned)(i+thres1) > thres2){
3271
                    if(i>=0){
3272
                        i<<= QEXPSHIFT;
3273
                        i/= qmul; //FIXME optimize
3274
                        dst[x + y*stride]=  i;
3275
                    }else{
3276
                        i= -i;
3277
                        i<<= QEXPSHIFT;
3278
                        i/= qmul; //FIXME optimize
3279
                        dst[x + y*stride]= -i;
3280
                    }
3281
                }else
3282
                    dst[x + y*stride]= 0;
3283
            }
3284
        }
3285
    }else{
3286
        for(y=0; y<h; y++){
3287
            for(x=0; x<w; x++){
3288
                int i= src[x + y*stride];
3289

    
3290
                if((unsigned)(i+thres1) > thres2){
3291
                    if(i>=0){
3292
                        i<<= QEXPSHIFT;
3293
                        i= (i + bias) / qmul; //FIXME optimize
3294
                        dst[x + y*stride]=  i;
3295
                    }else{
3296
                        i= -i;
3297
                        i<<= QEXPSHIFT;
3298
                        i= (i + bias) / qmul; //FIXME optimize
3299
                        dst[x + y*stride]= -i;
3300
                    }
3301
                }else
3302
                    dst[x + y*stride]= 0;
3303
            }
3304
        }
3305
    }
3306
}
3307

    
3308
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3309
    const int w= b->width;
3310
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3311
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3312
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3313
    int x,y;
3314

    
3315
    if(s->qlog == LOSSLESS_QLOG) return;
3316

    
3317
    for(y=start_y; y<end_y; y++){
3318
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3319
        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3320
        for(x=0; x<w; x++){
3321
            int i= line[x];
3322
            if(i<0){
3323
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3324
            }else if(i>0){
3325
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3326
            }
3327
        }
3328
    }
3329
}
3330

    
3331
static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3332
    const int w= b->width;
3333
    const int h= b->height;
3334
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3335
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3336
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3337
    int x,y;
3338

    
3339
    if(s->qlog == LOSSLESS_QLOG) return;
3340

    
3341
    for(y=0; y<h; y++){
3342
        for(x=0; x<w; x++){
3343
            int i= src[x + y*stride];
3344
            if(i<0){
3345
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3346
            }else if(i>0){
3347
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3348
            }
3349
        }
3350
    }
3351
}
3352

    
3353
static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3354
    const int w= b->width;
3355
    const int h= b->height;
3356
    int x,y;
3357

    
3358
    for(y=h-1; y>=0; y--){
3359
        for(x=w-1; x>=0; x--){
3360
            int i= x + y*stride;
3361

    
3362
            if(x){
3363
                if(use_median){
3364
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3365
                    else  src[i] -= src[i - 1];
3366
                }else{
3367
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3368
                    else  src[i] -= src[i - 1];
3369
                }
3370
            }else{
3371
                if(y) src[i] -= src[i - stride];
3372
            }
3373
        }
3374
    }
3375
}
3376

    
3377
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3378
    const int w= b->width;
3379
    int x,y;
3380

    
3381
    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3382
    IDWTELEM * prev;
3383

    
3384
    if (start_y != 0)
3385
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3386

    
3387
    for(y=start_y; y<end_y; y++){
3388
        prev = line;
3389
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3390
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3391
        for(x=0; x<w; x++){
3392
            if(x){
3393
                if(use_median){
3394
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3395
                    else  line[x] += line[x - 1];
3396
                }else{
3397
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3398
                    else  line[x] += line[x - 1];
3399
                }
3400
            }else{
3401
                if(y) line[x] += prev[x];
3402
            }
3403
        }
3404
    }
3405
}
3406

    
3407
static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3408
    const int w= b->width;
3409
    const int h= b->height;
3410
    int x,y;
3411

    
3412
    for(y=0; y<h; y++){
3413
        for(x=0; x<w; x++){
3414
            int i= x + y*stride;
3415

    
3416
            if(x){
3417
                if(use_median){
3418
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3419
                    else  src[i] += src[i - 1];
3420
                }else{
3421
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3422
                    else  src[i] += src[i - 1];
3423
                }
3424
            }else{
3425
                if(y) src[i] += src[i - stride];
3426
            }
3427
        }
3428
    }
3429
}
3430

    
3431
static void encode_qlogs(SnowContext *s){
3432
    int plane_index, level, orientation;
3433

    
3434
    for(plane_index=0; plane_index<2; plane_index++){
3435
        for(level=0; level<s->spatial_decomposition_count; level++){
3436
            for(orientation=level ? 1:0; orientation<4; orientation++){
3437
                if(orientation==2) continue;
3438
                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3439
            }
3440
        }
3441
    }
3442
}
3443

    
3444
static void encode_header(SnowContext *s){
3445
    int plane_index, i;
3446
    uint8_t kstate[32];
3447

    
3448
    memset(kstate, MID_STATE, sizeof(kstate));
3449

    
3450
    put_rac(&s->c, kstate, s->keyframe);
3451
    if(s->keyframe || s->always_reset){
3452
        reset_contexts(s);
3453
        s->last_spatial_decomposition_type=
3454
        s->last_qlog=
3455
        s->last_qbias=
3456
        s->last_mv_scale=
3457
        s->last_block_max_depth= 0;
3458
        for(plane_index=0; plane_index<2; plane_index++){
3459
            Plane *p= &s->plane[plane_index];
3460
            p->last_htaps=0;
3461
            p->last_diag_mc=0;
3462
            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3463
        }
3464
    }
3465
    if(s->keyframe){
3466
        put_symbol(&s->c, s->header_state, s->version, 0);
3467
        put_rac(&s->c, s->header_state, s->always_reset);
3468
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3469
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3470
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3471
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3472
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3473
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3474
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3475
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3476
        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3477

    
3478
        encode_qlogs(s);
3479
    }
3480

    
3481
    if(!s->keyframe){
3482
        int update_mc=0;
3483
        for(plane_index=0; plane_index<2; plane_index++){
3484
            Plane *p= &s->plane[plane_index];
3485
            update_mc |= p->last_htaps   != p->htaps;
3486
            update_mc |= p->last_diag_mc != p->diag_mc;
3487
            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3488
        }
3489
        put_rac(&s->c, s->header_state, update_mc);
3490
        if(update_mc){
3491
            for(plane_index=0; plane_index<2; plane_index++){
3492
                Plane *p= &s->plane[plane_index];
3493
                put_rac(&s->c, s->header_state, p->diag_mc);
3494
                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3495
                for(i= p->htaps/2; i; i--)
3496
                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3497
            }
3498
        }
3499
        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3500
            put_rac(&s->c, s->header_state, 1);
3501
            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3502
            encode_qlogs(s);
3503
        }else
3504
            put_rac(&s->c, s->header_state, 0);
3505
    }
3506

    
3507
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3508
    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3509
    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3510
    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3511
    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3512

    
3513
}
3514

    
3515
static void update_last_header_values(SnowContext *s){
3516
    int plane_index;
3517

    
3518
    if(!s->keyframe){
3519
        for(plane_index=0; plane_index<2; plane_index++){
3520
            Plane *p= &s->plane[plane_index];
3521
            p->last_diag_mc= p->diag_mc;
3522
            p->last_htaps  = p->htaps;
3523
            memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3524
        }
3525
    }
3526

    
3527
    s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
3528
    s->last_qlog                        = s->qlog;
3529
    s->last_qbias                       = s->qbias;
3530
    s->last_mv_scale                    = s->mv_scale;
3531
    s->last_block_max_depth             = s->block_max_depth;
3532
    s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3533
}
3534

    
3535
static void decode_qlogs(SnowContext *s){
3536
    int plane_index, level, orientation;
3537

    
3538
    for(plane_index=0; plane_index<3; plane_index++){
3539
        for(level=0; level<s->spatial_decomposition_count; level++){
3540
            for(orientation=level ? 1:0; orientation<4; orientation++){
3541
                int q;
3542
                if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3543
                else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3544
                else                    q= get_symbol(&s->c, s->header_state, 1);
3545
                s->plane[plane_index].band[level][orientation].qlog= q;
3546
            }
3547
        }
3548
    }
3549
}
3550

    
3551
#define GET_S(dst, check) \
3552
    tmp= get_symbol(&s->c, s->header_state, 0);\
3553
    if(!(check)){\
3554
        av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
3555
        return -1;\
3556
    }\
3557
    dst= tmp;
3558

    
3559
static int decode_header(SnowContext *s){
3560
    int plane_index, tmp;
3561
    uint8_t kstate[32];
3562

    
3563
    memset(kstate, MID_STATE, sizeof(kstate));
3564

    
3565
    s->keyframe= get_rac(&s->c, kstate);
3566
    if(s->keyframe || s->always_reset){
3567
        reset_contexts(s);
3568
        s->spatial_decomposition_type=
3569
        s->qlog=
3570
        s->qbias=
3571
        s->mv_scale=
3572
        s->block_max_depth= 0;
3573
    }
3574
    if(s->keyframe){
3575
        GET_S(s->version, tmp <= 0U)
3576
        s->always_reset= get_rac(&s->c, s->header_state);
3577
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3578
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3579
        GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3580
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3581
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3582
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3583
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3584
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3585
        GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
3586
        s->max_ref_frames++;
3587

    
3588
        decode_qlogs(s);
3589
    }
3590

    
3591
    if(!s->keyframe){
3592
        if(get_rac(&s->c, s->header_state)){
3593
            for(plane_index=0; plane_index<2; plane_index++){
3594
                int htaps, i, sum=0;
3595
                Plane *p= &s->plane[plane_index];
3596
                p->diag_mc= get_rac(&s->c, s->header_state);
3597
                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3598
                if((unsigned)htaps > HTAPS_MAX || htaps==0)
3599
                    return -1;
3600
                p->htaps= htaps;
3601
                for(i= htaps/2; i; i--){
3602
                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3603
                    sum += p->hcoeff[i];
3604
                }
3605
                p->hcoeff[0]= 32-sum;
3606
            }
3607
            s->plane[2].diag_mc= s->plane[1].diag_mc;
3608
            s->plane[2].htaps  = s->plane[1].htaps;
3609
            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3610
        }
3611
        if(get_rac(&s->c, s->header_state)){
3612
            GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3613
            decode_qlogs(s);
3614
        }
3615
    }
3616

    
3617
    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3618
    if(s->spatial_decomposition_type > 1U){
3619
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3620
        return -1;
3621
    }
3622
    if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
3623
             s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
3624
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
3625
        return -1;
3626
    }
3627

    
3628
    s->qlog           += get_symbol(&s->c, s->header_state, 1);
3629
    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
3630
    s->qbias          += get_symbol(&s->c, s->header_state, 1);
3631
    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3632
    if(s->block_max_depth > 1 || s->block_max_depth < 0){
3633
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3634
        s->block_max_depth= 0;
3635
        return -1;
3636
    }
3637

    
3638
    return 0;
3639
}
3640

    
3641
static void init_qexp(void){
3642
    int i;
3643
    double v=128;
3644

    
3645
    for(i=0; i<QROOT; i++){
3646
        qexp[i]= lrintf(v);
3647
        v *= pow(2, 1.0 / QROOT);
3648
    }
3649
}
3650

    
3651
static av_cold int common_init(AVCodecContext *avctx){
3652
    SnowContext *s = avctx->priv_data;
3653
    int width, height;
3654
    int i, j;
3655

    
3656
    s->avctx= avctx;
3657
    s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
3658

    
3659
    dsputil_init(&s->dsp, avctx);
3660

    
3661
#define mcf(dx,dy)\
3662
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3663
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3664
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3665
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3666
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3667
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3668

    
3669
    mcf( 0, 0)
3670
    mcf( 4, 0)
3671
    mcf( 8, 0)
3672
    mcf(12, 0)
3673
    mcf( 0, 4)
3674
    mcf( 4, 4)
3675
    mcf( 8, 4)
3676
    mcf(12, 4)
3677
    mcf( 0, 8)
3678
    mcf( 4, 8)
3679
    mcf( 8, 8)
3680
    mcf(12, 8)
3681
    mcf( 0,12)
3682
    mcf( 4,12)
3683
    mcf( 8,12)
3684
    mcf(12,12)
3685

    
3686
#define mcfh(dx,dy)\
3687
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3688
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3689
        mc_block_hpel ## dx ## dy ## 16;\
3690
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3691
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3692
        mc_block_hpel ## dx ## dy ## 8;
3693

    
3694
    mcfh(0, 0)
3695
    mcfh(8, 0)
3696
    mcfh(0, 8)
3697
    mcfh(8, 8)
3698

    
3699
    if(!qexp[0])
3700
        init_qexp();
3701

    
3702
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3703

    
3704
    width= s->avctx->width;
3705
    height= s->avctx->height;
3706

    
3707
    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3708
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3709

    
3710
    for(i=0; i<MAX_REF_FRAMES; i++)
3711
        for(j=0; j<MAX_REF_FRAMES; j++)
3712
            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3713

    
3714
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3715
    s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
3716

    
3717
    return 0;
3718
}
3719

    
3720
static int common_init_after_header(AVCodecContext *avctx){
3721
    SnowContext *s = avctx->priv_data;
3722
    int plane_index, level, orientation;
3723

    
3724
    for(plane_index=0; plane_index<3; plane_index++){
3725
        int w= s->avctx->width;
3726
        int h= s->avctx->height;
3727

    
3728
        if(plane_index){
3729
            w>>= s->chroma_h_shift;
3730
            h>>= s->chroma_v_shift;
3731
        }
3732
        s->plane[plane_index].width = w;
3733
        s->plane[plane_index].height= h;
3734

    
3735
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3736
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3737
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3738

    
3739
                b->buf= s->spatial_dwt_buffer;
3740
                b->level= level;
3741
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3742
                b->width = (w + !(orientation&1))>>1;
3743
                b->height= (h + !(orientation>1))>>1;
3744

    
3745
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3746
                b->buf_x_offset = 0;
3747
                b->buf_y_offset = 0;
3748

    
3749
                if(orientation&1){
3750
                    b->buf += (w+1)>>1;
3751
                    b->buf_x_offset = (w+1)>>1;
3752
                }
3753
                if(orientation>1){
3754
                    b->buf += b->stride>>1;
3755
                    b->buf_y_offset = b->stride_line >> 1;
3756
                }
3757
                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3758

    
3759
                if(level)
3760
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3761
                //FIXME avoid this realloc
3762
                av_freep(&b->x_coeff);
3763
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3764
            }
3765
            w= (w+1)>>1;
3766
            h= (h+1)>>1;
3767
        }
3768
    }
3769

    
3770
    return 0;
3771
}
3772

    
3773
static int qscale2qlog(int qscale){
3774
    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3775
           + 61*QROOT/8; //<64 >60
3776
}
3777

    
3778
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3779
{
3780
    /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3781
     * FIXME we know exact mv bits at this point,
3782
     * but ratecontrol isn't set up to include them. */
3783
    uint32_t coef_sum= 0;
3784
    int level, orientation, delta_qlog;
3785

    
3786
    for(level=0; level<s->spatial_decomposition_count; level++){
3787
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3788
            SubBand *b= &s->plane[0].band[level][orientation];
3789
            IDWTELEM *buf= b->ibuf;
3790
            const int w= b->width;
3791
            const int h= b->height;
3792
            const int stride= b->stride;
3793
            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3794
            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3795
            const int qdiv= (1<<16)/qmul;
3796
            int x, y;
3797
            //FIXME this is ugly
3798
            for(y=0; y<h; y++)
3799
                for(x=0; x<w; x++)
3800
                    buf[x+y*stride]= b->buf[x+y*stride];
3801
            if(orientation==0)
3802
                decorrelate(s, b, buf, stride, 1, 0);
3803
            for(y=0; y<h; y++)
3804
                for(x=0; x<w; x++)
3805
                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3806
        }
3807
    }
3808

    
3809
    /* ugly, ratecontrol just takes a sqrt again */
3810
    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3811
    assert(coef_sum < INT_MAX);
3812

    
3813
    if(pict->pict_type == FF_I_TYPE){
3814
        s->m.current_picture.mb_var_sum= coef_sum;
3815
        s->m.current_picture.mc_mb_var_sum= 0;
3816
    }else{
3817
        s->m.current_picture.mc_mb_var_sum= coef_sum;
3818
        s->m.current_picture.mb_var_sum= 0;
3819
    }
3820

    
3821
    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3822
    if (pict->quality < 0)
3823
        return INT_MIN;
3824
    s->lambda= pict->quality * 3/2;
3825
    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3826
    s->qlog+= delta_qlog;
3827
    return delta_qlog;
3828
}
3829

    
3830
static void calculate_visual_weight(SnowContext *s, Plane *p){
3831
    int width = p->width;
3832
    int height= p->height;
3833
    int level, orientation, x, y;
3834

    
3835
    for(level=0; level<s->spatial_decomposition_count; level++){
3836
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3837
            SubBand *b= &p->band[level][orientation];
3838
            IDWTELEM *ibuf= b->ibuf;
3839
            int64_t error=0;
3840

    
3841
            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3842
            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3843
            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3844
            for(y=0; y<height; y++){
3845
                for(x=0; x<width; x++){
3846
                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3847
                    error += d*d;
3848
                }
3849
            }
3850

    
3851
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3852
        }
3853
    }
3854
}
3855

    
3856
#define QUANTIZE2 0
3857

    
3858
#if QUANTIZE2==1
3859
#define Q2_STEP 8
3860

    
3861
static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3862
    SubBand *b= &p->band[level][orientation];
3863
    int x, y;
3864
    int xo=0;
3865
    int yo=0;
3866
    int step= 1 << (s->spatial_decomposition_count - level);
3867

    
3868
    if(orientation&1)
3869
        xo= step>>1;
3870
    if(orientation&2)
3871
        yo= step>>1;
3872

    
3873
    //FIXME bias for nonzero ?
3874
    //FIXME optimize
3875
    memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3876
    for(y=0; y<p->height; y++){
3877
        for(x=0; x<p->width; x++){
3878
            int sx= (x-xo + step/2) / step / Q2_STEP;
3879
            int sy= (y-yo + step/2) / step / Q2_STEP;
3880
            int v= r0[x + y*p->width] - r1[x + y*p->width];
3881
            assert(sx>=0 && sy>=0 && sx < score_stride);
3882
            v= ((v+8)>>4)<<4;
3883
            score[sx + sy*score_stride] += v*v;
3884
            assert(score[sx + sy*score_stride] >= 0);
3885
        }
3886
    }
3887
}
3888

    
3889
static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3890
    int level, orientation;
3891

    
3892
    for(level=0; level<s->spatial_decomposition_count; level++){
3893
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3894
            SubBand *b= &p->band[level][orientation];
3895
            IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3896

    
3897
            dequantize(s, b, dst, b->stride);
3898
        }
3899
    }
3900
}
3901

    
3902
static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3903
    int level, orientation, ys, xs, x, y, pass;
3904
    IDWTELEM best_dequant[height * stride];
3905
    IDWTELEM idwt2_buffer[height * stride];
3906
    const int score_stride= (width + 10)/Q2_STEP;
3907
    int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3908
    int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3909
    int threshold= (s->m.lambda * s->m.lambda) >> 6;
3910

    
3911
    //FIXME pass the copy cleanly ?
3912

    
3913
//    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3914
    ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3915

    
3916
    for(level=0; level<s->spatial_decomposition_count; level++){
3917
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3918
            SubBand *b= &p->band[level][orientation];
3919
            IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3920
             DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
3921
            assert(src == b->buf); // code does not depend on this but it is true currently
3922

    
3923
            quantize(s, b, dst, src, b->stride, s->qbias);
3924
        }
3925
    }
3926
    for(pass=0; pass<1; pass++){
3927
        if(s->qbias == 0) //keyframe
3928
            continue;
3929
        for(level=0; level<s->spatial_decomposition_count; level++){
3930
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3931
                SubBand *b= &p->band[level][orientation];
3932
                IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3933
                IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3934

    
3935
                for(ys= 0; ys<Q2_STEP; ys++){
3936
                    for(xs= 0; xs<Q2_STEP; xs++){
3937
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3938
                        dequantize_all(s, p, idwt2_buffer, width, height);
3939
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3940
                        find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3941
                        memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3942
                        for(y=ys; y<b->height; y+= Q2_STEP){
3943
                            for(x=xs; x<b->width; x+= Q2_STEP){
3944
                                if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3945
                                if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3946
                                //FIXME try more than just --
3947
                            }
3948
                        }
3949
                        dequantize_all(s, p, idwt2_buffer, width, height);
3950
                        ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3951
                        find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3952
                        for(y=ys; y<b->height; y+= Q2_STEP){
3953
                            for(x=xs; x<b->width; x+= Q2_STEP){
3954
                                int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3955
                                if(score[score_idx] <= best_score[score_idx] + threshold){
3956
                                    best_score[score_idx]= score[score_idx];
3957
                                    if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3958
                                    if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3959
                                    //FIXME copy instead
3960
                                }
3961
                            }
3962
                        }
3963
                    }
3964
                }
3965
            }
3966
        }
3967
    }
3968
    memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3969
}
3970

    
3971
#endif /* QUANTIZE2==1 */
3972

    
3973
static av_cold int encode_init(AVCodecContext *avctx)
3974
{
3975
    SnowContext *s = avctx->priv_data;
3976
    int plane_index;
3977

    
3978
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3979
        av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3980
               "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3981
        return -1;
3982
    }
3983

    
3984
    if(avctx->prediction_method == DWT_97
3985
       && (avctx->flags & CODEC_FLAG_QSCALE)
3986
       && avctx->global_quality == 0){
3987
        av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3988
        return -1;
3989
    }
3990

    
3991
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3992

    
3993
    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3994
    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3995

    
3996
    for(plane_index=0; plane_index<3; plane_index++){
3997