Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 4f90f33a

History | View | Annotate | Download (175 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "avcodec.h"
22
#include "dsputil.h"
23
#include "snow.h"
24

    
25
#include "rangecoder.h"
26

    
27
#include "mpegvideo.h"
28

    
29
#undef NDEBUG
30
#include <assert.h>
31

    
32
static const int8_t quant3[256]={
33
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49
};
50
static const int8_t quant3b[256]={
51
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
};
68
static const int8_t quant3bA[256]={
69
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
};
86
static const int8_t quant5[256]={
87
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103
};
104
static const int8_t quant7[256]={
105
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121
};
122
static const int8_t quant9[256]={
123
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139
};
140
static const int8_t quant11[256]={
141
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157
};
158
static const int8_t quant13[256]={
159
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
175
};
176

    
177
#if 0 //64*cubic
178
static const uint8_t obmc32[1024]={
179
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
180
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
181
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
182
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
183
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
184
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
185
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
186
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
187
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
188
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
189
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
190
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
191
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
192
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
193
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
194
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
195
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
196
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
197
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
198
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
199
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
200
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
201
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
202
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
203
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
204
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
205
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
206
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
207
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
208
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
209
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
210
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
211
//error:0.000022
212
};
213
static const uint8_t obmc16[256]={
214
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
215
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
216
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
217
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
218
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
219
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
220
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
221
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
222
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
223
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
224
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
225
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
226
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
227
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
228
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
229
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
230
//error:0.000033
231
};
232
#elif 1 // 64*linear
233
static const uint8_t obmc32[1024]={
234
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
235
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
236
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
237
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
238
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
239
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
240
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
241
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
242
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
243
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
244
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
245
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
246
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
247
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
248
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
249
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
250
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
251
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
252
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
253
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
254
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
255
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
256
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
257
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
258
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
259
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
260
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
261
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
262
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
263
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
264
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
265
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
266
 //error:0.000020
267
};
268
static const uint8_t obmc16[256]={
269
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
270
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
271
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
272
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
273
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
274
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
281
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
282
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
283
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
284
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
285
//error:0.000015
286
};
287
#else //64*cos
288
static const uint8_t obmc32[1024]={
289
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
290
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
291
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
292
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
293
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
294
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
295
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
296
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
297
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
298
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
299
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
300
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
301
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
302
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
303
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
304
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
305
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
306
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
307
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
308
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
309
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
310
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
311
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
312
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
313
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
314
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
315
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
316
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
317
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
318
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
319
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
320
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
321
//error:0.000022
322
};
323
static const uint8_t obmc16[256]={
324
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
325
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
326
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
327
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
328
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
329
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
330
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
331
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
332
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
333
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
334
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
335
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
336
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
337
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
338
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
339
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
340
//error:0.000022
341
};
342
#endif
343

    
344
//linear *64
345
static const uint8_t obmc8[64]={
346
  4, 12, 20, 28, 28, 20, 12,  4,
347
 12, 36, 60, 84, 84, 60, 36, 12,
348
 20, 60,100,140,140,100, 60, 20,
349
 28, 84,140,196,196,140, 84, 28,
350
 28, 84,140,196,196,140, 84, 28,
351
 20, 60,100,140,140,100, 60, 20,
352
 12, 36, 60, 84, 84, 60, 36, 12,
353
  4, 12, 20, 28, 28, 20, 12,  4,
354
//error:0.000000
355
};
356

    
357
//linear *64
358
static const uint8_t obmc4[16]={
359
 16, 48, 48, 16,
360
 48,144,144, 48,
361
 48,144,144, 48,
362
 16, 48, 48, 16,
363
//error:0.000000
364
};
365

    
366
static const uint8_t *obmc_tab[4]={
367
    obmc32, obmc16, obmc8, obmc4
368
};
369

    
370
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
371

    
372
typedef struct BlockNode{
373
    int16_t mx;
374
    int16_t my;
375
    uint8_t ref;
376
    uint8_t color[3];
377
    uint8_t type;
378
//#define TYPE_SPLIT    1
379
#define BLOCK_INTRA   1
380
#define BLOCK_OPT     2
381
//#define TYPE_NOCOLOR  4
382
    uint8_t level; //FIXME merge into type?
383
}BlockNode;
384

    
385
static const BlockNode null_block= { //FIXME add border maybe
386
    .color= {128,128,128},
387
    .mx= 0,
388
    .my= 0,
389
    .ref= 0,
390
    .type= 0,
391
    .level= 0,
392
};
393

    
394
#define LOG2_MB_SIZE 4
395
#define MB_SIZE (1<<LOG2_MB_SIZE)
396
#define ENCODER_EXTRA_BITS 4
397
#define HTAPS_MAX 8
398

    
399
typedef struct x_and_coeff{
400
    int16_t x;
401
    uint16_t coeff;
402
} x_and_coeff;
403

    
404
typedef struct SubBand{
405
    int level;
406
    int stride;
407
    int width;
408
    int height;
409
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
410
    DWTELEM *buf;
411
    IDWTELEM *ibuf;
412
    int buf_x_offset;
413
    int buf_y_offset;
414
    int stride_line; ///< Stride measured in lines, not pixels.
415
    x_and_coeff * x_coeff;
416
    struct SubBand *parent;
417
    uint8_t state[/*7*2*/ 7 + 512][32];
418
}SubBand;
419

    
420
typedef struct Plane{
421
    int width;
422
    int height;
423
    SubBand band[MAX_DECOMPOSITIONS][4];
424

    
425
    int htaps;
426
    int8_t hcoeff[HTAPS_MAX/2];
427
    int diag_mc;
428
    int fast_mc;
429

    
430
    int last_htaps;
431
    int8_t last_hcoeff[HTAPS_MAX/2];
432
    int last_diag_mc;
433
}Plane;
434

    
435
typedef struct SnowContext{
436
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
437

    
438
    AVCodecContext *avctx;
439
    RangeCoder c;
440
    DSPContext dsp;
441
    AVFrame new_picture;
442
    AVFrame input_picture;              ///< new_picture with the internal linesizes
443
    AVFrame current_picture;
444
    AVFrame last_picture[MAX_REF_FRAMES];
445
    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446
    AVFrame mconly_picture;
447
//     uint8_t q_context[16];
448
    uint8_t header_state[32];
449
    uint8_t block_state[128 + 32*128];
450
    int keyframe;
451
    int always_reset;
452
    int version;
453
    int spatial_decomposition_type;
454
    int last_spatial_decomposition_type;
455
    int temporal_decomposition_type;
456
    int spatial_decomposition_count;
457
    int temporal_decomposition_count;
458
    int max_ref_frames;
459
    int ref_frames;
460
    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
461
    uint32_t *ref_scores[MAX_REF_FRAMES];
462
    DWTELEM *spatial_dwt_buffer;
463
    IDWTELEM *spatial_idwt_buffer;
464
    int colorspace_type;
465
    int chroma_h_shift;
466
    int chroma_v_shift;
467
    int spatial_scalability;
468
    int qlog;
469
    int last_qlog;
470
    int lambda;
471
    int lambda2;
472
    int pass1_rc;
473
    int mv_scale;
474
    int last_mv_scale;
475
    int qbias;
476
    int last_qbias;
477
#define QBIAS_SHIFT 3
478
    int b_width;
479
    int b_height;
480
    int block_max_depth;
481
    int last_block_max_depth;
482
    Plane plane[MAX_PLANES];
483
    BlockNode *block;
484
#define ME_CACHE_SIZE 1024
485
    int me_cache[ME_CACHE_SIZE];
486
    int me_cache_generation;
487
    slice_buffer sb;
488

    
489
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
490
}SnowContext;
491

    
492
typedef struct {
493
    IDWTELEM *b0;
494
    IDWTELEM *b1;
495
    IDWTELEM *b2;
496
    IDWTELEM *b3;
497
    int y;
498
} dwt_compose_t;
499

    
500
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
501
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
502

    
503
static void iterative_me(SnowContext *s);
504

    
505
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
506
{
507
    int i;
508

    
509
    buf->base_buffer = base_buffer;
510
    buf->line_count = line_count;
511
    buf->line_width = line_width;
512
    buf->data_count = max_allocated_lines;
513
    buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
514
    buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
515

    
516
    for (i = 0; i < max_allocated_lines; i++)
517
    {
518
        buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
519
    }
520

    
521
    buf->data_stack_top = max_allocated_lines - 1;
522
}
523

    
524
static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
525
{
526
    int offset;
527
    IDWTELEM * buffer;
528

    
529
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
530

    
531
    assert(buf->data_stack_top >= 0);
532
//  assert(!buf->line[line]);
533
    if (buf->line[line])
534
        return buf->line[line];
535

    
536
    offset = buf->line_width * line;
537
    buffer = buf->data_stack[buf->data_stack_top];
538
    buf->data_stack_top--;
539
    buf->line[line] = buffer;
540

    
541
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
542

    
543
    return buffer;
544
}
545

    
546
static void slice_buffer_release(slice_buffer * buf, int line)
547
{
548
    int offset;
549
    IDWTELEM * buffer;
550

    
551
    assert(line >= 0 && line < buf->line_count);
552
    assert(buf->line[line]);
553

    
554
    offset = buf->line_width * line;
555
    buffer = buf->line[line];
556
    buf->data_stack_top++;
557
    buf->data_stack[buf->data_stack_top] = buffer;
558
    buf->line[line] = NULL;
559

    
560
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
561
}
562

    
563
static void slice_buffer_flush(slice_buffer * buf)
564
{
565
    int i;
566
    for (i = 0; i < buf->line_count; i++)
567
    {
568
        if (buf->line[i])
569
        {
570
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
571
            slice_buffer_release(buf, i);
572
        }
573
    }
574
}
575

    
576
static void slice_buffer_destroy(slice_buffer * buf)
577
{
578
    int i;
579
    slice_buffer_flush(buf);
580

    
581
    for (i = buf->data_count - 1; i >= 0; i--)
582
    {
583
        av_freep(&buf->data_stack[i]);
584
    }
585
    av_freep(&buf->data_stack);
586
    av_freep(&buf->line);
587
}
588

    
589
#ifdef __sgi
590
// Avoid a name clash on SGI IRIX
591
#undef qexp
592
#endif
593
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
594
static uint8_t qexp[QROOT];
595

    
596
static inline int mirror(int v, int m){
597
    while((unsigned)v > (unsigned)m){
598
        v=-v;
599
        if(v<0) v+= 2*m;
600
    }
601
    return v;
602
}
603

    
604
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
605
    int i;
606

    
607
    if(v){
608
        const int a= FFABS(v);
609
        const int e= av_log2(a);
610
#if 1
611
        const int el= FFMIN(e, 10);
612
        put_rac(c, state+0, 0);
613

    
614
        for(i=0; i<el; i++){
615
            put_rac(c, state+1+i, 1);  //1..10
616
        }
617
        for(; i<e; i++){
618
            put_rac(c, state+1+9, 1);  //1..10
619
        }
620
        put_rac(c, state+1+FFMIN(i,9), 0);
621

    
622
        for(i=e-1; i>=el; i--){
623
            put_rac(c, state+22+9, (a>>i)&1); //22..31
624
        }
625
        for(; i>=0; i--){
626
            put_rac(c, state+22+i, (a>>i)&1); //22..31
627
        }
628

    
629
        if(is_signed)
630
            put_rac(c, state+11 + el, v < 0); //11..21
631
#else
632

    
633
        put_rac(c, state+0, 0);
634
        if(e<=9){
635
            for(i=0; i<e; i++){
636
                put_rac(c, state+1+i, 1);  //1..10
637
            }
638
            put_rac(c, state+1+i, 0);
639

    
640
            for(i=e-1; i>=0; i--){
641
                put_rac(c, state+22+i, (a>>i)&1); //22..31
642
            }
643

    
644
            if(is_signed)
645
                put_rac(c, state+11 + e, v < 0); //11..21
646
        }else{
647
            for(i=0; i<e; i++){
648
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
649
            }
650
            put_rac(c, state+1+FFMIN(i,9), 0);
651

    
652
            for(i=e-1; i>=0; i--){
653
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
654
            }
655

    
656
            if(is_signed)
657
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
658
        }
659
#endif
660
    }else{
661
        put_rac(c, state+0, 1);
662
    }
663
}
664

    
665
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
666
    if(get_rac(c, state+0))
667
        return 0;
668
    else{
669
        int i, e, a;
670
        e= 0;
671
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
672
            e++;
673
        }
674

    
675
        a= 1;
676
        for(i=e-1; i>=0; i--){
677
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
678
        }
679

    
680
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
681
            return -a;
682
        else
683
            return a;
684
    }
685
}
686

    
687
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
688
    int i;
689
    int r= log2>=0 ? 1<<log2 : 1;
690

    
691
    assert(v>=0);
692
    assert(log2>=-4);
693

    
694
    while(v >= r){
695
        put_rac(c, state+4+log2, 1);
696
        v -= r;
697
        log2++;
698
        if(log2>0) r+=r;
699
    }
700
    put_rac(c, state+4+log2, 0);
701

    
702
    for(i=log2-1; i>=0; i--){
703
        put_rac(c, state+31-i, (v>>i)&1);
704
    }
705
}
706

    
707
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
708
    int i;
709
    int r= log2>=0 ? 1<<log2 : 1;
710
    int v=0;
711

    
712
    assert(log2>=-4);
713

    
714
    while(get_rac(c, state+4+log2)){
715
        v+= r;
716
        log2++;
717
        if(log2>0) r+=r;
718
    }
719

    
720
    for(i=log2-1; i>=0; i--){
721
        v+= get_rac(c, state+31-i)<<i;
722
    }
723

    
724
    return v;
725
}
726

    
727
static av_always_inline void
728
lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
729
     int dst_step, int src_step, int ref_step,
730
     int width, int mul, int add, int shift,
731
     int highpass, int inverse){
732
    const int mirror_left= !highpass;
733
    const int mirror_right= (width&1) ^ highpass;
734
    const int w= (width>>1) - 1 + (highpass & width);
735
    int i;
736

    
737
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
738
    if(mirror_left){
739
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
740
        dst += dst_step;
741
        src += src_step;
742
    }
743

    
744
    for(i=0; i<w; i++){
745
        dst[i*dst_step] =
746
            LIFT(src[i*src_step],
747
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
748
                 inverse);
749
    }
750

    
751
    if(mirror_right){
752
        dst[w*dst_step] =
753
            LIFT(src[w*src_step],
754
                 ((mul*2*ref[w*ref_step]+add)>>shift),
755
                 inverse);
756
    }
757
}
758

    
759
static av_always_inline void
760
inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
761
         int dst_step, int src_step, int ref_step,
762
         int width, int mul, int add, int shift,
763
         int highpass, int inverse){
764
    const int mirror_left= !highpass;
765
    const int mirror_right= (width&1) ^ highpass;
766
    const int w= (width>>1) - 1 + (highpass & width);
767
    int i;
768

    
769
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
770
    if(mirror_left){
771
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
772
        dst += dst_step;
773
        src += src_step;
774
    }
775

    
776
    for(i=0; i<w; i++){
777
        dst[i*dst_step] =
778
            LIFT(src[i*src_step],
779
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
780
                 inverse);
781
    }
782

    
783
    if(mirror_right){
784
        dst[w*dst_step] =
785
            LIFT(src[w*src_step],
786
                 ((mul*2*ref[w*ref_step]+add)>>shift),
787
                 inverse);
788
    }
789
}
790

    
791
#ifndef liftS
792
static av_always_inline void
793
liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
794
      int dst_step, int src_step, int ref_step,
795
      int width, int mul, int add, int shift,
796
      int highpass, int inverse){
797
    const int mirror_left= !highpass;
798
    const int mirror_right= (width&1) ^ highpass;
799
    const int w= (width>>1) - 1 + (highpass & width);
800
    int i;
801

    
802
    assert(shift == 4);
803
#define LIFTS(src, ref, inv) \
804
        ((inv) ? \
805
            (src) + (((ref) + 4*(src))>>shift): \
806
            -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
807
    if(mirror_left){
808
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
809
        dst += dst_step;
810
        src += src_step;
811
    }
812

    
813
    for(i=0; i<w; i++){
814
        dst[i*dst_step] =
815
            LIFTS(src[i*src_step],
816
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
817
                  inverse);
818
    }
819

    
820
    if(mirror_right){
821
        dst[w*dst_step] =
822
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
823
    }
824
}
825
static av_always_inline void
826
inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
827
          int dst_step, int src_step, int ref_step,
828
          int width, int mul, int add, int shift,
829
          int highpass, int inverse){
830
    const int mirror_left= !highpass;
831
    const int mirror_right= (width&1) ^ highpass;
832
    const int w= (width>>1) - 1 + (highpass & width);
833
    int i;
834

    
835
    assert(shift == 4);
836
#define LIFTS(src, ref, inv) \
837
    ((inv) ? \
838
        (src) + (((ref) + 4*(src))>>shift): \
839
        -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
840
    if(mirror_left){
841
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
842
        dst += dst_step;
843
        src += src_step;
844
    }
845

    
846
    for(i=0; i<w; i++){
847
        dst[i*dst_step] =
848
            LIFTS(src[i*src_step],
849
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
850
                  inverse);
851
    }
852

    
853
    if(mirror_right){
854
        dst[w*dst_step] =
855
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
856
    }
857
}
858
#endif
859

    
860
static void horizontal_decompose53i(DWTELEM *b, int width){
861
    DWTELEM temp[width];
862
    const int width2= width>>1;
863
    int x;
864
    const int w2= (width+1)>>1;
865

    
866
    for(x=0; x<width2; x++){
867
        temp[x   ]= b[2*x    ];
868
        temp[x+w2]= b[2*x + 1];
869
    }
870
    if(width&1)
871
        temp[x   ]= b[2*x    ];
872
#if 0
873
    {
874
    int A1,A2,A3,A4;
875
    A2= temp[1       ];
876
    A4= temp[0       ];
877
    A1= temp[0+width2];
878
    A1 -= (A2 + A4)>>1;
879
    A4 += (A1 + 1)>>1;
880
    b[0+width2] = A1;
881
    b[0       ] = A4;
882
    for(x=1; x+1<width2; x+=2){
883
        A3= temp[x+width2];
884
        A4= temp[x+1     ];
885
        A3 -= (A2 + A4)>>1;
886
        A2 += (A1 + A3 + 2)>>2;
887
        b[x+width2] = A3;
888
        b[x       ] = A2;
889

890
        A1= temp[x+1+width2];
891
        A2= temp[x+2       ];
892
        A1 -= (A2 + A4)>>1;
893
        A4 += (A1 + A3 + 2)>>2;
894
        b[x+1+width2] = A1;
895
        b[x+1       ] = A4;
896
    }
897
    A3= temp[width-1];
898
    A3 -= A2;
899
    A2 += (A1 + A3 + 2)>>2;
900
    b[width -1] = A3;
901
    b[width2-1] = A2;
902
    }
903
#else
904
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
905
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
906
#endif
907
}
908

    
909
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
910
    int i;
911

    
912
    for(i=0; i<width; i++){
913
        b1[i] -= (b0[i] + b2[i])>>1;
914
    }
915
}
916

    
917
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
918
    int i;
919

    
920
    for(i=0; i<width; i++){
921
        b1[i] += (b0[i] + b2[i] + 2)>>2;
922
    }
923
}
924

    
925
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
926
    int y;
927
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
928
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
929

    
930
    for(y=-2; y<height; y+=2){
931
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
932
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
933

    
934
{START_TIMER
935
        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
936
        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
937
STOP_TIMER("horizontal_decompose53i")}
938

    
939
{START_TIMER
940
        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
941
        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
942
STOP_TIMER("vertical_decompose53i*")}
943

    
944
        b0=b2;
945
        b1=b3;
946
    }
947
}
948

    
949
static void horizontal_decompose97i(DWTELEM *b, int width){
950
    DWTELEM temp[width];
951
    const int w2= (width+1)>>1;
952

    
953
    lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
954
    liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
955
    lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
956
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
957
}
958

    
959

    
960
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
961
    int i;
962

    
963
    for(i=0; i<width; i++){
964
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
965
    }
966
}
967

    
968
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
969
    int i;
970

    
971
    for(i=0; i<width; i++){
972
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
973
    }
974
}
975

    
976
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
977
    int i;
978

    
979
    for(i=0; i<width; i++){
980
#ifdef liftS
981
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
982
#else
983
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
984
#endif
985
    }
986
}
987

    
988
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
989
    int i;
990

    
991
    for(i=0; i<width; i++){
992
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
993
    }
994
}
995

    
996
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
997
    int y;
998
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
999
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1000
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1001
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1002

    
1003
    for(y=-4; y<height; y+=2){
1004
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1005
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1006

    
1007
{START_TIMER
1008
        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1009
        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1010
if(width>400){
1011
STOP_TIMER("horizontal_decompose97i")
1012
}}
1013

    
1014
{START_TIMER
1015
        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1016
        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1017
        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1018
        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1019

    
1020
if(width>400){
1021
STOP_TIMER("vertical_decompose97i")
1022
}}
1023

    
1024
        b0=b2;
1025
        b1=b3;
1026
        b2=b4;
1027
        b3=b5;
1028
    }
1029
}
1030

    
1031
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1032
    int level;
1033

    
1034
    for(level=0; level<decomposition_count; level++){
1035
        switch(type){
1036
        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1037
        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1038
        }
1039
    }
1040
}
1041

    
1042
static void horizontal_compose53i(IDWTELEM *b, int width){
1043
    IDWTELEM temp[width];
1044
    const int width2= width>>1;
1045
    const int w2= (width+1)>>1;
1046
    int x;
1047

    
1048
#if 0
1049
    int A1,A2,A3,A4;
1050
    A2= temp[1       ];
1051
    A4= temp[0       ];
1052
    A1= temp[0+width2];
1053
    A1 -= (A2 + A4)>>1;
1054
    A4 += (A1 + 1)>>1;
1055
    b[0+width2] = A1;
1056
    b[0       ] = A4;
1057
    for(x=1; x+1<width2; x+=2){
1058
        A3= temp[x+width2];
1059
        A4= temp[x+1     ];
1060
        A3 -= (A2 + A4)>>1;
1061
        A2 += (A1 + A3 + 2)>>2;
1062
        b[x+width2] = A3;
1063
        b[x       ] = A2;
1064

1065
        A1= temp[x+1+width2];
1066
        A2= temp[x+2       ];
1067
        A1 -= (A2 + A4)>>1;
1068
        A4 += (A1 + A3 + 2)>>2;
1069
        b[x+1+width2] = A1;
1070
        b[x+1       ] = A4;
1071
    }
1072
    A3= temp[width-1];
1073
    A3 -= A2;
1074
    A2 += (A1 + A3 + 2)>>2;
1075
    b[width -1] = A3;
1076
    b[width2-1] = A2;
1077
#else
1078
    inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1079
    inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1080
#endif
1081
    for(x=0; x<width2; x++){
1082
        b[2*x    ]= temp[x   ];
1083
        b[2*x + 1]= temp[x+w2];
1084
    }
1085
    if(width&1)
1086
        b[2*x    ]= temp[x   ];
1087
}
1088

    
1089
static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1090
    int i;
1091

    
1092
    for(i=0; i<width; i++){
1093
        b1[i] += (b0[i] + b2[i])>>1;
1094
    }
1095
}
1096

    
1097
static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1098
    int i;
1099

    
1100
    for(i=0; i<width; i++){
1101
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1102
    }
1103
}
1104

    
1105
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1106
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1107
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1108
    cs->y = -1;
1109
}
1110

    
1111
static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1112
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1113
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1114
    cs->y = -1;
1115
}
1116

    
1117
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1118
    int y= cs->y;
1119

    
1120
    IDWTELEM *b0= cs->b0;
1121
    IDWTELEM *b1= cs->b1;
1122
    IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1123
    IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1124

    
1125
{START_TIMER
1126
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1127
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1128
STOP_TIMER("vertical_compose53i*")}
1129

    
1130
{START_TIMER
1131
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1132
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1133
STOP_TIMER("horizontal_compose53i")}
1134

    
1135
    cs->b0 = b2;
1136
    cs->b1 = b3;
1137
    cs->y += 2;
1138
}
1139

    
1140
static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1141
    int y= cs->y;
1142
    IDWTELEM *b0= cs->b0;
1143
    IDWTELEM *b1= cs->b1;
1144
    IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1145
    IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1146

    
1147
{START_TIMER
1148
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1149
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1150
STOP_TIMER("vertical_compose53i*")}
1151

    
1152
{START_TIMER
1153
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1154
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1155
STOP_TIMER("horizontal_compose53i")}
1156

    
1157
    cs->b0 = b2;
1158
    cs->b1 = b3;
1159
    cs->y += 2;
1160
}
1161

    
1162
static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1163
    dwt_compose_t cs;
1164
    spatial_compose53i_init(&cs, buffer, height, stride);
1165
    while(cs.y <= height)
1166
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1167
}
1168

    
1169

    
1170
void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1171
    IDWTELEM temp[width];
1172
    const int w2= (width+1)>>1;
1173

    
1174
    inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1175
    inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1176
    inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1177
    inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1178
}
1179

    
1180
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1181
    int i;
1182

    
1183
    for(i=0; i<width; i++){
1184
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1185
    }
1186
}
1187

    
1188
static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1189
    int i;
1190

    
1191
    for(i=0; i<width; i++){
1192
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1193
    }
1194
}
1195

    
1196
static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1197
    int i;
1198

    
1199
    for(i=0; i<width; i++){
1200
#ifdef liftS
1201
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1202
#else
1203
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1204
#endif
1205
    }
1206
}
1207

    
1208
static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1209
    int i;
1210

    
1211
    for(i=0; i<width; i++){
1212
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1213
    }
1214
}
1215

    
1216
void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1217
    int i;
1218

    
1219
    for(i=0; i<width; i++){
1220
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1221
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1222
#ifdef liftS
1223
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1224
#else
1225
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1226
#endif
1227
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1228
    }
1229
}
1230

    
1231
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1232
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1233
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1234
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1235
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1236
    cs->y = -3;
1237
}
1238

    
1239
static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1240
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1241
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1242
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1243
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1244
    cs->y = -3;
1245
}
1246

    
1247
static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1248
    int y = cs->y;
1249

    
1250
    IDWTELEM *b0= cs->b0;
1251
    IDWTELEM *b1= cs->b1;
1252
    IDWTELEM *b2= cs->b2;
1253
    IDWTELEM *b3= cs->b3;
1254
    IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1255
    IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1256

    
1257
{START_TIMER
1258
    if(y>0 && y+4<height){
1259
        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1260
    }else{
1261
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1262
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1263
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1264
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1265
    }
1266
if(width>400){
1267
STOP_TIMER("vertical_compose97i")}}
1268

    
1269
{START_TIMER
1270
        if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1271
        if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1272
if(width>400 && y+0<(unsigned)height){
1273
STOP_TIMER("horizontal_compose97i")}}
1274

    
1275
    cs->b0=b2;
1276
    cs->b1=b3;
1277
    cs->b2=b4;
1278
    cs->b3=b5;
1279
    cs->y += 2;
1280
}
1281

    
1282
static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1283
    int y = cs->y;
1284
    IDWTELEM *b0= cs->b0;
1285
    IDWTELEM *b1= cs->b1;
1286
    IDWTELEM *b2= cs->b2;
1287
    IDWTELEM *b3= cs->b3;
1288
    IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1289
    IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1290

    
1291
{START_TIMER
1292
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1293
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1294
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1295
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1296
if(width>400){
1297
STOP_TIMER("vertical_compose97i")}}
1298

    
1299
{START_TIMER
1300
        if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1301
        if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1302
if(width>400 && b0 <= b2){
1303
STOP_TIMER("horizontal_compose97i")}}
1304

    
1305
    cs->b0=b2;
1306
    cs->b1=b3;
1307
    cs->b2=b4;
1308
    cs->b3=b5;
1309
    cs->y += 2;
1310
}
1311

    
1312
static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1313
    dwt_compose_t cs;
1314
    spatial_compose97i_init(&cs, buffer, height, stride);
1315
    while(cs.y <= height)
1316
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1317
}
1318

    
1319
static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1320
    int level;
1321
    for(level=decomposition_count-1; level>=0; level--){
1322
        switch(type){
1323
        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1324
        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1325
        }
1326
    }
1327
}
1328

    
1329
static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1330
    int level;
1331
    for(level=decomposition_count-1; level>=0; level--){
1332
        switch(type){
1333
        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1334
        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1335
        }
1336
    }
1337
}
1338

    
1339
static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1340
    const int support = type==1 ? 3 : 5;
1341
    int level;
1342
    if(type==2) return;
1343

    
1344
    for(level=decomposition_count-1; level>=0; level--){
1345
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1346
            switch(type){
1347
            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1348
                    break;
1349
            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1350
                    break;
1351
            }
1352
        }
1353
    }
1354
}
1355

    
1356
static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1357
    const int support = type==1 ? 3 : 5;
1358
    int level;
1359
    if(type==2) return;
1360

    
1361
    for(level=decomposition_count-1; level>=0; level--){
1362
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1363
            switch(type){
1364
            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1365
                    break;
1366
            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1367
                    break;
1368
            }
1369
        }
1370
    }
1371
}
1372

    
1373
static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1374
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1375
        int y;
1376
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1377
        for(y=0; y<height; y+=4)
1378
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1379
}
1380

    
1381
static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1382
    const int w= b->width;
1383
    const int h= b->height;
1384
    int x, y;
1385

    
1386
    if(1){
1387
        int run=0;
1388
        int runs[w*h];
1389
        int run_index=0;
1390
        int max_index;
1391

    
1392
        for(y=0; y<h; y++){
1393
            for(x=0; x<w; x++){
1394
                int v, p=0;
1395
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1396
                v= src[x + y*stride];
1397

    
1398
                if(y){
1399
                    t= src[x + (y-1)*stride];
1400
                    if(x){
1401
                        lt= src[x - 1 + (y-1)*stride];
1402
                    }
1403
                    if(x + 1 < w){
1404
                        rt= src[x + 1 + (y-1)*stride];
1405
                    }
1406
                }
1407
                if(x){
1408
                    l= src[x - 1 + y*stride];
1409
                    /*if(x > 1){
1410
                        if(orientation==1) ll= src[y + (x-2)*stride];
1411
                        else               ll= src[x - 2 + y*stride];
1412
                    }*/
1413
                }
1414
                if(parent){
1415
                    int px= x>>1;
1416
                    int py= y>>1;
1417
                    if(px<b->parent->width && py<b->parent->height)
1418
                        p= parent[px + py*2*stride];
1419
                }
1420
                if(!(/*ll|*/l|lt|t|rt|p)){
1421
                    if(v){
1422
                        runs[run_index++]= run;
1423
                        run=0;
1424
                    }else{
1425
                        run++;
1426
                    }
1427
                }
1428
            }
1429
        }
1430
        max_index= run_index;
1431
        runs[run_index++]= run;
1432
        run_index=0;
1433
        run= runs[run_index++];
1434

    
1435
        put_symbol2(&s->c, b->state[30], max_index, 0);
1436
        if(run_index <= max_index)
1437
            put_symbol2(&s->c, b->state[1], run, 3);
1438

    
1439
        for(y=0; y<h; y++){
1440
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1441
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1442
                return -1;
1443
            }
1444
            for(x=0; x<w; x++){
1445
                int v, p=0;
1446
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1447
                v= src[x + y*stride];
1448

    
1449
                if(y){
1450
                    t= src[x + (y-1)*stride];
1451
                    if(x){
1452
                        lt= src[x - 1 + (y-1)*stride];
1453
                    }
1454
                    if(x + 1 < w){
1455
                        rt= src[x + 1 + (y-1)*stride];
1456
                    }
1457
                }
1458
                if(x){
1459
                    l= src[x - 1 + y*stride];
1460
                    /*if(x > 1){
1461
                        if(orientation==1) ll= src[y + (x-2)*stride];
1462
                        else               ll= src[x - 2 + y*stride];
1463
                    }*/
1464
                }
1465
                if(parent){
1466
                    int px= x>>1;
1467
                    int py= y>>1;
1468
                    if(px<b->parent->width && py<b->parent->height)
1469
                        p= parent[px + py*2*stride];
1470
                }
1471
                if(/*ll|*/l|lt|t|rt|p){
1472
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1473

    
1474
                    put_rac(&s->c, &b->state[0][context], !!v);
1475
                }else{
1476
                    if(!run){
1477
                        run= runs[run_index++];
1478

    
1479
                        if(run_index <= max_index)
1480
                            put_symbol2(&s->c, b->state[1], run, 3);
1481
                        assert(v);
1482
                    }else{
1483
                        run--;
1484
                        assert(!v);
1485
                    }
1486
                }
1487
                if(v){
1488
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1489
                    int l2= 2*FFABS(l) + (l<0);
1490
                    int t2= 2*FFABS(t) + (t<0);
1491

    
1492
                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1493
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1494
                }
1495
            }
1496
        }
1497
    }
1498
    return 0;
1499
}
1500

    
1501
static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1502
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1503
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1504
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1505
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1506
}
1507

    
1508
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1509
    const int w= b->width;
1510
    const int h= b->height;
1511
    int x,y;
1512

    
1513
    if(1){
1514
        int run, runs;
1515
        x_and_coeff *xc= b->x_coeff;
1516
        x_and_coeff *prev_xc= NULL;
1517
        x_and_coeff *prev2_xc= xc;
1518
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1519
        x_and_coeff *prev_parent_xc= parent_xc;
1520

    
1521
        runs= get_symbol2(&s->c, b->state[30], 0);
1522
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1523
        else           run= INT_MAX;
1524

    
1525
        for(y=0; y<h; y++){
1526
            int v=0;
1527
            int lt=0, t=0, rt=0;
1528

    
1529
            if(y && prev_xc->x == 0){
1530
                rt= prev_xc->coeff;
1531
            }
1532
            for(x=0; x<w; x++){
1533
                int p=0;
1534
                const int l= v;
1535

    
1536
                lt= t; t= rt;
1537

    
1538
                if(y){
1539
                    if(prev_xc->x <= x)
1540
                        prev_xc++;
1541
                    if(prev_xc->x == x + 1)
1542
                        rt= prev_xc->coeff;
1543
                    else
1544
                        rt=0;
1545
                }
1546
                if(parent_xc){
1547
                    if(x>>1 > parent_xc->x){
1548
                        parent_xc++;
1549
                    }
1550
                    if(x>>1 == parent_xc->x){
1551
                        p= parent_xc->coeff;
1552
                    }
1553
                }
1554
                if(/*ll|*/l|lt|t|rt|p){
1555
                    int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1556

    
1557
                    v=get_rac(&s->c, &b->state[0][context]);
1558
                    if(v){
1559
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1560
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1561

    
1562
                        xc->x=x;
1563
                        (xc++)->coeff= v;
1564
                    }
1565
                }else{
1566
                    if(!run){
1567
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1568
                        else           run= INT_MAX;
1569
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1570
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1571

    
1572
                        xc->x=x;
1573
                        (xc++)->coeff= v;
1574
                    }else{
1575
                        int max_run;
1576
                        run--;
1577
                        v=0;
1578

    
1579
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1580
                        else  max_run= FFMIN(run, w-x-1);
1581
                        if(parent_xc)
1582
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1583
                        x+= max_run;
1584
                        run-= max_run;
1585
                    }
1586
                }
1587
            }
1588
            (xc++)->x= w+1; //end marker
1589
            prev_xc= prev2_xc;
1590
            prev2_xc= xc;
1591

    
1592
            if(parent_xc){
1593
                if(y&1){
1594
                    while(parent_xc->x != parent->width+1)
1595
                        parent_xc++;
1596
                    parent_xc++;
1597
                    prev_parent_xc= parent_xc;
1598
                }else{
1599
                    parent_xc= prev_parent_xc;
1600
                }
1601
            }
1602
        }
1603

    
1604
        (xc++)->x= w+1; //end marker
1605
    }
1606
}
1607

    
1608
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1609
    const int w= b->width;
1610
    int y;
1611
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1612
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1613
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1614
    int new_index = 0;
1615

    
1616
    START_TIMER
1617

    
1618
    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1619
        qadd= 0;
1620
        qmul= 1<<QEXPSHIFT;
1621
    }
1622

    
1623
    /* If we are on the second or later slice, restore our index. */
1624
    if (start_y != 0)
1625
        new_index = save_state[0];
1626

    
1627

    
1628
    for(y=start_y; y<h; y++){
1629
        int x = 0;
1630
        int v;
1631
        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1632
        memset(line, 0, b->width*sizeof(IDWTELEM));
1633
        v = b->x_coeff[new_index].coeff;
1634
        x = b->x_coeff[new_index++].x;
1635
        while(x < w)
1636
        {
1637
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1638
            register int u= -(v&1);
1639
            line[x] = (t^u) - u;
1640

    
1641
            v = b->x_coeff[new_index].coeff;
1642
            x = b->x_coeff[new_index++].x;
1643
        }
1644
    }
1645
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1646
        STOP_TIMER("decode_subband")
1647
    }
1648

    
1649
    /* Save our variables for the next slice. */
1650
    save_state[0] = new_index;
1651

    
1652
    return;
1653
}
1654

    
1655
static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1656
    int plane_index, level, orientation;
1657

    
1658
    for(plane_index=0; plane_index<3; plane_index++){
1659
        for(level=0; level<MAX_DECOMPOSITIONS; level++){
1660
            for(orientation=level ? 1:0; orientation<4; orientation++){
1661
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1662
            }
1663
        }
1664
    }
1665
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1666
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1667
}
1668

    
1669
static int alloc_blocks(SnowContext *s){
1670
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1671
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1672

    
1673
    s->b_width = w;
1674
    s->b_height= h;
1675

    
1676
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1677
    return 0;
1678
}
1679

    
1680
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1681
    uint8_t *bytestream= d->bytestream;
1682
    uint8_t *bytestream_start= d->bytestream_start;
1683
    *d= *s;
1684
    d->bytestream= bytestream;
1685
    d->bytestream_start= bytestream_start;
1686
}
1687

    
1688
//near copy & paste from dsputil, FIXME
1689
static int pix_sum(uint8_t * pix, int line_size, int w)
1690
{
1691
    int s, i, j;
1692

    
1693
    s = 0;
1694
    for (i = 0; i < w; i++) {
1695
        for (j = 0; j < w; j++) {
1696
            s += pix[0];
1697
            pix ++;
1698
        }
1699
        pix += line_size - w;
1700
    }
1701
    return s;
1702
}
1703

    
1704
//near copy & paste from dsputil, FIXME
1705
static int pix_norm1(uint8_t * pix, int line_size, int w)
1706
{
1707
    int s, i, j;
1708
    uint32_t *sq = ff_squareTbl + 256;
1709

    
1710
    s = 0;
1711
    for (i = 0; i < w; i++) {
1712
        for (j = 0; j < w; j ++) {
1713
            s += sq[pix[0]];
1714
            pix ++;
1715
        }
1716
        pix += line_size - w;
1717
    }
1718
    return s;
1719
}
1720

    
1721
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1722
    const int w= s->b_width << s->block_max_depth;
1723
    const int rem_depth= s->block_max_depth - level;
1724
    const int index= (x + y*w) << rem_depth;
1725
    const int block_w= 1<<rem_depth;
1726
    BlockNode block;
1727
    int i,j;
1728

    
1729
    block.color[0]= l;
1730
    block.color[1]= cb;
1731
    block.color[2]= cr;
1732
    block.mx= mx;
1733
    block.my= my;
1734
    block.ref= ref;
1735
    block.type= type;
1736
    block.level= level;
1737

    
1738
    for(j=0; j<block_w; j++){
1739
        for(i=0; i<block_w; i++){
1740
            s->block[index + i + j*w]= block;
1741
        }
1742
    }
1743
}
1744

    
1745
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1746
    const int offset[3]= {
1747
          y*c->  stride + x,
1748
        ((y*c->uvstride + x)>>1),
1749
        ((y*c->uvstride + x)>>1),
1750
    };
1751
    int i;
1752
    for(i=0; i<3; i++){
1753
        c->src[0][i]= src [i];
1754
        c->ref[0][i]= ref [i] + offset[i];
1755
    }
1756
    assert(!ref_index);
1757
}
1758

    
1759
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1760
                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1761
    if(s->ref_frames == 1){
1762
        *mx = mid_pred(left->mx, top->mx, tr->mx);
1763
        *my = mid_pred(left->my, top->my, tr->my);
1764
    }else{
1765
        const int *scale = scale_mv_ref[ref];
1766
        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1767
                       (top ->mx * scale[top ->ref] + 128) >>8,
1768
                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1769
        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1770
                       (top ->my * scale[top ->ref] + 128) >>8,
1771
                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
1772
    }
1773
}
1774

    
1775
//FIXME copy&paste
1776
#define P_LEFT P[1]
1777
#define P_TOP P[2]
1778
#define P_TOPRIGHT P[3]
1779
#define P_MEDIAN P[4]
1780
#define P_MV1 P[9]
1781
#define FLAG_QPEL   1 //must be 1
1782

    
1783
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1784
    uint8_t p_buffer[1024];
1785
    uint8_t i_buffer[1024];
1786
    uint8_t p_state[sizeof(s->block_state)];
1787
    uint8_t i_state[sizeof(s->block_state)];
1788
    RangeCoder pc, ic;
1789
    uint8_t *pbbak= s->c.bytestream;
1790
    uint8_t *pbbak_start= s->c.bytestream_start;
1791
    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1792
    const int w= s->b_width  << s->block_max_depth;
1793
    const int h= s->b_height << s->block_max_depth;
1794
    const int rem_depth= s->block_max_depth - level;
1795
    const int index= (x + y*w) << rem_depth;
1796
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1797
    int trx= (x+1)<<rem_depth;
1798
    int try= (y+1)<<rem_depth;
1799
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1800
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1801
    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1802
    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1803
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1804
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1805
    int pl = left->color[0];
1806
    int pcb= left->color[1];
1807
    int pcr= left->color[2];
1808
    int pmx, pmy;
1809
    int mx=0, my=0;
1810
    int l,cr,cb;
1811
    const int stride= s->current_picture.linesize[0];
1812
    const int uvstride= s->current_picture.linesize[1];
1813
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
1814
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1815
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1816
    int P[10][2];
1817
    int16_t last_mv[3][2];
1818
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1819
    const int shift= 1+qpel;
1820
    MotionEstContext *c= &s->m.me;
1821
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1822
    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1823
    int my_context= av_log2(2*FFABS(left->my - top->my));
1824
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1825
    int ref, best_ref, ref_score, ref_mx, ref_my;
1826

    
1827
    assert(sizeof(s->block_state) >= 256);
1828
    if(s->keyframe){
1829
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1830
        return 0;
1831
    }
1832

    
1833
//    clip predictors / edge ?
1834

    
1835
    P_LEFT[0]= left->mx;
1836
    P_LEFT[1]= left->my;
1837
    P_TOP [0]= top->mx;
1838
    P_TOP [1]= top->my;
1839
    P_TOPRIGHT[0]= tr->mx;
1840
    P_TOPRIGHT[1]= tr->my;
1841

    
1842
    last_mv[0][0]= s->block[index].mx;
1843
    last_mv[0][1]= s->block[index].my;
1844
    last_mv[1][0]= right->mx;
1845
    last_mv[1][1]= right->my;
1846
    last_mv[2][0]= bottom->mx;
1847
    last_mv[2][1]= bottom->my;
1848

    
1849
    s->m.mb_stride=2;
1850
    s->m.mb_x=
1851
    s->m.mb_y= 0;
1852
    c->skip= 0;
1853

    
1854
    assert(c->  stride ==   stride);
1855
    assert(c->uvstride == uvstride);
1856

    
1857
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1858
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1859
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1860
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1861

    
1862
    c->xmin = - x*block_w - 16+2;
1863
    c->ymin = - y*block_w - 16+2;
1864
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1865
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1866

    
1867
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1868
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
1869
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1870
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1871
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1872
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1873
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1874

    
1875
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1876
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1877

    
1878
    if (!y) {
1879
        c->pred_x= P_LEFT[0];
1880
        c->pred_y= P_LEFT[1];
1881
    } else {
1882
        c->pred_x = P_MEDIAN[0];
1883
        c->pred_y = P_MEDIAN[1];
1884
    }
1885

    
1886
    score= INT_MAX;
1887
    best_ref= 0;
1888
    for(ref=0; ref<s->ref_frames; ref++){
1889
        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1890

    
1891
        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1892
                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1893

    
1894
        assert(ref_mx >= c->xmin);
1895
        assert(ref_mx <= c->xmax);
1896
        assert(ref_my >= c->ymin);
1897
        assert(ref_my <= c->ymax);
1898

    
1899
        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1900
        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1901
        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1902
        if(s->ref_mvs[ref]){
1903
            s->ref_mvs[ref][index][0]= ref_mx;
1904
            s->ref_mvs[ref][index][1]= ref_my;
1905
            s->ref_scores[ref][index]= ref_score;
1906
        }
1907
        if(score > ref_score){
1908
            score= ref_score;
1909
            best_ref= ref;
1910
            mx= ref_mx;
1911
            my= ref_my;
1912
        }
1913
    }
1914
    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1915

    
1916
  //  subpel search
1917
    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1918
    pc= s->c;
1919
    pc.bytestream_start=
1920
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1921
    memcpy(p_state, s->block_state, sizeof(s->block_state));
1922

    
1923
    if(level!=s->block_max_depth)
1924
        put_rac(&pc, &p_state[4 + s_context], 1);
1925
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1926
    if(s->ref_frames > 1)
1927
        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1928
    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1929
    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1930
    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1931
    p_len= pc.bytestream - pc.bytestream_start;
1932
    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1933

    
1934
    block_s= block_w*block_w;
1935
    sum = pix_sum(current_data[0], stride, block_w);
1936
    l= (sum + block_s/2)/block_s;
1937
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1938

    
1939
    block_s= block_w*block_w>>2;
1940
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
1941
    cb= (sum + block_s/2)/block_s;
1942
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1943
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
1944
    cr= (sum + block_s/2)/block_s;
1945
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1946

    
1947
    ic= s->c;
1948
    ic.bytestream_start=
1949
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1950
    memcpy(i_state, s->block_state, sizeof(s->block_state));
1951
    if(level!=s->block_max_depth)
1952
        put_rac(&ic, &i_state[4 + s_context], 1);
1953
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1954
    put_symbol(&ic, &i_state[32],  l-pl , 1);
1955
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1956
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1957
    i_len= ic.bytestream - ic.bytestream_start;
1958
    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1959

    
1960
//    assert(score==256*256*256*64-1);
1961
    assert(iscore < 255*255*256 + s->lambda2*10);
1962
    assert(iscore >= 0);
1963
    assert(l>=0 && l<=255);
1964
    assert(pl>=0 && pl<=255);
1965

    
1966
    if(level==0){
1967
        int varc= iscore >> 8;
1968
        int vard= score >> 8;
1969
        if (vard <= 64 || vard < varc)
1970
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1971
        else
1972
            c->scene_change_score+= s->m.qscale;
1973
    }
1974

    
1975
    if(level!=s->block_max_depth){
1976
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
1977
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1978
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1979
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1980
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1981
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1982

    
1983
        if(score2 < score && score2 < iscore)
1984
            return score2;
1985
    }
1986

    
1987
    if(iscore < score){
1988
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1989
        memcpy(pbbak, i_buffer, i_len);
1990
        s->c= ic;
1991
        s->c.bytestream_start= pbbak_start;
1992
        s->c.bytestream= pbbak + i_len;
1993
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1994
        memcpy(s->block_state, i_state, sizeof(s->block_state));
1995
        return iscore;
1996
    }else{
1997
        memcpy(pbbak, p_buffer, p_len);
1998
        s->c= pc;
1999
        s->c.bytestream_start= pbbak_start;
2000
        s->c.bytestream= pbbak + p_len;
2001
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2002
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2003
        return score;
2004
    }
2005
}
2006

    
2007
static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2008
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2009
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2010
    }else{
2011
        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2012
    }
2013
}
2014

    
2015
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2016
    const int w= s->b_width  << s->block_max_depth;
2017
    const int rem_depth= s->block_max_depth - level;
2018
    const int index= (x + y*w) << rem_depth;
2019
    int trx= (x+1)<<rem_depth;
2020
    BlockNode *b= &s->block[index];
2021
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2022
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2023
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2024
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2025
    int pl = left->color[0];
2026
    int pcb= left->color[1];
2027
    int pcr= left->color[2];
2028
    int pmx, pmy;
2029
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2030
    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2031
    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2032
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2033

    
2034
    if(s->keyframe){
2035
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2036
        return;
2037
    }
2038

    
2039
    if(level!=s->block_max_depth){
2040
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2041
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
2042
        }else{
2043
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
2044
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2045
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2046
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2047
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2048
            return;
2049
        }
2050
    }
2051
    if(b->type & BLOCK_INTRA){
2052
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2053
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2054
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2055
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2056
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2057
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2058
    }else{
2059
        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2060
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2061
        if(s->ref_frames > 1)
2062
            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2063
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2064
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2065
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2066
    }
2067
}
2068

    
2069
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2070
    const int w= s->b_width << s->block_max_depth;
2071
    const int rem_depth= s->block_max_depth - level;
2072
    const int index= (x + y*w) << rem_depth;
2073
    int trx= (x+1)<<rem_depth;
2074
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2075
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2076
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2077
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2078
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2079

    
2080
    if(s->keyframe){
2081
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2082
        return;
2083
    }
2084

    
2085
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2086
        int type, mx, my;
2087
        int l = left->color[0];
2088
        int cb= left->color[1];
2089
        int cr= left->color[2];
2090
        int ref = 0;
2091
        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2092
        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2093
        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2094

    
2095
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2096

    
2097
        if(type){
2098
            pred_mv(s, &mx, &my, 0, left, top, tr);
2099
            l += get_symbol(&s->c, &s->block_state[32], 1);
2100
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2101
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2102
        }else{
2103
            if(s->ref_frames > 1)
2104
                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2105
            pred_mv(s, &mx, &my, ref, left, top, tr);
2106
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2107
            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2108
        }
2109
        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2110
    }else{
2111
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2112
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2113
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2114
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2115
    }
2116
}
2117

    
2118
static void encode_blocks(SnowContext *s, int search){
2119
    int x, y;
2120
    int w= s->b_width;
2121
    int h= s->b_height;
2122

    
2123
    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2124
        iterative_me(s);
2125

    
2126
    for(y=0; y<h; y++){
2127
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2128
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2129
            return;
2130
        }
2131
        for(x=0; x<w; x++){
2132
            if(s->avctx->me_method == ME_ITER || !search)
2133
                encode_q_branch2(s, 0, x, y);
2134
            else
2135
                encode_q_branch (s, 0, x, y);
2136
        }
2137
    }
2138
}
2139

    
2140
static void decode_blocks(SnowContext *s){
2141
    int x, y;
2142
    int w= s->b_width;
2143
    int h= s->b_height;
2144

    
2145
    for(y=0; y<h; y++){
2146
        for(x=0; x<w; x++){
2147
            decode_q_branch(s, 0, x, y);
2148
        }
2149
    }
2150
}
2151

    
2152
static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2153
    const static uint8_t weight[64]={
2154
    8,7,6,5,4,3,2,1,
2155
    7,7,0,0,0,0,0,1,
2156
    6,0,6,0,0,0,2,0,
2157
    5,0,0,5,0,3,0,0,
2158
    4,0,0,0,4,0,0,0,
2159
    3,0,0,5,0,3,0,0,
2160
    2,0,6,0,0,0,2,0,
2161
    1,7,0,0,0,0,0,1,
2162
    };
2163

    
2164
    const static uint8_t brane[256]={
2165
    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2166
    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2167
    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2168
    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2169
    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2170
    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2171
    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2172
    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2173
    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2174
    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2175
    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2176
    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2177
    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2178
    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2179
    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2180
    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2181
    };
2182

    
2183
    const static uint8_t needs[16]={
2184
    0,1,0,0,
2185
    2,4,2,0,
2186
    0,1,0,0,
2187
    15
2188
    };
2189

    
2190
    int x, y, b, r, l;
2191
    int16_t tmpIt   [64*(32+HTAPS_MAX)];
2192
    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2193
    int16_t *tmpI= tmpIt;
2194
    uint8_t *tmp2= tmp2t[0];
2195
    uint8_t *hpel[11];
2196
START_TIMER
2197
    assert(dx<16 && dy<16);
2198
    r= brane[dx + 16*dy]&15;
2199
    l= brane[dx + 16*dy]>>4;
2200

    
2201
    b= needs[l] | needs[r];
2202
    if(p && !p->diag_mc)
2203
        b= 15;
2204

    
2205
    if(b&5){
2206
        for(y=0; y < b_h+HTAPS_MAX-1; y++){
2207
            for(x=0; x < b_w; x++){
2208
                int a_1=src[x + HTAPS_MAX/2-4];
2209
                int a0= src[x + HTAPS_MAX/2-3];
2210
                int a1= src[x + HTAPS_MAX/2-2];
2211
                int a2= src[x + HTAPS_MAX/2-1];
2212
                int a3= src[x + HTAPS_MAX/2+0];
2213
                int a4= src[x + HTAPS_MAX/2+1];
2214
                int a5= src[x + HTAPS_MAX/2+2];
2215
                int a6= src[x + HTAPS_MAX/2+3];
2216
                int am=0;
2217
                if(!p || p->fast_mc){
2218
                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2219
                    tmpI[x]= am;
2220
                    am= (am+16)>>5;
2221
                }else{
2222
                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2223
                    tmpI[x]= am;
2224
                    am= (am+32)>>6;
2225
                }
2226

    
2227
                if(am&(~255)) am= ~(am>>31);
2228
                tmp2[x]= am;
2229
            }
2230
            tmpI+= 64;
2231
            tmp2+= stride;
2232
            src += stride;
2233
        }
2234
        src -= stride*y;
2235
    }
2236
    src += HTAPS_MAX/2 - 1;
2237
    tmp2= tmp2t[1];
2238

    
2239
    if(b&2){
2240
        for(y=0; y < b_h; y++){
2241
            for(x=0; x < b_w+1; x++){
2242
                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2243
                int a0= src[x + (HTAPS_MAX/2-3)*stride];
2244
                int a1= src[x + (HTAPS_MAX/2-2)*stride];
2245
                int a2= src[x + (HTAPS_MAX/2-1)*stride];
2246
                int a3= src[x + (HTAPS_MAX/2+0)*stride];
2247
                int a4= src[x + (HTAPS_MAX/2+1)*stride];
2248
                int a5= src[x + (HTAPS_MAX/2+2)*stride];
2249
                int a6= src[x + (HTAPS_MAX/2+3)*stride];
2250
                int am=0;
2251
                if(!p || p->fast_mc)
2252
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2253
                else
2254
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2255

    
2256
                if(am&(~255)) am= ~(am>>31);
2257
                tmp2[x]= am;
2258
            }
2259
            src += stride;
2260
            tmp2+= stride;
2261
        }
2262
        src -= stride*y;
2263
    }
2264
    src += stride*(HTAPS_MAX/2 - 1);
2265
    tmp2= tmp2t[2];
2266
    tmpI= tmpIt;
2267
    if(b&4){
2268
        for(y=0; y < b_h; y++){
2269
            for(x=0; x < b_w; x++){
2270
                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2271
                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2272
                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2273
                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2274
                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2275
                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2276
                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2277
                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2278
                int am=0;
2279
                if(!p || p->fast_mc)
2280
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2281
                else
2282
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2283
                if(am&(~255)) am= ~(am>>31);
2284
                tmp2[x]= am;
2285
            }
2286
            tmpI+= 64;
2287
            tmp2+= stride;
2288
        }
2289
    }
2290

    
2291
    hpel[ 0]= src;
2292
    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2293
    hpel[ 2]= src + 1;
2294

    
2295
    hpel[ 4]= tmp2t[1];
2296
    hpel[ 5]= tmp2t[2];
2297
    hpel[ 6]= tmp2t[1] + 1;
2298

    
2299
    hpel[ 8]= src + stride;
2300
    hpel[ 9]= hpel[1] + stride;
2301
    hpel[10]= hpel[8] + 1;
2302

    
2303
    if(b==15){
2304
        uint8_t *src1= hpel[dx/8 + dy/8*4  ];
2305
        uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2306
        uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2307
        uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2308
        dx&=7;
2309
        dy&=7;
2310
        for(y=0; y < b_h; y++){
2311
            for(x=0; x < b_w; x++){
2312
                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2313
                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
2314
            }
2315
            src1+=stride;
2316
            src2+=stride;
2317
            src3+=stride;
2318
            src4+=stride;
2319
            dst +=stride;
2320
        }
2321
    }else{
2322
        uint8_t *src1= hpel[l];
2323
        uint8_t *src2= hpel[r];
2324
        int a= weight[((dx&7) + (8*(dy&7)))];
2325
        int b= 8-a;
2326
        for(y=0; y < b_h; y++){
2327
            for(x=0; x < b_w; x++){
2328
                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2329
            }
2330
            src1+=stride;
2331
            src2+=stride;
2332
            dst +=stride;
2333
        }
2334
    }
2335
STOP_TIMER("mc_block")
2336
}
2337

    
2338
#define mca(dx,dy,b_w)\
2339
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2340
    uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2341
    assert(h==b_w);\
2342
    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2343
}
2344

    
2345
mca( 0, 0,16)
2346
mca( 8, 0,16)
2347
mca( 0, 8,16)
2348
mca( 8, 8,16)
2349
mca( 0, 0,8)
2350
mca( 8, 0,8)
2351
mca( 0, 8,8)
2352
mca( 8, 8,8)
2353

    
2354
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2355
    if(block->type & BLOCK_INTRA){
2356
        int x, y;
2357
        const int color = block->color[plane_index];
2358
        const int color4= color*0x01010101;
2359
        if(b_w==32){
2360
            for(y=0; y < b_h; y++){
2361
                *(uint32_t*)&dst[0 + y*stride]= color4;
2362
                *(uint32_t*)&dst[4 + y*stride]= color4;
2363
                *(uint32_t*)&dst[8 + y*stride]= color4;
2364
                *(uint32_t*)&dst[12+ y*stride]= color4;
2365
                *(uint32_t*)&dst[16+ y*stride]= color4;
2366
                *(uint32_t*)&dst[20+ y*stride]= color4;
2367
                *(uint32_t*)&dst[24+ y*stride]= color4;
2368
                *(uint32_t*)&dst[28+ y*stride]= color4;
2369
            }
2370
        }else if(b_w==16){
2371
            for(y=0; y < b_h; y++){
2372
                *(uint32_t*)&dst[0 + y*stride]= color4;
2373
                *(uint32_t*)&dst[4 + y*stride]= color4;
2374
                *(uint32_t*)&dst[8 + y*stride]= color4;
2375
                *(uint32_t*)&dst[12+ y*stride]= color4;
2376
            }
2377
        }else if(b_w==8){
2378
            for(y=0; y < b_h; y++){
2379
                *(uint32_t*)&dst[0 + y*stride]= color4;
2380
                *(uint32_t*)&dst[4 + y*stride]= color4;
2381
            }
2382
        }else if(b_w==4){
2383
            for(y=0; y < b_h; y++){
2384
                *(uint32_t*)&dst[0 + y*stride]= color4;
2385
            }
2386
        }else{
2387
            for(y=0; y < b_h; y++){
2388
                for(x=0; x < b_w; x++){
2389
                    dst[x + y*stride]= color;
2390
                }
2391
            }
2392
        }
2393
    }else{
2394
        uint8_t *src= s->last_picture[block->ref].data[plane_index];
2395
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2396
        int mx= block->mx*scale;
2397
        int my= block->my*scale;
2398
        const int dx= mx&15;
2399
        const int dy= my&15;
2400
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2401
        sx += (mx>>4) - (HTAPS_MAX/2-1);
2402
        sy += (my>>4) - (HTAPS_MAX/2-1);
2403
        src += sx + sy*stride;
2404
        if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2405
           || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2406
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2407
            src= tmp + MB_SIZE;
2408
        }
2409
//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2410
//        assert(!(b_w&(b_w-1)));
2411
        assert(b_w>1 && b_h>1);
2412
        assert(tab_index>=0 && tab_index<4 || b_w==32);
2413
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2414
            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2415
        else if(b_w==32){
2416
            int y;
2417
            for(y=0; y<b_h; y+=16){
2418
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2419
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2420
            }
2421
        }else if(b_w==b_h)
2422
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2423
        else if(b_w==2*b_h){
2424
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
2425
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2426
        }else{
2427
            assert(2*b_w==b_h);
2428
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
2429
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2430
        }
2431
    }
2432
}
2433

    
2434
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2435
                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2436
    int y, x;
2437
    IDWTELEM * dst;
2438
    for(y=0; y<b_h; y++){
2439
        //FIXME ugly misuse of obmc_stride
2440
        const uint8_t *obmc1= obmc + y*obmc_stride;
2441
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2442
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2443
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2444
        dst = slice_buffer_get_line(sb, src_y + y);
2445
        for(x=0; x<b_w; x++){
2446
            int v=   obmc1[x] * block[3][x + y*src_stride]
2447
                    +obmc2[x] * block[2][x + y*src_stride]
2448
                    +obmc3[x] * block[1][x + y*src_stride]
2449
                    +obmc4[x] * block[0][x + y*src_stride];
2450

    
2451
            v <<= 8 - LOG2_OBMC_MAX;
2452
            if(FRAC_BITS != 8){
2453
                v >>= 8 - FRAC_BITS;
2454
            }
2455
            if(add){
2456
                v += dst[x + src_x];
2457
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2458
                if(v&(~255)) v= ~(v>>31);
2459
                dst8[x + y*src_stride] = v;
2460
            }else{
2461
                dst[x + src_x] -= v;
2462
            }
2463
        }
2464
    }
2465
}
2466

    
2467
//FIXME name clenup (b_w, block_w, b_width stuff)
2468
static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2469
    const int b_width = s->b_width  << s->block_max_depth;
2470
    const int b_height= s->b_height << s->block_max_depth;
2471
    const int b_stride= b_width;
2472
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2473
    BlockNode *rt= lt+1;
2474
    BlockNode *lb= lt+b_stride;
2475
    BlockNode *rb= lb+1;
2476
    uint8_t *block[4];
2477
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2478
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2479
    uint8_t *ptmp;
2480
    int x,y;
2481

    
2482
    if(b_x<0){
2483
        lt= rt;
2484
        lb= rb;
2485
    }else if(b_x + 1 >= b_width){
2486
        rt= lt;
2487
        rb= lb;
2488
    }
2489
    if(b_y<0){
2490
        lt= lb;
2491
        rt= rb;
2492
    }else if(b_y + 1 >= b_height){
2493
        lb= lt;
2494
        rb= rt;
2495
    }
2496

    
2497
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2498
        obmc -= src_x;
2499
        b_w += src_x;
2500
        if(!sliced && !offset_dst)
2501
            dst -= src_x;
2502
        src_x=0;
2503
    }else if(src_x + b_w > w){
2504
        b_w = w - src_x;
2505
    }
2506
    if(src_y<0){
2507
        obmc -= src_y*obmc_stride;
2508
        b_h += src_y;
2509
        if(!sliced && !offset_dst)
2510
            dst -= src_y*dst_stride;
2511
        src_y=0;
2512
    }else if(src_y + b_h> h){
2513
        b_h = h - src_y;
2514
    }
2515

    
2516
    if(b_w<=0 || b_h<=0) return;
2517

    
2518
assert(src_stride > 2*MB_SIZE + 5);
2519
    if(!sliced && offset_dst)
2520
        dst += src_x + src_y*dst_stride;
2521
    dst8+= src_x + src_y*src_stride;
2522
//    src += src_x + src_y*src_stride;
2523

    
2524
    ptmp= tmp + 3*tmp_step;
2525
    block[0]= ptmp;
2526
    ptmp+=tmp_step;
2527
    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2528

    
2529
    if(same_block(lt, rt)){
2530
        block[1]= block[0];
2531
    }else{
2532
        block[1]= ptmp;
2533
        ptmp+=tmp_step;
2534
        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2535
    }
2536

    
2537
    if(same_block(lt, lb)){
2538
        block[2]= block[0];
2539
    }else if(same_block(rt, lb)){
2540
        block[2]= block[1];
2541
    }else{
2542
        block[2]= ptmp;
2543
        ptmp+=tmp_step;
2544
        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2545
    }
2546

    
2547
    if(same_block(lt, rb) ){
2548
        block[3]= block[0];
2549
    }else if(same_block(rt, rb)){
2550
        block[3]= block[1];
2551
    }else if(same_block(lb, rb)){
2552
        block[3]= block[2];
2553
    }else{
2554
        block[3]= ptmp;
2555
        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2556
    }
2557
#if 0
2558
    for(y=0; y<b_h; y++){
2559
        for(x=0; x<b_w; x++){
2560
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2561
            if(add) dst[x + y*dst_stride] += v;
2562
            else    dst[x + y*dst_stride] -= v;
2563
        }
2564
    }
2565
    for(y=0; y<b_h; y++){
2566
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2567
        for(x=0; x<b_w; x++){
2568
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2569
            if(add) dst[x + y*dst_stride] += v;
2570
            else    dst[x + y*dst_stride] -= v;
2571
        }
2572
    }
2573
    for(y=0; y<b_h; y++){
2574
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2575
        for(x=0; x<b_w; x++){
2576
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2577
            if(add) dst[x + y*dst_stride] += v;
2578
            else    dst[x + y*dst_stride] -= v;
2579
        }
2580
    }
2581
    for(y=0; y<b_h; y++){
2582
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2583
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2584
        for(x=0; x<b_w; x++){
2585
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2586
            if(add) dst[x + y*dst_stride] += v;
2587
            else    dst[x + y*dst_stride] -= v;
2588
        }
2589
    }
2590
#else
2591
    if(sliced){
2592
        START_TIMER
2593

    
2594
        s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2595
        STOP_TIMER("inner_add_yblock")
2596
    }else
2597
    for(y=0; y<b_h; y++){
2598
        //FIXME ugly misuse of obmc_stride
2599
        const uint8_t *obmc1= obmc + y*obmc_stride;
2600
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2601
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2602
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2603
        for(x=0; x<b_w; x++){
2604
            int v=   obmc1[x] * block[3][x + y*src_stride]
2605
                    +obmc2[x] * block[2][x + y*src_stride]
2606
                    +obmc3[x] * block[1][x + y*src_stride]
2607
                    +obmc4[x] * block[0][x + y*src_stride];
2608

    
2609
            v <<= 8 - LOG2_OBMC_MAX;
2610
            if(FRAC_BITS != 8){
2611
                v >>= 8 - FRAC_BITS;
2612
            }
2613
            if(add){
2614
                v += dst[x + y*dst_stride];
2615
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2616
                if(v&(~255)) v= ~(v>>31);
2617
                dst8[x + y*src_stride] = v;
2618
            }else{
2619
                dst[x + y*dst_stride] -= v;
2620
            }
2621
        }
2622
    }
2623
#endif
2624
}
2625

    
2626
static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2627
    Plane *p= &s->plane[plane_index];
2628
    const int mb_w= s->b_width  << s->block_max_depth;
2629
    const int mb_h= s->b_height << s->block_max_depth;
2630
    int x, y, mb_x;
2631
    int block_size = MB_SIZE >> s->block_max_depth;
2632
    int block_w    = plane_index ? block_size/2 : block_size;
2633
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2634
    int obmc_stride= plane_index ? block_size : 2*block_size;
2635
    int ref_stride= s->current_picture.linesize[plane_index];
2636
    uint8_t *dst8= s->current_picture.data[plane_index];
2637
    int w= p->width;
2638
    int h= p->height;
2639
    START_TIMER
2640

    
2641
    if(s->keyframe || (s->avctx->debug&512)){
2642
        if(mb_y==mb_h)
2643
            return;
2644

    
2645
        if(add){
2646
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2647
            {
2648
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2649
                IDWTELEM * line = sb->line[y];
2650
                for(x=0; x<w; x++)
2651
                {
2652
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2653
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2654
                    v >>= FRAC_BITS;
2655
                    if(v&(~255)) v= ~(v>>31);
2656
                    dst8[x + y*ref_stride]= v;
2657
                }
2658
            }
2659
        }else{
2660
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2661
            {
2662
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2663
                IDWTELEM * line = sb->line[y];
2664
                for(x=0; x<w; x++)
2665
                {
2666
                    line[x] -= 128 << FRAC_BITS;
2667
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2668
                }
2669
            }
2670
        }
2671

    
2672
        return;
2673
    }
2674

    
2675
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2676
            START_TIMER
2677

    
2678
            add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2679
                       block_w*mb_x - block_w/2,
2680
                       block_w*mb_y - block_w/2,
2681
                       block_w, block_w,
2682
                       w, h,
2683
                       w, ref_stride, obmc_stride,
2684
                       mb_x - 1, mb_y - 1,
2685
                       add, 0, plane_index);
2686

    
2687
            STOP_TIMER("add_yblock")
2688
        }
2689

    
2690
    STOP_TIMER("predict_slice")
2691
}
2692

    
2693
static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2694
    Plane *p= &s->plane[plane_index];
2695
    const int mb_w= s->b_width  << s->block_max_depth;
2696
    const int mb_h= s->b_height << s->block_max_depth;
2697
    int x, y, mb_x;
2698
    int block_size = MB_SIZE >> s->block_max_depth;
2699
    int block_w    = plane_index ? block_size/2 : block_size;
2700
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2701
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2702
    int ref_stride= s->current_picture.linesize[plane_index];
2703
    uint8_t *dst8= s->current_picture.data[plane_index];
2704
    int w= p->width;
2705
    int h= p->height;
2706
    START_TIMER
2707

    
2708
    if(s->keyframe || (s->avctx->debug&512)){
2709
        if(mb_y==mb_h)
2710
            return;
2711

    
2712
        if(add){
2713
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2714
                for(x=0; x<w; x++){
2715
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2716
                    v >>= FRAC_BITS;
2717
                    if(v&(~255)) v= ~(v>>31);
2718
                    dst8[x + y*ref_stride]= v;
2719
                }
2720
            }
2721
        }else{
2722
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2723
                for(x=0; x<w; x++){
2724
                    buf[x + y*w]-= 128<<FRAC_BITS;
2725
                }
2726
            }
2727
        }
2728

    
2729
        return;
2730
    }
2731

    
2732
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2733
            START_TIMER
2734

    
2735
            add_yblock(s, 0, NULL, buf, dst8, obmc,
2736
                       block_w*mb_x - block_w/2,
2737
                       block_w*mb_y - block_w/2,
2738
                       block_w, block_w,
2739
                       w, h,
2740
                       w, ref_stride, obmc_stride,
2741
                       mb_x - 1, mb_y - 1,
2742
                       add, 1, plane_index);
2743

    
2744
            STOP_TIMER("add_yblock")
2745
        }
2746

    
2747
    STOP_TIMER("predict_slice")
2748
}
2749

    
2750
static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2751
    const int mb_h= s->b_height << s->block_max_depth;
2752
    int mb_y;
2753
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2754
        predict_slice(s, buf, plane_index, add, mb_y);
2755
}
2756

    
2757
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2758
    int i, x2, y2;
2759
    Plane *p= &s->plane[plane_index];
2760
    const int block_size = MB_SIZE >> s->block_max_depth;
2761
    const int block_w    = plane_index ? block_size/2 : block_size;
2762
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2763
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2764
    const int ref_stride= s->current_picture.linesize[plane_index];
2765
    uint8_t *src= s-> input_picture.data[plane_index];
2766
    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2767
    const int b_stride = s->b_width << s->block_max_depth;
2768
    const int w= p->width;
2769
    const int h= p->height;
2770
    int index= mb_x + mb_y*b_stride;
2771
    BlockNode *b= &s->block[index];
2772
    BlockNode backup= *b;
2773
    int ab=0;
2774
    int aa=0;
2775

    
2776
    b->type|= BLOCK_INTRA;
2777
    b->color[plane_index]= 0;
2778
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2779

    
2780
    for(i=0; i<4; i++){
2781
        int mb_x2= mb_x + (i &1) - 1;
2782
        int mb_y2= mb_y + (i>>1) - 1;
2783
        int x= block_w*mb_x2 + block_w/2;
2784
        int y= block_w*mb_y2 + block_w/2;
2785

    
2786
        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2787
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2788

    
2789
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2790
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2791
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2792
                int obmc_v= obmc[index];
2793
                int d;
2794
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2795
                if(x<0) obmc_v += obmc[index + block_w];
2796
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2797
                if(x+block_w>w) obmc_v += obmc[index - block_w];
2798
                //FIXME precalc this or simplify it somehow else
2799

    
2800
                d = -dst[index] + (1<<(FRAC_BITS-1));
2801
                dst[index] = d;
2802
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2803
                aa += obmc_v * obmc_v; //FIXME precalclate this
2804
            }
2805
        }
2806
    }
2807
    *b= backup;
2808

    
2809
    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2810
}
2811

    
2812
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2813
    const int b_stride = s->b_width << s->block_max_depth;
2814
    const int b_height = s->b_height<< s->block_max_depth;
2815
    int index= x + y*b_stride;
2816
    const BlockNode *b     = &s->block[index];
2817
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2818
    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2819
    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2820
    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2821
    int dmx, dmy;
2822
//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2823
//  int my_context= av_log2(2*FFABS(left->my - top->my));
2824

    
2825
    if(x<0 || x>=b_stride || y>=b_height)
2826
        return 0;
2827
/*
2828
1            0      0
2829
01X          1-2    1
2830
001XX        3-6    2-3
2831
0001XXX      7-14   4-7
2832
00001XXXX   15-30   8-15
2833
*/
2834
//FIXME try accurate rate
2835
//FIXME intra and inter predictors if surrounding blocks arent the same type
2836
    if(b->type & BLOCK_INTRA){
2837
        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2838
                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
2839
                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
2840
    }else{
2841
        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2842
        dmx-= b->mx;
2843
        dmy-= b->my;
2844
        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2845
                    + av_log2(2*FFABS(dmy))
2846
                    + av_log2(2*b->ref));
2847
    }
2848
}
2849

    
2850
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2851
    Plane *p= &s->plane[plane_index];
2852
    const int block_size = MB_SIZE >> s->block_max_depth;
2853
    const int block_w    = plane_index ? block_size/2 : block_size;
2854
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2855
    const int ref_stride= s->current_picture.linesize[plane_index];
2856
    uint8_t *dst= s->current_picture.data[plane_index];
2857
    uint8_t *src= s->  input_picture.data[plane_index];
2858
    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2859
    uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2860
    uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2861
    const int b_stride = s->b_width << s->block_max_depth;
2862
    const int b_height = s->b_height<< s->block_max_depth;
2863
    const int w= p->width;
2864
    const int h= p->height;
2865
    int distortion;
2866
    int rate= 0;
2867
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2868
    int sx= block_w*mb_x - block_w/2;
2869
    int sy= block_w*mb_y - block_w/2;
2870
    int x0= FFMAX(0,-sx);
2871
    int y0= FFMAX(0,-sy);
2872
    int x1= FFMIN(block_w*2, w-sx);
2873
    int y1= FFMIN(block_w*2, h-sy);
2874
    int i,x,y;
2875

    
2876
    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2877

    
2878
    for(y=y0; y<y1; y++){
2879
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2880
        const IDWTELEM *pred1 = pred + y*obmc_stride;
2881
        uint8_t *cur1 = cur + y*ref_stride;
2882
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2883
        for(x=x0; x<x1; x++){
2884
#if FRAC_BITS >= LOG2_OBMC_MAX
2885
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2886
#else
2887
            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2888
#endif
2889
            v = (v + pred1[x]) >> FRAC_BITS;
2890
            if(v&(~255)) v= ~(v>>31);
2891
            dst1[x] = v;
2892
        }
2893
    }
2894

    
2895
    /* copy the regions where obmc[] = (uint8_t)256 */
2896
    if(LOG2_OBMC_MAX == 8
2897
        && (mb_x == 0 || mb_x == b_stride-1)
2898
        && (mb_y == 0 || mb_y == b_height-1)){
2899
        if(mb_x == 0)
2900
            x1 = block_w;
2901
        else
2902
            x0 = block_w;
2903
        if(mb_y == 0)
2904
            y1 = block_w;
2905
        else
2906
            y0 = block_w;
2907
        for(y=y0; y<y1; y++)
2908
            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2909
    }
2910

    
2911
    if(block_w==16){
2912
        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2913
        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2914
        /* FIXME cmps overlap but don't cover the wavelet's whole support,
2915
         * so improving the score of one block is not strictly guaranteed to
2916
         * improve the score of the whole frame, so iterative motion est
2917
         * doesn't always converge. */
2918
        if(s->avctx->me_cmp == FF_CMP_W97)
2919
            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2920
        else if(s->avctx->me_cmp == FF_CMP_W53)
2921
            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2922
        else{
2923
            distortion = 0;
2924
            for(i=0; i<4; i++){
2925
                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2926
                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2927
            }
2928
        }
2929
    }else{
2930
        assert(block_w==8);
2931
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2932
    }
2933

    
2934
    if(plane_index==0){
2935
        for(i=0; i<4; i++){
2936
/* ..RRr
2937
 * .RXx.
2938
 * rxx..
2939
 */
2940
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2941
        }
2942
        if(mb_x == b_stride-2)
2943
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2944
    }
2945
    return distortion + rate*penalty_factor;
2946
}
2947

    
2948
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2949
    int i, y2;
2950
    Plane *p= &s->plane[plane_index];
2951
    const int block_size = MB_SIZE >> s->block_max_depth;
2952
    const int block_w    = plane_index ? block_size/2 : block_size;
2953
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2954
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2955
    const int ref_stride= s->current_picture.linesize[plane_index];
2956
    uint8_t *dst= s->current_picture.data[plane_index];
2957
    uint8_t *src= s-> input_picture.data[plane_index];
2958
    static const IDWTELEM zero_dst[4096]; //FIXME
2959
    const int b_stride = s->b_width << s->block_max_depth;
2960
    const int w= p->width;
2961
    const int h= p->height;
2962
    int distortion= 0;
2963
    int rate= 0;
2964
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2965

    
2966
    for(i=0; i<9; i++){
2967
        int mb_x2= mb_x + (i%3) - 1;
2968
        int mb_y2= mb_y + (i/3) - 1;
2969
        int x= block_w*mb_x2 + block_w/2;
2970
        int y= block_w*mb_y2 + block_w/2;
2971

    
2972
        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2973
                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2974

    
2975
        //FIXME find a cleaner/simpler way to skip the outside stuff
2976
        for(y2= y; y2<0; y2++)
2977
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2978
        for(y2= h; y2<y+block_w; y2++)
2979
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2980
        if(x<0){
2981
            for(y2= y; y2<y+block_w; y2++)
2982
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2983
        }
2984
        if(x+block_w > w){
2985
            for(y2= y; y2<y+block_w; y2++)
2986
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2987
        }
2988

    
2989
        assert(block_w== 8 || block_w==16);
2990
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2991
    }
2992

    
2993
    if(plane_index==0){
2994
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2995
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2996

    
2997
/* ..RRRr
2998
 * .RXXx.
2999
 * .RXXx.
3000
 * rxxx.
3001
 */
3002
        if(merged)
3003
            rate = get_block_bits(s, mb_x, mb_y, 2);
3004
        for(i=merged?4:0; i<9; i++){
3005
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3006
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3007
        }
3008
    }
3009
    return distortion + rate*penalty_factor;
3010
}
3011

    
3012
static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3013
    const int b_stride= s->b_width << s->block_max_depth;
3014
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3015
    BlockNode backup= *block;
3016
    int rd, index, value;
3017

    
3018
    assert(mb_x>=0 && mb_y>=0);
3019
    assert(mb_x<b_stride);
3020

    
3021
    if(intra){
3022
        block->color[0] = p[0];
3023
        block->color[1] = p[1];
3024
        block->color[2] = p[2];
3025
        block->type |= BLOCK_INTRA;
3026
    }else{
3027
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3028
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3029
        if(s->me_cache[index] == value)
3030
            return 0;
3031
        s->me_cache[index]= value;
3032

    
3033
        block->mx= p[0];
3034
        block->my= p[1];
3035
        block->type &= ~BLOCK_INTRA;
3036
    }
3037

    
3038
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3039

    
3040
//FIXME chroma
3041
    if(rd < *best_rd){
3042
        *best_rd= rd;
3043
        return 1;
3044
    }else{
3045
        *block= backup;
3046
        return 0;
3047
    }
3048
}
3049

    
3050
/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3051
static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3052
    int p[2] = {p0, p1};
3053
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3054
}
3055

    
3056
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3057
    const int b_stride= s->b_width << s->block_max_depth;
3058
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3059
    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3060
    int rd, index, value;
3061

    
3062
    assert(mb_x>=0 && mb_y>=0);
3063
    assert(mb_x<b_stride);
3064
    assert(((mb_x|mb_y)&1) == 0);
3065

    
3066
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3067
    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3068
    if(s->me_cache[index] == value)
3069
        return 0;
3070
    s->me_cache[index]= value;
3071

    
3072
    block->mx= p0;
3073
    block->my= p1;
3074
    block->ref= ref;
3075
    block->type &= ~BLOCK_INTRA;
3076
    block[1]= block[b_stride]= block[b_stride+1]= *block;
3077

    
3078
    rd= get_4block_rd(s, mb_x, mb_y, 0);
3079

    
3080
//FIXME chroma
3081
    if(rd < *best_rd){
3082
        *best_rd= rd;
3083
        return 1;
3084
    }else{
3085
        block[0]= backup[0];
3086
        block[1]= backup[1];
3087
        block[b_stride]= backup[2];
3088
        block[b_stride+1]= backup[3];
3089
        return 0;
3090
    }
3091
}
3092

    
3093
static void iterative_me(SnowContext *s){
3094
    int pass, mb_x, mb_y;
3095
    const int b_width = s->b_width  << s->block_max_depth;
3096
    const int b_height= s->b_height << s->block_max_depth;
3097
    const int b_stride= b_width;
3098
    int color[3];
3099

    
3100
    {
3101
        RangeCoder r = s->c;
3102
        uint8_t state[sizeof(s->block_state)];
3103
        memcpy(state, s->block_state, sizeof(s->block_state));
3104
        for(mb_y= 0; mb_y<s->b_height; mb_y++)
3105
            for(mb_x= 0; mb_x<s->b_width; mb_x++)
3106
                encode_q_branch(s, 0, mb_x, mb_y);
3107
        s->c = r;
3108
        memcpy(s->block_state, state, sizeof(s->block_state));
3109
    }
3110

    
3111
    for(pass=0; pass<25; pass++){
3112
        int change= 0;
3113

    
3114
        for(mb_y= 0; mb_y<b_height; mb_y++){
3115
            for(mb_x= 0; mb_x<b_width; mb_x++){
3116
                int dia_change, i, j, ref;
3117
                int best_rd= INT_MAX, ref_rd;
3118
                BlockNode backup, ref_b;
3119
                const int index= mb_x + mb_y * b_stride;
3120
                BlockNode *block= &s->block[index];
3121
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3122
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3123
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3124
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3125
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3126
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3127
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3128
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3129
                const int b_w= (MB_SIZE >> s->block_max_depth);
3130
                uint8_t obmc_edged[b_w*2][b_w*2];
3131

    
3132
                if(pass && (block->type & BLOCK_OPT))
3133
                    continue;
3134
                block->type |= BLOCK_OPT;
3135

    
3136
                backup= *block;
3137

    
3138
                if(!s->me_cache_generation)
3139
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3140
                s->me_cache_generation += 1<<22;
3141

    
3142
                //FIXME precalc
3143
                {
3144
                    int x, y;
3145
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3146
                    if(mb_x==0)
3147
                        for(y=0; y<b_w*2; y++)
3148
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3149
                    if(mb_x==b_stride-1)
3150
                        for(y=0; y<b_w*2; y++)
3151
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3152
                    if(mb_y==0){
3153
                        for(x=0; x<b_w*2; x++)
3154
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3155
                        for(y=1; y<b_w; y++)
3156
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3157
                    }
3158
                    if(mb_y==b_height-1){
3159
                        for(x=0; x<b_w*2; x++)
3160
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3161
                        for(y=b_w; y<b_w*2-1; y++)
3162
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3163
                    }
3164
                }
3165

    
3166
                //skip stuff outside the picture
3167
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3168
                {
3169
                    uint8_t *src= s->  input_picture.data[0];
3170
                    uint8_t *dst= s->current_picture.data[0];
3171
                    const int stride= s->current_picture.linesize[0];
3172
                    const int block_w= MB_SIZE >> s->block_max_depth;
3173
                    const int sx= block_w*mb_x - block_w/2;
3174
                    const int sy= block_w*mb_y - block_w/2;
3175
                    const int w= s->plane[0].width;
3176
                    const int h= s->plane[0].height;
3177
                    int y;
3178

    
3179
                    for(y=sy; y<0; y++)
3180
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3181
                    for(y=h; y<sy+block_w*2; y++)
3182
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3183
                    if(sx<0){
3184
                        for(y=sy; y<sy+block_w*2; y++)
3185
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3186
                    }
3187
                    if(sx+block_w*2 > w){
3188
                        for(y=sy; y<sy+block_w*2; y++)
3189
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3190
                    }
3191
                }
3192

    
3193
                // intra(black) = neighbors' contribution to the current block
3194
                for(i=0; i<3; i++)
3195
                    color[i]= get_dc(s, mb_x, mb_y, i);
3196

    
3197
                // get previous score (cannot be cached due to OBMC)
3198
                if(pass > 0 && (block->type&BLOCK_INTRA)){
3199
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3200
                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3201
                }else
3202
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3203

    
3204
                ref_b= *block;
3205
                ref_rd= best_rd;
3206
                for(ref=0; ref < s->ref_frames; ref++){
3207
                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3208
                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3209
                        continue;
3210
                    block->ref= ref;
3211
                    best_rd= INT_MAX;
3212

    
3213
                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3214
                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3215
                    if(tb)
3216
                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3217
                    if(lb)
3218
                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3219
                    if(rb)
3220
                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3221
                    if(bb)
3222
                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3223

    
3224
                    /* fullpel ME */
3225
                    //FIXME avoid subpel interpol / round to nearest integer
3226
                    do{
3227
                        dia_change=0;
3228
                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3229
                            for(j=0; j<i; j++){
3230
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3231
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3232
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3233
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3234
                            }
3235
                        }
3236
                    }while(dia_change);
3237
                    /* subpel ME */
3238
                    do{
3239
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3240
                        dia_change=0;
3241
                        for(i=0; i<8; i++)
3242
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3243
                    }while(dia_change);
3244
                    //FIXME or try the standard 2 pass qpel or similar
3245

    
3246
                    mvr[0][0]= block->mx;
3247
                    mvr[0][1]= block->my;
3248
                    if(ref_rd > best_rd){
3249
                        ref_rd= best_rd;
3250
                        ref_b= *block;
3251
                    }
3252
                }
3253
                best_rd= ref_rd;
3254
                *block= ref_b;
3255
#if 1
3256
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3257
                //FIXME RD style color selection
3258
#endif
3259
                if(!same_block(block, &backup)){
3260
                    if(tb ) tb ->type &= ~BLOCK_OPT;
3261
                    if(lb ) lb ->type &= ~BLOCK_OPT;
3262
                    if(rb ) rb ->type &= ~BLOCK_OPT;
3263
                    if(bb ) bb ->type &= ~BLOCK_OPT;
3264
                    if(tlb) tlb->type &= ~BLOCK_OPT;
3265
                    if(trb) trb->type &= ~BLOCK_OPT;
3266
                    if(blb) blb->type &= ~BLOCK_OPT;
3267
                    if(brb) brb->type &= ~BLOCK_OPT;
3268
                    change ++;
3269
                }
3270
            }
3271
        }
3272
        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3273
        if(!change)
3274
            break;
3275
    }
3276

    
3277
    if(s->block_max_depth == 1){
3278
        int change= 0;
3279
        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3280
            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3281
                int i;
3282
                int best_rd, init_rd;
3283
                const int index= mb_x + mb_y * b_stride;
3284
                BlockNode *b[4];
3285

    
3286
                b[0]= &s->block[index];
3287
                b[1]= b[0]+1;
3288
                b[2]= b[0]+b_stride;
3289
                b[3]= b[2]+1;
3290
                if(same_block(b[0], b[1]) &&
3291
                   same_block(b[0], b[2]) &&
3292
                   same_block(b[0], b[3]))
3293
                    continue;
3294

    
3295
                if(!s->me_cache_generation)
3296
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3297
                s->me_cache_generation += 1<<22;
3298

    
3299
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3300

    
3301
                //FIXME more multiref search?
3302
                check_4block_inter(s, mb_x, mb_y,
3303
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3304
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3305

    
3306
                for(i=0; i<4; i++)
3307
                    if(!(b[i]->type&BLOCK_INTRA))
3308
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3309

    
3310
                if(init_rd != best_rd)
3311
                    change++;
3312
            }
3313
        }
3314
        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3315
    }
3316
}
3317

    
3318
static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3319
    const int level= b->level;
3320
    const int w= b->width;
3321
    const int h= b->height;
3322
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3323
    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3324
    int x,y, thres1, thres2;
3325
//    START_TIMER
3326

    
3327
    if(s->qlog == LOSSLESS_QLOG){
3328
        for(y=0; y<h; y++)
3329
            for(x=0; x<w; x++)
3330
                dst[x + y*stride]= src[x + y*stride];
3331
        return;
3332
    }
3333

    
3334
    bias= bias ? 0 : (3*qmul)>>3;
3335
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3336
    thres2= 2*thres1;
3337

    
3338
    if(!bias){
3339
        for(y=0; y<h; y++){
3340
            for(x=0; x<w; x++){
3341
                int i= src[x + y*stride];
3342

    
3343
                if((unsigned)(i+thres1) > thres2){
3344
                    if(i>=0){
3345
                        i<<= QEXPSHIFT;
3346
                        i/= qmul; //FIXME optimize
3347
                        dst[x + y*stride]=  i;
3348
                    }else{
3349
                        i= -i;
3350
                        i<<= QEXPSHIFT;
3351
                        i/= qmul; //FIXME optimize
3352
                        dst[x + y*stride]= -i;
3353
                    }
3354
                }else
3355
                    dst[x + y*stride]= 0;
3356
            }
3357
        }
3358
    }else{
3359
        for(y=0; y<h; y++){
3360
            for(x=0; x<w; x++){
3361
                int i= src[x + y*stride];
3362

    
3363
                if((unsigned)(i+thres1) > thres2){
3364
                    if(i>=0){
3365
                        i<<= QEXPSHIFT;
3366
                        i= (i + bias) / qmul; //FIXME optimize
3367
                        dst[x + y*stride]=  i;
3368
                    }else{
3369
                        i= -i;
3370
                        i<<= QEXPSHIFT;
3371
                        i= (i + bias) / qmul; //FIXME optimize
3372
                        dst[x + y*stride]= -i;
3373
                    }
3374
                }else
3375
                    dst[x + y*stride]= 0;
3376
            }
3377
        }
3378
    }
3379
    if(level+1 == s->spatial_decomposition_count){
3380
//        STOP_TIMER("quantize")
3381
    }
3382
}
3383

    
3384
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3385
    const int w= b->width;
3386
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3387
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3388
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3389
    int x,y;
3390
    START_TIMER
3391

    
3392
    if(s->qlog == LOSSLESS_QLOG) return;
3393

    
3394
    for(y=start_y; y<end_y; y++){
3395
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3396
        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3397
        for(x=0; x<w; x++){
3398
            int i= line[x];
3399
            if(i<0){
3400
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3401
            }else if(i>0){
3402
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3403
            }
3404
        }
3405
    }
3406
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3407
        STOP_TIMER("dquant")
3408
    }
3409
}
3410

    
3411
static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3412
    const int w= b->width;
3413
    const int h= b->height;
3414
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3415
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3416
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3417
    int x,y;
3418
    START_TIMER
3419

    
3420
    if(s->qlog == LOSSLESS_QLOG) return;
3421

    
3422
    for(y=0; y<h; y++){
3423
        for(x=0; x<w; x++){
3424
            int i= src[x + y*stride];
3425
            if(i<0){
3426
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3427
            }else if(i>0){
3428
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3429
            }
3430
        }
3431
    }
3432
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3433
        STOP_TIMER("dquant")
3434
    }
3435
}
3436

    
3437
static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3438
    const int w= b->width;
3439
    const int h= b->height;
3440
    int x,y;
3441

    
3442
    for(y=h-1; y>=0; y--){
3443
        for(x=w-1; x>=0; x--){
3444
            int i= x + y*stride;
3445

    
3446
            if(x){
3447
                if(use_median){
3448
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3449
                    else  src[i] -= src[i - 1];
3450
                }else{
3451
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3452
                    else  src[i] -= src[i - 1];
3453
                }
3454
            }else{
3455
                if(y) src[i] -= src[i - stride];
3456
            }
3457
        }
3458
    }
3459
}
3460

    
3461
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3462
    const int w= b->width;
3463
    int x,y;
3464

    
3465
//    START_TIMER
3466

    
3467
    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3468
    IDWTELEM * prev;
3469

    
3470
    if (start_y != 0)
3471
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3472

    
3473
    for(y=start_y; y<end_y; y++){
3474
        prev = line;
3475
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3476
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3477
        for(x=0; x<w; x++){
3478
            if(x){
3479
                if(use_median){
3480
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3481
                    else  line[x] += line[x - 1];
3482
                }else{
3483
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3484
                    else  line[x] += line[x - 1];
3485
                }
3486
            }else{
3487
                if(y) line[x] += prev[x];
3488
            }
3489
        }
3490
    }
3491

    
3492
//    STOP_TIMER("correlate")
3493
}
3494

    
3495
static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3496
    const int w= b->width;
3497
    const int h= b->height;
3498
    int x,y;
3499

    
3500
    for(y=0; y<h; y++){
3501
        for(x=0; x<w; x++){
3502
            int i= x + y*stride;
3503

    
3504
            if(x){
3505
                if(use_median){
3506
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3507
                    else  src[i] += src[i - 1];
3508
                }else{
3509
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3510
                    else  src[i] += src[i - 1];
3511
                }
3512
            }else{
3513
                if(y) src[i] += src[i - stride];
3514
            }
3515
        }
3516
    }
3517
}
3518

    
3519
static void encode_header(SnowContext *s){
3520
    int plane_index, level, orientation, i;
3521
    uint8_t kstate[32];
3522

    
3523
    memset(kstate, MID_STATE, sizeof(kstate));
3524

    
3525
    put_rac(&s->c, kstate, s->keyframe);
3526
    if(s->keyframe || s->always_reset){
3527
        reset_contexts(s);
3528
        s->last_spatial_decomposition_type=
3529
        s->last_qlog=
3530
        s->last_qbias=
3531
        s->last_mv_scale=
3532
        s->last_block_max_depth= 0;
3533
        for(plane_index=0; plane_index<2; plane_index++){
3534
            Plane *p= &s->plane[plane_index];
3535
            p->last_htaps=0;
3536
            p->last_diag_mc=0;
3537
            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3538
        }
3539
    }
3540
    if(s->keyframe){
3541
        put_symbol(&s->c, s->header_state, s->version, 0);
3542
        put_rac(&s->c, s->header_state, s->always_reset);
3543
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3544
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3545
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3546
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3547
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3548
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3549
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3550
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3551
        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3552

    
3553
        for(plane_index=0; plane_index<2; plane_index++){
3554
            for(level=0; level<s->spatial_decomposition_count; level++){
3555
                for(orientation=level ? 1:0; orientation<4; orientation++){
3556
                    if(orientation==2) continue;
3557
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3558
                }
3559
            }
3560
        }
3561
    }
3562

    
3563
    if(!s->keyframe){
3564
        int update_mc=0;
3565
        for(plane_index=0; plane_index<2; plane_index++){
3566
            Plane *p= &s->plane[plane_index];
3567
            update_mc |= p->last_htaps   != p->htaps;
3568
            update_mc |= p->last_diag_mc != p->diag_mc;
3569
            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3570
        }
3571
        if(!s->always_reset)
3572
            put_rac(&s->c, s->header_state, update_mc);
3573
        if(update_mc){
3574
            for(plane_index=0; plane_index<2; plane_index++){
3575
                Plane *p= &s->plane[plane_index];
3576
                put_rac(&s->c, s->header_state, p->diag_mc);
3577
                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3578
                for(i= p->htaps/2; i; i--)
3579
                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3580

    
3581
                p->last_diag_mc= p->diag_mc;
3582
                p->last_htaps= p->htaps;
3583
                memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3584
            }
3585
        }
3586
    }
3587

    
3588
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3589
    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3590
    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3591
    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3592
    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3593

    
3594
    s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3595
    s->last_qlog                      = s->qlog;
3596
    s->last_qbias                     = s->qbias;
3597
    s->last_mv_scale                  = s->mv_scale;
3598
    s->last_block_max_depth           = s->block_max_depth;
3599
}
3600

    
3601
static int decode_header(SnowContext *s){
3602
    int plane_index, level, orientation;
3603
    uint8_t kstate[32];
3604

    
3605
    memset(kstate, MID_STATE, sizeof(kstate));
3606

    
3607
    s->keyframe= get_rac(&s->c, kstate);
3608
    if(s->keyframe || s->always_reset){
3609
        reset_contexts(s);
3610
        s->spatial_decomposition_type=
3611
        s->qlog=
3612
        s->qbias=
3613
        s->mv_scale=
3614
        s->block_max_depth= 0;
3615
    }
3616
    if(s->keyframe){
3617
        s->version= get_symbol(&s->c, s->header_state, 0);
3618
        if(s->version>0){
3619
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3620
            return -1;
3621
        }
3622
        s->always_reset= get_rac(&s->c, s->header_state);
3623
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3624
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3625
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3626
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3627
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3628
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3629
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3630
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3631
        s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3632

    
3633
        for(plane_index=0; plane_index<3; plane_index++){
3634
            for(level=0; level<s->spatial_decomposition_count; level++){
3635
                for(orientation=level ? 1:0; orientation<4; orientation++){
3636
                    int q;
3637
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3638
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3639
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3640
                    s->plane[plane_index].band[level][orientation].qlog= q;
3641
                }
3642
            }
3643
        }
3644
    }
3645

    
3646
    if(!s->keyframe){
3647
        if(s->always_reset || get_rac(&s->c, s->header_state)){
3648
            for(plane_index=0; plane_index<2; plane_index++){
3649
                int htaps, i, sum=0, absum=0;
3650
                Plane *p= &s->plane[plane_index];
3651
                p->diag_mc= get_rac(&s->c, s->header_state);
3652
                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3653
                if((unsigned)htaps > HTAPS_MAX || htaps==0)
3654
                    return -1;
3655
                p->htaps= htaps;
3656
                for(i= htaps/2; i; i--){
3657
                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3658
                    sum += p->hcoeff[i];
3659
                }
3660
                p->hcoeff[0]= 32-sum;
3661
            }
3662
            s->plane[2].diag_mc= s->plane[1].diag_mc;
3663
            s->plane[2].htaps  = s->plane[1].htaps;
3664
            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3665
        }
3666
    }
3667

    
3668
    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3669
    if(s->spatial_decomposition_type > 1){
3670
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3671
        return -1;
3672
    }
3673

    
3674
    s->qlog           += get_symbol(&s->c, s->header_state, 1);
3675
    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
3676
    s->qbias          += get_symbol(&s->c, s->header_state, 1);
3677
    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3678
    if(s->block_max_depth > 1 || s->block_max_depth < 0){
3679
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3680
        s->block_max_depth= 0;
3681
        return -1;
3682
    }
3683

    
3684
    return 0;
3685
}
3686

    
3687
static void init_qexp(void){
3688
    int i;
3689
    double v=128;
3690

    
3691
    for(i=0; i<QROOT; i++){
3692
        qexp[i]= lrintf(v);
3693
        v *= pow(2, 1.0 / QROOT);
3694
    }
3695
}
3696

    
3697
static int common_init(AVCodecContext *avctx){
3698
    SnowContext *s = avctx->priv_data;
3699
    int width, height;
3700
    int i, j;
3701

    
3702
    s->avctx= avctx;
3703

    
3704
    dsputil_init(&s->dsp, avctx);
3705

    
3706
#define mcf(dx,dy)\
3707
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3708
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3709
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3710
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3711
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3712
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3713

    
3714
    mcf( 0, 0)
3715
    mcf( 4, 0)
3716
    mcf( 8, 0)
3717
    mcf(12, 0)
3718
    mcf( 0, 4)
3719
    mcf( 4, 4)
3720
    mcf( 8, 4)
3721
    mcf(12, 4)
3722
    mcf( 0, 8)
3723
    mcf( 4, 8)
3724
    mcf( 8, 8)
3725
    mcf(12, 8)
3726
    mcf( 0,12)
3727
    mcf( 4,12)
3728
    mcf( 8,12)
3729
    mcf(12,12)
3730

    
3731
#define mcfh(dx,dy)\
3732
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3733
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3734
        mc_block_hpel ## dx ## dy ## 16;\
3735
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3736
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3737
        mc_block_hpel ## dx ## dy ## 8;
3738

    
3739
    mcfh(0, 0)
3740
    mcfh(8, 0)
3741
    mcfh(0, 8)
3742
    mcfh(8, 8)
3743

    
3744
    if(!qexp[0])
3745
        init_qexp();
3746

    
3747
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3748

    
3749
    width= s->avctx->width;
3750
    height= s->avctx->height;
3751

    
3752
    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3753
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
3754

    
3755
    for(i=0; i<MAX_REF_FRAMES; i++)
3756
        for(j=0; j<MAX_REF_FRAMES; j++)
3757
            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3758

    
3759
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3760

    
3761
    return 0;
3762
}
3763

    
3764
static int common_init_after_header(AVCodecContext *avctx){
3765
    SnowContext *s = avctx->priv_data;
3766
    int plane_index, level, orientation;
3767

    
3768
    for(plane_index=0; plane_index<3; plane_index++){
3769
        int w= s->avctx->width;
3770
        int h= s->avctx->height;
3771

    
3772
        if(plane_index){
3773
            w>>= s->chroma_h_shift;
3774
            h>>= s->chroma_v_shift;
3775
        }
3776
        s->plane[plane_index].width = w;
3777
        s->plane[plane_index].height= h;
3778

    
3779
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3780
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3781
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3782
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3783

    
3784
                b->buf= s->spatial_dwt_buffer;
3785
                b->level= level;
3786
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3787
                b->width = (w + !(orientation&1))>>1;
3788
                b->height= (h + !(orientation>1))>>1;
3789

    
3790
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3791
                b->buf_x_offset = 0;
3792
                b->buf_y_offset = 0;
3793

    
3794
                if(orientation&1){
3795
                    b->buf += (w+1)>>1;
3796
                    b->buf_x_offset = (w+1)>>1;
3797
                }
3798
                if(orientation>1){
3799
                    b->buf += b->stride>>1;
3800
                    b->buf_y_offset = b->stride_line >> 1;
3801
                }
3802
                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3803

    
3804
                if(level)
3805
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3806
                //FIXME avoid this realloc
3807
                av_freep(&b->x_coeff);
3808
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3809
            }
3810
            w= (w+1)>>1;
3811
            h= (h+1)>>1;
3812
        }
3813
    }
3814

    
3815
    return 0;
3816
}
3817

    
3818
static int qscale2qlog(int qscale){
3819
    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3820
           + 61*QROOT/8; //<64 >60
3821
}
3822

    
3823
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3824
{
3825
    /* estimate the frame's complexity as a sum of weighted dwt coefs.
3826
     * FIXME we know exact mv bits at this point,
3827
     * but ratecontrol isn't set up to include them. */
3828
    uint32_t coef_sum= 0;
3829
    int level, orientation, delta_qlog;
3830

    
3831
    for(level=0; level<s->spatial_decomposition_count; level++){
3832
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3833
            SubBand *b= &s->plane[0].band[level][orientation];
3834
            IDWTELEM *buf= b->ibuf;
3835
            const int w= b->width;
3836
            const int h= b->height;
3837
            const int stride= b->stride;
3838
            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3839
            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3840
            const int qdiv= (1<<16)/qmul;
3841
            int x, y;
3842
            //FIXME this is ugly
3843
            for(y=0; y<h; y++)
3844
                for(x=0; x<w; x++)
3845
                    buf[x+y*stride]= b->buf[x+y*stride];
3846
            if(orientation==0)
3847
                decorrelate(s, b, buf, stride, 1, 0);
3848
            for(y=0; y<h; y++)
3849
                for(x=0; x<w; x++)
3850
                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3851
        }
3852
    }
3853

    
3854
    /* ugly, ratecontrol just takes a sqrt again */
3855
    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3856
    assert(coef_sum < INT_MAX);
3857

    
3858
    if(pict->pict_type == I_TYPE){
3859
        s->m.current_picture.mb_var_sum= coef_sum;
3860
        s->m.current_picture.mc_mb_var_sum= 0;
3861
    }else{
3862
        s->m.current_picture.mc_mb_var_sum= coef_sum;
3863
        s->m.current_picture.mb_var_sum= 0;
3864
    }
3865

    
3866
    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3867
    if (pict->quality < 0)
3868
        return INT_MIN;
3869
    s->lambda= pict->quality * 3/2;
3870
    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3871
    s->qlog+= delta_qlog;
3872
    return delta_qlog;
3873
}
3874

    
3875
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3876
    int width = p->width;
3877
    int height= p->height;
3878
    int level, orientation, x, y;
3879

    
3880
    for(level=0; level<s->spatial_decomposition_count; level++){
3881
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3882
            SubBand *b= &p->band[level][orientation];
3883
            IDWTELEM *ibuf= b->ibuf;
3884
            int64_t error=0;
3885

    
3886
            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3887
            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3888
            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3889
            for(y=0; y<height; y++){
3890
                for(x=0; x<width; x++){
3891
                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3892
                    error += d*d;
3893
                }
3894
            }
3895

    
3896
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3897
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3898
        }
3899
    }
3900
}
3901

    
3902
static int encode_init(AVCodecContext *avctx)
3903
{
3904
    SnowContext *s = avctx->priv_data;
3905
    int plane_index;
3906

    
3907
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3908
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3909
               "use vstrict=-2 / -strict -2 to use it anyway\n");
3910
        return -1;
3911
    }
3912

    
3913
    if(avctx->prediction_method == DWT_97
3914
       && (avctx->flags & CODEC_FLAG_QSCALE)
3915
       && avctx->global_quality == 0){
3916
        av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3917
        return -1;
3918
    }
3919

    
3920
    s->spatial_decomposition_count= 5;
3921
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3922

    
3923
    s->chroma_h_shift= 1; //FIXME XXX
3924
    s->chroma_v_shift= 1;
3925

    
3926
    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3927
    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3928

    
3929
    for(plane_index=0; plane_index<3; plane_index++){
3930
        s->plane[plane_index].diag_mc= 1;
3931
        s->plane[plane_index].htaps= 6;
3932
        s->plane[plane_index].hcoeff[0]=  40;
3933
        s->plane[plane_index].hcoeff[1]= -10;
3934
        s->plane[plane_index].hcoeff[2]=   2;
3935
        s->plane[plane_index].fast_mc= 1;
3936
    }
3937

    
3938
    common_init(avctx);
3939
    common_init_after_header(avctx);
3940
    alloc_blocks(s);
3941

    
3942
    s->version=0;
3943

    
3944
    s->m.avctx   = avctx;
3945
    s->m.flags   = avctx->flags;
3946
    s->m.bit_rate= avctx->bit_rate;
3947

    
3948
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3949
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3950
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3951
    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3952
    h263_encode_init(&s->m); //mv_penalty
3953

    
3954
    s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3955

    
3956
    if(avctx->flags&CODEC_FLAG_PASS1){
3957
        if(!avctx->stats_out)
3958
            avctx->stats_out = av_mallocz(256);
3959
    }
3960
    if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3961
        if(ff_rate_control_init(&s->m) < 0)
3962
            return -1;
3963
    }
3964
    s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3965

    
3966
    for(plane_index=0; plane_index<3; plane_index++){
3967
        calculate_vissual_weight(s, &s->plane[plane_index]);
3968
    }
3969

    
3970

    
3971
    avctx->coded_frame= &s->current_picture;
3972
    switch(avctx->pix_fmt){
3973
//    case PIX_FMT_YUV444P:
3974
//    case PIX_FMT_YUV422P:
3975
    case PIX_FMT_YUV420P:
3976
    case PIX_FMT_GRAY8:
3977
//    case PIX_FMT_YUV411P:
3978
//    case PIX_FMT_YUV410P:
3979
        s->colorspace_type= 0;
3980
        break;
3981
/*    case PIX_FMT_RGB32:
3982
        s->colorspace= 1;
3983
        break;*/
3984
    default:
3985
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3986
        return -1;
3987
    }
3988
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3989
    s->chroma_h_shift= 1;
3990
    s->chroma_v_shift= 1;
3991

    
3992
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3993
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3994

    
3995
    s->avctx->get_buffer(s->avctx, &s->input_picture);
3996

    
3997
    if(s->avctx->me_method == ME_ITER){
3998
        int i;
3999
        int size= s->b_width * s->b_height << 2*s->block_max_depth;
4000
        for(i=0; i<s->max_ref_frames; i++){
4001
            s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4002
            s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4003
        }
4004
    }
4005

    
4006
    return 0;
4007
}