Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 89438028

History | View | Annotate | Download (181 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20

    
21
#include "avcodec.h"
22
#include "dsputil.h"
23
#include "snow.h"
24

    
25
#include "rangecoder.h"
26

    
27
#include "mpegvideo.h"
28

    
29
#undef NDEBUG
30
#include <assert.h>
31

    
32
static const int8_t quant3[256]={
33
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49
};
50
static const int8_t quant3b[256]={
51
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
};
68
static const int8_t quant3bA[256]={
69
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
};
86
static const int8_t quant5[256]={
87
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103
};
104
static const int8_t quant7[256]={
105
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121
};
122
static const int8_t quant9[256]={
123
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139
};
140
static const int8_t quant11[256]={
141
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157
};
158
static const int8_t quant13[256]={
159
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
175
};
176

    
177
#if 0 //64*cubic
178
static const uint8_t obmc32[1024]={
179
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
180
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
181
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
182
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
183
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
184
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
185
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
186
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
187
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
188
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
189
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
190
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
191
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
192
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
193
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
194
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
195
  1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
196
  0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
197
  0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
198
  0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
199
  0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
200
  0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
201
  0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
202
  0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
203
  0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
204
  0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
205
  0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
206
  0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
207
  0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
208
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
209
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
210
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
211
//error:0.000022
212
};
213
static const uint8_t obmc16[256]={
214
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
215
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
216
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
217
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
218
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
219
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
220
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
221
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
222
  4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
223
  4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
224
  0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
225
  0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
226
  0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
227
  0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
228
  0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
229
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
230
//error:0.000033
231
};
232
#elif 1 // 64*linear
233
static const uint8_t obmc32[1024]={
234
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
235
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
236
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
237
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
238
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
239
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
240
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
241
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
242
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
243
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
244
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
245
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
246
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
247
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
248
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
249
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
250
  8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
251
  8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
252
  8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
253
  8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
254
  4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
255
  4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
256
  4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
257
  4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
258
  4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
259
  4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
260
  4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
261
  4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
262
  0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
263
  0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
264
  0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
265
  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
266
 //error:0.000020
267
};
268
static const uint8_t obmc16[256]={
269
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
270
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
271
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
272
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
273
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
274
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277
 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278
 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279
 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280
  8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
281
  8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
282
  4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
283
  4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
284
  0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
285
//error:0.000015
286
};
287
#else //64*cos
288
static const uint8_t obmc32[1024]={
289
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
290
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
291
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
292
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
293
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
294
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
295
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
296
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
297
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
298
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
299
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
300
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
301
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
302
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
303
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
304
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
305
  1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
306
  0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
307
  0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
308
  0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
309
  0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
310
  0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
311
  0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
312
  0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
313
  0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
314
  0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
315
  0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
316
  0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
317
  0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
318
  0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
319
  0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
320
  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
321
//error:0.000022
322
};
323
static const uint8_t obmc16[256]={
324
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
325
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
326
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
327
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
328
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
329
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
330
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
331
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
332
  0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
333
  4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
334
  4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
335
  0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
336
  0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
337
  0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
338
  0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
339
  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
340
//error:0.000022
341
};
342
#endif /* 0 */
343

    
344
//linear *64
345
static const uint8_t obmc8[64]={
346
  4, 12, 20, 28, 28, 20, 12,  4,
347
 12, 36, 60, 84, 84, 60, 36, 12,
348
 20, 60,100,140,140,100, 60, 20,
349
 28, 84,140,196,196,140, 84, 28,
350
 28, 84,140,196,196,140, 84, 28,
351
 20, 60,100,140,140,100, 60, 20,
352
 12, 36, 60, 84, 84, 60, 36, 12,
353
  4, 12, 20, 28, 28, 20, 12,  4,
354
//error:0.000000
355
};
356

    
357
//linear *64
358
static const uint8_t obmc4[16]={
359
 16, 48, 48, 16,
360
 48,144,144, 48,
361
 48,144,144, 48,
362
 16, 48, 48, 16,
363
//error:0.000000
364
};
365

    
366
static const uint8_t *obmc_tab[4]={
367
    obmc32, obmc16, obmc8, obmc4
368
};
369

    
370
static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
371

    
372
typedef struct BlockNode{
373
    int16_t mx;
374
    int16_t my;
375
    uint8_t ref;
376
    uint8_t color[3];
377
    uint8_t type;
378
//#define TYPE_SPLIT    1
379
#define BLOCK_INTRA   1
380
#define BLOCK_OPT     2
381
//#define TYPE_NOCOLOR  4
382
    uint8_t level; //FIXME merge into type?
383
}BlockNode;
384

    
385
static const BlockNode null_block= { //FIXME add border maybe
386
    .color= {128,128,128},
387
    .mx= 0,
388
    .my= 0,
389
    .ref= 0,
390
    .type= 0,
391
    .level= 0,
392
};
393

    
394
#define LOG2_MB_SIZE 4
395
#define MB_SIZE (1<<LOG2_MB_SIZE)
396
#define ENCODER_EXTRA_BITS 4
397
#define HTAPS_MAX 8
398

    
399
typedef struct x_and_coeff{
400
    int16_t x;
401
    uint16_t coeff;
402
} x_and_coeff;
403

    
404
typedef struct SubBand{
405
    int level;
406
    int stride;
407
    int width;
408
    int height;
409
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
410
    DWTELEM *buf;
411
    IDWTELEM *ibuf;
412
    int buf_x_offset;
413
    int buf_y_offset;
414
    int stride_line; ///< Stride measured in lines, not pixels.
415
    x_and_coeff * x_coeff;
416
    struct SubBand *parent;
417
    uint8_t state[/*7*2*/ 7 + 512][32];
418
}SubBand;
419

    
420
typedef struct Plane{
421
    int width;
422
    int height;
423
    SubBand band[MAX_DECOMPOSITIONS][4];
424

    
425
    int htaps;
426
    int8_t hcoeff[HTAPS_MAX/2];
427
    int diag_mc;
428
    int fast_mc;
429

    
430
    int last_htaps;
431
    int8_t last_hcoeff[HTAPS_MAX/2];
432
    int last_diag_mc;
433
}Plane;
434

    
435
typedef struct SnowContext{
436
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
437

    
438
    AVCodecContext *avctx;
439
    RangeCoder c;
440
    DSPContext dsp;
441
    AVFrame new_picture;
442
    AVFrame input_picture;              ///< new_picture with the internal linesizes
443
    AVFrame current_picture;
444
    AVFrame last_picture[MAX_REF_FRAMES];
445
    uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446
    AVFrame mconly_picture;
447
//     uint8_t q_context[16];
448
    uint8_t header_state[32];
449
    uint8_t block_state[128 + 32*128];
450
    int keyframe;
451
    int always_reset;
452
    int version;
453
    int spatial_decomposition_type;
454
    int last_spatial_decomposition_type;
455
    int temporal_decomposition_type;
456
    int spatial_decomposition_count;
457
    int last_spatial_decomposition_count;
458
    int temporal_decomposition_count;
459
    int max_ref_frames;
460
    int ref_frames;
461
    int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
462
    uint32_t *ref_scores[MAX_REF_FRAMES];
463
    DWTELEM *spatial_dwt_buffer;
464
    IDWTELEM *spatial_idwt_buffer;
465
    int colorspace_type;
466
    int chroma_h_shift;
467
    int chroma_v_shift;
468
    int spatial_scalability;
469
    int qlog;
470
    int last_qlog;
471
    int lambda;
472
    int lambda2;
473
    int pass1_rc;
474
    int mv_scale;
475
    int last_mv_scale;
476
    int qbias;
477
    int last_qbias;
478
#define QBIAS_SHIFT 3
479
    int b_width;
480
    int b_height;
481
    int block_max_depth;
482
    int last_block_max_depth;
483
    Plane plane[MAX_PLANES];
484
    BlockNode *block;
485
#define ME_CACHE_SIZE 1024
486
    int me_cache[ME_CACHE_SIZE];
487
    int me_cache_generation;
488
    slice_buffer sb;
489

    
490
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
491
}SnowContext;
492

    
493
typedef struct {
494
    IDWTELEM *b0;
495
    IDWTELEM *b1;
496
    IDWTELEM *b2;
497
    IDWTELEM *b3;
498
    int y;
499
} dwt_compose_t;
500

    
501
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
502
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
503

    
504
static void iterative_me(SnowContext *s);
505

    
506
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
507
{
508
    int i;
509

    
510
    buf->base_buffer = base_buffer;
511
    buf->line_count = line_count;
512
    buf->line_width = line_width;
513
    buf->data_count = max_allocated_lines;
514
    buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
515
    buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
516

    
517
    for (i = 0; i < max_allocated_lines; i++)
518
    {
519
        buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
520
    }
521

    
522
    buf->data_stack_top = max_allocated_lines - 1;
523
}
524

    
525
static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
526
{
527
    int offset;
528
    IDWTELEM * buffer;
529

    
530
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
531

    
532
    assert(buf->data_stack_top >= 0);
533
//  assert(!buf->line[line]);
534
    if (buf->line[line])
535
        return buf->line[line];
536

    
537
    offset = buf->line_width * line;
538
    buffer = buf->data_stack[buf->data_stack_top];
539
    buf->data_stack_top--;
540
    buf->line[line] = buffer;
541

    
542
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
543

    
544
    return buffer;
545
}
546

    
547
static void slice_buffer_release(slice_buffer * buf, int line)
548
{
549
    int offset;
550
    IDWTELEM * buffer;
551

    
552
    assert(line >= 0 && line < buf->line_count);
553
    assert(buf->line[line]);
554

    
555
    offset = buf->line_width * line;
556
    buffer = buf->line[line];
557
    buf->data_stack_top++;
558
    buf->data_stack[buf->data_stack_top] = buffer;
559
    buf->line[line] = NULL;
560

    
561
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
562
}
563

    
564
static void slice_buffer_flush(slice_buffer * buf)
565
{
566
    int i;
567
    for (i = 0; i < buf->line_count; i++)
568
    {
569
        if (buf->line[i])
570
        {
571
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
572
            slice_buffer_release(buf, i);
573
        }
574
    }
575
}
576

    
577
static void slice_buffer_destroy(slice_buffer * buf)
578
{
579
    int i;
580
    slice_buffer_flush(buf);
581

    
582
    for (i = buf->data_count - 1; i >= 0; i--)
583
    {
584
        av_freep(&buf->data_stack[i]);
585
    }
586
    av_freep(&buf->data_stack);
587
    av_freep(&buf->line);
588
}
589

    
590
#ifdef __sgi
591
// Avoid a name clash on SGI IRIX
592
#undef qexp
593
#endif
594
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
595
static uint8_t qexp[QROOT];
596

    
597
static inline int mirror(int v, int m){
598
    while((unsigned)v > (unsigned)m){
599
        v=-v;
600
        if(v<0) v+= 2*m;
601
    }
602
    return v;
603
}
604

    
605
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
606
    int i;
607

    
608
    if(v){
609
        const int a= FFABS(v);
610
        const int e= av_log2(a);
611
#if 1
612
        const int el= FFMIN(e, 10);
613
        put_rac(c, state+0, 0);
614

    
615
        for(i=0; i<el; i++){
616
            put_rac(c, state+1+i, 1);  //1..10
617
        }
618
        for(; i<e; i++){
619
            put_rac(c, state+1+9, 1);  //1..10
620
        }
621
        put_rac(c, state+1+FFMIN(i,9), 0);
622

    
623
        for(i=e-1; i>=el; i--){
624
            put_rac(c, state+22+9, (a>>i)&1); //22..31
625
        }
626
        for(; i>=0; i--){
627
            put_rac(c, state+22+i, (a>>i)&1); //22..31
628
        }
629

    
630
        if(is_signed)
631
            put_rac(c, state+11 + el, v < 0); //11..21
632
#else
633

    
634
        put_rac(c, state+0, 0);
635
        if(e<=9){
636
            for(i=0; i<e; i++){
637
                put_rac(c, state+1+i, 1);  //1..10
638
            }
639
            put_rac(c, state+1+i, 0);
640

    
641
            for(i=e-1; i>=0; i--){
642
                put_rac(c, state+22+i, (a>>i)&1); //22..31
643
            }
644

    
645
            if(is_signed)
646
                put_rac(c, state+11 + e, v < 0); //11..21
647
        }else{
648
            for(i=0; i<e; i++){
649
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
650
            }
651
            put_rac(c, state+1+FFMIN(i,9), 0);
652

    
653
            for(i=e-1; i>=0; i--){
654
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
655
            }
656

    
657
            if(is_signed)
658
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
659
        }
660
#endif /* 1 */
661
    }else{
662
        put_rac(c, state+0, 1);
663
    }
664
}
665

    
666
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
667
    if(get_rac(c, state+0))
668
        return 0;
669
    else{
670
        int i, e, a;
671
        e= 0;
672
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
673
            e++;
674
        }
675

    
676
        a= 1;
677
        for(i=e-1; i>=0; i--){
678
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
679
        }
680

    
681
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
682
            return -a;
683
        else
684
            return a;
685
    }
686
}
687

    
688
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
689
    int i;
690
    int r= log2>=0 ? 1<<log2 : 1;
691

    
692
    assert(v>=0);
693
    assert(log2>=-4);
694

    
695
    while(v >= r){
696
        put_rac(c, state+4+log2, 1);
697
        v -= r;
698
        log2++;
699
        if(log2>0) r+=r;
700
    }
701
    put_rac(c, state+4+log2, 0);
702

    
703
    for(i=log2-1; i>=0; i--){
704
        put_rac(c, state+31-i, (v>>i)&1);
705
    }
706
}
707

    
708
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
709
    int i;
710
    int r= log2>=0 ? 1<<log2 : 1;
711
    int v=0;
712

    
713
    assert(log2>=-4);
714

    
715
    while(get_rac(c, state+4+log2)){
716
        v+= r;
717
        log2++;
718
        if(log2>0) r+=r;
719
    }
720

    
721
    for(i=log2-1; i>=0; i--){
722
        v+= get_rac(c, state+31-i)<<i;
723
    }
724

    
725
    return v;
726
}
727

    
728
static av_always_inline void
729
lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
730
     int dst_step, int src_step, int ref_step,
731
     int width, int mul, int add, int shift,
732
     int highpass, int inverse){
733
    const int mirror_left= !highpass;
734
    const int mirror_right= (width&1) ^ highpass;
735
    const int w= (width>>1) - 1 + (highpass & width);
736
    int i;
737

    
738
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
739
    if(mirror_left){
740
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
741
        dst += dst_step;
742
        src += src_step;
743
    }
744

    
745
    for(i=0; i<w; i++){
746
        dst[i*dst_step] =
747
            LIFT(src[i*src_step],
748
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
749
                 inverse);
750
    }
751

    
752
    if(mirror_right){
753
        dst[w*dst_step] =
754
            LIFT(src[w*src_step],
755
                 ((mul*2*ref[w*ref_step]+add)>>shift),
756
                 inverse);
757
    }
758
}
759

    
760
static av_always_inline void
761
inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
762
         int dst_step, int src_step, int ref_step,
763
         int width, int mul, int add, int shift,
764
         int highpass, int inverse){
765
    const int mirror_left= !highpass;
766
    const int mirror_right= (width&1) ^ highpass;
767
    const int w= (width>>1) - 1 + (highpass & width);
768
    int i;
769

    
770
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
771
    if(mirror_left){
772
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
773
        dst += dst_step;
774
        src += src_step;
775
    }
776

    
777
    for(i=0; i<w; i++){
778
        dst[i*dst_step] =
779
            LIFT(src[i*src_step],
780
                 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
781
                 inverse);
782
    }
783

    
784
    if(mirror_right){
785
        dst[w*dst_step] =
786
            LIFT(src[w*src_step],
787
                 ((mul*2*ref[w*ref_step]+add)>>shift),
788
                 inverse);
789
    }
790
}
791

    
792
#ifndef liftS
793
static av_always_inline void
794
liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
795
      int dst_step, int src_step, int ref_step,
796
      int width, int mul, int add, int shift,
797
      int highpass, int inverse){
798
    const int mirror_left= !highpass;
799
    const int mirror_right= (width&1) ^ highpass;
800
    const int w= (width>>1) - 1 + (highpass & width);
801
    int i;
802

    
803
    assert(shift == 4);
804
#define LIFTS(src, ref, inv) \
805
        ((inv) ? \
806
            (src) + (((ref) + 4*(src))>>shift): \
807
            -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
808
    if(mirror_left){
809
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
810
        dst += dst_step;
811
        src += src_step;
812
    }
813

    
814
    for(i=0; i<w; i++){
815
        dst[i*dst_step] =
816
            LIFTS(src[i*src_step],
817
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
818
                  inverse);
819
    }
820

    
821
    if(mirror_right){
822
        dst[w*dst_step] =
823
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
824
    }
825
}
826
static av_always_inline void
827
inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
828
          int dst_step, int src_step, int ref_step,
829
          int width, int mul, int add, int shift,
830
          int highpass, int inverse){
831
    const int mirror_left= !highpass;
832
    const int mirror_right= (width&1) ^ highpass;
833
    const int w= (width>>1) - 1 + (highpass & width);
834
    int i;
835

    
836
    assert(shift == 4);
837
#define LIFTS(src, ref, inv) \
838
    ((inv) ? \
839
        (src) + (((ref) + 4*(src))>>shift): \
840
        -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
841
    if(mirror_left){
842
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
843
        dst += dst_step;
844
        src += src_step;
845
    }
846

    
847
    for(i=0; i<w; i++){
848
        dst[i*dst_step] =
849
            LIFTS(src[i*src_step],
850
                  mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
851
                  inverse);
852
    }
853

    
854
    if(mirror_right){
855
        dst[w*dst_step] =
856
            LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
857
    }
858
}
859
#endif
860

    
861
static void horizontal_decompose53i(DWTELEM *b, int width){
862
    DWTELEM temp[width];
863
    const int width2= width>>1;
864
    int x;
865
    const int w2= (width+1)>>1;
866

    
867
    for(x=0; x<width2; x++){
868
        temp[x   ]= b[2*x    ];
869
        temp[x+w2]= b[2*x + 1];
870
    }
871
    if(width&1)
872
        temp[x   ]= b[2*x    ];
873
#if 0
874
    {
875
    int A1,A2,A3,A4;
876
    A2= temp[1       ];
877
    A4= temp[0       ];
878
    A1= temp[0+width2];
879
    A1 -= (A2 + A4)>>1;
880
    A4 += (A1 + 1)>>1;
881
    b[0+width2] = A1;
882
    b[0       ] = A4;
883
    for(x=1; x+1<width2; x+=2){
884
        A3= temp[x+width2];
885
        A4= temp[x+1     ];
886
        A3 -= (A2 + A4)>>1;
887
        A2 += (A1 + A3 + 2)>>2;
888
        b[x+width2] = A3;
889
        b[x       ] = A2;
890

891
        A1= temp[x+1+width2];
892
        A2= temp[x+2       ];
893
        A1 -= (A2 + A4)>>1;
894
        A4 += (A1 + A3 + 2)>>2;
895
        b[x+1+width2] = A1;
896
        b[x+1       ] = A4;
897
    }
898
    A3= temp[width-1];
899
    A3 -= A2;
900
    A2 += (A1 + A3 + 2)>>2;
901
    b[width -1] = A3;
902
    b[width2-1] = A2;
903
    }
904
#else
905
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
906
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
907
#endif
908
}
909

    
910
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
911
    int i;
912

    
913
    for(i=0; i<width; i++){
914
        b1[i] -= (b0[i] + b2[i])>>1;
915
    }
916
}
917

    
918
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
919
    int i;
920

    
921
    for(i=0; i<width; i++){
922
        b1[i] += (b0[i] + b2[i] + 2)>>2;
923
    }
924
}
925

    
926
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
927
    int y;
928
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
929
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
930

    
931
    for(y=-2; y<height; y+=2){
932
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
933
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
934

    
935
{START_TIMER
936
        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
937
        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
938
STOP_TIMER("horizontal_decompose53i")}
939

    
940
{START_TIMER
941
        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
942
        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
943
STOP_TIMER("vertical_decompose53i*")}
944

    
945
        b0=b2;
946
        b1=b3;
947
    }
948
}
949

    
950
static void horizontal_decompose97i(DWTELEM *b, int width){
951
    DWTELEM temp[width];
952
    const int w2= (width+1)>>1;
953

    
954
    lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
955
    liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
956
    lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
957
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
958
}
959

    
960

    
961
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
962
    int i;
963

    
964
    for(i=0; i<width; i++){
965
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
966
    }
967
}
968

    
969
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
970
    int i;
971

    
972
    for(i=0; i<width; i++){
973
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
974
    }
975
}
976

    
977
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
978
    int i;
979

    
980
    for(i=0; i<width; i++){
981
#ifdef liftS
982
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
983
#else
984
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
985
#endif
986
    }
987
}
988

    
989
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
990
    int i;
991

    
992
    for(i=0; i<width; i++){
993
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
994
    }
995
}
996

    
997
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
998
    int y;
999
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1000
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1001
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1002
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1003

    
1004
    for(y=-4; y<height; y+=2){
1005
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1006
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1007

    
1008
{START_TIMER
1009
        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1010
        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1011
if(width>400){
1012
STOP_TIMER("horizontal_decompose97i")
1013
}}
1014

    
1015
{START_TIMER
1016
        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1017
        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1018
        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1019
        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1020

    
1021
if(width>400){
1022
STOP_TIMER("vertical_decompose97i")
1023
}}
1024

    
1025
        b0=b2;
1026
        b1=b3;
1027
        b2=b4;
1028
        b3=b5;
1029
    }
1030
}
1031

    
1032
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1033
    int level;
1034

    
1035
    for(level=0; level<decomposition_count; level++){
1036
        switch(type){
1037
        case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1038
        case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1039
        }
1040
    }
1041
}
1042

    
1043
static void horizontal_compose53i(IDWTELEM *b, int width){
1044
    IDWTELEM temp[width];
1045
    const int width2= width>>1;
1046
    const int w2= (width+1)>>1;
1047
    int x;
1048

    
1049
#if 0
1050
    int A1,A2,A3,A4;
1051
    A2= temp[1       ];
1052
    A4= temp[0       ];
1053
    A1= temp[0+width2];
1054
    A1 -= (A2 + A4)>>1;
1055
    A4 += (A1 + 1)>>1;
1056
    b[0+width2] = A1;
1057
    b[0       ] = A4;
1058
    for(x=1; x+1<width2; x+=2){
1059
        A3= temp[x+width2];
1060
        A4= temp[x+1     ];
1061
        A3 -= (A2 + A4)>>1;
1062
        A2 += (A1 + A3 + 2)>>2;
1063
        b[x+width2] = A3;
1064
        b[x       ] = A2;
1065

1066
        A1= temp[x+1+width2];
1067
        A2= temp[x+2       ];
1068
        A1 -= (A2 + A4)>>1;
1069
        A4 += (A1 + A3 + 2)>>2;
1070
        b[x+1+width2] = A1;
1071
        b[x+1       ] = A4;
1072
    }
1073
    A3= temp[width-1];
1074
    A3 -= A2;
1075
    A2 += (A1 + A3 + 2)>>2;
1076
    b[width -1] = A3;
1077
    b[width2-1] = A2;
1078
#else
1079
    inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1080
    inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1081
#endif /* 0 */
1082
    for(x=0; x<width2; x++){
1083
        b[2*x    ]= temp[x   ];
1084
        b[2*x + 1]= temp[x+w2];
1085
    }
1086
    if(width&1)
1087
        b[2*x    ]= temp[x   ];
1088
}
1089

    
1090
static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1091
    int i;
1092

    
1093
    for(i=0; i<width; i++){
1094
        b1[i] += (b0[i] + b2[i])>>1;
1095
    }
1096
}
1097

    
1098
static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1099
    int i;
1100

    
1101
    for(i=0; i<width; i++){
1102
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1103
    }
1104
}
1105

    
1106
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1107
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1108
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1109
    cs->y = -1;
1110
}
1111

    
1112
static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1113
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1114
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1115
    cs->y = -1;
1116
}
1117

    
1118
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1119
    int y= cs->y;
1120

    
1121
    IDWTELEM *b0= cs->b0;
1122
    IDWTELEM *b1= cs->b1;
1123
    IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1124
    IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1125

    
1126
{START_TIMER
1127
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1128
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1129
STOP_TIMER("vertical_compose53i*")}
1130

    
1131
{START_TIMER
1132
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1133
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1134
STOP_TIMER("horizontal_compose53i")}
1135

    
1136
    cs->b0 = b2;
1137
    cs->b1 = b3;
1138
    cs->y += 2;
1139
}
1140

    
1141
static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1142
    int y= cs->y;
1143
    IDWTELEM *b0= cs->b0;
1144
    IDWTELEM *b1= cs->b1;
1145
    IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1146
    IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1147

    
1148
{START_TIMER
1149
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1150
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1151
STOP_TIMER("vertical_compose53i*")}
1152

    
1153
{START_TIMER
1154
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1155
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1156
STOP_TIMER("horizontal_compose53i")}
1157

    
1158
    cs->b0 = b2;
1159
    cs->b1 = b3;
1160
    cs->y += 2;
1161
}
1162

    
1163
static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1164
    dwt_compose_t cs;
1165
    spatial_compose53i_init(&cs, buffer, height, stride);
1166
    while(cs.y <= height)
1167
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1168
}
1169

    
1170

    
1171
void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1172
    IDWTELEM temp[width];
1173
    const int w2= (width+1)>>1;
1174

    
1175
    inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1176
    inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1177
    inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1178
    inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1179
}
1180

    
1181
static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1182
    int i;
1183

    
1184
    for(i=0; i<width; i++){
1185
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1186
    }
1187
}
1188

    
1189
static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1190
    int i;
1191

    
1192
    for(i=0; i<width; i++){
1193
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1194
    }
1195
}
1196

    
1197
static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1198
    int i;
1199

    
1200
    for(i=0; i<width; i++){
1201
#ifdef liftS
1202
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1203
#else
1204
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1205
#endif
1206
    }
1207
}
1208

    
1209
static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1210
    int i;
1211

    
1212
    for(i=0; i<width; i++){
1213
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1214
    }
1215
}
1216

    
1217
void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1218
    int i;
1219

    
1220
    for(i=0; i<width; i++){
1221
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1222
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1223
#ifdef liftS
1224
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1225
#else
1226
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1227
#endif
1228
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1229
    }
1230
}
1231

    
1232
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1233
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1234
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1235
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1236
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1237
    cs->y = -3;
1238
}
1239

    
1240
static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1241
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1242
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1243
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1244
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1245
    cs->y = -3;
1246
}
1247

    
1248
static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1249
    int y = cs->y;
1250

    
1251
    IDWTELEM *b0= cs->b0;
1252
    IDWTELEM *b1= cs->b1;
1253
    IDWTELEM *b2= cs->b2;
1254
    IDWTELEM *b3= cs->b3;
1255
    IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1256
    IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1257

    
1258
{START_TIMER
1259
    if(y>0 && y+4<height){
1260
        dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1261
    }else{
1262
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1263
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1264
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1265
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1266
    }
1267
if(width>400){
1268
STOP_TIMER("vertical_compose97i")}}
1269

    
1270
{START_TIMER
1271
        if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1272
        if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1273
if(width>400 && y+0<(unsigned)height){
1274
STOP_TIMER("horizontal_compose97i")}}
1275

    
1276
    cs->b0=b2;
1277
    cs->b1=b3;
1278
    cs->b2=b4;
1279
    cs->b3=b5;
1280
    cs->y += 2;
1281
}
1282

    
1283
static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1284
    int y = cs->y;
1285
    IDWTELEM *b0= cs->b0;
1286
    IDWTELEM *b1= cs->b1;
1287
    IDWTELEM *b2= cs->b2;
1288
    IDWTELEM *b3= cs->b3;
1289
    IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1290
    IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1291

    
1292
{START_TIMER
1293
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1294
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1295
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1296
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1297
if(width>400){
1298
STOP_TIMER("vertical_compose97i")}}
1299

    
1300
{START_TIMER
1301
        if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1302
        if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1303
if(width>400 && b0 <= b2){
1304
STOP_TIMER("horizontal_compose97i")}}
1305

    
1306
    cs->b0=b2;
1307
    cs->b1=b3;
1308
    cs->b2=b4;
1309
    cs->b3=b5;
1310
    cs->y += 2;
1311
}
1312

    
1313
static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1314
    dwt_compose_t cs;
1315
    spatial_compose97i_init(&cs, buffer, height, stride);
1316
    while(cs.y <= height)
1317
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1318
}
1319

    
1320
static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1321
    int level;
1322
    for(level=decomposition_count-1; level>=0; level--){
1323
        switch(type){
1324
        case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1325
        case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1326
        }
1327
    }
1328
}
1329

    
1330
static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1331
    int level;
1332
    for(level=decomposition_count-1; level>=0; level--){
1333
        switch(type){
1334
        case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1335
        case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1336
        }
1337
    }
1338
}
1339

    
1340
static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1341
    const int support = type==1 ? 3 : 5;
1342
    int level;
1343
    if(type==2) return;
1344

    
1345
    for(level=decomposition_count-1; level>=0; level--){
1346
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1347
            switch(type){
1348
            case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1349
                    break;
1350
            case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1351
                    break;
1352
            }
1353
        }
1354
    }
1355
}
1356

    
1357
static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1358
    const int support = type==1 ? 3 : 5;
1359
    int level;
1360
    if(type==2) return;
1361

    
1362
    for(level=decomposition_count-1; level>=0; level--){
1363
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1364
            switch(type){
1365
            case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1366
                    break;
1367
            case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1368
                    break;
1369
            }
1370
        }
1371
    }
1372
}
1373

    
1374
static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1375
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1376
        int y;
1377
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1378
        for(y=0; y<height; y+=4)
1379
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1380
}
1381

    
1382
static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1383
    const int w= b->width;
1384
    const int h= b->height;
1385
    int x, y;
1386

    
1387
    if(1){
1388
        int run=0;
1389
        int runs[w*h];
1390
        int run_index=0;
1391
        int max_index;
1392

    
1393
        for(y=0; y<h; y++){
1394
            for(x=0; x<w; x++){
1395
                int v, p=0;
1396
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1397
                v= src[x + y*stride];
1398

    
1399
                if(y){
1400
                    t= src[x + (y-1)*stride];
1401
                    if(x){
1402
                        lt= src[x - 1 + (y-1)*stride];
1403
                    }
1404
                    if(x + 1 < w){
1405
                        rt= src[x + 1 + (y-1)*stride];
1406
                    }
1407
                }
1408
                if(x){
1409
                    l= src[x - 1 + y*stride];
1410
                    /*if(x > 1){
1411
                        if(orientation==1) ll= src[y + (x-2)*stride];
1412
                        else               ll= src[x - 2 + y*stride];
1413
                    }*/
1414
                }
1415
                if(parent){
1416
                    int px= x>>1;
1417
                    int py= y>>1;
1418
                    if(px<b->parent->width && py<b->parent->height)
1419
                        p= parent[px + py*2*stride];
1420
                }
1421
                if(!(/*ll|*/l|lt|t|rt|p)){
1422
                    if(v){
1423
                        runs[run_index++]= run;
1424
                        run=0;
1425
                    }else{
1426
                        run++;
1427
                    }
1428
                }
1429
            }
1430
        }
1431
        max_index= run_index;
1432
        runs[run_index++]= run;
1433
        run_index=0;
1434
        run= runs[run_index++];
1435

    
1436
        put_symbol2(&s->c, b->state[30], max_index, 0);
1437
        if(run_index <= max_index)
1438
            put_symbol2(&s->c, b->state[1], run, 3);
1439

    
1440
        for(y=0; y<h; y++){
1441
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1442
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1443
                return -1;
1444
            }
1445
            for(x=0; x<w; x++){
1446
                int v, p=0;
1447
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1448
                v= src[x + y*stride];
1449

    
1450
                if(y){
1451
                    t= src[x + (y-1)*stride];
1452
                    if(x){
1453
                        lt= src[x - 1 + (y-1)*stride];
1454
                    }
1455
                    if(x + 1 < w){
1456
                        rt= src[x + 1 + (y-1)*stride];
1457
                    }
1458
                }
1459
                if(x){
1460
                    l= src[x - 1 + y*stride];
1461
                    /*if(x > 1){
1462
                        if(orientation==1) ll= src[y + (x-2)*stride];
1463
                        else               ll= src[x - 2 + y*stride];
1464
                    }*/
1465
                }
1466
                if(parent){
1467
                    int px= x>>1;
1468
                    int py= y>>1;
1469
                    if(px<b->parent->width && py<b->parent->height)
1470
                        p= parent[px + py*2*stride];
1471
                }
1472
                if(/*ll|*/l|lt|t|rt|p){
1473
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1474

    
1475
                    put_rac(&s->c, &b->state[0][context], !!v);
1476
                }else{
1477
                    if(!run){
1478
                        run= runs[run_index++];
1479

    
1480
                        if(run_index <= max_index)
1481
                            put_symbol2(&s->c, b->state[1], run, 3);
1482
                        assert(v);
1483
                    }else{
1484
                        run--;
1485
                        assert(!v);
1486
                    }
1487
                }
1488
                if(v){
1489
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1490
                    int l2= 2*FFABS(l) + (l<0);
1491
                    int t2= 2*FFABS(t) + (t<0);
1492

    
1493
                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1494
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1495
                }
1496
            }
1497
        }
1498
    }
1499
    return 0;
1500
}
1501

    
1502
static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1503
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1504
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1505
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1506
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1507
}
1508

    
1509
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1510
    const int w= b->width;
1511
    const int h= b->height;
1512
    int x,y;
1513

    
1514
    if(1){
1515
        int run, runs;
1516
        x_and_coeff *xc= b->x_coeff;
1517
        x_and_coeff *prev_xc= NULL;
1518
        x_and_coeff *prev2_xc= xc;
1519
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1520
        x_and_coeff *prev_parent_xc= parent_xc;
1521

    
1522
        runs= get_symbol2(&s->c, b->state[30], 0);
1523
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1524
        else           run= INT_MAX;
1525

    
1526
        for(y=0; y<h; y++){
1527
            int v=0;
1528
            int lt=0, t=0, rt=0;
1529

    
1530
            if(y && prev_xc->x == 0){
1531
                rt= prev_xc->coeff;
1532
            }
1533
            for(x=0; x<w; x++){
1534
                int p=0;
1535
                const int l= v;
1536

    
1537
                lt= t; t= rt;
1538

    
1539
                if(y){
1540
                    if(prev_xc->x <= x)
1541
                        prev_xc++;
1542
                    if(prev_xc->x == x + 1)
1543
                        rt= prev_xc->coeff;
1544
                    else
1545
                        rt=0;
1546
                }
1547
                if(parent_xc){
1548
                    if(x>>1 > parent_xc->x){
1549
                        parent_xc++;
1550
                    }
1551
                    if(x>>1 == parent_xc->x){
1552
                        p= parent_xc->coeff;
1553
                    }
1554
                }
1555
                if(/*ll|*/l|lt|t|rt|p){
1556
                    int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1557

    
1558
                    v=get_rac(&s->c, &b->state[0][context]);
1559
                    if(v){
1560
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1561
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1562

    
1563
                        xc->x=x;
1564
                        (xc++)->coeff= v;
1565
                    }
1566
                }else{
1567
                    if(!run){
1568
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1569
                        else           run= INT_MAX;
1570
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1571
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1572

    
1573
                        xc->x=x;
1574
                        (xc++)->coeff= v;
1575
                    }else{
1576
                        int max_run;
1577
                        run--;
1578
                        v=0;
1579

    
1580
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1581
                        else  max_run= FFMIN(run, w-x-1);
1582
                        if(parent_xc)
1583
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1584
                        x+= max_run;
1585
                        run-= max_run;
1586
                    }
1587
                }
1588
            }
1589
            (xc++)->x= w+1; //end marker
1590
            prev_xc= prev2_xc;
1591
            prev2_xc= xc;
1592

    
1593
            if(parent_xc){
1594
                if(y&1){
1595
                    while(parent_xc->x != parent->width+1)
1596
                        parent_xc++;
1597
                    parent_xc++;
1598
                    prev_parent_xc= parent_xc;
1599
                }else{
1600
                    parent_xc= prev_parent_xc;
1601
                }
1602
            }
1603
        }
1604

    
1605
        (xc++)->x= w+1; //end marker
1606
    }
1607
}
1608

    
1609
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1610
    const int w= b->width;
1611
    int y;
1612
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1613
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1614
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1615
    int new_index = 0;
1616

    
1617
    START_TIMER
1618

    
1619
    if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1620
        qadd= 0;
1621
        qmul= 1<<QEXPSHIFT;
1622
    }
1623

    
1624
    /* If we are on the second or later slice, restore our index. */
1625
    if (start_y != 0)
1626
        new_index = save_state[0];
1627

    
1628

    
1629
    for(y=start_y; y<h; y++){
1630
        int x = 0;
1631
        int v;
1632
        IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1633
        memset(line, 0, b->width*sizeof(IDWTELEM));
1634
        v = b->x_coeff[new_index].coeff;
1635
        x = b->x_coeff[new_index++].x;
1636
        while(x < w)
1637
        {
1638
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1639
            register int u= -(v&1);
1640
            line[x] = (t^u) - u;
1641

    
1642
            v = b->x_coeff[new_index].coeff;
1643
            x = b->x_coeff[new_index++].x;
1644
        }
1645
    }
1646
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1647
        STOP_TIMER("decode_subband")
1648
    }
1649

    
1650
    /* Save our variables for the next slice. */
1651
    save_state[0] = new_index;
1652

    
1653
    return;
1654
}
1655

    
1656
static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1657
    int plane_index, level, orientation;
1658

    
1659
    for(plane_index=0; plane_index<3; plane_index++){
1660
        for(level=0; level<MAX_DECOMPOSITIONS; level++){
1661
            for(orientation=level ? 1:0; orientation<4; orientation++){
1662
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1663
            }
1664
        }
1665
    }
1666
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1667
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1668
}
1669

    
1670
static int alloc_blocks(SnowContext *s){
1671
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1672
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1673

    
1674
    s->b_width = w;
1675
    s->b_height= h;
1676

    
1677
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1678
    return 0;
1679
}
1680

    
1681
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1682
    uint8_t *bytestream= d->bytestream;
1683
    uint8_t *bytestream_start= d->bytestream_start;
1684
    *d= *s;
1685
    d->bytestream= bytestream;
1686
    d->bytestream_start= bytestream_start;
1687
}
1688

    
1689
//near copy & paste from dsputil, FIXME
1690
static int pix_sum(uint8_t * pix, int line_size, int w)
1691
{
1692
    int s, i, j;
1693

    
1694
    s = 0;
1695
    for (i = 0; i < w; i++) {
1696
        for (j = 0; j < w; j++) {
1697
            s += pix[0];
1698
            pix ++;
1699
        }
1700
        pix += line_size - w;
1701
    }
1702
    return s;
1703
}
1704

    
1705
//near copy & paste from dsputil, FIXME
1706
static int pix_norm1(uint8_t * pix, int line_size, int w)
1707
{
1708
    int s, i, j;
1709
    uint32_t *sq = ff_squareTbl + 256;
1710

    
1711
    s = 0;
1712
    for (i = 0; i < w; i++) {
1713
        for (j = 0; j < w; j ++) {
1714
            s += sq[pix[0]];
1715
            pix ++;
1716
        }
1717
        pix += line_size - w;
1718
    }
1719
    return s;
1720
}
1721

    
1722
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1723
    const int w= s->b_width << s->block_max_depth;
1724
    const int rem_depth= s->block_max_depth - level;
1725
    const int index= (x + y*w) << rem_depth;
1726
    const int block_w= 1<<rem_depth;
1727
    BlockNode block;
1728
    int i,j;
1729

    
1730
    block.color[0]= l;
1731
    block.color[1]= cb;
1732
    block.color[2]= cr;
1733
    block.mx= mx;
1734
    block.my= my;
1735
    block.ref= ref;
1736
    block.type= type;
1737
    block.level= level;
1738

    
1739
    for(j=0; j<block_w; j++){
1740
        for(i=0; i<block_w; i++){
1741
            s->block[index + i + j*w]= block;
1742
        }
1743
    }
1744
}
1745

    
1746
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1747
    const int offset[3]= {
1748
          y*c->  stride + x,
1749
        ((y*c->uvstride + x)>>1),
1750
        ((y*c->uvstride + x)>>1),
1751
    };
1752
    int i;
1753
    for(i=0; i<3; i++){
1754
        c->src[0][i]= src [i];
1755
        c->ref[0][i]= ref [i] + offset[i];
1756
    }
1757
    assert(!ref_index);
1758
}
1759

    
1760
static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1761
                           const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1762
    if(s->ref_frames == 1){
1763
        *mx = mid_pred(left->mx, top->mx, tr->mx);
1764
        *my = mid_pred(left->my, top->my, tr->my);
1765
    }else{
1766
        const int *scale = scale_mv_ref[ref];
1767
        *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1768
                       (top ->mx * scale[top ->ref] + 128) >>8,
1769
                       (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1770
        *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1771
                       (top ->my * scale[top ->ref] + 128) >>8,
1772
                       (tr  ->my * scale[tr  ->ref] + 128) >>8);
1773
    }
1774
}
1775

    
1776
//FIXME copy&paste
1777
#define P_LEFT P[1]
1778
#define P_TOP P[2]
1779
#define P_TOPRIGHT P[3]
1780
#define P_MEDIAN P[4]
1781
#define P_MV1 P[9]
1782
#define FLAG_QPEL   1 //must be 1
1783

    
1784
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1785
    uint8_t p_buffer[1024];
1786
    uint8_t i_buffer[1024];
1787
    uint8_t p_state[sizeof(s->block_state)];
1788
    uint8_t i_state[sizeof(s->block_state)];
1789
    RangeCoder pc, ic;
1790
    uint8_t *pbbak= s->c.bytestream;
1791
    uint8_t *pbbak_start= s->c.bytestream_start;
1792
    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1793
    const int w= s->b_width  << s->block_max_depth;
1794
    const int h= s->b_height << s->block_max_depth;
1795
    const int rem_depth= s->block_max_depth - level;
1796
    const int index= (x + y*w) << rem_depth;
1797
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1798
    int trx= (x+1)<<rem_depth;
1799
    int try= (y+1)<<rem_depth;
1800
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1801
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1802
    const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1803
    const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1804
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1805
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1806
    int pl = left->color[0];
1807
    int pcb= left->color[1];
1808
    int pcr= left->color[2];
1809
    int pmx, pmy;
1810
    int mx=0, my=0;
1811
    int l,cr,cb;
1812
    const int stride= s->current_picture.linesize[0];
1813
    const int uvstride= s->current_picture.linesize[1];
1814
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
1815
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1816
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1817
    int P[10][2];
1818
    int16_t last_mv[3][2];
1819
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1820
    const int shift= 1+qpel;
1821
    MotionEstContext *c= &s->m.me;
1822
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1823
    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1824
    int my_context= av_log2(2*FFABS(left->my - top->my));
1825
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1826
    int ref, best_ref, ref_score, ref_mx, ref_my;
1827

    
1828
    assert(sizeof(s->block_state) >= 256);
1829
    if(s->keyframe){
1830
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1831
        return 0;
1832
    }
1833

    
1834
//    clip predictors / edge ?
1835

    
1836
    P_LEFT[0]= left->mx;
1837
    P_LEFT[1]= left->my;
1838
    P_TOP [0]= top->mx;
1839
    P_TOP [1]= top->my;
1840
    P_TOPRIGHT[0]= tr->mx;
1841
    P_TOPRIGHT[1]= tr->my;
1842

    
1843
    last_mv[0][0]= s->block[index].mx;
1844
    last_mv[0][1]= s->block[index].my;
1845
    last_mv[1][0]= right->mx;
1846
    last_mv[1][1]= right->my;
1847
    last_mv[2][0]= bottom->mx;
1848
    last_mv[2][1]= bottom->my;
1849

    
1850
    s->m.mb_stride=2;
1851
    s->m.mb_x=
1852
    s->m.mb_y= 0;
1853
    c->skip= 0;
1854

    
1855
    assert(c->  stride ==   stride);
1856
    assert(c->uvstride == uvstride);
1857

    
1858
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1859
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1860
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1861
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1862

    
1863
    c->xmin = - x*block_w - 16+2;
1864
    c->ymin = - y*block_w - 16+2;
1865
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1866
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1867

    
1868
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1869
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
1870
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1871
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1872
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1873
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1874
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1875

    
1876
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1877
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1878

    
1879
    if (!y) {
1880
        c->pred_x= P_LEFT[0];
1881
        c->pred_y= P_LEFT[1];
1882
    } else {
1883
        c->pred_x = P_MEDIAN[0];
1884
        c->pred_y = P_MEDIAN[1];
1885
    }
1886

    
1887
    score= INT_MAX;
1888
    best_ref= 0;
1889
    for(ref=0; ref<s->ref_frames; ref++){
1890
        init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1891

    
1892
        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1893
                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1894

    
1895
        assert(ref_mx >= c->xmin);
1896
        assert(ref_mx <= c->xmax);
1897
        assert(ref_my >= c->ymin);
1898
        assert(ref_my <= c->ymax);
1899

    
1900
        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1901
        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1902
        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1903
        if(s->ref_mvs[ref]){
1904
            s->ref_mvs[ref][index][0]= ref_mx;
1905
            s->ref_mvs[ref][index][1]= ref_my;
1906
            s->ref_scores[ref][index]= ref_score;
1907
        }
1908
        if(score > ref_score){
1909
            score= ref_score;
1910
            best_ref= ref;
1911
            mx= ref_mx;
1912
            my= ref_my;
1913
        }
1914
    }
1915
    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1916

    
1917
  //  subpel search
1918
    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1919
    pc= s->c;
1920
    pc.bytestream_start=
1921
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1922
    memcpy(p_state, s->block_state, sizeof(s->block_state));
1923

    
1924
    if(level!=s->block_max_depth)
1925
        put_rac(&pc, &p_state[4 + s_context], 1);
1926
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1927
    if(s->ref_frames > 1)
1928
        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1929
    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1930
    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1931
    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1932
    p_len= pc.bytestream - pc.bytestream_start;
1933
    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1934

    
1935
    block_s= block_w*block_w;
1936
    sum = pix_sum(current_data[0], stride, block_w);
1937
    l= (sum + block_s/2)/block_s;
1938
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1939

    
1940
    block_s= block_w*block_w>>2;
1941
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
1942
    cb= (sum + block_s/2)/block_s;
1943
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1944
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
1945
    cr= (sum + block_s/2)/block_s;
1946
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1947

    
1948
    ic= s->c;
1949
    ic.bytestream_start=
1950
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1951
    memcpy(i_state, s->block_state, sizeof(s->block_state));
1952
    if(level!=s->block_max_depth)
1953
        put_rac(&ic, &i_state[4 + s_context], 1);
1954
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1955
    put_symbol(&ic, &i_state[32],  l-pl , 1);
1956
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1957
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1958
    i_len= ic.bytestream - ic.bytestream_start;
1959
    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1960

    
1961
//    assert(score==256*256*256*64-1);
1962
    assert(iscore < 255*255*256 + s->lambda2*10);
1963
    assert(iscore >= 0);
1964
    assert(l>=0 && l<=255);
1965
    assert(pl>=0 && pl<=255);
1966

    
1967
    if(level==0){
1968
        int varc= iscore >> 8;
1969
        int vard= score >> 8;
1970
        if (vard <= 64 || vard < varc)
1971
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1972
        else
1973
            c->scene_change_score+= s->m.qscale;
1974
    }
1975

    
1976
    if(level!=s->block_max_depth){
1977
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
1978
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1979
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1980
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1981
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1982
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1983

    
1984
        if(score2 < score && score2 < iscore)
1985
            return score2;
1986
    }
1987

    
1988
    if(iscore < score){
1989
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1990
        memcpy(pbbak, i_buffer, i_len);
1991
        s->c= ic;
1992
        s->c.bytestream_start= pbbak_start;
1993
        s->c.bytestream= pbbak + i_len;
1994
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1995
        memcpy(s->block_state, i_state, sizeof(s->block_state));
1996
        return iscore;
1997
    }else{
1998
        memcpy(pbbak, p_buffer, p_len);
1999
        s->c= pc;
2000
        s->c.bytestream_start= pbbak_start;
2001
        s->c.bytestream= pbbak + p_len;
2002
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2003
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2004
        return score;
2005
    }
2006
}
2007

    
2008
static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2009
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2010
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2011
    }else{
2012
        return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2013
    }
2014
}
2015

    
2016
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2017
    const int w= s->b_width  << s->block_max_depth;
2018
    const int rem_depth= s->block_max_depth - level;
2019
    const int index= (x + y*w) << rem_depth;
2020
    int trx= (x+1)<<rem_depth;
2021
    BlockNode *b= &s->block[index];
2022
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2023
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2024
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2025
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2026
    int pl = left->color[0];
2027
    int pcb= left->color[1];
2028
    int pcr= left->color[2];
2029
    int pmx, pmy;
2030
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2031
    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2032
    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2033
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2034

    
2035
    if(s->keyframe){
2036
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2037
        return;
2038
    }
2039

    
2040
    if(level!=s->block_max_depth){
2041
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2042
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
2043
        }else{
2044
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
2045
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2046
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2047
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2048
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2049
            return;
2050
        }
2051
    }
2052
    if(b->type & BLOCK_INTRA){
2053
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2054
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2055
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2056
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2057
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2058
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2059
    }else{
2060
        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2061
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2062
        if(s->ref_frames > 1)
2063
            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2064
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2065
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2066
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2067
    }
2068
}
2069

    
2070
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2071
    const int w= s->b_width << s->block_max_depth;
2072
    const int rem_depth= s->block_max_depth - level;
2073
    const int index= (x + y*w) << rem_depth;
2074
    int trx= (x+1)<<rem_depth;
2075
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2076
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
2077
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2078
    const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2079
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2080

    
2081
    if(s->keyframe){
2082
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2083
        return;
2084
    }
2085

    
2086
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2087
        int type, mx, my;
2088
        int l = left->color[0];
2089
        int cb= left->color[1];
2090
        int cr= left->color[2];
2091
        int ref = 0;
2092
        int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2093
        int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2094
        int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2095

    
2096
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2097

    
2098
        if(type){
2099
            pred_mv(s, &mx, &my, 0, left, top, tr);
2100
            l += get_symbol(&s->c, &s->block_state[32], 1);
2101
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2102
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2103
        }else{
2104
            if(s->ref_frames > 1)
2105
                ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2106
            pred_mv(s, &mx, &my, ref, left, top, tr);
2107
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2108
            my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2109
        }
2110
        set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2111
    }else{
2112
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2113
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2114
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2115
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2116
    }
2117
}
2118

    
2119
static void encode_blocks(SnowContext *s, int search){
2120
    int x, y;
2121
    int w= s->b_width;
2122
    int h= s->b_height;
2123

    
2124
    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2125
        iterative_me(s);
2126

    
2127
    for(y=0; y<h; y++){
2128
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2129
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2130
            return;
2131
        }
2132
        for(x=0; x<w; x++){
2133
            if(s->avctx->me_method == ME_ITER || !search)
2134
                encode_q_branch2(s, 0, x, y);
2135
            else
2136
                encode_q_branch (s, 0, x, y);
2137
        }
2138
    }
2139
}
2140

    
2141
static void decode_blocks(SnowContext *s){
2142
    int x, y;
2143
    int w= s->b_width;
2144
    int h= s->b_height;
2145

    
2146
    for(y=0; y<h; y++){
2147
        for(x=0; x<w; x++){
2148
            decode_q_branch(s, 0, x, y);
2149
        }
2150
    }
2151
}
2152

    
2153
static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2154
    const static uint8_t weight[64]={
2155
    8,7,6,5,4,3,2,1,
2156
    7,7,0,0,0,0,0,1,
2157
    6,0,6,0,0,0,2,0,
2158
    5,0,0,5,0,3,0,0,
2159
    4,0,0,0,4,0,0,0,
2160
    3,0,0,5,0,3,0,0,
2161
    2,0,6,0,0,0,2,0,
2162
    1,7,0,0,0,0,0,1,
2163
    };
2164

    
2165
    const static uint8_t brane[256]={
2166
    0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2167
    0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2168
    0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2169
    0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2170
    0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2171
    0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2172
    0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2173
    0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2174
    0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2175
    0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2176
    0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2177
    0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2178
    0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2179
    0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2180
    0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2181
    0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2182
    };
2183

    
2184
    const static uint8_t needs[16]={
2185
    0,1,0,0,
2186
    2,4,2,0,
2187
    0,1,0,0,
2188
    15
2189
    };
2190

    
2191
    int x, y, b, r, l;
2192
    int16_t tmpIt   [64*(32+HTAPS_MAX)];
2193
    uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2194
    int16_t *tmpI= tmpIt;
2195
    uint8_t *tmp2= tmp2t[0];
2196
    const uint8_t *hpel[11];
2197
START_TIMER
2198
    assert(dx<16 && dy<16);
2199
    r= brane[dx + 16*dy]&15;
2200
    l= brane[dx + 16*dy]>>4;
2201

    
2202
    b= needs[l] | needs[r];
2203
    if(p && !p->diag_mc)
2204
        b= 15;
2205

    
2206
    if(b&5){
2207
        for(y=0; y < b_h+HTAPS_MAX-1; y++){
2208
            for(x=0; x < b_w; x++){
2209
                int a_1=src[x + HTAPS_MAX/2-4];
2210
                int a0= src[x + HTAPS_MAX/2-3];
2211
                int a1= src[x + HTAPS_MAX/2-2];
2212
                int a2= src[x + HTAPS_MAX/2-1];
2213
                int a3= src[x + HTAPS_MAX/2+0];
2214
                int a4= src[x + HTAPS_MAX/2+1];
2215
                int a5= src[x + HTAPS_MAX/2+2];
2216
                int a6= src[x + HTAPS_MAX/2+3];
2217
                int am=0;
2218
                if(!p || p->fast_mc){
2219
                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2220
                    tmpI[x]= am;
2221
                    am= (am+16)>>5;
2222
                }else{
2223
                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2224
                    tmpI[x]= am;
2225
                    am= (am+32)>>6;
2226
                }
2227

    
2228
                if(am&(~255)) am= ~(am>>31);
2229
                tmp2[x]= am;
2230
            }
2231
            tmpI+= 64;
2232
            tmp2+= stride;
2233
            src += stride;
2234
        }
2235
        src -= stride*y;
2236
    }
2237
    src += HTAPS_MAX/2 - 1;
2238
    tmp2= tmp2t[1];
2239

    
2240
    if(b&2){
2241
        for(y=0; y < b_h; y++){
2242
            for(x=0; x < b_w+1; x++){
2243
                int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2244
                int a0= src[x + (HTAPS_MAX/2-3)*stride];
2245
                int a1= src[x + (HTAPS_MAX/2-2)*stride];
2246
                int a2= src[x + (HTAPS_MAX/2-1)*stride];
2247
                int a3= src[x + (HTAPS_MAX/2+0)*stride];
2248
                int a4= src[x + (HTAPS_MAX/2+1)*stride];
2249
                int a5= src[x + (HTAPS_MAX/2+2)*stride];
2250
                int a6= src[x + (HTAPS_MAX/2+3)*stride];
2251
                int am=0;
2252
                if(!p || p->fast_mc)
2253
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2254
                else
2255
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2256

    
2257
                if(am&(~255)) am= ~(am>>31);
2258
                tmp2[x]= am;
2259
            }
2260
            src += stride;
2261
            tmp2+= stride;
2262
        }
2263
        src -= stride*y;
2264
    }
2265
    src += stride*(HTAPS_MAX/2 - 1);
2266
    tmp2= tmp2t[2];
2267
    tmpI= tmpIt;
2268
    if(b&4){
2269
        for(y=0; y < b_h; y++){
2270
            for(x=0; x < b_w; x++){
2271
                int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2272
                int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2273
                int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2274
                int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2275
                int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2276
                int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2277
                int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2278
                int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2279
                int am=0;
2280
                if(!p || p->fast_mc)
2281
                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2282
                else
2283
                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2284
                if(am&(~255)) am= ~(am>>31);
2285
                tmp2[x]= am;
2286
            }
2287
            tmpI+= 64;
2288
            tmp2+= stride;
2289
        }
2290
    }
2291

    
2292
    hpel[ 0]= src;
2293
    hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2294
    hpel[ 2]= src + 1;
2295

    
2296
    hpel[ 4]= tmp2t[1];
2297
    hpel[ 5]= tmp2t[2];
2298
    hpel[ 6]= tmp2t[1] + 1;
2299

    
2300
    hpel[ 8]= src + stride;
2301
    hpel[ 9]= hpel[1] + stride;
2302
    hpel[10]= hpel[8] + 1;
2303

    
2304
    if(b==15){
2305
        const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
2306
        const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2307
        const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2308
        const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2309
        dx&=7;
2310
        dy&=7;
2311
        for(y=0; y < b_h; y++){
2312
            for(x=0; x < b_w; x++){
2313
                dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2314
                         (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
2315
            }
2316
            src1+=stride;
2317
            src2+=stride;
2318
            src3+=stride;
2319
            src4+=stride;
2320
            dst +=stride;
2321
        }
2322
    }else{
2323
        const uint8_t *src1= hpel[l];
2324
        const uint8_t *src2= hpel[r];
2325
        int a= weight[((dx&7) + (8*(dy&7)))];
2326
        int b= 8-a;
2327
        for(y=0; y < b_h; y++){
2328
            for(x=0; x < b_w; x++){
2329
                dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2330
            }
2331
            src1+=stride;
2332
            src2+=stride;
2333
            dst +=stride;
2334
        }
2335
    }
2336
STOP_TIMER("mc_block")
2337
}
2338

    
2339
#define mca(dx,dy,b_w)\
2340
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2341
    uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2342
    assert(h==b_w);\
2343
    mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2344
}
2345

    
2346
mca( 0, 0,16)
2347
mca( 8, 0,16)
2348
mca( 0, 8,16)
2349
mca( 8, 8,16)
2350
mca( 0, 0,8)
2351
mca( 8, 0,8)
2352
mca( 0, 8,8)
2353
mca( 8, 8,8)
2354

    
2355
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2356
    if(block->type & BLOCK_INTRA){
2357
        int x, y;
2358
        const int color = block->color[plane_index];
2359
        const int color4= color*0x01010101;
2360
        if(b_w==32){
2361
            for(y=0; y < b_h; y++){
2362
                *(uint32_t*)&dst[0 + y*stride]= color4;
2363
                *(uint32_t*)&dst[4 + y*stride]= color4;
2364
                *(uint32_t*)&dst[8 + y*stride]= color4;
2365
                *(uint32_t*)&dst[12+ y*stride]= color4;
2366
                *(uint32_t*)&dst[16+ y*stride]= color4;
2367
                *(uint32_t*)&dst[20+ y*stride]= color4;
2368
                *(uint32_t*)&dst[24+ y*stride]= color4;
2369
                *(uint32_t*)&dst[28+ y*stride]= color4;
2370
            }
2371
        }else if(b_w==16){
2372
            for(y=0; y < b_h; y++){
2373
                *(uint32_t*)&dst[0 + y*stride]= color4;
2374
                *(uint32_t*)&dst[4 + y*stride]= color4;
2375
                *(uint32_t*)&dst[8 + y*stride]= color4;
2376
                *(uint32_t*)&dst[12+ y*stride]= color4;
2377
            }
2378
        }else if(b_w==8){
2379
            for(y=0; y < b_h; y++){
2380
                *(uint32_t*)&dst[0 + y*stride]= color4;
2381
                *(uint32_t*)&dst[4 + y*stride]= color4;
2382
            }
2383
        }else if(b_w==4){
2384
            for(y=0; y < b_h; y++){
2385
                *(uint32_t*)&dst[0 + y*stride]= color4;
2386
            }
2387
        }else{
2388
            for(y=0; y < b_h; y++){
2389
                for(x=0; x < b_w; x++){
2390
                    dst[x + y*stride]= color;
2391
                }
2392
            }
2393
        }
2394
    }else{
2395
        uint8_t *src= s->last_picture[block->ref].data[plane_index];
2396
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2397
        int mx= block->mx*scale;
2398
        int my= block->my*scale;
2399
        const int dx= mx&15;
2400
        const int dy= my&15;
2401
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2402
        sx += (mx>>4) - (HTAPS_MAX/2-1);
2403
        sy += (my>>4) - (HTAPS_MAX/2-1);
2404
        src += sx + sy*stride;
2405
        if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2406
           || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2407
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2408
            src= tmp + MB_SIZE;
2409
        }
2410
//        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2411
//        assert(!(b_w&(b_w-1)));
2412
        assert(b_w>1 && b_h>1);
2413
        assert((tab_index>=0 && tab_index<4) || b_w==32);
2414
        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2415
            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2416
        else if(b_w==32){
2417
            int y;
2418
            for(y=0; y<b_h; y+=16){
2419
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2420
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2421
            }
2422
        }else if(b_w==b_h)
2423
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2424
        else if(b_w==2*b_h){
2425
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
2426
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2427
        }else{
2428
            assert(2*b_w==b_h);
2429
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
2430
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2431
        }
2432
    }
2433
}
2434

    
2435
void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2436
                              int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2437
    int y, x;
2438
    IDWTELEM * dst;
2439
    for(y=0; y<b_h; y++){
2440
        //FIXME ugly misuse of obmc_stride
2441
        const uint8_t *obmc1= obmc + y*obmc_stride;
2442
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2443
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2444
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2445
        dst = slice_buffer_get_line(sb, src_y + y);
2446
        for(x=0; x<b_w; x++){
2447
            int v=   obmc1[x] * block[3][x + y*src_stride]
2448
                    +obmc2[x] * block[2][x + y*src_stride]
2449
                    +obmc3[x] * block[1][x + y*src_stride]
2450
                    +obmc4[x] * block[0][x + y*src_stride];
2451

    
2452
            v <<= 8 - LOG2_OBMC_MAX;
2453
            if(FRAC_BITS != 8){
2454
                v >>= 8 - FRAC_BITS;
2455
            }
2456
            if(add){
2457
                v += dst[x + src_x];
2458
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2459
                if(v&(~255)) v= ~(v>>31);
2460
                dst8[x + y*src_stride] = v;
2461
            }else{
2462
                dst[x + src_x] -= v;
2463
            }
2464
        }
2465
    }
2466
}
2467

    
2468
//FIXME name clenup (b_w, block_w, b_width stuff)
2469
static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2470
    const int b_width = s->b_width  << s->block_max_depth;
2471
    const int b_height= s->b_height << s->block_max_depth;
2472
    const int b_stride= b_width;
2473
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2474
    BlockNode *rt= lt+1;
2475
    BlockNode *lb= lt+b_stride;
2476
    BlockNode *rb= lb+1;
2477
    uint8_t *block[4];
2478
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2479
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2480
    uint8_t *ptmp;
2481
    int x,y;
2482

    
2483
    if(b_x<0){
2484
        lt= rt;
2485
        lb= rb;
2486
    }else if(b_x + 1 >= b_width){
2487
        rt= lt;
2488
        rb= lb;
2489
    }
2490
    if(b_y<0){
2491
        lt= lb;
2492
        rt= rb;
2493
    }else if(b_y + 1 >= b_height){
2494
        lb= lt;
2495
        rb= rt;
2496
    }
2497

    
2498
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2499
        obmc -= src_x;
2500
        b_w += src_x;
2501
        if(!sliced && !offset_dst)
2502
            dst -= src_x;
2503
        src_x=0;
2504
    }else if(src_x + b_w > w){
2505
        b_w = w - src_x;
2506
    }
2507
    if(src_y<0){
2508
        obmc -= src_y*obmc_stride;
2509
        b_h += src_y;
2510
        if(!sliced && !offset_dst)
2511
            dst -= src_y*dst_stride;
2512
        src_y=0;
2513
    }else if(src_y + b_h> h){
2514
        b_h = h - src_y;
2515
    }
2516

    
2517
    if(b_w<=0 || b_h<=0) return;
2518

    
2519
assert(src_stride > 2*MB_SIZE + 5);
2520
    if(!sliced && offset_dst)
2521
        dst += src_x + src_y*dst_stride;
2522
    dst8+= src_x + src_y*src_stride;
2523
//    src += src_x + src_y*src_stride;
2524

    
2525
    ptmp= tmp + 3*tmp_step;
2526
    block[0]= ptmp;
2527
    ptmp+=tmp_step;
2528
    pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2529

    
2530
    if(same_block(lt, rt)){
2531
        block[1]= block[0];
2532
    }else{
2533
        block[1]= ptmp;
2534
        ptmp+=tmp_step;
2535
        pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2536
    }
2537

    
2538
    if(same_block(lt, lb)){
2539
        block[2]= block[0];
2540
    }else if(same_block(rt, lb)){
2541
        block[2]= block[1];
2542
    }else{
2543
        block[2]= ptmp;
2544
        ptmp+=tmp_step;
2545
        pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2546
    }
2547

    
2548
    if(same_block(lt, rb) ){
2549
        block[3]= block[0];
2550
    }else if(same_block(rt, rb)){
2551
        block[3]= block[1];
2552
    }else if(same_block(lb, rb)){
2553
        block[3]= block[2];
2554
    }else{
2555
        block[3]= ptmp;
2556
        pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2557
    }
2558
#if 0
2559
    for(y=0; y<b_h; y++){
2560
        for(x=0; x<b_w; x++){
2561
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2562
            if(add) dst[x + y*dst_stride] += v;
2563
            else    dst[x + y*dst_stride] -= v;
2564
        }
2565
    }
2566
    for(y=0; y<b_h; y++){
2567
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2568
        for(x=0; x<b_w; x++){
2569
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2570
            if(add) dst[x + y*dst_stride] += v;
2571
            else    dst[x + y*dst_stride] -= v;
2572
        }
2573
    }
2574
    for(y=0; y<b_h; y++){
2575
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2576
        for(x=0; x<b_w; x++){
2577
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2578
            if(add) dst[x + y*dst_stride] += v;
2579
            else    dst[x + y*dst_stride] -= v;
2580
        }
2581
    }
2582
    for(y=0; y<b_h; y++){
2583
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2584
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2585
        for(x=0; x<b_w; x++){
2586
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2587
            if(add) dst[x + y*dst_stride] += v;
2588
            else    dst[x + y*dst_stride] -= v;
2589
        }
2590
    }
2591
#else
2592
    if(sliced){
2593
        START_TIMER
2594

    
2595
        s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2596
        STOP_TIMER("inner_add_yblock")
2597
    }else
2598
    for(y=0; y<b_h; y++){
2599
        //FIXME ugly misuse of obmc_stride
2600
        const uint8_t *obmc1= obmc + y*obmc_stride;
2601
        const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2602
        const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2603
        const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2604
        for(x=0; x<b_w; x++){
2605
            int v=   obmc1[x] * block[3][x + y*src_stride]
2606
                    +obmc2[x] * block[2][x + y*src_stride]
2607
                    +obmc3[x] * block[1][x + y*src_stride]
2608
                    +obmc4[x] * block[0][x + y*src_stride];
2609

    
2610
            v <<= 8 - LOG2_OBMC_MAX;
2611
            if(FRAC_BITS != 8){
2612
                v >>= 8 - FRAC_BITS;
2613
            }
2614
            if(add){
2615
                v += dst[x + y*dst_stride];
2616
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2617
                if(v&(~255)) v= ~(v>>31);
2618
                dst8[x + y*src_stride] = v;
2619
            }else{
2620
                dst[x + y*dst_stride] -= v;
2621
            }
2622
        }
2623
    }
2624
#endif /* 0 */
2625
}
2626

    
2627
static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2628
    Plane *p= &s->plane[plane_index];
2629
    const int mb_w= s->b_width  << s->block_max_depth;
2630
    const int mb_h= s->b_height << s->block_max_depth;
2631
    int x, y, mb_x;
2632
    int block_size = MB_SIZE >> s->block_max_depth;
2633
    int block_w    = plane_index ? block_size/2 : block_size;
2634
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2635
    int obmc_stride= plane_index ? block_size : 2*block_size;
2636
    int ref_stride= s->current_picture.linesize[plane_index];
2637
    uint8_t *dst8= s->current_picture.data[plane_index];
2638
    int w= p->width;
2639
    int h= p->height;
2640
    START_TIMER
2641

    
2642
    if(s->keyframe || (s->avctx->debug&512)){
2643
        if(mb_y==mb_h)
2644
            return;
2645

    
2646
        if(add){
2647
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2648
            {
2649
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2650
                IDWTELEM * line = sb->line[y];
2651
                for(x=0; x<w; x++)
2652
                {
2653
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2654
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2655
                    v >>= FRAC_BITS;
2656
                    if(v&(~255)) v= ~(v>>31);
2657
                    dst8[x + y*ref_stride]= v;
2658
                }
2659
            }
2660
        }else{
2661
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2662
            {
2663
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2664
                IDWTELEM * line = sb->line[y];
2665
                for(x=0; x<w; x++)
2666
                {
2667
                    line[x] -= 128 << FRAC_BITS;
2668
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2669
                }
2670
            }
2671
        }
2672

    
2673
        return;
2674
    }
2675

    
2676
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2677
            START_TIMER
2678

    
2679
            add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2680
                       block_w*mb_x - block_w/2,
2681
                       block_w*mb_y - block_w/2,
2682
                       block_w, block_w,
2683
                       w, h,
2684
                       w, ref_stride, obmc_stride,
2685
                       mb_x - 1, mb_y - 1,
2686
                       add, 0, plane_index);
2687

    
2688
            STOP_TIMER("add_yblock")
2689
        }
2690

    
2691
    STOP_TIMER("predict_slice")
2692
}
2693

    
2694
static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2695
    Plane *p= &s->plane[plane_index];
2696
    const int mb_w= s->b_width  << s->block_max_depth;
2697
    const int mb_h= s->b_height << s->block_max_depth;
2698
    int x, y, mb_x;
2699
    int block_size = MB_SIZE >> s->block_max_depth;
2700
    int block_w    = plane_index ? block_size/2 : block_size;
2701
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2702
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2703
    int ref_stride= s->current_picture.linesize[plane_index];
2704
    uint8_t *dst8= s->current_picture.data[plane_index];
2705
    int w= p->width;
2706
    int h= p->height;
2707
    START_TIMER
2708

    
2709
    if(s->keyframe || (s->avctx->debug&512)){
2710
        if(mb_y==mb_h)
2711
            return;
2712

    
2713
        if(add){
2714
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2715
                for(x=0; x<w; x++){
2716
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2717
                    v >>= FRAC_BITS;
2718
                    if(v&(~255)) v= ~(v>>31);
2719
                    dst8[x + y*ref_stride]= v;
2720
                }
2721
            }
2722
        }else{
2723
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2724
                for(x=0; x<w; x++){
2725
                    buf[x + y*w]-= 128<<FRAC_BITS;
2726
                }
2727
            }
2728
        }
2729

    
2730
        return;
2731
    }
2732

    
2733
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2734
            START_TIMER
2735

    
2736
            add_yblock(s, 0, NULL, buf, dst8, obmc,
2737
                       block_w*mb_x - block_w/2,
2738
                       block_w*mb_y - block_w/2,
2739
                       block_w, block_w,
2740
                       w, h,
2741
                       w, ref_stride, obmc_stride,
2742
                       mb_x - 1, mb_y - 1,
2743
                       add, 1, plane_index);
2744

    
2745
            STOP_TIMER("add_yblock")
2746
        }
2747

    
2748
    STOP_TIMER("predict_slice")
2749
}
2750

    
2751
static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2752
    const int mb_h= s->b_height << s->block_max_depth;
2753
    int mb_y;
2754
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2755
        predict_slice(s, buf, plane_index, add, mb_y);
2756
}
2757

    
2758
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2759
    int i, x2, y2;
2760
    Plane *p= &s->plane[plane_index];
2761
    const int block_size = MB_SIZE >> s->block_max_depth;
2762
    const int block_w    = plane_index ? block_size/2 : block_size;
2763
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2764
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2765
    const int ref_stride= s->current_picture.linesize[plane_index];
2766
    uint8_t *src= s-> input_picture.data[plane_index];
2767
    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2768
    const int b_stride = s->b_width << s->block_max_depth;
2769
    const int w= p->width;
2770
    const int h= p->height;
2771
    int index= mb_x + mb_y*b_stride;
2772
    BlockNode *b= &s->block[index];
2773
    BlockNode backup= *b;
2774
    int ab=0;
2775
    int aa=0;
2776

    
2777
    b->type|= BLOCK_INTRA;
2778
    b->color[plane_index]= 0;
2779
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2780

    
2781
    for(i=0; i<4; i++){
2782
        int mb_x2= mb_x + (i &1) - 1;
2783
        int mb_y2= mb_y + (i>>1) - 1;
2784
        int x= block_w*mb_x2 + block_w/2;
2785
        int y= block_w*mb_y2 + block_w/2;
2786

    
2787
        add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2788
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2789

    
2790
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2791
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2792
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2793
                int obmc_v= obmc[index];
2794
                int d;
2795
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2796
                if(x<0) obmc_v += obmc[index + block_w];
2797
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2798
                if(x+block_w>w) obmc_v += obmc[index - block_w];
2799
                //FIXME precalc this or simplify it somehow else
2800

    
2801
                d = -dst[index] + (1<<(FRAC_BITS-1));
2802
                dst[index] = d;
2803
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2804
                aa += obmc_v * obmc_v; //FIXME precalclate this
2805
            }
2806
        }
2807
    }
2808
    *b= backup;
2809

    
2810
    return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2811
}
2812

    
2813
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2814
    const int b_stride = s->b_width << s->block_max_depth;
2815
    const int b_height = s->b_height<< s->block_max_depth;
2816
    int index= x + y*b_stride;
2817
    const BlockNode *b     = &s->block[index];
2818
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
2819
    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
2820
    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
2821
    const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2822
    int dmx, dmy;
2823
//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2824
//  int my_context= av_log2(2*FFABS(left->my - top->my));
2825

    
2826
    if(x<0 || x>=b_stride || y>=b_height)
2827
        return 0;
2828
/*
2829
1            0      0
2830
01X          1-2    1
2831
001XX        3-6    2-3
2832
0001XXX      7-14   4-7
2833
00001XXXX   15-30   8-15
2834
*/
2835
//FIXME try accurate rate
2836
//FIXME intra and inter predictors if surrounding blocks arent the same type
2837
    if(b->type & BLOCK_INTRA){
2838
        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2839
                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
2840
                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
2841
    }else{
2842
        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2843
        dmx-= b->mx;
2844
        dmy-= b->my;
2845
        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2846
                    + av_log2(2*FFABS(dmy))
2847
                    + av_log2(2*b->ref));
2848
    }
2849
}
2850

    
2851
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2852
    Plane *p= &s->plane[plane_index];
2853
    const int block_size = MB_SIZE >> s->block_max_depth;
2854
    const int block_w    = plane_index ? block_size/2 : block_size;
2855
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2856
    const int ref_stride= s->current_picture.linesize[plane_index];
2857
    uint8_t *dst= s->current_picture.data[plane_index];
2858
    uint8_t *src= s->  input_picture.data[plane_index];
2859
    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2860
    uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2861
    uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2862
    const int b_stride = s->b_width << s->block_max_depth;
2863
    const int b_height = s->b_height<< s->block_max_depth;
2864
    const int w= p->width;
2865
    const int h= p->height;
2866
    int distortion;
2867
    int rate= 0;
2868
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2869
    int sx= block_w*mb_x - block_w/2;
2870
    int sy= block_w*mb_y - block_w/2;
2871
    int x0= FFMAX(0,-sx);
2872
    int y0= FFMAX(0,-sy);
2873
    int x1= FFMIN(block_w*2, w-sx);
2874
    int y1= FFMIN(block_w*2, h-sy);
2875
    int i,x,y;
2876

    
2877
    pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2878

    
2879
    for(y=y0; y<y1; y++){
2880
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2881
        const IDWTELEM *pred1 = pred + y*obmc_stride;
2882
        uint8_t *cur1 = cur + y*ref_stride;
2883
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2884
        for(x=x0; x<x1; x++){
2885
#if FRAC_BITS >= LOG2_OBMC_MAX
2886
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2887
#else
2888
            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2889
#endif
2890
            v = (v + pred1[x]) >> FRAC_BITS;
2891
            if(v&(~255)) v= ~(v>>31);
2892
            dst1[x] = v;
2893
        }
2894
    }
2895

    
2896
    /* copy the regions where obmc[] = (uint8_t)256 */
2897
    if(LOG2_OBMC_MAX == 8
2898
        && (mb_x == 0 || mb_x == b_stride-1)
2899
        && (mb_y == 0 || mb_y == b_height-1)){
2900
        if(mb_x == 0)
2901
            x1 = block_w;
2902
        else
2903
            x0 = block_w;
2904
        if(mb_y == 0)
2905
            y1 = block_w;
2906
        else
2907
            y0 = block_w;
2908
        for(y=y0; y<y1; y++)
2909
            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2910
    }
2911

    
2912
    if(block_w==16){
2913
        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2914
        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2915
        /* FIXME cmps overlap but don't cover the wavelet's whole support,
2916
         * so improving the score of one block is not strictly guaranteed to
2917
         * improve the score of the whole frame, so iterative motion est
2918
         * doesn't always converge. */
2919
        if(s->avctx->me_cmp == FF_CMP_W97)
2920
            distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2921
        else if(s->avctx->me_cmp == FF_CMP_W53)
2922
            distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2923
        else{
2924
            distortion = 0;
2925
            for(i=0; i<4; i++){
2926
                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2927
                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2928
            }
2929
        }
2930
    }else{
2931
        assert(block_w==8);
2932
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2933
    }
2934

    
2935
    if(plane_index==0){
2936
        for(i=0; i<4; i++){
2937
/* ..RRr
2938
 * .RXx.
2939
 * rxx..
2940
 */
2941
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2942
        }
2943
        if(mb_x == b_stride-2)
2944
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2945
    }
2946
    return distortion + rate*penalty_factor;
2947
}
2948

    
2949
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2950
    int i, y2;
2951
    Plane *p= &s->plane[plane_index];
2952
    const int block_size = MB_SIZE >> s->block_max_depth;
2953
    const int block_w    = plane_index ? block_size/2 : block_size;
2954
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2955
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2956
    const int ref_stride= s->current_picture.linesize[plane_index];
2957
    uint8_t *dst= s->current_picture.data[plane_index];
2958
    uint8_t *src= s-> input_picture.data[plane_index];
2959
    static const IDWTELEM zero_dst[4096]; //FIXME
2960
    const int b_stride = s->b_width << s->block_max_depth;
2961
    const int w= p->width;
2962
    const int h= p->height;
2963
    int distortion= 0;
2964
    int rate= 0;
2965
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2966

    
2967
    for(i=0; i<9; i++){
2968
        int mb_x2= mb_x + (i%3) - 1;
2969
        int mb_y2= mb_y + (i/3) - 1;
2970
        int x= block_w*mb_x2 + block_w/2;
2971
        int y= block_w*mb_y2 + block_w/2;
2972

    
2973
        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2974
                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2975

    
2976
        //FIXME find a cleaner/simpler way to skip the outside stuff
2977
        for(y2= y; y2<0; y2++)
2978
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2979
        for(y2= h; y2<y+block_w; y2++)
2980
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2981
        if(x<0){
2982
            for(y2= y; y2<y+block_w; y2++)
2983
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2984
        }
2985
        if(x+block_w > w){
2986
            for(y2= y; y2<y+block_w; y2++)
2987
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2988
        }
2989

    
2990
        assert(block_w== 8 || block_w==16);
2991
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2992
    }
2993

    
2994
    if(plane_index==0){
2995
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2996
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2997

    
2998
/* ..RRRr
2999
 * .RXXx.
3000
 * .RXXx.
3001
 * rxxx.
3002
 */
3003
        if(merged)
3004
            rate = get_block_bits(s, mb_x, mb_y, 2);
3005
        for(i=merged?4:0; i<9; i++){
3006
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3007
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3008
        }
3009
    }
3010
    return distortion + rate*penalty_factor;
3011
}
3012

    
3013
static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3014
    const int b_stride= s->b_width << s->block_max_depth;
3015
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3016
    BlockNode backup= *block;
3017
    int rd, index, value;
3018

    
3019
    assert(mb_x>=0 && mb_y>=0);
3020
    assert(mb_x<b_stride);
3021

    
3022
    if(intra){
3023
        block->color[0] = p[0];
3024
        block->color[1] = p[1];
3025
        block->color[2] = p[2];
3026
        block->type |= BLOCK_INTRA;
3027
    }else{
3028
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3029
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3030
        if(s->me_cache[index] == value)
3031
            return 0;
3032
        s->me_cache[index]= value;
3033

    
3034
        block->mx= p[0];
3035
        block->my= p[1];
3036
        block->type &= ~BLOCK_INTRA;
3037
    }
3038

    
3039
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3040

    
3041
//FIXME chroma
3042
    if(rd < *best_rd){
3043
        *best_rd= rd;
3044
        return 1;
3045
    }else{
3046
        *block= backup;
3047
        return 0;
3048
    }
3049
}
3050

    
3051
/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3052
static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3053
    int p[2] = {p0, p1};
3054
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3055
}
3056

    
3057
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3058
    const int b_stride= s->b_width << s->block_max_depth;
3059
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3060
    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3061
    int rd, index, value;
3062

    
3063
    assert(mb_x>=0 && mb_y>=0);
3064
    assert(mb_x<b_stride);
3065
    assert(((mb_x|mb_y)&1) == 0);
3066

    
3067
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3068
    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3069
    if(s->me_cache[index] == value)
3070
        return 0;
3071
    s->me_cache[index]= value;
3072

    
3073
    block->mx= p0;
3074
    block->my= p1;
3075
    block->ref= ref;
3076
    block->type &= ~BLOCK_INTRA;
3077
    block[1]= block[b_stride]= block[b_stride+1]= *block;
3078

    
3079
    rd= get_4block_rd(s, mb_x, mb_y, 0);
3080

    
3081
//FIXME chroma
3082
    if(rd < *best_rd){
3083
        *best_rd= rd;
3084
        return 1;
3085
    }else{
3086
        block[0]= backup[0];
3087
        block[1]= backup[1];
3088
        block[b_stride]= backup[2];
3089
        block[b_stride+1]= backup[3];
3090
        return 0;
3091
    }
3092
}
3093

    
3094
static void iterative_me(SnowContext *s){
3095
    int pass, mb_x, mb_y;
3096
    const int b_width = s->b_width  << s->block_max_depth;
3097
    const int b_height= s->b_height << s->block_max_depth;
3098
    const int b_stride= b_width;
3099
    int color[3];
3100

    
3101
    {
3102
        RangeCoder r = s->c;
3103
        uint8_t state[sizeof(s->block_state)];
3104
        memcpy(state, s->block_state, sizeof(s->block_state));
3105
        for(mb_y= 0; mb_y<s->b_height; mb_y++)
3106
            for(mb_x= 0; mb_x<s->b_width; mb_x++)
3107
                encode_q_branch(s, 0, mb_x, mb_y);
3108
        s->c = r;
3109
        memcpy(s->block_state, state, sizeof(s->block_state));
3110
    }
3111

    
3112
    for(pass=0; pass<25; pass++){
3113
        int change= 0;
3114

    
3115
        for(mb_y= 0; mb_y<b_height; mb_y++){
3116
            for(mb_x= 0; mb_x<b_width; mb_x++){
3117
                int dia_change, i, j, ref;
3118
                int best_rd= INT_MAX, ref_rd;
3119
                BlockNode backup, ref_b;
3120
                const int index= mb_x + mb_y * b_stride;
3121
                BlockNode *block= &s->block[index];
3122
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3123
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3124
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3125
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3126
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3127
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3128
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3129
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3130
                const int b_w= (MB_SIZE >> s->block_max_depth);
3131
                uint8_t obmc_edged[b_w*2][b_w*2];
3132

    
3133
                if(pass && (block->type & BLOCK_OPT))
3134
                    continue;
3135
                block->type |= BLOCK_OPT;
3136

    
3137
                backup= *block;
3138

    
3139
                if(!s->me_cache_generation)
3140
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3141
                s->me_cache_generation += 1<<22;
3142

    
3143
                //FIXME precalc
3144
                {
3145
                    int x, y;
3146
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3147
                    if(mb_x==0)
3148
                        for(y=0; y<b_w*2; y++)
3149
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3150
                    if(mb_x==b_stride-1)
3151
                        for(y=0; y<b_w*2; y++)
3152
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3153
                    if(mb_y==0){
3154
                        for(x=0; x<b_w*2; x++)
3155
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3156
                        for(y=1; y<b_w; y++)
3157
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3158
                    }
3159
                    if(mb_y==b_height-1){
3160
                        for(x=0; x<b_w*2; x++)
3161
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3162
                        for(y=b_w; y<b_w*2-1; y++)
3163
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3164
                    }
3165
                }
3166

    
3167
                //skip stuff outside the picture
3168
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3169
                {
3170
                    uint8_t *src= s->  input_picture.data[0];
3171
                    uint8_t *dst= s->current_picture.data[0];
3172
                    const int stride= s->current_picture.linesize[0];
3173
                    const int block_w= MB_SIZE >> s->block_max_depth;
3174
                    const int sx= block_w*mb_x - block_w/2;
3175
                    const int sy= block_w*mb_y - block_w/2;
3176
                    const int w= s->plane[0].width;
3177
                    const int h= s->plane[0].height;
3178
                    int y;
3179

    
3180
                    for(y=sy; y<0; y++)
3181
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3182
                    for(y=h; y<sy+block_w*2; y++)
3183
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3184
                    if(sx<0){
3185
                        for(y=sy; y<sy+block_w*2; y++)
3186
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3187
                    }
3188
                    if(sx+block_w*2 > w){
3189
                        for(y=sy; y<sy+block_w*2; y++)
3190
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3191
                    }
3192
                }
3193

    
3194
                // intra(black) = neighbors' contribution to the current block
3195
                for(i=0; i<3; i++)
3196
                    color[i]= get_dc(s, mb_x, mb_y, i);
3197

    
3198
                // get previous score (cannot be cached due to OBMC)
3199
                if(pass > 0 && (block->type&BLOCK_INTRA)){
3200
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3201
                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3202
                }else
3203
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3204

    
3205
                ref_b= *block;
3206
                ref_rd= best_rd;
3207
                for(ref=0; ref < s->ref_frames; ref++){
3208
                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3209
                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3210
                        continue;
3211
                    block->ref= ref;
3212
                    best_rd= INT_MAX;
3213

    
3214
                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3215
                    check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3216
                    if(tb)
3217
                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3218
                    if(lb)
3219
                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3220
                    if(rb)
3221
                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3222
                    if(bb)
3223
                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3224

    
3225
                    /* fullpel ME */
3226
                    //FIXME avoid subpel interpol / round to nearest integer
3227
                    do{
3228
                        dia_change=0;
3229
                        for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3230
                            for(j=0; j<i; j++){
3231
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3232
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3233
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3234
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3235
                            }
3236
                        }
3237
                    }while(dia_change);
3238
                    /* subpel ME */
3239
                    do{
3240
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3241
                        dia_change=0;
3242
                        for(i=0; i<8; i++)
3243
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3244
                    }while(dia_change);
3245
                    //FIXME or try the standard 2 pass qpel or similar
3246

    
3247
                    mvr[0][0]= block->mx;
3248
                    mvr[0][1]= block->my;
3249
                    if(ref_rd > best_rd){
3250
                        ref_rd= best_rd;
3251
                        ref_b= *block;
3252
                    }
3253
                }
3254
                best_rd= ref_rd;
3255
                *block= ref_b;
3256
#if 1
3257
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3258
                //FIXME RD style color selection
3259
#endif
3260
                if(!same_block(block, &backup)){
3261
                    if(tb ) tb ->type &= ~BLOCK_OPT;
3262
                    if(lb ) lb ->type &= ~BLOCK_OPT;
3263
                    if(rb ) rb ->type &= ~BLOCK_OPT;
3264
                    if(bb ) bb ->type &= ~BLOCK_OPT;
3265
                    if(tlb) tlb->type &= ~BLOCK_OPT;
3266
                    if(trb) trb->type &= ~BLOCK_OPT;
3267
                    if(blb) blb->type &= ~BLOCK_OPT;
3268
                    if(brb) brb->type &= ~BLOCK_OPT;
3269
                    change ++;
3270
                }
3271
            }
3272
        }
3273
        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3274
        if(!change)
3275
            break;
3276
    }
3277

    
3278
    if(s->block_max_depth == 1){
3279
        int change= 0;
3280
        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3281
            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3282
                int i;
3283
                int best_rd, init_rd;
3284
                const int index= mb_x + mb_y * b_stride;
3285
                BlockNode *b[4];
3286

    
3287
                b[0]= &s->block[index];
3288
                b[1]= b[0]+1;
3289
                b[2]= b[0]+b_stride;
3290
                b[3]= b[2]+1;
3291
                if(same_block(b[0], b[1]) &&
3292
                   same_block(b[0], b[2]) &&
3293
                   same_block(b[0], b[3]))
3294
                    continue;
3295

    
3296
                if(!s->me_cache_generation)
3297
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3298
                s->me_cache_generation += 1<<22;
3299

    
3300
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3301

    
3302
                //FIXME more multiref search?
3303
                check_4block_inter(s, mb_x, mb_y,
3304
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3305
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3306

    
3307
                for(i=0; i<4; i++)
3308
                    if(!(b[i]->type&BLOCK_INTRA))
3309
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3310

    
3311
                if(init_rd != best_rd)
3312
                    change++;
3313
            }
3314
        }
3315
        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3316
    }
3317
}
3318

    
3319
static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3320
    const int level= b->level;
3321
    const int w= b->width;
3322
    const int h= b->height;
3323
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3324
    const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3325
    int x,y, thres1, thres2;
3326
//    START_TIMER
3327

    
3328
    if(s->qlog == LOSSLESS_QLOG){
3329
        for(y=0; y<h; y++)
3330
            for(x=0; x<w; x++)
3331
                dst[x + y*stride]= src[x + y*stride];
3332
        return;
3333
    }
3334

    
3335
    bias= bias ? 0 : (3*qmul)>>3;
3336
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3337
    thres2= 2*thres1;
3338

    
3339
    if(!bias){
3340
        for(y=0; y<h; y++){
3341
            for(x=0; x<w; x++){
3342
                int i= src[x + y*stride];
3343

    
3344
                if((unsigned)(i+thres1) > thres2){
3345
                    if(i>=0){
3346
                        i<<= QEXPSHIFT;
3347
                        i/= qmul; //FIXME optimize
3348
                        dst[x + y*stride]=  i;
3349
                    }else{
3350
                        i= -i;
3351
                        i<<= QEXPSHIFT;
3352
                        i/= qmul; //FIXME optimize
3353
                        dst[x + y*stride]= -i;
3354
                    }
3355
                }else
3356
                    dst[x + y*stride]= 0;
3357
            }
3358
        }
3359
    }else{
3360
        for(y=0; y<h; y++){
3361
            for(x=0; x<w; x++){
3362
                int i= src[x + y*stride];
3363

    
3364
                if((unsigned)(i+thres1) > thres2){
3365
                    if(i>=0){
3366
                        i<<= QEXPSHIFT;
3367
                        i= (i + bias) / qmul; //FIXME optimize
3368
                        dst[x + y*stride]=  i;
3369
                    }else{
3370
                        i= -i;
3371
                        i<<= QEXPSHIFT;
3372
                        i= (i + bias) / qmul; //FIXME optimize
3373
                        dst[x + y*stride]= -i;
3374
                    }
3375
                }else
3376
                    dst[x + y*stride]= 0;
3377
            }
3378
        }
3379
    }
3380
    if(level+1 == s->spatial_decomposition_count){
3381
//        STOP_TIMER("quantize")
3382
    }
3383
}
3384

    
3385
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3386
    const int w= b->width;
3387
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3388
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3389
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3390
    int x,y;
3391
    START_TIMER
3392

    
3393
    if(s->qlog == LOSSLESS_QLOG) return;
3394

    
3395
    for(y=start_y; y<end_y; y++){
3396
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3397
        IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3398
        for(x=0; x<w; x++){
3399
            int i= line[x];
3400
            if(i<0){
3401
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3402
            }else if(i>0){
3403
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3404
            }
3405
        }
3406
    }
3407
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3408
        STOP_TIMER("dquant")
3409
    }
3410
}
3411

    
3412
static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3413
    const int w= b->width;
3414
    const int h= b->height;
3415
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3416
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3417
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3418
    int x,y;
3419
    START_TIMER
3420

    
3421
    if(s->qlog == LOSSLESS_QLOG) return;
3422

    
3423
    for(y=0; y<h; y++){
3424
        for(x=0; x<w; x++){
3425
            int i= src[x + y*stride];
3426
            if(i<0){
3427
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3428
            }else if(i>0){
3429
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3430
            }
3431
        }
3432
    }
3433
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3434
        STOP_TIMER("dquant")
3435
    }
3436
}
3437

    
3438
static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3439
    const int w= b->width;
3440
    const int h= b->height;
3441
    int x,y;
3442

    
3443
    for(y=h-1; y>=0; y--){
3444
        for(x=w-1; x>=0; x--){
3445
            int i= x + y*stride;
3446

    
3447
            if(x){
3448
                if(use_median){
3449
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3450
                    else  src[i] -= src[i - 1];
3451
                }else{
3452
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3453
                    else  src[i] -= src[i - 1];
3454
                }
3455
            }else{
3456
                if(y) src[i] -= src[i - stride];
3457
            }
3458
        }
3459
    }
3460
}
3461

    
3462
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3463
    const int w= b->width;
3464
    int x,y;
3465

    
3466
//    START_TIMER
3467

    
3468
    IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3469
    IDWTELEM * prev;
3470

    
3471
    if (start_y != 0)
3472
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3473

    
3474
    for(y=start_y; y<end_y; y++){
3475
        prev = line;
3476
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3477
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3478
        for(x=0; x<w; x++){
3479
            if(x){
3480
                if(use_median){
3481
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3482
                    else  line[x] += line[x - 1];
3483
                }else{
3484
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3485
                    else  line[x] += line[x - 1];
3486
                }
3487
            }else{
3488
                if(y) line[x] += prev[x];
3489
            }
3490
        }
3491
    }
3492

    
3493
//    STOP_TIMER("correlate")
3494
}
3495

    
3496
static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3497
    const int w= b->width;
3498
    const int h= b->height;
3499
    int x,y;
3500

    
3501
    for(y=0; y<h; y++){
3502
        for(x=0; x<w; x++){
3503
            int i= x + y*stride;
3504

    
3505
            if(x){
3506
                if(use_median){
3507
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3508
                    else  src[i] += src[i - 1];
3509
                }else{
3510
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3511
                    else  src[i] += src[i - 1];
3512
                }
3513
            }else{
3514
                if(y) src[i] += src[i - stride];
3515
            }
3516
        }
3517
    }
3518
}
3519

    
3520
static void encode_qlogs(SnowContext *s){
3521
    int plane_index, level, orientation;
3522

    
3523
    for(plane_index=0; plane_index<2; plane_index++){
3524
        for(level=0; level<s->spatial_decomposition_count; level++){
3525
            for(orientation=level ? 1:0; orientation<4; orientation++){
3526
                if(orientation==2) continue;
3527
                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3528
            }
3529
        }
3530
    }
3531
}
3532

    
3533
static void encode_header(SnowContext *s){
3534
    int plane_index, i;
3535
    uint8_t kstate[32];
3536

    
3537
    memset(kstate, MID_STATE, sizeof(kstate));
3538

    
3539
    put_rac(&s->c, kstate, s->keyframe);
3540
    if(s->keyframe || s->always_reset){
3541
        reset_contexts(s);
3542
        s->last_spatial_decomposition_type=
3543
        s->last_qlog=
3544
        s->last_qbias=
3545
        s->last_mv_scale=
3546
        s->last_block_max_depth= 0;
3547
        for(plane_index=0; plane_index<2; plane_index++){
3548
            Plane *p= &s->plane[plane_index];
3549
            p->last_htaps=0;
3550
            p->last_diag_mc=0;
3551
            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3552
        }
3553
    }
3554
    if(s->keyframe){
3555
        put_symbol(&s->c, s->header_state, s->version, 0);
3556
        put_rac(&s->c, s->header_state, s->always_reset);
3557
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3558
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3559
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3560
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3561
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3562
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3563
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3564
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3565
        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3566

    
3567
        encode_qlogs(s);
3568
    }
3569

    
3570
    if(!s->keyframe){
3571
        int update_mc=0;
3572
        for(plane_index=0; plane_index<2; plane_index++){
3573
            Plane *p= &s->plane[plane_index];
3574
            update_mc |= p->last_htaps   != p->htaps;
3575
            update_mc |= p->last_diag_mc != p->diag_mc;
3576
            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3577
        }
3578
        put_rac(&s->c, s->header_state, update_mc);
3579
        if(update_mc){
3580
            for(plane_index=0; plane_index<2; plane_index++){
3581
                Plane *p= &s->plane[plane_index];
3582
                put_rac(&s->c, s->header_state, p->diag_mc);
3583
                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3584
                for(i= p->htaps/2; i; i--)
3585
                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3586
            }
3587
        }
3588
        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3589
            put_rac(&s->c, s->header_state, 1);
3590
            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3591
            encode_qlogs(s);
3592
        }else
3593
            put_rac(&s->c, s->header_state, 0);
3594
    }
3595

    
3596
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3597
    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
3598
    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
3599
    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
3600
    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3601

    
3602
}
3603

    
3604
static void update_last_header_values(SnowContext *s){
3605
    int plane_index;
3606

    
3607
    if(!s->keyframe){
3608
        for(plane_index=0; plane_index<2; plane_index++){
3609
            Plane *p= &s->plane[plane_index];
3610
            p->last_diag_mc= p->diag_mc;
3611
            p->last_htaps  = p->htaps;
3612
            memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3613
        }
3614
    }
3615

    
3616
    s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3617
    s->last_qlog                      = s->qlog;
3618
    s->last_qbias                     = s->qbias;
3619
    s->last_mv_scale                  = s->mv_scale;
3620
    s->last_block_max_depth           = s->block_max_depth;
3621
    s->last_spatial_decomposition_count= s->spatial_decomposition_count;
3622
}
3623

    
3624
static void decode_qlogs(SnowContext *s){
3625
    int plane_index, level, orientation;
3626

    
3627
    for(plane_index=0; plane_index<3; plane_index++){
3628
        for(level=0; level<s->spatial_decomposition_count; level++){
3629
            for(orientation=level ? 1:0; orientation<4; orientation++){
3630
                int q;
3631
                if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3632
                else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3633
                else                    q= get_symbol(&s->c, s->header_state, 1);
3634
                s->plane[plane_index].band[level][orientation].qlog= q;
3635
            }
3636
        }
3637
    }
3638
}
3639

    
3640
static int decode_header(SnowContext *s){
3641
    int plane_index;
3642
    uint8_t kstate[32];
3643

    
3644
    memset(kstate, MID_STATE, sizeof(kstate));
3645

    
3646
    s->keyframe= get_rac(&s->c, kstate);
3647
    if(s->keyframe || s->always_reset){
3648
        reset_contexts(s);
3649
        s->spatial_decomposition_type=
3650
        s->qlog=
3651
        s->qbias=
3652
        s->mv_scale=
3653
        s->block_max_depth= 0;
3654
    }
3655
    if(s->keyframe){
3656
        s->version= get_symbol(&s->c, s->header_state, 0);
3657
        if(s->version>0){
3658
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3659
            return -1;
3660
        }
3661
        s->always_reset= get_rac(&s->c, s->header_state);
3662
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3663
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3664
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3665
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3666
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3667
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3668
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3669
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3670
        s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3671

    
3672
        decode_qlogs(s);
3673
    }
3674

    
3675
    if(!s->keyframe){
3676
        if(get_rac(&s->c, s->header_state)){
3677
            for(plane_index=0; plane_index<2; plane_index++){
3678
                int htaps, i, sum=0;
3679
                Plane *p= &s->plane[plane_index];
3680
                p->diag_mc= get_rac(&s->c, s->header_state);
3681
                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3682
                if((unsigned)htaps > HTAPS_MAX || htaps==0)
3683
                    return -1;
3684
                p->htaps= htaps;
3685
                for(i= htaps/2; i; i--){
3686
                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3687
                    sum += p->hcoeff[i];
3688
                }
3689
                p->hcoeff[0]= 32-sum;
3690
            }
3691
            s->plane[2].diag_mc= s->plane[1].diag_mc;
3692
            s->plane[2].htaps  = s->plane[1].htaps;
3693
            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3694
        }
3695
        if(get_rac(&s->c, s->header_state)){
3696
            s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3697
            decode_qlogs(s);
3698
        }
3699
    }
3700

    
3701
    s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3702
    if(s->spatial_decomposition_type > 1){
3703
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3704
        return -1;
3705
    }
3706

    
3707
    s->qlog           += get_symbol(&s->c, s->header_state, 1);
3708
    s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
3709
    s->qbias          += get_symbol(&s->c, s->header_state, 1);
3710
    s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3711
    if(s->block_max_depth > 1 || s->block_max_depth < 0){
3712
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3713
        s->block_max_depth= 0;
3714
        return -1;
3715
    }
3716

    
3717
    return 0;
3718
}
3719

    
3720
static void init_qexp(void){
3721
    int i;
3722
    double v=128;
3723

    
3724
    for(i=0; i<QROOT; i++){
3725
        qexp[i]= lrintf(v);
3726
        v *= pow(2, 1.0 / QROOT);
3727
    }
3728
}
3729

    
3730
static int common_init(AVCodecContext *avctx){
3731
    SnowContext *s = avctx->priv_data;
3732
    int width, height;
3733
    int i, j;
3734

    
3735
    s->avctx= avctx;
3736

    
3737
    dsputil_init(&s->dsp, avctx);
3738

    
3739
#define mcf(dx,dy)\
3740
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3741
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3742
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3743
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3744
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3745
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3746

    
3747
    mcf( 0, 0)
3748
    mcf( 4, 0)
3749
    mcf( 8, 0)
3750
    mcf(12, 0)
3751
    mcf( 0, 4)
3752
    mcf( 4, 4)
3753
    mcf( 8, 4)
3754
    mcf(12, 4)
3755
    mcf( 0, 8)
3756
    mcf( 4, 8)
3757
    mcf( 8, 8)
3758
    mcf(12, 8)
3759
    mcf( 0,12)
3760
    mcf( 4,12)
3761
    mcf( 8,12)
3762
    mcf(12,12)
3763

    
3764
#define mcfh(dx,dy)\
3765
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3766
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3767
        mc_block_hpel ## dx ## dy ## 16;\
3768
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3769
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3770
        mc_block_hpel ## dx ## dy ## 8;
3771

    
3772
    mcfh(0, 0)
3773
    mcfh(8, 0)
3774
    mcfh(0, 8)
3775
    mcfh(8, 8)
3776

    
3777
    if(!qexp[0])
3778
        init_qexp();
3779

    
3780
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3781

    
3782
    width= s->avctx->width;
3783
    height= s->avctx->height;
3784

    
3785
    s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3786
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
3787

    
3788
    for(i=0; i<MAX_REF_FRAMES; i++)
3789
        for(j=0; j<MAX_REF_FRAMES; j++)
3790
            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3791

    
3792
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3793

    
3794
    return 0;
3795
}
3796

    
3797
static int common_init_after_header(AVCodecContext *avctx){
3798
    SnowContext *s = avctx->priv_data;
3799
    int plane_index, level, orientation;
3800

    
3801
    for(plane_index=0; plane_index<3; plane_index++){
3802
        int w= s->avctx->width;
3803
        int h= s->avctx->height;
3804

    
3805
        if(plane_index){
3806
            w>>= s->chroma_h_shift;
3807
            h>>= s->chroma_v_shift;
3808
        }
3809
        s->plane[plane_index].width = w;
3810
        s->plane[plane_index].height= h;
3811

    
3812
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3813
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3814
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3815
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3816

    
3817
                b->buf= s->spatial_dwt_buffer;
3818
                b->level= level;
3819
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3820
                b->width = (w + !(orientation&1))>>1;
3821
                b->height= (h + !(orientation>1))>>1;
3822

    
3823
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3824
                b->buf_x_offset = 0;
3825
                b->buf_y_offset = 0;
3826

    
3827
                if(orientation&1){
3828
                    b->buf += (w+1)>>1;
3829
                    b->buf_x_offset = (w+1)>>1;
3830
                }
3831
                if(orientation>1){
3832
                    b->buf += b->stride>>1;
3833
                    b->buf_y_offset = b->stride_line >> 1;
3834
                }
3835
                b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3836

    
3837
                if(level)
3838
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3839
                //FIXME avoid this realloc
3840
                av_freep(&b->x_coeff);
3841
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3842
            }
3843
            w= (w+1)>>1;
3844
            h= (h+1)>>1;
3845
        }
3846
    }
3847

    
3848
    return 0;
3849
}
3850

    
3851
static int qscale2qlog(int qscale){
3852
    return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3853
           + 61*QROOT/8; //<64 >60
3854
}
3855

    
3856
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3857
{
3858
    /* estimate the frame's complexity as a sum of weighted dwt coefs.
3859
     * FIXME we know exact mv bits at this point,
3860
     * but ratecontrol isn't set up to include them. */
3861
    uint32_t coef_sum= 0;
3862
    int level, orientation, delta_qlog;
3863

    
3864
    for(level=0; level<s->spatial_decomposition_count; level++){
3865
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3866
            SubBand *b= &s->plane[0].band[level][orientation];
3867
            IDWTELEM *buf= b->ibuf;
3868
            const int w= b->width;
3869
            const int h= b->height;
3870
            const int stride= b->stride;
3871
            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3872
            const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3873
            const int qdiv= (1<<16)/qmul;
3874
            int x, y;
3875
            //FIXME this is ugly
3876
            for(y=0; y<h; y++)
3877
                for(x=0; x<w; x++)
3878
                    buf[x+y*stride]= b->buf[x+y*stride];
3879
            if(orientation==0)
3880
                decorrelate(s, b, buf, stride, 1, 0);
3881
            for(y=0; y<h; y++)
3882
                for(x=0; x<w; x++)
3883
                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3884
        }
3885
    }
3886

    
3887
    /* ugly, ratecontrol just takes a sqrt again */
3888
    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3889
    assert(coef_sum < INT_MAX);
3890

    
3891
    if(pict->pict_type == I_TYPE){
3892
        s->m.current_picture.mb_var_sum= coef_sum;
3893
        s->m.current_picture.mc_mb_var_sum= 0;
3894
    }else{
3895
        s->m.current_picture.mc_mb_var_sum= coef_sum;
3896
        s->m.current_picture.mb_var_sum= 0;
3897
    }
3898

    
3899
    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3900
    if (pict->quality < 0)
3901
        return INT_MIN;
3902
    s->lambda= pict->quality * 3/2;
3903
    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3904
    s->qlog+= delta_qlog;
3905
    return delta_qlog;
3906
}
3907

    
3908
static void calculate_visual_weight(SnowContext *s, Plane *p){
3909
    int width = p->width;
3910
    int height= p->height;
3911
    int level, orientation, x, y;
3912

    
3913
    for(level=0; level<s->spatial_decomposition_count; level++){
3914
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3915
            SubBand *b= &p->band[level][orientation];
3916
            IDWTELEM *ibuf= b->ibuf;
3917
            int64_t error=0;
3918

    
3919
            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3920
            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3921
            ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3922
            for(y=0; y<height; y++){
3923
                for(x=0; x<width; x++){
3924
                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3925
                    error += d*d;
3926
                }
3927
            }
3928

    
3929
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3930
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3931
        }
3932
    }
3933
}
3934

    
3935
#define QUANTIZE2 0
3936

    
3937
#if QUANTIZE2==1
3938
#define Q2_STEP 8
3939

    
3940
static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3941
    SubBand *b= &p->band[level][orientation];
3942
    int x, y;
3943
    int xo=0;
3944
    int yo=0;
3945
    int step= 1 << (s->spatial_decomposition_count - level);
3946

    
3947
    if(orientation&1)
3948
        xo= step>>1;
3949
    if(orientation&2)
3950
        yo= step>>1;
3951

    
3952
    //FIXME bias for non zero ?
3953
    //FIXME optimize
3954
    memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3955
    for(y=0; y<p->height; y++){
3956
        for(x=0; x<p->width; x++){
3957
            int sx= (x-xo + step/2) / step / Q2_STEP;
3958
            int sy= (y-yo + step/2) / step / Q2_STEP;
3959
            int v= r0[x + y*p->width] - r1[x + y*p->width];
3960
            assert(sx>=0 && sy>=0 && sx < score_stride);
3961
            v= ((v+8)>>4)<<4;
3962
            score[sx + sy*score_stride] += v*v;
3963
            assert(score[sx + sy*score_stride] >= 0);
3964
        }
3965
    }
3966
}
3967

    
3968
static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3969
    int level, orientation;
3970

    
3971
    for(level=0; level<s->spatial_decomposition_count; level++){
3972
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3973
            SubBand *b= &p->band[level][orientation];
3974
            IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3975

    
3976
            dequantize(s, b, dst, b->stride);
3977
        }
3978
    }
3979
}
3980

    
3981
static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3982
    int level, orientation, ys, xs, x, y, pass;
3983
    IDWTELEM best_dequant[height * stride];
3984
    IDWTELEM idwt2_buffer[height * stride];
3985
    const int score_stride= (width + 10)/Q2_STEP;
3986
    int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3987
    int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3988
    int threshold= (s->m.lambda * s->m.lambda) >> 6;
3989

    
3990
    //FIXME pass the copy cleanly ?
3991

    
3992
//    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3993
    ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3994

    
3995
    for(level=0; level<s->spatial_decomposition_count; level++){
3996
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3997
            SubBand *b= &p->band[level][orientation];
3998
            IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3999
             DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
4000
            assert(src == b->buf); // code doesnt depen on this but its true currently
4001

    
4002
            quantize(s, b, dst, src, b->stride, s->qbias);
4003
        }