Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 5509bffa

History | View | Annotate | Download (154 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22

    
23
#include "rangecoder.h"
24
#define MID_STATE 128
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QSHIFT 5
35
#define QROOT (1<<QSHIFT)
36
#define LOSSLESS_QLOG -128
37
#define FRAC_BITS 8
38

    
39
static const int8_t quant3[256]={
40
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56
};
57
static const int8_t quant3b[256]={
58
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74
};
75
static const int8_t quant3bA[256]={
76
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92
};
93
static const int8_t quant5[256]={
94
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110
};
111
static const int8_t quant7[256]={
112
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128
};
129
static const int8_t quant9[256]={
130
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146
};
147
static const int8_t quant11[256]={
148
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164
};
165
static const int8_t quant13[256]={
166
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182
};
183

    
184
#define LOG2_OBMC_MAX 6
185
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
186
#if 0 //64*cubic
187
static const uint8_t obmc32[1024]={
188
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220
//error:0.000022
221
};
222
static const uint8_t obmc16[256]={
223
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239
//error:0.000033
240
};
241
#elif 1 // 64*linear
242
static const uint8_t obmc32[1024]={
243
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275
 //error:0.000020
276
};
277
static const uint8_t obmc16[256]={
278
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294
//error:0.000015
295
};
296
#else //64*cos
297
static const uint8_t obmc32[1024]={
298
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330
//error:0.000022
331
};
332
static const uint8_t obmc16[256]={
333
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349
//error:0.000022
350
};
351
#endif
352

    
353
//linear *64
354
static const uint8_t obmc8[64]={
355
 1, 3, 5, 7, 7, 5, 3, 1,
356
 3, 9,15,21,21,15, 9, 3,
357
 5,15,25,35,35,25,15, 5,
358
 7,21,35,49,49,35,21, 7,
359
 7,21,35,49,49,35,21, 7,
360
 5,15,25,35,35,25,15, 5,
361
 3, 9,15,21,21,15, 9, 3,
362
 1, 3, 5, 7, 7, 5, 3, 1,
363
//error:0.000000
364
};
365

    
366
//linear *64
367
static const uint8_t obmc4[16]={
368
 4,12,12, 4,
369
12,36,36,12,
370
12,36,36,12,
371
 4,12,12, 4,
372
//error:0.000000
373
};
374

    
375
static const uint8_t *obmc_tab[4]={
376
    obmc32, obmc16, obmc8, obmc4
377
};
378

    
379
typedef struct BlockNode{
380
    int16_t mx;
381
    int16_t my;
382
    uint8_t color[3];
383
    uint8_t type;
384
//#define TYPE_SPLIT    1
385
#define BLOCK_INTRA   1
386
#define BLOCK_OPT     2
387
//#define TYPE_NOCOLOR  4
388
    uint8_t level; //FIXME merge into type?
389
}BlockNode;
390

    
391
static const BlockNode null_block= { //FIXME add border maybe
392
    .color= {128,128,128},
393
    .mx= 0,
394
    .my= 0,
395
    .type= 0,
396
    .level= 0,
397
};
398

    
399
#define LOG2_MB_SIZE 4
400
#define MB_SIZE (1<<LOG2_MB_SIZE)
401

    
402
typedef struct x_and_coeff{
403
    int16_t x;
404
    uint16_t coeff;
405
} x_and_coeff;
406

    
407
typedef struct SubBand{
408
    int level;
409
    int stride;
410
    int width;
411
    int height;
412
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
413
    DWTELEM *buf;
414
    int buf_x_offset;
415
    int buf_y_offset;
416
    int stride_line; ///< Stride measured in lines, not pixels.
417
    x_and_coeff * x_coeff;
418
    struct SubBand *parent;
419
    uint8_t state[/*7*2*/ 7 + 512][32];
420
}SubBand;
421

    
422
typedef struct Plane{
423
    int width;
424
    int height;
425
    SubBand band[MAX_DECOMPOSITIONS][4];
426
}Plane;
427

    
428
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
429
typedef struct {
430
    DWTELEM * * line; ///< For use by idwt and predict_slices.
431
    DWTELEM * * data_stack; ///< Used for internal purposes.
432
    int data_stack_top;
433
    int line_count;
434
    int line_width;
435
    int data_count;
436
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
437
} slice_buffer;
438

    
439
typedef struct SnowContext{
440
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
441

    
442
    AVCodecContext *avctx;
443
    RangeCoder c;
444
    DSPContext dsp;
445
    AVFrame new_picture;
446
    AVFrame input_picture;              ///< new_picture with the internal linesizes
447
    AVFrame current_picture;
448
    AVFrame last_picture;
449
    AVFrame mconly_picture;
450
//     uint8_t q_context[16];
451
    uint8_t header_state[32];
452
    uint8_t block_state[128 + 32*128];
453
    int keyframe;
454
    int always_reset;
455
    int version;
456
    int spatial_decomposition_type;
457
    int temporal_decomposition_type;
458
    int spatial_decomposition_count;
459
    int temporal_decomposition_count;
460
    DWTELEM *spatial_dwt_buffer;
461
    int colorspace_type;
462
    int chroma_h_shift;
463
    int chroma_v_shift;
464
    int spatial_scalability;
465
    int qlog;
466
    int lambda;
467
    int lambda2;
468
    int mv_scale;
469
    int qbias;
470
#define QBIAS_SHIFT 3
471
    int b_width;
472
    int b_height;
473
    int block_max_depth;
474
    Plane plane[MAX_PLANES];
475
    BlockNode *block;
476
#define ME_CACHE_SIZE 1024
477
    int me_cache[ME_CACHE_SIZE];
478
    int me_cache_generation;
479
    slice_buffer sb;
480

    
481
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
482
}SnowContext;
483

    
484
typedef struct {
485
    DWTELEM *b0;
486
    DWTELEM *b1;
487
    DWTELEM *b2;
488
    DWTELEM *b3;
489
    int y;
490
} dwt_compose_t;
491

    
492
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
493
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
494

    
495
static void iterative_me(SnowContext *s);
496

    
497
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
498
{
499
    int i;
500

    
501
    buf->base_buffer = base_buffer;
502
    buf->line_count = line_count;
503
    buf->line_width = line_width;
504
    buf->data_count = max_allocated_lines;
505
    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
506
    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
507

    
508
    for (i = 0; i < max_allocated_lines; i++)
509
    {
510
      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
511
    }
512

    
513
    buf->data_stack_top = max_allocated_lines - 1;
514
}
515

    
516
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
517
{
518
    int offset;
519
    DWTELEM * buffer;
520

    
521
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
522

    
523
    assert(buf->data_stack_top >= 0);
524
//  assert(!buf->line[line]);
525
    if (buf->line[line])
526
        return buf->line[line];
527

    
528
    offset = buf->line_width * line;
529
    buffer = buf->data_stack[buf->data_stack_top];
530
    buf->data_stack_top--;
531
    buf->line[line] = buffer;
532

    
533
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
534

    
535
    return buffer;
536
}
537

    
538
static void slice_buffer_release(slice_buffer * buf, int line)
539
{
540
    int offset;
541
    DWTELEM * buffer;
542

    
543
    assert(line >= 0 && line < buf->line_count);
544
    assert(buf->line[line]);
545

    
546
    offset = buf->line_width * line;
547
    buffer = buf->line[line];
548
    buf->data_stack_top++;
549
    buf->data_stack[buf->data_stack_top] = buffer;
550
    buf->line[line] = NULL;
551

    
552
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
553
}
554

    
555
static void slice_buffer_flush(slice_buffer * buf)
556
{
557
    int i;
558
    for (i = 0; i < buf->line_count; i++)
559
    {
560
        if (buf->line[i])
561
        {
562
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
563
            slice_buffer_release(buf, i);
564
        }
565
    }
566
}
567

    
568
static void slice_buffer_destroy(slice_buffer * buf)
569
{
570
    int i;
571
    slice_buffer_flush(buf);
572

    
573
    for (i = buf->data_count - 1; i >= 0; i--)
574
    {
575
        assert(buf->data_stack[i]);
576
        av_free(buf->data_stack[i]);
577
    }
578
    assert(buf->data_stack);
579
    av_free(buf->data_stack);
580
    assert(buf->line);
581
    av_free(buf->line);
582
}
583

    
584
#ifdef __sgi
585
// Avoid a name clash on SGI IRIX
586
#undef qexp
587
#endif
588
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
589
static uint8_t qexp[QROOT];
590

    
591
static inline int mirror(int v, int m){
592
    while((unsigned)v > (unsigned)m){
593
        v=-v;
594
        if(v<0) v+= 2*m;
595
    }
596
    return v;
597
}
598

    
599
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
600
    int i;
601

    
602
    if(v){
603
        const int a= ABS(v);
604
        const int e= av_log2(a);
605
#if 1
606
        const int el= FFMIN(e, 10);
607
        put_rac(c, state+0, 0);
608

    
609
        for(i=0; i<el; i++){
610
            put_rac(c, state+1+i, 1);  //1..10
611
        }
612
        for(; i<e; i++){
613
            put_rac(c, state+1+9, 1);  //1..10
614
        }
615
        put_rac(c, state+1+FFMIN(i,9), 0);
616

    
617
        for(i=e-1; i>=el; i--){
618
            put_rac(c, state+22+9, (a>>i)&1); //22..31
619
        }
620
        for(; i>=0; i--){
621
            put_rac(c, state+22+i, (a>>i)&1); //22..31
622
        }
623

    
624
        if(is_signed)
625
            put_rac(c, state+11 + el, v < 0); //11..21
626
#else
627

    
628
        put_rac(c, state+0, 0);
629
        if(e<=9){
630
            for(i=0; i<e; i++){
631
                put_rac(c, state+1+i, 1);  //1..10
632
            }
633
            put_rac(c, state+1+i, 0);
634

    
635
            for(i=e-1; i>=0; i--){
636
                put_rac(c, state+22+i, (a>>i)&1); //22..31
637
            }
638

    
639
            if(is_signed)
640
                put_rac(c, state+11 + e, v < 0); //11..21
641
        }else{
642
            for(i=0; i<e; i++){
643
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
644
            }
645
            put_rac(c, state+1+FFMIN(i,9), 0);
646

    
647
            for(i=e-1; i>=0; i--){
648
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
649
            }
650

    
651
            if(is_signed)
652
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
653
        }
654
#endif
655
    }else{
656
        put_rac(c, state+0, 1);
657
    }
658
}
659

    
660
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
661
    if(get_rac(c, state+0))
662
        return 0;
663
    else{
664
        int i, e, a;
665
        e= 0;
666
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
667
            e++;
668
        }
669

    
670
        a= 1;
671
        for(i=e-1; i>=0; i--){
672
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
673
        }
674

    
675
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
676
            return -a;
677
        else
678
            return a;
679
    }
680
}
681

    
682
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
683
    int i;
684
    int r= log2>=0 ? 1<<log2 : 1;
685

    
686
    assert(v>=0);
687
    assert(log2>=-4);
688

    
689
    while(v >= r){
690
        put_rac(c, state+4+log2, 1);
691
        v -= r;
692
        log2++;
693
        if(log2>0) r+=r;
694
    }
695
    put_rac(c, state+4+log2, 0);
696

    
697
    for(i=log2-1; i>=0; i--){
698
        put_rac(c, state+31-i, (v>>i)&1);
699
    }
700
}
701

    
702
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
703
    int i;
704
    int r= log2>=0 ? 1<<log2 : 1;
705
    int v=0;
706

    
707
    assert(log2>=-4);
708

    
709
    while(get_rac(c, state+4+log2)){
710
        v+= r;
711
        log2++;
712
        if(log2>0) r+=r;
713
    }
714

    
715
    for(i=log2-1; i>=0; i--){
716
        v+= get_rac(c, state+31-i)<<i;
717
    }
718

    
719
    return v;
720
}
721

    
722
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
723
    const int mirror_left= !highpass;
724
    const int mirror_right= (width&1) ^ highpass;
725
    const int w= (width>>1) - 1 + (highpass & width);
726
    int i;
727

    
728
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
729
    if(mirror_left){
730
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
731
        dst += dst_step;
732
        src += src_step;
733
    }
734

    
735
    for(i=0; i<w; i++){
736
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
737
    }
738

    
739
    if(mirror_right){
740
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
741
    }
742
}
743

    
744
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
745
    const int mirror_left= !highpass;
746
    const int mirror_right= (width&1) ^ highpass;
747
    const int w= (width>>1) - 1 + (highpass & width);
748
    int i;
749

    
750
    if(mirror_left){
751
        int r= 3*2*ref[0];
752
        r += r>>4;
753
        r += r>>8;
754
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
755
        dst += dst_step;
756
        src += src_step;
757
    }
758

    
759
    for(i=0; i<w; i++){
760
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
761
        r += r>>4;
762
        r += r>>8;
763
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
764
    }
765

    
766
    if(mirror_right){
767
        int r= 3*2*ref[w*ref_step];
768
        r += r>>4;
769
        r += r>>8;
770
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
771
    }
772
}
773

    
774
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
775
    const int mirror_left= !highpass;
776
    const int mirror_right= (width&1) ^ highpass;
777
    const int w= (width>>1) - 1 + (highpass & width);
778
    int i;
779

    
780
    assert(shift == 4);
781
#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
782
    if(mirror_left){
783
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
784
        dst += dst_step;
785
        src += src_step;
786
    }
787

    
788
    for(i=0; i<w; i++){
789
        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
790
    }
791

    
792
    if(mirror_right){
793
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
794
    }
795
}
796

    
797

    
798
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
799
    int x, i;
800

    
801
    for(x=start; x<width; x+=2){
802
        int64_t sum=0;
803

    
804
        for(i=0; i<n; i++){
805
            int x2= x + 2*i - n + 1;
806
            if     (x2<     0) x2= -x2;
807
            else if(x2>=width) x2= 2*width-x2-2;
808
            sum += coeffs[i]*(int64_t)dst[x2];
809
        }
810
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
811
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
812
    }
813
}
814

    
815
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
816
    int x, y, i;
817
    for(y=start; y<height; y+=2){
818
        for(x=0; x<width; x++){
819
            int64_t sum=0;
820

    
821
            for(i=0; i<n; i++){
822
                int y2= y + 2*i - n + 1;
823
                if     (y2<      0) y2= -y2;
824
                else if(y2>=height) y2= 2*height-y2-2;
825
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
826
            }
827
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
828
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
829
        }
830
    }
831
}
832

    
833
#define SCALEX 1
834
#define LX0 0
835
#define LX1 1
836

    
837
#if 0 // more accurate 9/7
838
#define N1 2
839
#define SHIFT1 14
840
#define COEFFS1 (int[]){-25987,-25987}
841
#define N2 2
842
#define SHIFT2 19
843
#define COEFFS2 (int[]){-27777,-27777}
844
#define N3 2
845
#define SHIFT3 15
846
#define COEFFS3 (int[]){28931,28931}
847
#define N4 2
848
#define SHIFT4 15
849
#define COEFFS4 (int[]){14533,14533}
850
#elif 1 // 13/7 CRF
851
#define N1 4
852
#define SHIFT1 4
853
#define COEFFS1 (int[]){1,-9,-9,1}
854
#define N2 4
855
#define SHIFT2 4
856
#define COEFFS2 (int[]){-1,5,5,-1}
857
#define N3 0
858
#define SHIFT3 1
859
#define COEFFS3 NULL
860
#define N4 0
861
#define SHIFT4 1
862
#define COEFFS4 NULL
863
#elif 1 // 3/5
864
#define LX0 1
865
#define LX1 0
866
#define SCALEX 0.5
867
#define N1 2
868
#define SHIFT1 1
869
#define COEFFS1 (int[]){1,1}
870
#define N2 2
871
#define SHIFT2 2
872
#define COEFFS2 (int[]){-1,-1}
873
#define N3 0
874
#define SHIFT3 0
875
#define COEFFS3 NULL
876
#define N4 0
877
#define SHIFT4 0
878
#define COEFFS4 NULL
879
#elif 1 // 11/5
880
#define N1 0
881
#define SHIFT1 1
882
#define COEFFS1 NULL
883
#define N2 2
884
#define SHIFT2 2
885
#define COEFFS2 (int[]){-1,-1}
886
#define N3 2
887
#define SHIFT3 0
888
#define COEFFS3 (int[]){-1,-1}
889
#define N4 4
890
#define SHIFT4 7
891
#define COEFFS4 (int[]){-5,29,29,-5}
892
#define SCALEX 4
893
#elif 1 // 9/7 CDF
894
#define N1 2
895
#define SHIFT1 7
896
#define COEFFS1 (int[]){-203,-203}
897
#define N2 2
898
#define SHIFT2 12
899
#define COEFFS2 (int[]){-217,-217}
900
#define N3 2
901
#define SHIFT3 7
902
#define COEFFS3 (int[]){113,113}
903
#define N4 2
904
#define SHIFT4 9
905
#define COEFFS4 (int[]){227,227}
906
#define SCALEX 1
907
#elif 1 // 7/5 CDF
908
#define N1 0
909
#define SHIFT1 1
910
#define COEFFS1 NULL
911
#define N2 2
912
#define SHIFT2 2
913
#define COEFFS2 (int[]){-1,-1}
914
#define N3 2
915
#define SHIFT3 0
916
#define COEFFS3 (int[]){-1,-1}
917
#define N4 2
918
#define SHIFT4 4
919
#define COEFFS4 (int[]){3,3}
920
#elif 1 // 9/7 MN
921
#define N1 4
922
#define SHIFT1 4
923
#define COEFFS1 (int[]){1,-9,-9,1}
924
#define N2 2
925
#define SHIFT2 2
926
#define COEFFS2 (int[]){1,1}
927
#define N3 0
928
#define SHIFT3 1
929
#define COEFFS3 NULL
930
#define N4 0
931
#define SHIFT4 1
932
#define COEFFS4 NULL
933
#else // 13/7 CRF
934
#define N1 4
935
#define SHIFT1 4
936
#define COEFFS1 (int[]){1,-9,-9,1}
937
#define N2 4
938
#define SHIFT2 4
939
#define COEFFS2 (int[]){-1,5,5,-1}
940
#define N3 0
941
#define SHIFT3 1
942
#define COEFFS3 NULL
943
#define N4 0
944
#define SHIFT4 1
945
#define COEFFS4 NULL
946
#endif
947
static void horizontal_decomposeX(DWTELEM *b, int width){
948
    DWTELEM temp[width];
949
    const int width2= width>>1;
950
    const int w2= (width+1)>>1;
951
    int x;
952

    
953
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
954
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
955
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
956
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
957

    
958
    for(x=0; x<width2; x++){
959
        temp[x   ]= b[2*x    ];
960
        temp[x+w2]= b[2*x + 1];
961
    }
962
    if(width&1)
963
        temp[x   ]= b[2*x    ];
964
    memcpy(b, temp, width*sizeof(int));
965
}
966

    
967
static void horizontal_composeX(DWTELEM *b, int width){
968
    DWTELEM temp[width];
969
    const int width2= width>>1;
970
    int x;
971
    const int w2= (width+1)>>1;
972

    
973
    memcpy(temp, b, width*sizeof(int));
974
    for(x=0; x<width2; x++){
975
        b[2*x    ]= temp[x   ];
976
        b[2*x + 1]= temp[x+w2];
977
    }
978
    if(width&1)
979
        b[2*x    ]= temp[x   ];
980

    
981
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
982
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
983
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
984
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
985
}
986

    
987
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
988
    int x, y;
989

    
990
    for(y=0; y<height; y++){
991
        for(x=0; x<width; x++){
992
            buffer[y*stride + x] *= SCALEX;
993
        }
994
    }
995

    
996
    for(y=0; y<height; y++){
997
        horizontal_decomposeX(buffer + y*stride, width);
998
    }
999

    
1000
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
1001
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1002
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1003
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1004
}
1005

    
1006
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1007
    int x, y;
1008

    
1009
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1010
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1011
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1012
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1013

    
1014
    for(y=0; y<height; y++){
1015
        horizontal_composeX(buffer + y*stride, width);
1016
    }
1017

    
1018
    for(y=0; y<height; y++){
1019
        for(x=0; x<width; x++){
1020
            buffer[y*stride + x] /= SCALEX;
1021
        }
1022
    }
1023
}
1024

    
1025
static void horizontal_decompose53i(DWTELEM *b, int width){
1026
    DWTELEM temp[width];
1027
    const int width2= width>>1;
1028
    int x;
1029
    const int w2= (width+1)>>1;
1030

    
1031
    for(x=0; x<width2; x++){
1032
        temp[x   ]= b[2*x    ];
1033
        temp[x+w2]= b[2*x + 1];
1034
    }
1035
    if(width&1)
1036
        temp[x   ]= b[2*x    ];
1037
#if 0
1038
    {
1039
    int A1,A2,A3,A4;
1040
    A2= temp[1       ];
1041
    A4= temp[0       ];
1042
    A1= temp[0+width2];
1043
    A1 -= (A2 + A4)>>1;
1044
    A4 += (A1 + 1)>>1;
1045
    b[0+width2] = A1;
1046
    b[0       ] = A4;
1047
    for(x=1; x+1<width2; x+=2){
1048
        A3= temp[x+width2];
1049
        A4= temp[x+1     ];
1050
        A3 -= (A2 + A4)>>1;
1051
        A2 += (A1 + A3 + 2)>>2;
1052
        b[x+width2] = A3;
1053
        b[x       ] = A2;
1054

1055
        A1= temp[x+1+width2];
1056
        A2= temp[x+2       ];
1057
        A1 -= (A2 + A4)>>1;
1058
        A4 += (A1 + A3 + 2)>>2;
1059
        b[x+1+width2] = A1;
1060
        b[x+1       ] = A4;
1061
    }
1062
    A3= temp[width-1];
1063
    A3 -= A2;
1064
    A2 += (A1 + A3 + 2)>>2;
1065
    b[width -1] = A3;
1066
    b[width2-1] = A2;
1067
    }
1068
#else
1069
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1070
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
1071
#endif
1072
}
1073

    
1074
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1075
    int i;
1076

    
1077
    for(i=0; i<width; i++){
1078
        b1[i] -= (b0[i] + b2[i])>>1;
1079
    }
1080
}
1081

    
1082
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1083
    int i;
1084

    
1085
    for(i=0; i<width; i++){
1086
        b1[i] += (b0[i] + b2[i] + 2)>>2;
1087
    }
1088
}
1089

    
1090
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1091
    int y;
1092
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1093
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
1094

    
1095
    for(y=-2; y<height; y+=2){
1096
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1097
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1098

    
1099
{START_TIMER
1100
        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1101
        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1102
STOP_TIMER("horizontal_decompose53i")}
1103

    
1104
{START_TIMER
1105
        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1106
        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1107
STOP_TIMER("vertical_decompose53i*")}
1108

    
1109
        b0=b2;
1110
        b1=b3;
1111
    }
1112
}
1113

    
1114
#define liftS lift
1115
#define lift5 lift
1116
#if 1
1117
#define W_AM 3
1118
#define W_AO 0
1119
#define W_AS 1
1120

    
1121
#undef liftS
1122
#define W_BM 1
1123
#define W_BO 8
1124
#define W_BS 4
1125

    
1126
#define W_CM 1
1127
#define W_CO 0
1128
#define W_CS 0
1129

    
1130
#define W_DM 3
1131
#define W_DO 4
1132
#define W_DS 3
1133
#elif 0
1134
#define W_AM 55
1135
#define W_AO 16
1136
#define W_AS 5
1137

    
1138
#define W_BM 3
1139
#define W_BO 32
1140
#define W_BS 6
1141

    
1142
#define W_CM 127
1143
#define W_CO 64
1144
#define W_CS 7
1145

    
1146
#define W_DM 7
1147
#define W_DO 8
1148
#define W_DS 4
1149
#elif 0
1150
#define W_AM 97
1151
#define W_AO 32
1152
#define W_AS 6
1153

    
1154
#define W_BM 63
1155
#define W_BO 512
1156
#define W_BS 10
1157

    
1158
#define W_CM 13
1159
#define W_CO 8
1160
#define W_CS 4
1161

    
1162
#define W_DM 15
1163
#define W_DO 16
1164
#define W_DS 5
1165

    
1166
#else
1167

    
1168
#define W_AM 203
1169
#define W_AO 64
1170
#define W_AS 7
1171

    
1172
#define W_BM 217
1173
#define W_BO 2048
1174
#define W_BS 12
1175

    
1176
#define W_CM 113
1177
#define W_CO 64
1178
#define W_CS 7
1179

    
1180
#define W_DM 227
1181
#define W_DO 128
1182
#define W_DS 9
1183
#endif
1184
static void horizontal_decompose97i(DWTELEM *b, int width){
1185
    DWTELEM temp[width];
1186
    const int w2= (width+1)>>1;
1187

    
1188
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1189
    liftS(temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1190
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1191
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1192
}
1193

    
1194

    
1195
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1196
    int i;
1197

    
1198
    for(i=0; i<width; i++){
1199
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1200
    }
1201
}
1202

    
1203
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1204
    int i;
1205

    
1206
    for(i=0; i<width; i++){
1207
#ifdef lift5
1208
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1209
#else
1210
        int r= 3*(b0[i] + b2[i]);
1211
        r+= r>>4;
1212
        r+= r>>8;
1213
        b1[i] += (r+W_CO)>>W_CS;
1214
#endif
1215
    }
1216
}
1217

    
1218
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1219
    int i;
1220

    
1221
    for(i=0; i<width; i++){
1222
#ifdef liftS
1223
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1224
#else
1225
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1226
#endif
1227
    }
1228
}
1229

    
1230
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1231
    int i;
1232

    
1233
    for(i=0; i<width; i++){
1234
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1235
    }
1236
}
1237

    
1238
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1239
    int y;
1240
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1241
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1242
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1243
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1244

    
1245
    for(y=-4; y<height; y+=2){
1246
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1247
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1248

    
1249
{START_TIMER
1250
        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1251
        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1252
if(width>400){
1253
STOP_TIMER("horizontal_decompose97i")
1254
}}
1255

    
1256
{START_TIMER
1257
        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1258
        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1259
        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1260
        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1261

    
1262
if(width>400){
1263
STOP_TIMER("vertical_decompose97i")
1264
}}
1265

    
1266
        b0=b2;
1267
        b1=b3;
1268
        b2=b4;
1269
        b3=b5;
1270
    }
1271
}
1272

    
1273
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1274
    int level;
1275

    
1276
    for(level=0; level<decomposition_count; level++){
1277
        switch(type){
1278
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1279
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1280
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1281
        }
1282
    }
1283
}
1284

    
1285
static void horizontal_compose53i(DWTELEM *b, int width){
1286
    DWTELEM temp[width];
1287
    const int width2= width>>1;
1288
    const int w2= (width+1)>>1;
1289
    int x;
1290

    
1291
#if 0
1292
    int A1,A2,A3,A4;
1293
    A2= temp[1       ];
1294
    A4= temp[0       ];
1295
    A1= temp[0+width2];
1296
    A1 -= (A2 + A4)>>1;
1297
    A4 += (A1 + 1)>>1;
1298
    b[0+width2] = A1;
1299
    b[0       ] = A4;
1300
    for(x=1; x+1<width2; x+=2){
1301
        A3= temp[x+width2];
1302
        A4= temp[x+1     ];
1303
        A3 -= (A2 + A4)>>1;
1304
        A2 += (A1 + A3 + 2)>>2;
1305
        b[x+width2] = A3;
1306
        b[x       ] = A2;
1307

1308
        A1= temp[x+1+width2];
1309
        A2= temp[x+2       ];
1310
        A1 -= (A2 + A4)>>1;
1311
        A4 += (A1 + A3 + 2)>>2;
1312
        b[x+1+width2] = A1;
1313
        b[x+1       ] = A4;
1314
    }
1315
    A3= temp[width-1];
1316
    A3 -= A2;
1317
    A2 += (A1 + A3 + 2)>>2;
1318
    b[width -1] = A3;
1319
    b[width2-1] = A2;
1320
#else
1321
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1322
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1323
#endif
1324
    for(x=0; x<width2; x++){
1325
        b[2*x    ]= temp[x   ];
1326
        b[2*x + 1]= temp[x+w2];
1327
    }
1328
    if(width&1)
1329
        b[2*x    ]= temp[x   ];
1330
}
1331

    
1332
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1333
    int i;
1334

    
1335
    for(i=0; i<width; i++){
1336
        b1[i] += (b0[i] + b2[i])>>1;
1337
    }
1338
}
1339

    
1340
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1341
    int i;
1342

    
1343
    for(i=0; i<width; i++){
1344
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1345
    }
1346
}
1347

    
1348
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1349
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1350
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1351
    cs->y = -1;
1352
}
1353

    
1354
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1355
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1356
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1357
    cs->y = -1;
1358
}
1359

    
1360
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1361
    int y= cs->y;
1362

    
1363
    DWTELEM *b0= cs->b0;
1364
    DWTELEM *b1= cs->b1;
1365
    DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1366
    DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1367

    
1368
{START_TIMER
1369
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1370
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1371
STOP_TIMER("vertical_compose53i*")}
1372

    
1373
{START_TIMER
1374
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1375
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1376
STOP_TIMER("horizontal_compose53i")}
1377

    
1378
    cs->b0 = b2;
1379
    cs->b1 = b3;
1380
    cs->y += 2;
1381
}
1382

    
1383
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1384
    int y= cs->y;
1385
    DWTELEM *b0= cs->b0;
1386
    DWTELEM *b1= cs->b1;
1387
    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1388
    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1389

    
1390
{START_TIMER
1391
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1392
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1393
STOP_TIMER("vertical_compose53i*")}
1394

    
1395
{START_TIMER
1396
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1397
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1398
STOP_TIMER("horizontal_compose53i")}
1399

    
1400
    cs->b0 = b2;
1401
    cs->b1 = b3;
1402
    cs->y += 2;
1403
}
1404

    
1405
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1406
    dwt_compose_t cs;
1407
    spatial_compose53i_init(&cs, buffer, height, stride);
1408
    while(cs.y <= height)
1409
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1410
}
1411

    
1412

    
1413
static void horizontal_compose97i(DWTELEM *b, int width){
1414
    DWTELEM temp[width];
1415
    const int w2= (width+1)>>1;
1416

    
1417
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1418
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1419
    liftS(b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1420
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1421
}
1422

    
1423
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1424
    int i;
1425

    
1426
    for(i=0; i<width; i++){
1427
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1428
    }
1429
}
1430

    
1431
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1432
    int i;
1433

    
1434
    for(i=0; i<width; i++){
1435
#ifdef lift5
1436
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1437
#else
1438
        int r= 3*(b0[i] + b2[i]);
1439
        r+= r>>4;
1440
        r+= r>>8;
1441
        b1[i] -= (r+W_CO)>>W_CS;
1442
#endif
1443
    }
1444
}
1445

    
1446
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1447
    int i;
1448

    
1449
    for(i=0; i<width; i++){
1450
#ifdef liftS
1451
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1452
#else
1453
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1454
#endif
1455
    }
1456
}
1457

    
1458
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1459
    int i;
1460

    
1461
    for(i=0; i<width; i++){
1462
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1463
    }
1464
}
1465

    
1466
static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1467
    int i;
1468

    
1469
    for(i=0; i<width; i++){
1470
#ifndef lift5
1471
        int r;
1472
#endif
1473
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1474
#ifdef lift5
1475
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1476
#else
1477
        r= 3*(b2[i] + b4[i]);
1478
        r+= r>>4;
1479
        r+= r>>8;
1480
        b3[i] -= (r+W_CO)>>W_CS;
1481
#endif
1482
#ifdef liftS
1483
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1484
#else
1485
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1486
#endif
1487
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1488
    }
1489
}
1490

    
1491
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1492
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1493
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1494
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1495
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1496
    cs->y = -3;
1497
}
1498

    
1499
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1500
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1501
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1502
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1503
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1504
    cs->y = -3;
1505
}
1506

    
1507
static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1508
    int y = cs->y;
1509

    
1510
    DWTELEM *b0= cs->b0;
1511
    DWTELEM *b1= cs->b1;
1512
    DWTELEM *b2= cs->b2;
1513
    DWTELEM *b3= cs->b3;
1514
    DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1515
    DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1516

    
1517
{START_TIMER
1518
    if(y>0 && y+4<height){
1519
        vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1520
    }else{
1521
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1522
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1523
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1524
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1525
    }
1526
if(width>400){
1527
STOP_TIMER("vertical_compose97i")}}
1528

    
1529
{START_TIMER
1530
        if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1531
        if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
1532
if(width>400 && y+0<(unsigned)height){
1533
STOP_TIMER("horizontal_compose97i")}}
1534

    
1535
    cs->b0=b2;
1536
    cs->b1=b3;
1537
    cs->b2=b4;
1538
    cs->b3=b5;
1539
    cs->y += 2;
1540
}
1541

    
1542
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1543
    int y = cs->y;
1544
    DWTELEM *b0= cs->b0;
1545
    DWTELEM *b1= cs->b1;
1546
    DWTELEM *b2= cs->b2;
1547
    DWTELEM *b3= cs->b3;
1548
    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1549
    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1550

    
1551
{START_TIMER
1552
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1553
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1554
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1555
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1556
if(width>400){
1557
STOP_TIMER("vertical_compose97i")}}
1558

    
1559
{START_TIMER
1560
        if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1561
        if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
1562
if(width>400 && b0 <= b2){
1563
STOP_TIMER("horizontal_compose97i")}}
1564

    
1565
    cs->b0=b2;
1566
    cs->b1=b3;
1567
    cs->b2=b4;
1568
    cs->b3=b5;
1569
    cs->y += 2;
1570
}
1571

    
1572
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1573
    dwt_compose_t cs;
1574
    spatial_compose97i_init(&cs, buffer, height, stride);
1575
    while(cs.y <= height)
1576
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1577
}
1578

    
1579
void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1580
    int level;
1581
    for(level=decomposition_count-1; level>=0; level--){
1582
        switch(type){
1583
        case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1584
        case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1585
        /* not slicified yet */
1586
        case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1587
          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1588
        }
1589
    }
1590
}
1591

    
1592
void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1593
    int level;
1594
    for(level=decomposition_count-1; level>=0; level--){
1595
        switch(type){
1596
        case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1597
        case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1598
        /* not slicified yet */
1599
        case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1600
        }
1601
    }
1602
}
1603

    
1604
void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1605
    const int support = type==1 ? 3 : 5;
1606
    int level;
1607
    if(type==2) return;
1608

    
1609
    for(level=decomposition_count-1; level>=0; level--){
1610
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1611
            switch(type){
1612
            case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1613
                    break;
1614
            case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1615
                    break;
1616
            case 2: break;
1617
            }
1618
        }
1619
    }
1620
}
1621

    
1622
void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1623
    const int support = type==1 ? 3 : 5;
1624
    int level;
1625
    if(type==2) return;
1626

    
1627
    for(level=decomposition_count-1; level>=0; level--){
1628
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1629
            switch(type){
1630
            case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1631
                    break;
1632
            case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1633
                    break;
1634
            case 2: break;
1635
            }
1636
        }
1637
    }
1638
}
1639

    
1640
void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1641
    if(type==2){
1642
        int level;
1643
        for(level=decomposition_count-1; level>=0; level--)
1644
            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
1645
    }else{
1646
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1647
        int y;
1648
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1649
        for(y=0; y<height; y+=4)
1650
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1651
    }
1652
}
1653

    
1654
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1655
    const int w= b->width;
1656
    const int h= b->height;
1657
    int x, y;
1658

    
1659
    if(1){
1660
        int run=0;
1661
        int runs[w*h];
1662
        int run_index=0;
1663
        int max_index;
1664

    
1665
        for(y=0; y<h; y++){
1666
            for(x=0; x<w; x++){
1667
                int v, p=0;
1668
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1669
                v= src[x + y*stride];
1670

    
1671
                if(y){
1672
                    t= src[x + (y-1)*stride];
1673
                    if(x){
1674
                        lt= src[x - 1 + (y-1)*stride];
1675
                    }
1676
                    if(x + 1 < w){
1677
                        rt= src[x + 1 + (y-1)*stride];
1678
                    }
1679
                }
1680
                if(x){
1681
                    l= src[x - 1 + y*stride];
1682
                    /*if(x > 1){
1683
                        if(orientation==1) ll= src[y + (x-2)*stride];
1684
                        else               ll= src[x - 2 + y*stride];
1685
                    }*/
1686
                }
1687
                if(parent){
1688
                    int px= x>>1;
1689
                    int py= y>>1;
1690
                    if(px<b->parent->width && py<b->parent->height)
1691
                        p= parent[px + py*2*stride];
1692
                }
1693
                if(!(/*ll|*/l|lt|t|rt|p)){
1694
                    if(v){
1695
                        runs[run_index++]= run;
1696
                        run=0;
1697
                    }else{
1698
                        run++;
1699
                    }
1700
                }
1701
            }
1702
        }
1703
        max_index= run_index;
1704
        runs[run_index++]= run;
1705
        run_index=0;
1706
        run= runs[run_index++];
1707

    
1708
        put_symbol2(&s->c, b->state[30], max_index, 0);
1709
        if(run_index <= max_index)
1710
            put_symbol2(&s->c, b->state[1], run, 3);
1711

    
1712
        for(y=0; y<h; y++){
1713
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1714
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1715
                return -1;
1716
            }
1717
            for(x=0; x<w; x++){
1718
                int v, p=0;
1719
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1720
                v= src[x + y*stride];
1721

    
1722
                if(y){
1723
                    t= src[x + (y-1)*stride];
1724
                    if(x){
1725
                        lt= src[x - 1 + (y-1)*stride];
1726
                    }
1727
                    if(x + 1 < w){
1728
                        rt= src[x + 1 + (y-1)*stride];
1729
                    }
1730
                }
1731
                if(x){
1732
                    l= src[x - 1 + y*stride];
1733
                    /*if(x > 1){
1734
                        if(orientation==1) ll= src[y + (x-2)*stride];
1735
                        else               ll= src[x - 2 + y*stride];
1736
                    }*/
1737
                }
1738
                if(parent){
1739
                    int px= x>>1;
1740
                    int py= y>>1;
1741
                    if(px<b->parent->width && py<b->parent->height)
1742
                        p= parent[px + py*2*stride];
1743
                }
1744
                if(/*ll|*/l|lt|t|rt|p){
1745
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1746

    
1747
                    put_rac(&s->c, &b->state[0][context], !!v);
1748
                }else{
1749
                    if(!run){
1750
                        run= runs[run_index++];
1751

    
1752
                        if(run_index <= max_index)
1753
                            put_symbol2(&s->c, b->state[1], run, 3);
1754
                        assert(v);
1755
                    }else{
1756
                        run--;
1757
                        assert(!v);
1758
                    }
1759
                }
1760
                if(v){
1761
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1762
                    int l2= 2*ABS(l) + (l<0);
1763
                    int t2= 2*ABS(t) + (t<0);
1764

    
1765
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1766
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1767
                }
1768
            }
1769
        }
1770
    }
1771
    return 0;
1772
}
1773

    
1774
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1775
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1776
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1777
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1778
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1779
}
1780

    
1781
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1782
    const int w= b->width;
1783
    const int h= b->height;
1784
    int x,y;
1785

    
1786
    if(1){
1787
        int run, runs;
1788
        x_and_coeff *xc= b->x_coeff;
1789
        x_and_coeff *prev_xc= NULL;
1790
        x_and_coeff *prev2_xc= xc;
1791
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1792
        x_and_coeff *prev_parent_xc= parent_xc;
1793

    
1794
        runs= get_symbol2(&s->c, b->state[30], 0);
1795
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1796
        else           run= INT_MAX;
1797

    
1798
        for(y=0; y<h; y++){
1799
            int v=0;
1800
            int lt=0, t=0, rt=0;
1801

    
1802
            if(y && prev_xc->x == 0){
1803
                rt= prev_xc->coeff;
1804
            }
1805
            for(x=0; x<w; x++){
1806
                int p=0;
1807
                const int l= v;
1808

    
1809
                lt= t; t= rt;
1810

    
1811
                if(y){
1812
                    if(prev_xc->x <= x)
1813
                        prev_xc++;
1814
                    if(prev_xc->x == x + 1)
1815
                        rt= prev_xc->coeff;
1816
                    else
1817
                        rt=0;
1818
                }
1819
                if(parent_xc){
1820
                    if(x>>1 > parent_xc->x){
1821
                        parent_xc++;
1822
                    }
1823
                    if(x>>1 == parent_xc->x){
1824
                        p= parent_xc->coeff;
1825
                    }
1826
                }
1827
                if(/*ll|*/l|lt|t|rt|p){
1828
                    int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1829

    
1830
                    v=get_rac(&s->c, &b->state[0][context]);
1831
                    if(v){
1832
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1833
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1834

    
1835
                        xc->x=x;
1836
                        (xc++)->coeff= v;
1837
                    }
1838
                }else{
1839
                    if(!run){
1840
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1841
                        else           run= INT_MAX;
1842
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1843
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1844

    
1845
                        xc->x=x;
1846
                        (xc++)->coeff= v;
1847
                    }else{
1848
                        int max_run;
1849
                        run--;
1850
                        v=0;
1851

    
1852
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1853
                        else  max_run= FFMIN(run, w-x-1);
1854
                        if(parent_xc)
1855
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1856
                        x+= max_run;
1857
                        run-= max_run;
1858
                    }
1859
                }
1860
            }
1861
            (xc++)->x= w+1; //end marker
1862
            prev_xc= prev2_xc;
1863
            prev2_xc= xc;
1864

    
1865
            if(parent_xc){
1866
                if(y&1){
1867
                    while(parent_xc->x != parent->width+1)
1868
                        parent_xc++;
1869
                    parent_xc++;
1870
                    prev_parent_xc= parent_xc;
1871
                }else{
1872
                    parent_xc= prev_parent_xc;
1873
                }
1874
            }
1875
        }
1876

    
1877
        (xc++)->x= w+1; //end marker
1878
    }
1879
}
1880

    
1881
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1882
    const int w= b->width;
1883
    int y;
1884
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1885
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1886
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1887
    int new_index = 0;
1888

    
1889
    START_TIMER
1890

    
1891
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1892
        qadd= 0;
1893
        qmul= 1<<QEXPSHIFT;
1894
    }
1895

    
1896
    /* If we are on the second or later slice, restore our index. */
1897
    if (start_y != 0)
1898
        new_index = save_state[0];
1899

    
1900

    
1901
    for(y=start_y; y<h; y++){
1902
        int x = 0;
1903
        int v;
1904
        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1905
        memset(line, 0, b->width*sizeof(DWTELEM));
1906
        v = b->x_coeff[new_index].coeff;
1907
        x = b->x_coeff[new_index++].x;
1908
        while(x < w)
1909
        {
1910
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1911
            register int u= -(v&1);
1912
            line[x] = (t^u) - u;
1913

    
1914
            v = b->x_coeff[new_index].coeff;
1915
            x = b->x_coeff[new_index++].x;
1916
        }
1917
    }
1918
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1919
        STOP_TIMER("decode_subband")
1920
    }
1921

    
1922
    /* Save our variables for the next slice. */
1923
    save_state[0] = new_index;
1924

    
1925
    return;
1926
}
1927

    
1928
static void reset_contexts(SnowContext *s){
1929
    int plane_index, level, orientation;
1930

    
1931
    for(plane_index=0; plane_index<3; plane_index++){
1932
        for(level=0; level<s->spatial_decomposition_count; level++){
1933
            for(orientation=level ? 1:0; orientation<4; orientation++){
1934
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1935
            }
1936
        }
1937
    }
1938
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1939
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1940
}
1941

    
1942
static int alloc_blocks(SnowContext *s){
1943
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1944
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1945

    
1946
    s->b_width = w;
1947
    s->b_height= h;
1948

    
1949
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1950
    return 0;
1951
}
1952

    
1953
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1954
    uint8_t *bytestream= d->bytestream;
1955
    uint8_t *bytestream_start= d->bytestream_start;
1956
    *d= *s;
1957
    d->bytestream= bytestream;
1958
    d->bytestream_start= bytestream_start;
1959
}
1960

    
1961
//near copy & paste from dsputil, FIXME
1962
static int pix_sum(uint8_t * pix, int line_size, int w)
1963
{
1964
    int s, i, j;
1965

    
1966
    s = 0;
1967
    for (i = 0; i < w; i++) {
1968
        for (j = 0; j < w; j++) {
1969
            s += pix[0];
1970
            pix ++;
1971
        }
1972
        pix += line_size - w;
1973
    }
1974
    return s;
1975
}
1976

    
1977
//near copy & paste from dsputil, FIXME
1978
static int pix_norm1(uint8_t * pix, int line_size, int w)
1979
{
1980
    int s, i, j;
1981
    uint32_t *sq = squareTbl + 256;
1982

    
1983
    s = 0;
1984
    for (i = 0; i < w; i++) {
1985
        for (j = 0; j < w; j ++) {
1986
            s += sq[pix[0]];
1987
            pix ++;
1988
        }
1989
        pix += line_size - w;
1990
    }
1991
    return s;
1992
}
1993

    
1994
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1995
    const int w= s->b_width << s->block_max_depth;
1996
    const int rem_depth= s->block_max_depth - level;
1997
    const int index= (x + y*w) << rem_depth;
1998
    const int block_w= 1<<rem_depth;
1999
    BlockNode block;
2000
    int i,j;
2001

    
2002
    block.color[0]= l;
2003
    block.color[1]= cb;
2004
    block.color[2]= cr;
2005
    block.mx= mx;
2006
    block.my= my;
2007
    block.type= type;
2008
    block.level= level;
2009

    
2010
    for(j=0; j<block_w; j++){
2011
        for(i=0; i<block_w; i++){
2012
            s->block[index + i + j*w]= block;
2013
        }
2014
    }
2015
}
2016

    
2017
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2018
    const int offset[3]= {
2019
          y*c->  stride + x,
2020
        ((y*c->uvstride + x)>>1),
2021
        ((y*c->uvstride + x)>>1),
2022
    };
2023
    int i;
2024
    for(i=0; i<3; i++){
2025
        c->src[0][i]= src [i];
2026
        c->ref[0][i]= ref [i] + offset[i];
2027
    }
2028
    assert(!ref_index);
2029
}
2030

    
2031
//FIXME copy&paste
2032
#define P_LEFT P[1]
2033
#define P_TOP P[2]
2034
#define P_TOPRIGHT P[3]
2035
#define P_MEDIAN P[4]
2036
#define P_MV1 P[9]
2037
#define FLAG_QPEL   1 //must be 1
2038

    
2039
static int encode_q_branch(SnowContext *s, int level, int x, int y){
2040
    uint8_t p_buffer[1024];
2041
    uint8_t i_buffer[1024];
2042
    uint8_t p_state[sizeof(s->block_state)];
2043
    uint8_t i_state[sizeof(s->block_state)];
2044
    RangeCoder pc, ic;
2045
    uint8_t *pbbak= s->c.bytestream;
2046
    uint8_t *pbbak_start= s->c.bytestream_start;
2047
    int score, score2, iscore, i_len, p_len, block_s, sum;
2048
    const int w= s->b_width  << s->block_max_depth;
2049
    const int h= s->b_height << s->block_max_depth;
2050
    const int rem_depth= s->block_max_depth - level;
2051
    const int index= (x + y*w) << rem_depth;
2052
    const int block_w= 1<<(LOG2_MB_SIZE - level);
2053
    int trx= (x+1)<<rem_depth;
2054
    int try= (y+1)<<rem_depth;
2055
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2056
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2057
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2058
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2059
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2060
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2061
    int pl = left->color[0];
2062
    int pcb= left->color[1];
2063
    int pcr= left->color[2];
2064
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2065
    int pmy= mid_pred(left->my, top->my, tr->my);
2066
    int mx=0, my=0;
2067
    int l,cr,cb;
2068
    const int stride= s->current_picture.linesize[0];
2069
    const int uvstride= s->current_picture.linesize[1];
2070
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
2071
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2072
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2073
    int P[10][2];
2074
    int16_t last_mv[3][2];
2075
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2076
    const int shift= 1+qpel;
2077
    MotionEstContext *c= &s->m.me;
2078
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2079
    int my_context= av_log2(2*ABS(left->my - top->my));
2080
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2081

    
2082
    assert(sizeof(s->block_state) >= 256);
2083
    if(s->keyframe){
2084
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2085
        return 0;
2086
    }
2087

    
2088
//    clip predictors / edge ?
2089

    
2090
    P_LEFT[0]= left->mx;
2091
    P_LEFT[1]= left->my;
2092
    P_TOP [0]= top->mx;
2093
    P_TOP [1]= top->my;
2094
    P_TOPRIGHT[0]= tr->mx;
2095
    P_TOPRIGHT[1]= tr->my;
2096

    
2097
    last_mv[0][0]= s->block[index].mx;
2098
    last_mv[0][1]= s->block[index].my;
2099
    last_mv[1][0]= right->mx;
2100
    last_mv[1][1]= right->my;
2101
    last_mv[2][0]= bottom->mx;
2102
    last_mv[2][1]= bottom->my;
2103

    
2104
    s->m.mb_stride=2;
2105
    s->m.mb_x=
2106
    s->m.mb_y= 0;
2107
    s->m.me.skip= 0;
2108

    
2109
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2110

    
2111
    assert(s->m.me.  stride ==   stride);
2112
    assert(s->m.me.uvstride == uvstride);
2113

    
2114
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2115
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2116
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2117
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2118

    
2119
    c->xmin = - x*block_w - 16+2;
2120
    c->ymin = - y*block_w - 16+2;
2121
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2122
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2123

    
2124
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2125
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
2126
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2127
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2128
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2129
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2130
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2131

    
2132
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2133
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2134

    
2135
    if (!y) {
2136
        c->pred_x= P_LEFT[0];
2137
        c->pred_y= P_LEFT[1];
2138
    } else {
2139
        c->pred_x = P_MEDIAN[0];
2140
        c->pred_y = P_MEDIAN[1];
2141
    }
2142

    
2143
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2144
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2145

    
2146
    assert(mx >= c->xmin);
2147
    assert(mx <= c->xmax);
2148
    assert(my >= c->ymin);
2149
    assert(my <= c->ymax);
2150

    
2151
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2152
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2153
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2154

    
2155
  //  subpel search
2156
    pc= s->c;
2157
    pc.bytestream_start=
2158
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2159
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2160

    
2161
    if(level!=s->block_max_depth)
2162
        put_rac(&pc, &p_state[4 + s_context], 1);
2163
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2164
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2165
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2166
    p_len= pc.bytestream - pc.bytestream_start;
2167
    score += (s->lambda2*(p_len*8
2168
              + (pc.outstanding_count - s->c.outstanding_count)*8
2169
              + (-av_log2(pc.range)    + av_log2(s->c.range))
2170
             ))>>FF_LAMBDA_SHIFT;
2171

    
2172
    block_s= block_w*block_w;
2173
    sum = pix_sum(current_data[0], stride, block_w);
2174
    l= (sum + block_s/2)/block_s;
2175
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2176

    
2177
    block_s= block_w*block_w>>2;
2178
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
2179
    cb= (sum + block_s/2)/block_s;
2180
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2181
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
2182
    cr= (sum + block_s/2)/block_s;
2183
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2184

    
2185
    ic= s->c;
2186
    ic.bytestream_start=
2187
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2188
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2189
    if(level!=s->block_max_depth)
2190
        put_rac(&ic, &i_state[4 + s_context], 1);
2191
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2192
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2193
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2194
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2195
    i_len= ic.bytestream - ic.bytestream_start;
2196
    iscore += (s->lambda2*(i_len*8
2197
              + (ic.outstanding_count - s->c.outstanding_count)*8
2198
              + (-av_log2(ic.range)    + av_log2(s->c.range))
2199
             ))>>FF_LAMBDA_SHIFT;
2200

    
2201
//    assert(score==256*256*256*64-1);
2202
    assert(iscore < 255*255*256 + s->lambda2*10);
2203
    assert(iscore >= 0);
2204
    assert(l>=0 && l<=255);
2205
    assert(pl>=0 && pl<=255);
2206

    
2207
    if(level==0){
2208
        int varc= iscore >> 8;
2209
        int vard= score >> 8;
2210
        if (vard <= 64 || vard < varc)
2211
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2212
        else
2213
            c->scene_change_score+= s->m.qscale;
2214
    }
2215

    
2216
    if(level!=s->block_max_depth){
2217
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2218
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2219
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2220
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2221
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2222
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2223

    
2224
        if(score2 < score && score2 < iscore)
2225
            return score2;
2226
    }
2227

    
2228
    if(iscore < score){
2229
        memcpy(pbbak, i_buffer, i_len);
2230
        s->c= ic;
2231
        s->c.bytestream_start= pbbak_start;
2232
        s->c.bytestream= pbbak + i_len;
2233
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2234
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2235
        return iscore;
2236
    }else{
2237
        memcpy(pbbak, p_buffer, p_len);
2238
        s->c= pc;
2239
        s->c.bytestream_start= pbbak_start;
2240
        s->c.bytestream= pbbak + p_len;
2241
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2242
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2243
        return score;
2244
    }
2245
}
2246

    
2247
static always_inline int same_block(BlockNode *a, BlockNode *b){
2248
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2249
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2250
    }else{
2251
        return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
2252
    }
2253
}
2254

    
2255
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2256
    const int w= s->b_width  << s->block_max_depth;
2257
    const int rem_depth= s->block_max_depth - level;
2258
    const int index= (x + y*w) << rem_depth;
2259
    int trx= (x+1)<<rem_depth;
2260
    BlockNode *b= &s->block[index];
2261
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2262
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2263
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2264
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2265
    int pl = left->color[0];
2266
    int pcb= left->color[1];
2267
    int pcr= left->color[2];
2268
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2269
    int pmy= mid_pred(left->my, top->my, tr->my);
2270
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2271
    int my_context= av_log2(2*ABS(left->my - top->my));
2272
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2273

    
2274
    if(s->keyframe){
2275
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2276
        return;
2277
    }
2278

    
2279
    if(level!=s->block_max_depth){
2280
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2281
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
2282
        }else{
2283
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
2284
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2285
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2286
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2287
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2288
            return;
2289
        }
2290
    }
2291
    if(b->type & BLOCK_INTRA){
2292
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2293
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2294
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2295
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2296
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
2297
    }else{
2298
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2299
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2300
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2301
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
2302
    }
2303
}
2304

    
2305
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2306
    const int w= s->b_width << s->block_max_depth;
2307
    const int rem_depth= s->block_max_depth - level;
2308
    const int index= (x + y*w) << rem_depth;
2309
    int trx= (x+1)<<rem_depth;
2310
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2311
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2312
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2313
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2314
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2315

    
2316
    if(s->keyframe){
2317
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2318
        return;
2319
    }
2320

    
2321
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2322
        int type;
2323
        int l = left->color[0];
2324
        int cb= left->color[1];
2325
        int cr= left->color[2];
2326
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2327
        int my= mid_pred(left->my, top->my, tr->my);
2328
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2329
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2330

    
2331
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2332

    
2333
        if(type){
2334
            l += get_symbol(&s->c, &s->block_state[32], 1);
2335
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2336
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2337
        }else{
2338
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2339
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2340
        }
2341
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2342
    }else{
2343
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2344
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2345
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2346
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2347
    }
2348
}
2349

    
2350
static void encode_blocks(SnowContext *s){
2351
    int x, y;
2352
    int w= s->b_width;
2353
    int h= s->b_height;
2354

    
2355
    if(s->avctx->me_method == ME_ITER && !s->keyframe)
2356
        iterative_me(s);
2357

    
2358
    for(y=0; y<h; y++){
2359
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2360
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2361
            return;
2362
        }
2363
        for(x=0; x<w; x++){
2364
            if(s->avctx->me_method == ME_ITER)
2365
                encode_q_branch2(s, 0, x, y);
2366
            else
2367
                encode_q_branch (s, 0, x, y);
2368
        }
2369
    }
2370
}
2371

    
2372
static void decode_blocks(SnowContext *s){
2373
    int x, y;
2374
    int w= s->b_width;
2375
    int h= s->b_height;
2376

    
2377
    for(y=0; y<h; y++){
2378
        for(x=0; x<w; x++){
2379
            decode_q_branch(s, 0, x, y);
2380
        }
2381
    }
2382
}
2383

    
2384
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2385
    int x, y;
2386
START_TIMER
2387
    for(y=0; y < b_h+5; y++){
2388
        for(x=0; x < b_w; x++){
2389
            int a0= src[x    ];
2390
            int a1= src[x + 1];
2391
            int a2= src[x + 2];
2392
            int a3= src[x + 3];
2393
            int a4= src[x + 4];
2394
            int a5= src[x + 5];
2395
//            int am= 9*(a1+a2) - (a0+a3);
2396
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2397
//            int am= 18*(a2+a3) - 2*(a1+a4);
2398
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2399
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2400

    
2401
//            if(b_w==16) am= 8*(a1+a2);
2402

    
2403
            if(dx<8) am = (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2404
            else     am = (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2405

    
2406
            /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2407
            if(am&(~255)) am= ~(am>>31);
2408

    
2409
            tmp[x] = am;
2410

    
2411
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2412
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2413
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2414
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2415
        }
2416
        tmp += stride;
2417
        src += stride;
2418
    }
2419
    tmp -= (b_h+5)*stride;
2420

    
2421
    for(y=0; y < b_h; y++){
2422
        for(x=0; x < b_w; x++){
2423
            int a0= tmp[x + 0*stride];
2424
            int a1= tmp[x + 1*stride];
2425
            int a2= tmp[x + 2*stride];
2426
            int a3= tmp[x + 3*stride];
2427
            int a4= tmp[x + 4*stride];
2428
            int a5= tmp[x + 5*stride];
2429
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2430
//            int am= 18*(a2+a3) - 2*(a1+a4);
2431
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2432
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2433

    
2434
//            if(b_w==16) am= 8*(a1+a2);
2435

    
2436
            if(dy<8) am =  (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2437
            else     am = (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2438

    
2439
            if(am&(~255)) am= ~(am>>31);
2440

    
2441
            dst[x] = am;
2442
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2443
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2444
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2445
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2446
        }
2447
        dst += stride;
2448
        tmp += stride;
2449
    }
2450
STOP_TIMER("mc_block")
2451
}
2452

    
2453
#define mca(dx,dy,b_w)\
2454
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2455
    uint8_t tmp[stride*(b_w+5)];\
2456
    assert(h==b_w);\
2457
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2458
}
2459

    
2460
mca( 0, 0,16)
2461
mca( 8, 0,16)
2462
mca( 0, 8,16)
2463
mca( 8, 8,16)
2464
mca( 0, 0,8)
2465
mca( 8, 0,8)
2466
mca( 0, 8,8)
2467
mca( 8, 8,8)
2468

    
2469
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2470
    if(block->type & BLOCK_INTRA){
2471
        int x, y;
2472
        const int color = block->color[plane_index];
2473
        const int color4= color*0x01010101;
2474
        if(b_w==32){
2475
            for(y=0; y < b_h; y++){
2476
                *(uint32_t*)&dst[0 + y*stride]= color4;
2477
                *(uint32_t*)&dst[4 + y*stride]= color4;
2478
                *(uint32_t*)&dst[8 + y*stride]= color4;
2479
                *(uint32_t*)&dst[12+ y*stride]= color4;
2480
                *(uint32_t*)&dst[16+ y*stride]= color4;
2481
                *(uint32_t*)&dst[20+ y*stride]= color4;
2482
                *(uint32_t*)&dst[24+ y*stride]= color4;
2483
                *(uint32_t*)&dst[28+ y*stride]= color4;
2484
            }
2485
        }else if(b_w==16){
2486
            for(y=0; y < b_h; y++){
2487
                *(uint32_t*)&dst[0 + y*stride]= color4;
2488
                *(uint32_t*)&dst[4 + y*stride]= color4;
2489
                *(uint32_t*)&dst[8 + y*stride]= color4;
2490
                *(uint32_t*)&dst[12+ y*stride]= color4;
2491
            }
2492
        }else if(b_w==8){
2493
            for(y=0; y < b_h; y++){
2494
                *(uint32_t*)&dst[0 + y*stride]= color4;
2495
                *(uint32_t*)&dst[4 + y*stride]= color4;
2496
            }
2497
        }else if(b_w==4){
2498
            for(y=0; y < b_h; y++){
2499
                *(uint32_t*)&dst[0 + y*stride]= color4;
2500
            }
2501
        }else{
2502
            for(y=0; y < b_h; y++){
2503
                for(x=0; x < b_w; x++){
2504
                    dst[x + y*stride]= color;
2505
                }
2506
            }
2507
        }
2508
    }else{
2509
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2510
        int mx= block->mx*scale;
2511
        int my= block->my*scale;
2512
        const int dx= mx&15;
2513
        const int dy= my&15;
2514
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2515
        sx += (mx>>4) - 2;
2516
        sy += (my>>4) - 2;
2517
        src += sx + sy*stride;
2518
        if(   (unsigned)sx >= w - b_w - 4
2519
           || (unsigned)sy >= h - b_h - 4){
2520
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2521
            src= tmp + MB_SIZE;
2522
        }
2523
        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2524
        assert(!(b_w&(b_w-1)));
2525
        assert(b_w>1 && b_h>1);
2526
        assert(tab_index>=0 && tab_index<4 || b_w==32);
2527
        if((dx&3) || (dy&3))
2528
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2529
        else if(b_w==32){
2530
            int y;
2531
            for(y=0; y<b_h; y+=16){
2532
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2533
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2534
            }
2535
        }else if(b_w==b_h)
2536
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2537
        else if(b_w==2*b_h){
2538
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 2       + 2*stride,stride);
2539
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2540
        }else{
2541
            assert(2*b_w==b_h);
2542
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 2 + 2*stride           ,stride);
2543
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2544
        }
2545
    }
2546
}
2547

    
2548
//FIXME name clenup (b_w, block_w, b_width stuff)
2549
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2550
    DWTELEM * dst = NULL;
2551
    const int b_width = s->b_width  << s->block_max_depth;
2552
    const int b_height= s->b_height << s->block_max_depth;
2553
    const int b_stride= b_width;
2554
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2555
    BlockNode *rt= lt+1;
2556
    BlockNode *lb= lt+b_stride;
2557
    BlockNode *rb= lb+1;
2558
    uint8_t *block[4];
2559
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2560
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2561
    uint8_t *ptmp;
2562
    int x,y;
2563

    
2564
    if(b_x<0){
2565
        lt= rt;
2566
        lb= rb;
2567
    }else if(b_x + 1 >= b_width){
2568
        rt= lt;
2569
        rb= lb;
2570
    }
2571
    if(b_y<0){
2572
        lt= lb;
2573
        rt= rb;
2574
    }else if(b_y + 1 >= b_height){
2575
        lb= lt;
2576
        rb= rt;
2577
    }
2578

    
2579
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2580
        obmc -= src_x;
2581
        b_w += src_x;
2582
        src_x=0;
2583
    }else if(src_x + b_w > w){
2584
        b_w = w - src_x;
2585
    }
2586
    if(src_y<0){
2587
        obmc -= src_y*obmc_stride;
2588
        b_h += src_y;
2589
        src_y=0;
2590
    }else if(src_y + b_h> h){
2591
        b_h = h - src_y;
2592
    }
2593

    
2594
    if(b_w<=0 || b_h<=0) return;
2595

    
2596
assert(src_stride > 2*MB_SIZE + 5);
2597
//    old_dst += src_x + src_y*dst_stride;
2598
    dst8+= src_x + src_y*src_stride;
2599
//    src += src_x + src_y*src_stride;
2600

    
2601
    ptmp= tmp + 3*tmp_step;
2602
    block[0]= ptmp;
2603
    ptmp+=tmp_step;
2604
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2605

    
2606
    if(same_block(lt, rt)){
2607
        block[1]= block[0];
2608
    }else{
2609
        block[1]= ptmp;
2610
        ptmp+=tmp_step;
2611
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2612
    }
2613

    
2614
    if(same_block(lt, lb)){
2615
        block[2]= block[0];
2616
    }else if(same_block(rt, lb)){
2617
        block[2]= block[1];
2618
    }else{
2619
        block[2]= ptmp;
2620
        ptmp+=tmp_step;
2621
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2622
    }
2623

    
2624
    if(same_block(lt, rb) ){
2625
        block[3]= block[0];
2626
    }else if(same_block(rt, rb)){
2627
        block[3]= block[1];
2628
    }else if(same_block(lb, rb)){
2629
        block[3]= block[2];
2630
    }else{
2631
        block[3]= ptmp;
2632
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2633
    }
2634
#if 0
2635
    for(y=0; y<b_h; y++){
2636
        for(x=0; x<b_w; x++){
2637
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2638
            if(add) dst[x + y*dst_stride] += v;
2639
            else    dst[x + y*dst_stride] -= v;
2640
        }
2641
    }
2642
    for(y=0; y<b_h; y++){
2643
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2644
        for(x=0; x<b_w; x++){
2645
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2646
            if(add) dst[x + y*dst_stride] += v;
2647
            else    dst[x + y*dst_stride] -= v;
2648
        }
2649
    }
2650
    for(y=0; y<b_h; y++){
2651
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2652
        for(x=0; x<b_w; x++){
2653
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2654
            if(add) dst[x + y*dst_stride] += v;
2655
            else    dst[x + y*dst_stride] -= v;
2656
        }
2657
    }
2658
    for(y=0; y<b_h; y++){
2659
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2660
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2661
        for(x=0; x<b_w; x++){
2662
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2663
            if(add) dst[x + y*dst_stride] += v;
2664
            else    dst[x + y*dst_stride] -= v;
2665
        }
2666
    }
2667
#else
2668
{
2669

    
2670
    START_TIMER
2671

    
2672
    for(y=0; y<b_h; y++){
2673
        //FIXME ugly missue of obmc_stride
2674
        uint8_t *obmc1= obmc + y*obmc_stride;
2675
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2676
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2677
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2678
        dst = slice_buffer_get_line(sb, src_y + y);
2679
        for(x=0; x<b_w; x++){
2680
            int v=   obmc1[x] * block[3][x + y*src_stride]
2681
                    +obmc2[x] * block[2][x + y*src_stride]
2682
                    +obmc3[x] * block[1][x + y*src_stride]
2683
                    +obmc4[x] * block[0][x + y*src_stride];
2684

    
2685
            v <<= 8 - LOG2_OBMC_MAX;
2686
            if(FRAC_BITS != 8){
2687
                v += 1<<(7 - FRAC_BITS);
2688
                v >>= 8 - FRAC_BITS;
2689
            }
2690
            if(add){
2691
//                v += old_dst[x + y*dst_stride];
2692
                v += dst[x + src_x];
2693
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2694
                if(v&(~255)) v= ~(v>>31);
2695
                dst8[x + y*src_stride] = v;
2696
            }else{
2697
//                old_dst[x + y*dst_stride] -= v;
2698
                dst[x + src_x] -= v;
2699
            }
2700
        }
2701
    }
2702
        STOP_TIMER("Inner add y block")
2703
}
2704
#endif
2705
}
2706

    
2707
//FIXME name clenup (b_w, block_w, b_width stuff)
2708
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2709
    const int b_width = s->b_width  << s->block_max_depth;
2710
    const int b_height= s->b_height << s->block_max_depth;
2711
    const int b_stride= b_width;
2712
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2713
    BlockNode *rt= lt+1;
2714
    BlockNode *lb= lt+b_stride;
2715
    BlockNode *rb= lb+1;
2716
    uint8_t *block[4];
2717
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2718
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2719
    uint8_t *ptmp;
2720
    int x,y;
2721

    
2722
    if(b_x<0){
2723
        lt= rt;
2724
        lb= rb;
2725
    }else if(b_x + 1 >= b_width){
2726
        rt= lt;
2727
        rb= lb;
2728
    }
2729
    if(b_y<0){
2730
        lt= lb;
2731
        rt= rb;
2732
    }else if(b_y + 1 >= b_height){
2733
        lb= lt;
2734
        rb= rt;
2735
    }
2736

    
2737
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2738
        obmc -= src_x;
2739
        b_w += src_x;
2740
        if(!offset_dst)
2741
            dst -= src_x;
2742
        src_x=0;
2743
    }else if(src_x + b_w > w){
2744
        b_w = w - src_x;
2745
    }
2746
    if(src_y<0){
2747
        obmc -= src_y*obmc_stride;
2748
        b_h += src_y;
2749
        if(!offset_dst)
2750
            dst -= src_y*dst_stride;
2751
        src_y=0;
2752
    }else if(src_y + b_h> h){
2753
        b_h = h - src_y;
2754
    }
2755

    
2756
    if(b_w<=0 || b_h<=0) return;
2757

    
2758
assert(src_stride > 2*MB_SIZE + 5);
2759
    if(offset_dst)
2760
        dst += src_x + src_y*dst_stride;
2761
    dst8+= src_x + src_y*src_stride;
2762
//    src += src_x + src_y*src_stride;
2763

    
2764
    ptmp= tmp + 3*tmp_step;
2765
    block[0]= ptmp;
2766
    ptmp+=tmp_step;
2767
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2768

    
2769
    if(same_block(lt, rt)){
2770
        block[1]= block[0];
2771
    }else{
2772
        block[1]= ptmp;
2773
        ptmp+=tmp_step;
2774
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2775
    }
2776

    
2777
    if(same_block(lt, lb)){
2778
        block[2]= block[0];
2779
    }else if(same_block(rt, lb)){
2780
        block[2]= block[1];
2781
    }else{
2782
        block[2]= ptmp;
2783
        ptmp+=tmp_step;
2784
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2785
    }
2786

    
2787
    if(same_block(lt, rb) ){
2788
        block[3]= block[0];
2789
    }else if(same_block(rt, rb)){
2790
        block[3]= block[1];
2791
    }else if(same_block(lb, rb)){
2792
        block[3]= block[2];
2793
    }else{
2794
        block[3]= ptmp;
2795
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2796
    }
2797
#if 0
2798
    for(y=0; y<b_h; y++){
2799
        for(x=0; x<b_w; x++){
2800
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2801
            if(add) dst[x + y*dst_stride] += v;
2802
            else    dst[x + y*dst_stride] -= v;
2803
        }
2804
    }
2805
    for(y=0; y<b_h; y++){
2806
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2807
        for(x=0; x<b_w; x++){
2808
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2809
            if(add) dst[x + y*dst_stride] += v;
2810
            else    dst[x + y*dst_stride] -= v;
2811
        }
2812
    }
2813
    for(y=0; y<b_h; y++){
2814
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2815
        for(x=0; x<b_w; x++){
2816
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2817
            if(add) dst[x + y*dst_stride] += v;
2818
            else    dst[x + y*dst_stride] -= v;
2819
        }
2820
    }
2821
    for(y=0; y<b_h; y++){
2822
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2823
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2824
        for(x=0; x<b_w; x++){
2825
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2826
            if(add) dst[x + y*dst_stride] += v;
2827
            else    dst[x + y*dst_stride] -= v;
2828
        }
2829
    }
2830
#else
2831
    for(y=0; y<b_h; y++){
2832
        //FIXME ugly missue of obmc_stride
2833
        uint8_t *obmc1= obmc + y*obmc_stride;
2834
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2835
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2836
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2837
        for(x=0; x<b_w; x++){
2838
            int v=   obmc1[x] * block[3][x + y*src_stride]
2839
                    +obmc2[x] * block[2][x + y*src_stride]
2840
                    +obmc3[x] * block[1][x + y*src_stride]
2841
                    +obmc4[x] * block[0][x + y*src_stride];
2842

    
2843
            v <<= 8 - LOG2_OBMC_MAX;
2844
            if(FRAC_BITS != 8){
2845
                v += 1<<(7 - FRAC_BITS);
2846
                v >>= 8 - FRAC_BITS;
2847
            }
2848
            if(add){
2849
                v += dst[x + y*dst_stride];
2850
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2851
                if(v&(~255)) v= ~(v>>31);
2852
                dst8[x + y*src_stride] = v;
2853
            }else{
2854
                dst[x + y*dst_stride] -= v;
2855
            }
2856
        }
2857
    }
2858
#endif
2859
}
2860

    
2861
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2862
    Plane *p= &s->plane[plane_index];
2863
    const int mb_w= s->b_width  << s->block_max_depth;
2864
    const int mb_h= s->b_height << s->block_max_depth;
2865
    int x, y, mb_x;
2866
    int block_size = MB_SIZE >> s->block_max_depth;
2867
    int block_w    = plane_index ? block_size/2 : block_size;
2868
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2869
    int obmc_stride= plane_index ? block_size : 2*block_size;
2870
    int ref_stride= s->current_picture.linesize[plane_index];
2871
    uint8_t *ref  = s->last_picture.data[plane_index];
2872
    uint8_t *dst8= s->current_picture.data[plane_index];
2873
    int w= p->width;
2874
    int h= p->height;
2875
    START_TIMER
2876

    
2877
    if(s->keyframe || (s->avctx->debug&512)){
2878
        if(mb_y==mb_h)
2879
            return;
2880

    
2881
        if(add){
2882
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2883
            {
2884
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2885
                DWTELEM * line = sb->line[y];
2886
                for(x=0; x<w; x++)
2887
                {
2888
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2889
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2890
                    v >>= FRAC_BITS;
2891
                    if(v&(~255)) v= ~(v>>31);
2892
                    dst8[x + y*ref_stride]= v;
2893
                }
2894
            }
2895
        }else{
2896
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2897
            {
2898
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2899
                DWTELEM * line = sb->line[y];
2900
                for(x=0; x<w; x++)
2901
                {
2902
                    line[x] -= 128 << FRAC_BITS;
2903
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2904
                }
2905
            }
2906
        }
2907

    
2908
        return;
2909
    }
2910

    
2911
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2912
            START_TIMER
2913

    
2914
            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
2915
                       block_w*mb_x - block_w/2,
2916
                       block_w*mb_y - block_w/2,
2917
                       block_w, block_w,
2918
                       w, h,
2919
                       w, ref_stride, obmc_stride,
2920
                       mb_x - 1, mb_y - 1,
2921
                       add, plane_index);
2922

    
2923
            STOP_TIMER("add_yblock")
2924
        }
2925

    
2926
    STOP_TIMER("predict_slice")
2927
}
2928

    
2929
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2930
    Plane *p= &s->plane[plane_index];
2931
    const int mb_w= s->b_width  << s->block_max_depth;
2932
    const int mb_h= s->b_height << s->block_max_depth;
2933
    int x, y, mb_x;
2934
    int block_size = MB_SIZE >> s->block_max_depth;
2935
    int block_w    = plane_index ? block_size/2 : block_size;
2936
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2937
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2938
    int ref_stride= s->current_picture.linesize[plane_index];
2939
    uint8_t *ref  = s->last_picture.data[plane_index];
2940
    uint8_t *dst8= s->current_picture.data[plane_index];
2941
    int w= p->width;
2942
    int h= p->height;
2943
    START_TIMER
2944

    
2945
    if(s->keyframe || (s->avctx->debug&512)){
2946
        if(mb_y==mb_h)
2947
            return;
2948

    
2949
        if(add){
2950
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2951
                for(x=0; x<w; x++){
2952
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2953
                    v >>= FRAC_BITS;
2954
                    if(v&(~255)) v= ~(v>>31);
2955
                    dst8[x + y*ref_stride]= v;
2956
                }
2957
            }
2958
        }else{
2959
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2960
                for(x=0; x<w; x++){
2961
                    buf[x + y*w]-= 128<<FRAC_BITS;
2962
                }
2963
            }
2964
        }
2965

    
2966
        return;
2967
    }
2968

    
2969
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2970
            START_TIMER
2971

    
2972
            add_yblock(s, buf, dst8, ref, obmc,
2973
                       block_w*mb_x - block_w/2,
2974
                       block_w*mb_y - block_w/2,
2975
                       block_w, block_w,
2976
                       w, h,
2977
                       w, ref_stride, obmc_stride,
2978
                       mb_x - 1, mb_y - 1,
2979
                       add, 1, plane_index);
2980

    
2981
            STOP_TIMER("add_yblock")
2982
        }
2983

    
2984
    STOP_TIMER("predict_slice")
2985
}
2986

    
2987
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2988
    const int mb_h= s->b_height << s->block_max_depth;
2989
    int mb_y;
2990
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2991
        predict_slice(s, buf, plane_index, add, mb_y);
2992
}
2993

    
2994
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2995
    int i, x2, y2;
2996
    Plane *p= &s->plane[plane_index];
2997
    const int block_size = MB_SIZE >> s->block_max_depth;
2998
    const int block_w    = plane_index ? block_size/2 : block_size;
2999
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3000
    const int obmc_stride= plane_index ? block_size : 2*block_size;
3001
    const int ref_stride= s->current_picture.linesize[plane_index];
3002
    uint8_t *ref= s->   last_picture.data[plane_index];
3003
    uint8_t *src= s-> input_picture.data[plane_index];
3004
    DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3005
    const int b_stride = s->b_width << s->block_max_depth;
3006
    const int w= p->width;
3007
    const int h= p->height;
3008
    int index= mb_x + mb_y*b_stride;
3009
    BlockNode *b= &s->block[index];
3010
    BlockNode backup= *b;
3011
    int ab=0;
3012
    int aa=0;
3013

    
3014
    b->type|= BLOCK_INTRA;
3015
    b->color[plane_index]= 0;
3016
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
3017

    
3018
    for(i=0; i<4; i++){
3019
        int mb_x2= mb_x + (i &1) - 1;
3020
        int mb_y2= mb_y + (i>>1) - 1;
3021
        int x= block_w*mb_x2 + block_w/2;
3022
        int y= block_w*mb_y2 + block_w/2;
3023

    
3024
        add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc, 
3025
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
3026

    
3027
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3028
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3029
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3030
                int obmc_v= obmc[index];
3031
                int d;
3032
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3033
                if(x<0) obmc_v += obmc[index + block_w];
3034
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3035
                if(x+block_w>w) obmc_v += obmc[index - block_w];
3036
                //FIXME precalc this or simplify it somehow else
3037

    
3038
                d = -dst[index] + (1<<(FRAC_BITS-1));
3039
                dst[index] = d;
3040
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
3041
                aa += obmc_v * obmc_v; //FIXME precalclate this
3042
            }
3043
        }
3044
    }
3045
    *b= backup;
3046

    
3047
    return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3048
}
3049

    
3050
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3051
    Plane *p= &s->plane[plane_index];
3052
    const int block_size = MB_SIZE >> s->block_max_depth;
3053
    const int block_w    = plane_index ? block_size/2 : block_size;
3054
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3055
    const int obmc_stride= plane_index ? block_size : 2*block_size;
3056
    const int ref_stride= s->current_picture.linesize[plane_index];
3057
    uint8_t *ref= s->   last_picture.data[plane_index];
3058
    uint8_t *dst= s->current_picture.data[plane_index];
3059
    uint8_t *src= s->  input_picture.data[plane_index];
3060
    DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3061
    uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
3062
    uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
3063
    const int b_stride = s->b_width << s->block_max_depth;
3064
    const int b_height = s->b_height<< s->block_max_depth;
3065
    const int w= p->width;
3066
    const int h= p->height;
3067
    int distortion;
3068
    int rate= 0;
3069
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3070
    int sx= block_w*mb_x - block_w/2;
3071
    int sy= block_w*mb_y - block_w/2;
3072
    const int x0= FFMAX(0,-sx);
3073
    const int y0= FFMAX(0,-sy);
3074
    const int x1= FFMIN(block_w*2, w-sx);
3075
    const int y1= FFMIN(block_w*2, h-sy);
3076
    int i,x,y;
3077

    
3078
    pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3079

    
3080
    for(y=y0; y<y1; y++){
3081
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3082
        const DWTELEM *pred1 = pred + y*obmc_stride;
3083
        uint8_t *cur1 = cur + y*ref_stride;
3084
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3085
        for(x=x0; x<x1; x++){
3086
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3087
            v = (v + pred1[x]) >> FRAC_BITS;
3088
            if(v&(~255)) v= ~(v>>31);
3089
            dst1[x] = v;
3090
        }
3091
    }
3092

    
3093
    //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
3094
    if(block_w==16){
3095
        distortion = 0;
3096
        for(i=0; i<4; i++){
3097
            int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3098
            distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3099
        }
3100
    }else{
3101
        assert(block_w==8);
3102
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3103
    }
3104

    
3105
    if(plane_index==0){
3106
        for(i=0; i<4; i++){
3107
/* ..RRr
3108
 * .RXx.
3109
 * rxx..
3110
 */
3111
            int x= mb_x + (i&1) - (i>>1);
3112
            int y= mb_y + (i>>1);
3113
            int index= x + y*b_stride;
3114
            BlockNode *b     = &s->block[index];
3115
            BlockNode *left  = x ? &s->block[index-1] : &null_block;
3116
            BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
3117
            BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
3118
            BlockNode *tr    = y && x+1<b_stride ? &s->block[index-b_stride+1] : tl;
3119
            int dmx, dmy;
3120
//        int mx_context= av_log2(2*ABS(left->mx - top->mx));
3121
//        int my_context= av_log2(2*ABS(left->my - top->my));
3122

    
3123
            if(x<0 || x>=b_stride || y>=b_height)
3124
                continue;
3125
            dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3126
            dmy= b->my - mid_pred(left->my, top->my, tr->my);
3127
/*
3128
1            0      0
3129
01X          1-2    1
3130
001XX        3-6    2-3
3131
0001XXX      7-14   4-7
3132
00001XXXX   15-30   8-15
3133
*/
3134
//FIXME try accurate rate
3135
//FIXME intra and inter predictors if surrounding blocks arent the same type
3136
            if(b->type & BLOCK_INTRA){
3137
                rate += 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3138
                            + av_log2(2*ABS(left->color[1] - b->color[1]))
3139
                            + av_log2(2*ABS(left->color[2] - b->color[2])));
3140
            }else
3141
                rate += 2*(1 + av_log2(2*ABS(dmx))
3142
                             + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
3143
        }
3144
    }
3145

    
3146
    return distortion + rate*penalty_factor;
3147
}
3148

    
3149
static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3150
    const int b_stride= s->b_width << s->block_max_depth;
3151
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3152
    BlockNode backup= *block;
3153
    int rd, index, value;
3154

    
3155
    assert(mb_x>=0 && mb_y>=0);
3156
    assert(mb_x<b_stride);
3157

    
3158
    if(intra){
3159
        block->color[0] = p[0];
3160
        block->color[1] = p[1];
3161
        block->color[2] = p[2];
3162
        block->type |= BLOCK_INTRA;
3163
    }else{
3164
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3165
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
3166
        if(s->me_cache[index] == value)
3167
            return 0;
3168
        s->me_cache[index]= value;
3169

    
3170
        block->mx= p[0];
3171
        block->my= p[1];
3172
        block->type &= ~BLOCK_INTRA;
3173
    }
3174

    
3175
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3176

    
3177
//FIXME chroma
3178
    if(rd < *best_rd){
3179
        *best_rd= rd;
3180
        return 1;
3181
    }else{
3182
        *block= backup;
3183
        return 0;
3184
    }
3185
}
3186

    
3187
/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3188
static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, const uint8_t *obmc_edged, int *best_rd){
3189
    int p[2] = {p0, p1};
3190
    return check_block(s, mb_x, mb_y, p, intra, obmc_edged, best_rd);
3191
}
3192

    
3193
static void iterative_me(SnowContext *s){
3194
    int pass, mb_x, mb_y;
3195
    const int b_width = s->b_width  << s->block_max_depth;
3196
    const int b_height= s->b_height << s->block_max_depth;
3197
    const int b_stride= b_width;
3198
    int color[3];
3199

    
3200
    for(pass=0; pass<50; pass++){
3201
        int change= 0;
3202

    
3203
        for(mb_y= 0; mb_y<b_height; mb_y++){
3204
            for(mb_x= 0; mb_x<b_width; mb_x++){
3205
                int dia_change, i, j;
3206
                int best_rd= INT_MAX;
3207
                BlockNode backup;
3208
                const int index= mb_x + mb_y * b_stride;
3209
                BlockNode *block= &s->block[index];
3210
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : &null_block;
3211
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : &null_block;
3212
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : &null_block;
3213
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : &null_block;
3214
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : &null_block;
3215
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : &null_block;
3216
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : &null_block;
3217
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : &null_block;
3218
                const int b_w= (MB_SIZE >> s->block_max_depth);
3219
                uint8_t obmc_edged[b_w*2][b_w*2];
3220

    
3221
                if(pass && (block->type & BLOCK_OPT))
3222
                    continue;
3223
                block->type |= BLOCK_OPT;
3224

    
3225
                backup= *block;
3226

    
3227
                if(!s->me_cache_generation)
3228
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3229
                s->me_cache_generation += 1<<22;
3230

    
3231
                //FIXME precalc
3232
                {
3233
                    int x, y;
3234
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3235
                    if(mb_x==0)
3236
                        for(y=0; y<b_w*2; y++)
3237
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3238
                    if(mb_x==b_stride-1)
3239
                        for(y=0; y<b_w*2; y++)
3240
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3241
                    if(mb_y==0){
3242
                        for(x=0; x<b_w*2; x++)
3243
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3244
                        for(y=1; y<b_w; y++)
3245
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3246
                    }
3247
                    if(mb_y==b_height-1){
3248
                        for(x=0; x<b_w*2; x++)
3249
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3250
                        for(y=b_w; y<b_w*2-1; y++)
3251
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3252
                    }
3253
                }
3254

    
3255
                //skip stuff outside the picture
3256
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3257
                {
3258
                    uint8_t *src= s->  input_picture.data[0];
3259
                    uint8_t *dst= s->current_picture.data[0];
3260
                    const int stride= s->current_picture.linesize[0];
3261
                    const int block_w= MB_SIZE >> s->block_max_depth;
3262
                    const int sx= block_w*mb_x - block_w/2;
3263
                    const int sy= block_w*mb_y - block_w/2;
3264
                    const int w= s->plane[0].width;
3265
                    const int h= s->plane[0].height;
3266
                    int y;
3267

    
3268
                    for(y=sy; y<0; y++)
3269
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3270
                    for(y=h; y<sy+block_w*2; y++)
3271
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3272
                    if(sx<0){
3273
                        for(y=sy; y<sy+block_w*2; y++)
3274
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3275
                    }
3276
                    if(sx+block_w*2 > w){
3277
                        for(y=sy; y<sy+block_w*2; y++)
3278
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3279
                    }
3280
                }
3281

    
3282
                // intra(black) = neighbors' contribution to the current block
3283
                for(i=0; i<3; i++)
3284
                    color[i]= get_dc(s, mb_x, mb_y, i);
3285

    
3286
                // get previous score (cant be cached due to OBMC)
3287
                check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, *obmc_edged, &best_rd);
3288
                check_block_inter(s, mb_x, mb_y, 0, 0, 0, *obmc_edged, &best_rd);
3289
                check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, *obmc_edged, &best_rd);
3290
                check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, *obmc_edged, &best_rd);
3291
                check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, *obmc_edged, &best_rd);
3292
                check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, *obmc_edged, &best_rd);
3293

    
3294
                /* fullpel ME */
3295
                //FIXME avoid subpel interpol / round to nearest integer
3296
                do{
3297
                    dia_change=0;
3298
                    for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3299
                        for(j=0; j<i; j++){
3300
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
3301
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
3302
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
3303
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
3304
                        }
3305
                    }
3306
                }while(dia_change);
3307
                /* subpel ME */
3308
                do{
3309
                    static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3310
                    dia_change=0;
3311
                    for(i=0; i<8; i++)
3312
                        dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, *obmc_edged, &best_rd);
3313
                }while(dia_change);
3314
                //FIXME or try the standard 2 pass qpel or similar
3315
#if 1
3316
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3317
                //FIXME RD style color selection
3318
#endif
3319
                if(!same_block(block, &backup)){
3320
                    if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3321
                    if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3322
                    if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3323
                    if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3324
                    if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3325
                    if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3326
                    if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3327
                    if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3328
                    change ++;
3329
                }
3330
            }
3331
        }
3332
        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3333
        if(!change)
3334
            break;
3335
    }
3336
}
3337

    
3338
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3339
    const int level= b->level;
3340
    const int w= b->width;
3341
    const int h= b->height;
3342
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3343
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3344
    int x,y, thres1, thres2;
3345
//    START_TIMER
3346

    
3347
    if(s->qlog == LOSSLESS_QLOG) return;
3348

    
3349
    bias= bias ? 0 : (3*qmul)>>3;
3350
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3351
    thres2= 2*thres1;
3352

    
3353
    if(!bias){
3354
        for(y=0; y<h; y++){
3355
            for(x=0; x<w; x++){
3356
                int i= src[x + y*stride];
3357

    
3358
                if((unsigned)(i+thres1) > thres2){
3359
                    if(i>=0){
3360
                        i<<= QEXPSHIFT;
3361
                        i/= qmul; //FIXME optimize
3362
                        src[x + y*stride]=  i;
3363
                    }else{
3364
                        i= -i;
3365
                        i<<= QEXPSHIFT;
3366
                        i/= qmul; //FIXME optimize
3367
                        src[x + y*stride]= -i;
3368
                    }
3369
                }else
3370
                    src[x + y*stride]= 0;
3371
            }
3372
        }
3373
    }else{
3374
        for(y=0; y<h; y++){
3375
            for(x=0; x<w; x++){
3376
                int i= src[x + y*stride];
3377

    
3378
                if((unsigned)(i+thres1) > thres2){
3379
                    if(i>=0){
3380
                        i<<= QEXPSHIFT;
3381
                        i= (i + bias) / qmul; //FIXME optimize
3382
                        src[x + y*stride]=  i;
3383
                    }else{
3384
                        i= -i;
3385
                        i<<= QEXPSHIFT;
3386
                        i= (i + bias) / qmul; //FIXME optimize
3387
                        src[x + y*stride]= -i;
3388
                    }
3389
                }else
3390
                    src[x + y*stride]= 0;
3391
            }
3392
        }
3393
    }
3394
    if(level+1 == s->spatial_decomposition_count){
3395
//        STOP_TIMER("quantize")
3396
    }
3397
}
3398

    
3399
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3400
    const int w= b->width;
3401
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3402
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3403
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3404
    int x,y;
3405
    START_TIMER
3406

    
3407
    if(s->qlog == LOSSLESS_QLOG) return;
3408

    
3409
    for(y=start_y; y<end_y; y++){
3410
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3411
        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3412
        for(x=0; x<w; x++){
3413
            int i= line[x];
3414
            if(i<0){
3415
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3416
            }else if(i>0){
3417
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3418
            }
3419
        }
3420
    }
3421
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3422
        STOP_TIMER("dquant")
3423
    }
3424
}
3425

    
3426
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3427
    const int w= b->width;
3428
    const int h= b->height;
3429
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3430
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3431
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3432
    int x,y;
3433
    START_TIMER
3434

    
3435
    if(s->qlog == LOSSLESS_QLOG) return;
3436

    
3437
    for(y=0; y<h; y++){
3438
        for(x=0; x<w; x++){
3439
            int i= src[x + y*stride];
3440
            if(i<0){
3441
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3442
            }else if(i>0){
3443
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3444
            }
3445
        }
3446
    }
3447
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3448
        STOP_TIMER("dquant")
3449
    }
3450
}
3451

    
3452
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3453
    const int w= b->width;
3454
    const int h= b->height;
3455
    int x,y;
3456

    
3457
    for(y=h-1; y>=0; y--){
3458
        for(x=w-1; x>=0; x--){
3459
            int i= x + y*stride;
3460

    
3461
            if(x){
3462
                if(use_median){
3463
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3464
                    else  src[i] -= src[i - 1];
3465
                }else{
3466
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3467
                    else  src[i] -= src[i - 1];
3468
                }
3469
            }else{
3470
                if(y) src[i] -= src[i - stride];
3471
            }
3472
        }
3473
    }
3474
}
3475

    
3476
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3477
    const int w= b->width;
3478
    int x,y;
3479

    
3480
//    START_TIMER
3481

    
3482
    DWTELEM * line;
3483
    DWTELEM * prev;
3484

    
3485
    if (start_y != 0)
3486
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3487

    
3488
    for(y=start_y; y<end_y; y++){
3489
        prev = line;
3490
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3491
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3492
        for(x=0; x<w; x++){
3493
            if(x){
3494
                if(use_median){
3495
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3496
                    else  line[x] += line[x - 1];
3497
                }else{
3498
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3499
                    else  line[x] += line[x - 1];
3500
                }
3501
            }else{
3502
                if(y) line[x] += prev[x];
3503
            }
3504
        }
3505
    }
3506

    
3507
//    STOP_TIMER("correlate")
3508
}
3509

    
3510
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3511
    const int w= b->width;
3512
    const int h= b->height;
3513
    int x,y;
3514

    
3515
    for(y=0; y<h; y++){
3516
        for(x=0; x<w; x++){
3517
            int i= x + y*stride;
3518

    
3519
            if(x){
3520
                if(use_median){
3521
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3522
                    else  src[i] += src[i - 1];
3523
                }else{
3524
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3525
                    else  src[i] += src[i - 1];
3526
                }
3527
            }else{
3528
                if(y) src[i] += src[i - stride];
3529
            }
3530
        }
3531
    }
3532
}
3533

    
3534
static void encode_header(SnowContext *s){
3535
    int plane_index, level, orientation;
3536
    uint8_t kstate[32];
3537

    
3538
    memset(kstate, MID_STATE, sizeof(kstate));
3539

    
3540
    put_rac(&s->c, kstate, s->keyframe);
3541
    if(s->keyframe || s->always_reset)
3542
        reset_contexts(s);
3543
    if(s->keyframe){
3544
        put_symbol(&s->c, s->header_state, s->version, 0);
3545
        put_rac(&s->c, s->header_state, s->always_reset);
3546
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3547
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3548
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3549
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3550
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3551
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3552
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3553
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3554

    
3555
        for(plane_index=0; plane_index<2; plane_index++){
3556
            for(level=0; level<s->spatial_decomposition_count; level++){
3557
                for(orientation=level ? 1:0; orientation<4; orientation++){
3558
                    if(orientation==2) continue;
3559
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3560
                }
3561
            }
3562
        }
3563
    }
3564
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3565
    put_symbol(&s->c, s->header_state, s->qlog, 1);
3566
    put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3567
    put_symbol(&s->c, s->header_state, s->qbias, 1);
3568
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3569
}
3570

    
3571
static int decode_header(SnowContext *s){
3572
    int plane_index, level, orientation;
3573
    uint8_t kstate[32];
3574

    
3575
    memset(kstate, MID_STATE, sizeof(kstate));
3576

    
3577
    s->keyframe= get_rac(&s->c, kstate);
3578
    if(s->keyframe || s->always_reset)
3579
        reset_contexts(s);
3580
    if(s->keyframe){
3581
        s->version= get_symbol(&s->c, s->header_state, 0);
3582
        if(s->version>0){
3583
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3584
            return -1;
3585
        }
3586
        s->always_reset= get_rac(&s->c, s->header_state);
3587
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3588
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3589
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3590
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3591
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3592
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3593
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3594
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3595

    
3596
        for(plane_index=0; plane_index<3; plane_index++){
3597
            for(level=0; level<s->spatial_decomposition_count; level++){
3598
                for(orientation=level ? 1:0; orientation<4; orientation++){
3599
                    int q;
3600
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3601
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3602
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3603
                    s->plane[plane_index].band[level][orientation].qlog= q;
3604
                }
3605
            }
3606
        }
3607
    }
3608

    
3609
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3610
    if(s->spatial_decomposition_type > 2){
3611
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3612
        return -1;
3613
    }
3614

    
3615
    s->qlog= get_symbol(&s->c, s->header_state, 1);
3616
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3617
    s->qbias= get_symbol(&s->c, s->header_state, 1);
3618
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3619
    if(s->block_max_depth > 1){
3620
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3621
        s->block_max_depth= 0;
3622
        return -1;
3623
    }
3624

    
3625
    return 0;
3626
}
3627

    
3628
static void init_qexp(){
3629
    int i;
3630
    double v=128;
3631

    
3632
    for(i=0; i<QROOT; i++){
3633
        qexp[i]= lrintf(v);
3634
        v *= pow(2, 1.0 / QROOT);
3635
    }
3636
}
3637

    
3638
static int common_init(AVCodecContext *avctx){
3639
    SnowContext *s = avctx->priv_data;
3640
    int width, height;
3641
    int level, orientation, plane_index, dec;
3642

    
3643
    s->avctx= avctx;
3644

    
3645
    dsputil_init(&s->dsp, avctx);
3646

    
3647
#define mcf(dx,dy)\
3648
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3649
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3650
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3651
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3652
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3653
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3654

    
3655
    mcf( 0, 0)
3656
    mcf( 4, 0)
3657
    mcf( 8, 0)
3658
    mcf(12, 0)
3659
    mcf( 0, 4)
3660
    mcf( 4, 4)
3661
    mcf( 8, 4)
3662
    mcf(12, 4)
3663
    mcf( 0, 8)
3664
    mcf( 4, 8)
3665
    mcf( 8, 8)
3666
    mcf(12, 8)
3667
    mcf( 0,12)
3668
    mcf( 4,12)
3669
    mcf( 8,12)
3670
    mcf(12,12)
3671

    
3672
#define mcfh(dx,dy)\
3673
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3674
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3675
        mc_block_hpel ## dx ## dy ## 16;\
3676
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3677
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3678
        mc_block_hpel ## dx ## dy ## 8;
3679

    
3680
    mcfh(0, 0)
3681
    mcfh(8, 0)
3682
    mcfh(0, 8)
3683
    mcfh(8, 8)
3684

    
3685
    if(!qexp[0])
3686
        init_qexp();
3687

    
3688
    dec= s->spatial_decomposition_count= 5;
3689
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3690

    
3691
    s->chroma_h_shift= 1; //FIXME XXX
3692
    s->chroma_v_shift= 1;
3693

    
3694
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3695

    
3696
    width= s->avctx->width;
3697
    height= s->avctx->height;
3698

    
3699
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3700

    
3701
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3702
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3703

    
3704
    for(plane_index=0; plane_index<3; plane_index++){
3705
        int w= s->avctx->width;
3706
        int h= s->avctx->height;
3707

    
3708
        if(plane_index){
3709
            w>>= s->chroma_h_shift;
3710
            h>>= s->chroma_v_shift;
3711
        }
3712
        s->plane[plane_index].width = w;
3713
        s->plane[plane_index].height= h;
3714
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3715
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3716
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3717
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3718

    
3719
                b->buf= s->spatial_dwt_buffer;
3720
                b->level= level;
3721
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3722
                b->width = (w + !(orientation&1))>>1;
3723
                b->height= (h + !(orientation>1))>>1;
3724

    
3725
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3726
                b->buf_x_offset = 0;
3727
                b->buf_y_offset = 0;
3728

    
3729
                if(orientation&1){
3730
                    b->buf += (w+1)>>1;
3731
                    b->buf_x_offset = (w+1)>>1;
3732
                }
3733
                if(orientation>1){
3734
                    b->buf += b->stride>>1;
3735
                    b->buf_y_offset = b->stride_line >> 1;
3736
                }
3737

    
3738
                if(level)
3739
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3740
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3741
            }
3742
            w= (w+1)>>1;
3743
            h= (h+1)>>1;
3744
        }
3745
    }
3746

    
3747
    reset_contexts(s);
3748
/*
3749
    width= s->width= avctx->width;
3750
    height= s->height= avctx->height;
3751

3752
    assert(width && height);
3753
*/
3754
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3755

    
3756
    return 0;
3757
}
3758

    
3759

    
3760
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3761
    int width = p->width;
3762
    int height= p->height;
3763
    int level, orientation, x, y;
3764

    
3765
    for(level=0; level<s->spatial_decomposition_count; level++){
3766
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3767
            SubBand *b= &p->band[level][orientation];
3768
            DWTELEM *buf= b->buf;
3769
            int64_t error=0;
3770

    
3771
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3772
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
3773
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3774
            for(y=0; y<height; y++){
3775
                for(x=0; x<width; x++){
3776
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
3777
                    error += d*d;
3778
                }
3779
            }
3780

    
3781
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3782
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3783
        }
3784
    }
3785
}
3786

    
3787
static int encode_init(AVCodecContext *avctx)
3788
{
3789
    SnowContext *s = avctx->priv_data;
3790
    int plane_index;
3791

    
3792
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3793
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3794
               "use vstrict=-2 / -strict -2 to use it anyway\n");
3795
        return -1;
3796
    }
3797

    
3798
    common_init(avctx);
3799
    alloc_blocks(s);
3800

    
3801
    s->version=0;
3802

    
3803
    s->m.avctx   = avctx;
3804
    s->m.flags   = avctx->flags;
3805
    s->m.bit_rate= avctx->bit_rate;
3806

    
3807
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3808
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3809
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3810
    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3811
    h263_encode_init(&s->m); //mv_penalty
3812

    
3813
    if(avctx->flags&CODEC_FLAG_PASS1){
3814
        if(!avctx->stats_out)
3815
            avctx->stats_out = av_mallocz(256);
3816
    }
3817
    if(avctx->flags&CODEC_FLAG_PASS2){
3818
        if(ff_rate_control_init(&s->m) < 0)
3819
            return -1;
3820
    }
3821

    
3822
    for(plane_index=0; plane_index<3; plane_index++){
3823
        calculate_vissual_weight(s, &s->plane[plane_index]);
3824
    }
3825

    
3826

    
3827
    avctx->coded_frame= &s->current_picture;
3828
    switch(avctx->pix_fmt){
3829
//    case PIX_FMT_YUV444P:
3830
//    case PIX_FMT_YUV422P:
3831
    case PIX_FMT_YUV420P:
3832
    case PIX_FMT_GRAY8:
3833
//    case PIX_FMT_YUV411P:
3834
//    case PIX_FMT_YUV410P:
3835
        s->colorspace_type= 0;
3836
        break;
3837
/*    case PIX_FMT_RGBA32:
3838
        s->colorspace= 1;
3839
        break;*/
3840
    default:
3841
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3842
        return -1;
3843
    }
3844
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3845
    s->chroma_h_shift= 1;
3846
    s->chroma_v_shift= 1;
3847

    
3848
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3849
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3850

    
3851
    s->avctx->get_buffer(s->avctx, &s->input_picture);
3852

    
3853
    return 0;
3854
}
3855

    
3856
static int frame_start(SnowContext *s){
3857
   AVFrame tmp;
3858
   int w= s->avctx->width; //FIXME round up to x16 ?
3859
   int h= s->avctx->height;
3860

    
3861
    if(s->current_picture.data[0]){
3862
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
3863
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3864
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3865
    }
3866

    
3867
    tmp= s->last_picture;
3868
    s->last_picture= s->current_picture;
3869
    s->current_picture= tmp;
3870

    
3871
    s->current_picture.reference= 1;
3872
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3873
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3874
        return -1;
3875
    }
3876

    
3877
    return 0;
3878
}
3879

    
3880
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3881
    SnowContext *s = avctx->priv_data;
3882
    RangeCoder * const c= &s->c;
3883
    AVFrame *pict = data;
3884
    const int width= s->avctx->width;
3885
    const int height= s->avctx->height;
3886
    int level, orientation, plane_index, i, y;
3887

    
3888
    ff_init_range_encoder(c, buf, buf_size);
3889
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3890

    
3891
    for(i=0; i<3; i++){
3892
        int shift= !!i;
3893
        for(y=0; y<(height>>shift); y++)
3894
            memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3895
                   &pict->data[i][y * pict->linesize[i]],
3896
                   width>>shift);
3897
    }
3898
    s->new_picture = *pict;
3899

    
3900
    if(avctx->flags&CODEC_FLAG_PASS2){
3901
        s->m.pict_type =
3902
        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3903
        s->keyframe= pict->pict_type==FF_I_TYPE;
3904
        s->m.picture_number= avctx->frame_number;
3905
        pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3906
    }else{
3907
        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3908
        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3909
    }
3910

    
3911
    if(pict->quality){
3912
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3913
        //<64 >60
3914
        s->qlog += 61*QROOT/8;
3915
    }else{
3916
        s->qlog= LOSSLESS_QLOG;
3917
    }
3918

    
3919
    frame_start(s);
3920
    s->current_picture.key_frame= s->keyframe;
3921

    
3922
    s->m.current_picture_ptr= &s->m.current_picture;
3923
    if(pict->pict_type == P_TYPE){
3924
        int block_width = (width +15)>>4;
3925
        int block_height= (height+15)>>4;
3926
        int stride= s->current_picture.linesize[0];
3927

    
3928
        assert(s->current_picture.data[0]);
3929
        assert(s->last_picture.data[0]);
3930

    
3931
        s->m.avctx= s->avctx;
3932
        s->m.current_picture.data[0]= s->current_picture.data[0];
3933
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
3934
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3935
        s->m.   last_picture_ptr= &s->m.   last_picture;
3936
        s->m.linesize=
3937
        s->m.   last_picture.linesize[0]=
3938
        s->m.    new_picture.linesize[0]=
3939
        s->m.current_picture.linesize[0]= stride;
3940
        s->m.uvlinesize= s->current_picture.linesize[1];
3941
        s->m.width = width;
3942
        s->m.height= height;
3943
        s->m.mb_width = block_width;
3944
        s->m.mb_height= block_height;
3945
        s->m.mb_stride=   s->m.mb_width+1;
3946
        s->m.b8_stride= 2*s->m.mb_width+1;
3947
        s->m.f_code=1;
3948
        s->m.pict_type= pict->pict_type;
3949
        s->m.me_method= s->avctx->me_method;
3950
        s->m.me.scene_change_score=0;
3951
        s->m.flags= s->avctx->flags;
3952
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3953
        s->m.out_format= FMT_H263;
3954
        s->m.unrestricted_mv= 1;
3955

    
3956
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3957
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3958
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3959

    
3960
        s->m.dsp= s->dsp; //move
3961
        ff_init_me(&s->m);
3962
        s->dsp= s->m.dsp;
3963
    }
3964

    
3965
redo_frame:
3966

    
3967
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3968

    
3969
    encode_header(s);
3970
    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3971
    encode_blocks(s);
3972
    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3973

    
3974
    for(plane_index=0; plane_index<3; plane_index++){
3975
        Plane *p= &s->plane[plane_index];
3976
        int w= p->width;
3977
        int h= p->height;
3978
        int x, y;
3979
//        int bits= put_bits_count(&s->c.pb);
3980

    
3981
        //FIXME optimize
3982
     if(pict->data[plane_index]) //FIXME gray hack
3983
        for(y=0; y<h; y++){
3984
            for(x=0; x<w; x++){
3985
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3986
            }
3987
        }
3988
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3989

    
3990
        if(   plane_index==0
3991
           && pict->pict_type == P_TYPE
3992
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3993
            ff_init_range_encoder(c, buf, buf_size);
3994
            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3995
            pict->pict_type= FF_I_TYPE;
3996
            s->keyframe=1;
3997
            reset_contexts(s);
3998
            goto redo_frame;
3999
        }
4000

    
4001
        if(s->qlog == LOSSLESS_QLOG){
4002
            for(y=0; y<h; y++){
4003
                for(x=0; x<w; x++){
4004
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4005
                }
4006
            }
4007
        }
4008

    
4009
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4010

    
4011
        for(level=0; level<s->spatial_decomposition_count; level++){
4012
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
4013
                SubBand *b= &p->band[level][orientation];
4014

    
4015
                quantize(s, b, b->buf, b->stride, s->qbias);
4016
                if(orientation==0)
4017
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4018
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4019
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
4020
                if(orientation==0)
4021
                    correlate(s, b, b->buf, b->stride, 1, 0);
4022
            }
4023
        }
4024
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4025

    
4026
        for(level=0; level<s->spatial_decomposition_count; level++){
4027
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
4028
                SubBand *b= &p->band[level][orientation];
4029

    
4030
                dequantize(s, b, b->buf, b->stride);
4031
            }
4032
        }
4033

    
4034
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4035
        if(s->qlog == LOSSLESS_QLOG){
4036
            for(y=0; y<h; y++){
4037
                for(x=0; x<w; x++){
4038
                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4039
                }
4040
            }
4041
        }
4042
{START_TIMER
4043
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4044
STOP_TIMER("pred-conv")}
4045
        if(s->avctx->flags&CODEC_FLAG_PSNR){
4046
            int64_t error= 0;
4047

    
4048
    if(pict->data[plane_index]) //FIXME gray hack
4049
            for(y=0; y<h; y++){
4050
                for(x=0; x<w; x++){
4051
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4052
                    error += d*d;
4053
                }
4054
            }
4055
            s->avctx->error[plane_index] += error;
4056
            s->current_picture.error[plane_index] = error;
4057
        }
4058
    }
4059

    
4060
    if(s->last_picture.data[0])
4061
        avctx->release_buffer(avctx, &s->last_picture);
4062

    
4063
    s->current_picture.coded_picture_number = avctx->frame_number;
4064
    s->current_picture.pict_type = pict->pict_type;
4065
    s->current_picture.quality = pict->quality;
4066
    if(avctx->flags&CODEC_FLAG_PASS1){
4067
        s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
4068
        s->m.current_picture.display_picture_number =
4069
        s->m.current_picture.coded_picture_number = avctx->frame_number;
4070
        s->m.pict_type = pict->pict_type;
4071
        s->m.current_picture.quality = pict->quality;
4072
        ff_write_pass1_stats(&s->m);
4073
    }
4074
    if(avctx->flags&CODEC_FLAG_PASS2){
4075
        s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4076
    }
4077

    
4078
    emms_c();
4079

    
4080
    return ff_rac_terminate(c);
4081
}
4082

    
4083
static void common_end(SnowContext *s){
4084
    int plane_index, level, orientation;
4085

    
4086
    av_freep(&s->spatial_dwt_buffer);
4087

    
4088
    av_freep(&s->m.me.scrat