Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 48d1b9a1

History | View | Annotate | Download (159 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22

    
23
#include "rangecoder.h"
24
#define MID_STATE 128
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QSHIFT 5
35
#define QROOT (1<<QSHIFT)
36
#define LOSSLESS_QLOG -128
37
#define FRAC_BITS 8
38

    
39
static const int8_t quant3[256]={
40
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56
};
57
static const int8_t quant3b[256]={
58
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74
};
75
static const int8_t quant3bA[256]={
76
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92
};
93
static const int8_t quant5[256]={
94
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110
};
111
static const int8_t quant7[256]={
112
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128
};
129
static const int8_t quant9[256]={
130
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146
};
147
static const int8_t quant11[256]={
148
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164
};
165
static const int8_t quant13[256]={
166
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182
};
183

    
184
#define LOG2_OBMC_MAX 6
185
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
186
#if 0 //64*cubic
187
static const uint8_t obmc32[1024]={
188
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220
//error:0.000022
221
};
222
static const uint8_t obmc16[256]={
223
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239
//error:0.000033
240
};
241
#elif 1 // 64*linear
242
static const uint8_t obmc32[1024]={
243
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275
 //error:0.000020
276
};
277
static const uint8_t obmc16[256]={
278
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294
//error:0.000015
295
};
296
#else //64*cos
297
static const uint8_t obmc32[1024]={
298
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330
//error:0.000022
331
};
332
static const uint8_t obmc16[256]={
333
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349
//error:0.000022
350
};
351
#endif
352

    
353
//linear *64
354
static const uint8_t obmc8[64]={
355
 1, 3, 5, 7, 7, 5, 3, 1,
356
 3, 9,15,21,21,15, 9, 3,
357
 5,15,25,35,35,25,15, 5,
358
 7,21,35,49,49,35,21, 7,
359
 7,21,35,49,49,35,21, 7,
360
 5,15,25,35,35,25,15, 5,
361
 3, 9,15,21,21,15, 9, 3,
362
 1, 3, 5, 7, 7, 5, 3, 1,
363
//error:0.000000
364
};
365

    
366
//linear *64
367
static const uint8_t obmc4[16]={
368
 4,12,12, 4,
369
12,36,36,12,
370
12,36,36,12,
371
 4,12,12, 4,
372
//error:0.000000
373
};
374

    
375
static const uint8_t *obmc_tab[4]={
376
    obmc32, obmc16, obmc8, obmc4
377
};
378

    
379
typedef struct BlockNode{
380
    int16_t mx;
381
    int16_t my;
382
    uint8_t color[3];
383
    uint8_t type;
384
//#define TYPE_SPLIT    1
385
#define BLOCK_INTRA   1
386
#define BLOCK_OPT     2
387
//#define TYPE_NOCOLOR  4
388
    uint8_t level; //FIXME merge into type?
389
}BlockNode;
390

    
391
static const BlockNode null_block= { //FIXME add border maybe
392
    .color= {128,128,128},
393
    .mx= 0,
394
    .my= 0,
395
    .type= 0,
396
    .level= 0,
397
};
398

    
399
#define LOG2_MB_SIZE 4
400
#define MB_SIZE (1<<LOG2_MB_SIZE)
401

    
402
typedef struct x_and_coeff{
403
    int16_t x;
404
    uint16_t coeff;
405
} x_and_coeff;
406

    
407
typedef struct SubBand{
408
    int level;
409
    int stride;
410
    int width;
411
    int height;
412
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
413
    DWTELEM *buf;
414
    int buf_x_offset;
415
    int buf_y_offset;
416
    int stride_line; ///< Stride measured in lines, not pixels.
417
    x_and_coeff * x_coeff;
418
    struct SubBand *parent;
419
    uint8_t state[/*7*2*/ 7 + 512][32];
420
}SubBand;
421

    
422
typedef struct Plane{
423
    int width;
424
    int height;
425
    SubBand band[MAX_DECOMPOSITIONS][4];
426
}Plane;
427

    
428
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
429
typedef struct {
430
    DWTELEM * * line; ///< For use by idwt and predict_slices.
431
    DWTELEM * * data_stack; ///< Used for internal purposes.
432
    int data_stack_top;
433
    int line_count;
434
    int line_width;
435
    int data_count;
436
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
437
} slice_buffer;
438

    
439
typedef struct SnowContext{
440
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
441

    
442
    AVCodecContext *avctx;
443
    RangeCoder c;
444
    DSPContext dsp;
445
    AVFrame new_picture;
446
    AVFrame input_picture;              ///< new_picture with the internal linesizes
447
    AVFrame current_picture;
448
    AVFrame last_picture;
449
    AVFrame mconly_picture;
450
//     uint8_t q_context[16];
451
    uint8_t header_state[32];
452
    uint8_t block_state[128 + 32*128];
453
    int keyframe;
454
    int always_reset;
455
    int version;
456
    int spatial_decomposition_type;
457
    int temporal_decomposition_type;
458
    int spatial_decomposition_count;
459
    int temporal_decomposition_count;
460
    DWTELEM *spatial_dwt_buffer;
461
    int colorspace_type;
462
    int chroma_h_shift;
463
    int chroma_v_shift;
464
    int spatial_scalability;
465
    int qlog;
466
    int lambda;
467
    int lambda2;
468
    int mv_scale;
469
    int qbias;
470
#define QBIAS_SHIFT 3
471
    int b_width;
472
    int b_height;
473
    int block_max_depth;
474
    Plane plane[MAX_PLANES];
475
    BlockNode *block;
476
#define ME_CACHE_SIZE 1024
477
    int me_cache[ME_CACHE_SIZE];
478
    int me_cache_generation;
479
    slice_buffer sb;
480

    
481
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
482
}SnowContext;
483

    
484
typedef struct {
485
    DWTELEM *b0;
486
    DWTELEM *b1;
487
    DWTELEM *b2;
488
    DWTELEM *b3;
489
    int y;
490
} dwt_compose_t;
491

    
492
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
493
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
494

    
495
static void iterative_me(SnowContext *s);
496

    
497
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
498
{
499
    int i;
500

    
501
    buf->base_buffer = base_buffer;
502
    buf->line_count = line_count;
503
    buf->line_width = line_width;
504
    buf->data_count = max_allocated_lines;
505
    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
506
    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
507

    
508
    for (i = 0; i < max_allocated_lines; i++)
509
    {
510
      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
511
    }
512

    
513
    buf->data_stack_top = max_allocated_lines - 1;
514
}
515

    
516
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
517
{
518
    int offset;
519
    DWTELEM * buffer;
520

    
521
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
522

    
523
    assert(buf->data_stack_top >= 0);
524
//  assert(!buf->line[line]);
525
    if (buf->line[line])
526
        return buf->line[line];
527

    
528
    offset = buf->line_width * line;
529
    buffer = buf->data_stack[buf->data_stack_top];
530
    buf->data_stack_top--;
531
    buf->line[line] = buffer;
532

    
533
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
534

    
535
    return buffer;
536
}
537

    
538
static void slice_buffer_release(slice_buffer * buf, int line)
539
{
540
    int offset;
541
    DWTELEM * buffer;
542

    
543
    assert(line >= 0 && line < buf->line_count);
544
    assert(buf->line[line]);
545

    
546
    offset = buf->line_width * line;
547
    buffer = buf->line[line];
548
    buf->data_stack_top++;
549
    buf->data_stack[buf->data_stack_top] = buffer;
550
    buf->line[line] = NULL;
551

    
552
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
553
}
554

    
555
static void slice_buffer_flush(slice_buffer * buf)
556
{
557
    int i;
558
    for (i = 0; i < buf->line_count; i++)
559
    {
560
        if (buf->line[i])
561
        {
562
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
563
            slice_buffer_release(buf, i);
564
        }
565
    }
566
}
567

    
568
static void slice_buffer_destroy(slice_buffer * buf)
569
{
570
    int i;
571
    slice_buffer_flush(buf);
572

    
573
    for (i = buf->data_count - 1; i >= 0; i--)
574
    {
575
        assert(buf->data_stack[i]);
576
        av_free(buf->data_stack[i]);
577
    }
578
    assert(buf->data_stack);
579
    av_free(buf->data_stack);
580
    assert(buf->line);
581
    av_free(buf->line);
582
}
583

    
584
#ifdef __sgi
585
// Avoid a name clash on SGI IRIX
586
#undef qexp
587
#endif
588
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
589
static uint8_t qexp[QROOT];
590

    
591
static inline int mirror(int v, int m){
592
    while((unsigned)v > (unsigned)m){
593
        v=-v;
594
        if(v<0) v+= 2*m;
595
    }
596
    return v;
597
}
598

    
599
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
600
    int i;
601

    
602
    if(v){
603
        const int a= ABS(v);
604
        const int e= av_log2(a);
605
#if 1
606
        const int el= FFMIN(e, 10);
607
        put_rac(c, state+0, 0);
608

    
609
        for(i=0; i<el; i++){
610
            put_rac(c, state+1+i, 1);  //1..10
611
        }
612
        for(; i<e; i++){
613
            put_rac(c, state+1+9, 1);  //1..10
614
        }
615
        put_rac(c, state+1+FFMIN(i,9), 0);
616

    
617
        for(i=e-1; i>=el; i--){
618
            put_rac(c, state+22+9, (a>>i)&1); //22..31
619
        }
620
        for(; i>=0; i--){
621
            put_rac(c, state+22+i, (a>>i)&1); //22..31
622
        }
623

    
624
        if(is_signed)
625
            put_rac(c, state+11 + el, v < 0); //11..21
626
#else
627

    
628
        put_rac(c, state+0, 0);
629
        if(e<=9){
630
            for(i=0; i<e; i++){
631
                put_rac(c, state+1+i, 1);  //1..10
632
            }
633
            put_rac(c, state+1+i, 0);
634

    
635
            for(i=e-1; i>=0; i--){
636
                put_rac(c, state+22+i, (a>>i)&1); //22..31
637
            }
638

    
639
            if(is_signed)
640
                put_rac(c, state+11 + e, v < 0); //11..21
641
        }else{
642
            for(i=0; i<e; i++){
643
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
644
            }
645
            put_rac(c, state+1+FFMIN(i,9), 0);
646

    
647
            for(i=e-1; i>=0; i--){
648
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
649
            }
650

    
651
            if(is_signed)
652
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
653
        }
654
#endif
655
    }else{
656
        put_rac(c, state+0, 1);
657
    }
658
}
659

    
660
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
661
    if(get_rac(c, state+0))
662
        return 0;
663
    else{
664
        int i, e, a;
665
        e= 0;
666
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
667
            e++;
668
        }
669

    
670
        a= 1;
671
        for(i=e-1; i>=0; i--){
672
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
673
        }
674

    
675
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
676
            return -a;
677
        else
678
            return a;
679
    }
680
}
681

    
682
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
683
    int i;
684
    int r= log2>=0 ? 1<<log2 : 1;
685

    
686
    assert(v>=0);
687
    assert(log2>=-4);
688

    
689
    while(v >= r){
690
        put_rac(c, state+4+log2, 1);
691
        v -= r;
692
        log2++;
693
        if(log2>0) r+=r;
694
    }
695
    put_rac(c, state+4+log2, 0);
696

    
697
    for(i=log2-1; i>=0; i--){
698
        put_rac(c, state+31-i, (v>>i)&1);
699
    }
700
}
701

    
702
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
703
    int i;
704
    int r= log2>=0 ? 1<<log2 : 1;
705
    int v=0;
706

    
707
    assert(log2>=-4);
708

    
709
    while(get_rac(c, state+4+log2)){
710
        v+= r;
711
        log2++;
712
        if(log2>0) r+=r;
713
    }
714

    
715
    for(i=log2-1; i>=0; i--){
716
        v+= get_rac(c, state+31-i)<<i;
717
    }
718

    
719
    return v;
720
}
721

    
722
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
723
    const int mirror_left= !highpass;
724
    const int mirror_right= (width&1) ^ highpass;
725
    const int w= (width>>1) - 1 + (highpass & width);
726
    int i;
727

    
728
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
729
    if(mirror_left){
730
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
731
        dst += dst_step;
732
        src += src_step;
733
    }
734

    
735
    for(i=0; i<w; i++){
736
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
737
    }
738

    
739
    if(mirror_right){
740
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
741
    }
742
}
743

    
744
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
745
    const int mirror_left= !highpass;
746
    const int mirror_right= (width&1) ^ highpass;
747
    const int w= (width>>1) - 1 + (highpass & width);
748
    int i;
749

    
750
    if(mirror_left){
751
        int r= 3*2*ref[0];
752
        r += r>>4;
753
        r += r>>8;
754
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
755
        dst += dst_step;
756
        src += src_step;
757
    }
758

    
759
    for(i=0; i<w; i++){
760
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
761
        r += r>>4;
762
        r += r>>8;
763
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
764
    }
765

    
766
    if(mirror_right){
767
        int r= 3*2*ref[w*ref_step];
768
        r += r>>4;
769
        r += r>>8;
770
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
771
    }
772
}
773

    
774
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
775
    const int mirror_left= !highpass;
776
    const int mirror_right= (width&1) ^ highpass;
777
    const int w= (width>>1) - 1 + (highpass & width);
778
    int i;
779

    
780
    assert(shift == 4);
781
#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
782
    if(mirror_left){
783
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
784
        dst += dst_step;
785
        src += src_step;
786
    }
787

    
788
    for(i=0; i<w; i++){
789
        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
790
    }
791

    
792
    if(mirror_right){
793
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
794
    }
795
}
796

    
797

    
798
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
799
    int x, i;
800

    
801
    for(x=start; x<width; x+=2){
802
        int64_t sum=0;
803

    
804
        for(i=0; i<n; i++){
805
            int x2= x + 2*i - n + 1;
806
            if     (x2<     0) x2= -x2;
807
            else if(x2>=width) x2= 2*width-x2-2;
808
            sum += coeffs[i]*(int64_t)dst[x2];
809
        }
810
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
811
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
812
    }
813
}
814

    
815
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
816
    int x, y, i;
817
    for(y=start; y<height; y+=2){
818
        for(x=0; x<width; x++){
819
            int64_t sum=0;
820

    
821
            for(i=0; i<n; i++){
822
                int y2= y + 2*i - n + 1;
823
                if     (y2<      0) y2= -y2;
824
                else if(y2>=height) y2= 2*height-y2-2;
825
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
826
            }
827
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
828
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
829
        }
830
    }
831
}
832

    
833
#define SCALEX 1
834
#define LX0 0
835
#define LX1 1
836

    
837
#if 0 // more accurate 9/7
838
#define N1 2
839
#define SHIFT1 14
840
#define COEFFS1 (int[]){-25987,-25987}
841
#define N2 2
842
#define SHIFT2 19
843
#define COEFFS2 (int[]){-27777,-27777}
844
#define N3 2
845
#define SHIFT3 15
846
#define COEFFS3 (int[]){28931,28931}
847
#define N4 2
848
#define SHIFT4 15
849
#define COEFFS4 (int[]){14533,14533}
850
#elif 1 // 13/7 CRF
851
#define N1 4
852
#define SHIFT1 4
853
#define COEFFS1 (int[]){1,-9,-9,1}
854
#define N2 4
855
#define SHIFT2 4
856
#define COEFFS2 (int[]){-1,5,5,-1}
857
#define N3 0
858
#define SHIFT3 1
859
#define COEFFS3 NULL
860
#define N4 0
861
#define SHIFT4 1
862
#define COEFFS4 NULL
863
#elif 1 // 3/5
864
#define LX0 1
865
#define LX1 0
866
#define SCALEX 0.5
867
#define N1 2
868
#define SHIFT1 1
869
#define COEFFS1 (int[]){1,1}
870
#define N2 2
871
#define SHIFT2 2
872
#define COEFFS2 (int[]){-1,-1}
873
#define N3 0
874
#define SHIFT3 0
875
#define COEFFS3 NULL
876
#define N4 0
877
#define SHIFT4 0
878
#define COEFFS4 NULL
879
#elif 1 // 11/5
880
#define N1 0
881
#define SHIFT1 1
882
#define COEFFS1 NULL
883
#define N2 2
884
#define SHIFT2 2
885
#define COEFFS2 (int[]){-1,-1}
886
#define N3 2
887
#define SHIFT3 0
888
#define COEFFS3 (int[]){-1,-1}
889
#define N4 4
890
#define SHIFT4 7
891
#define COEFFS4 (int[]){-5,29,29,-5}
892
#define SCALEX 4
893
#elif 1 // 9/7 CDF
894
#define N1 2
895
#define SHIFT1 7
896
#define COEFFS1 (int[]){-203,-203}
897
#define N2 2
898
#define SHIFT2 12
899
#define COEFFS2 (int[]){-217,-217}
900
#define N3 2
901
#define SHIFT3 7
902
#define COEFFS3 (int[]){113,113}
903
#define N4 2
904
#define SHIFT4 9
905
#define COEFFS4 (int[]){227,227}
906
#define SCALEX 1
907
#elif 1 // 7/5 CDF
908
#define N1 0
909
#define SHIFT1 1
910
#define COEFFS1 NULL
911
#define N2 2
912
#define SHIFT2 2
913
#define COEFFS2 (int[]){-1,-1}
914
#define N3 2
915
#define SHIFT3 0
916
#define COEFFS3 (int[]){-1,-1}
917
#define N4 2
918
#define SHIFT4 4
919
#define COEFFS4 (int[]){3,3}
920
#elif 1 // 9/7 MN
921
#define N1 4
922
#define SHIFT1 4
923
#define COEFFS1 (int[]){1,-9,-9,1}
924
#define N2 2
925
#define SHIFT2 2
926
#define COEFFS2 (int[]){1,1}
927
#define N3 0
928
#define SHIFT3 1
929
#define COEFFS3 NULL
930
#define N4 0
931
#define SHIFT4 1
932
#define COEFFS4 NULL
933
#else // 13/7 CRF
934
#define N1 4
935
#define SHIFT1 4
936
#define COEFFS1 (int[]){1,-9,-9,1}
937
#define N2 4
938
#define SHIFT2 4
939
#define COEFFS2 (int[]){-1,5,5,-1}
940
#define N3 0
941
#define SHIFT3 1
942
#define COEFFS3 NULL
943
#define N4 0
944
#define SHIFT4 1
945
#define COEFFS4 NULL
946
#endif
947
static void horizontal_decomposeX(DWTELEM *b, int width){
948
    DWTELEM temp[width];
949
    const int width2= width>>1;
950
    const int w2= (width+1)>>1;
951
    int x;
952

    
953
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
954
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
955
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
956
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
957

    
958
    for(x=0; x<width2; x++){
959
        temp[x   ]= b[2*x    ];
960
        temp[x+w2]= b[2*x + 1];
961
    }
962
    if(width&1)
963
        temp[x   ]= b[2*x    ];
964
    memcpy(b, temp, width*sizeof(int));
965
}
966

    
967
static void horizontal_composeX(DWTELEM *b, int width){
968
    DWTELEM temp[width];
969
    const int width2= width>>1;
970
    int x;
971
    const int w2= (width+1)>>1;
972

    
973
    memcpy(temp, b, width*sizeof(int));
974
    for(x=0; x<width2; x++){
975
        b[2*x    ]= temp[x   ];
976
        b[2*x + 1]= temp[x+w2];
977
    }
978
    if(width&1)
979
        b[2*x    ]= temp[x   ];
980

    
981
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
982
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
983
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
984
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
985
}
986

    
987
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
988
    int x, y;
989

    
990
    for(y=0; y<height; y++){
991
        for(x=0; x<width; x++){
992
            buffer[y*stride + x] *= SCALEX;
993
        }
994
    }
995

    
996
    for(y=0; y<height; y++){
997
        horizontal_decomposeX(buffer + y*stride, width);
998
    }
999

    
1000
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
1001
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1002
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1003
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1004
}
1005

    
1006
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1007
    int x, y;
1008

    
1009
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1010
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1011
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1012
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1013

    
1014
    for(y=0; y<height; y++){
1015
        horizontal_composeX(buffer + y*stride, width);
1016
    }
1017

    
1018
    for(y=0; y<height; y++){
1019
        for(x=0; x<width; x++){
1020
            buffer[y*stride + x] /= SCALEX;
1021
        }
1022
    }
1023
}
1024

    
1025
static void horizontal_decompose53i(DWTELEM *b, int width){
1026
    DWTELEM temp[width];
1027
    const int width2= width>>1;
1028
    int x;
1029
    const int w2= (width+1)>>1;
1030

    
1031
    for(x=0; x<width2; x++){
1032
        temp[x   ]= b[2*x    ];
1033
        temp[x+w2]= b[2*x + 1];
1034
    }
1035
    if(width&1)
1036
        temp[x   ]= b[2*x    ];
1037
#if 0
1038
    {
1039
    int A1,A2,A3,A4;
1040
    A2= temp[1       ];
1041
    A4= temp[0       ];
1042
    A1= temp[0+width2];
1043
    A1 -= (A2 + A4)>>1;
1044
    A4 += (A1 + 1)>>1;
1045
    b[0+width2] = A1;
1046
    b[0       ] = A4;
1047
    for(x=1; x+1<width2; x+=2){
1048
        A3= temp[x+width2];
1049
        A4= temp[x+1     ];
1050
        A3 -= (A2 + A4)>>1;
1051
        A2 += (A1 + A3 + 2)>>2;
1052
        b[x+width2] = A3;
1053
        b[x       ] = A2;
1054

1055
        A1= temp[x+1+width2];
1056
        A2= temp[x+2       ];
1057
        A1 -= (A2 + A4)>>1;
1058
        A4 += (A1 + A3 + 2)>>2;
1059
        b[x+1+width2] = A1;
1060
        b[x+1       ] = A4;
1061
    }
1062
    A3= temp[width-1];
1063
    A3 -= A2;
1064
    A2 += (A1 + A3 + 2)>>2;
1065
    b[width -1] = A3;
1066
    b[width2-1] = A2;
1067
    }
1068
#else
1069
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1070
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
1071
#endif
1072
}
1073

    
1074
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1075
    int i;
1076

    
1077
    for(i=0; i<width; i++){
1078
        b1[i] -= (b0[i] + b2[i])>>1;
1079
    }
1080
}
1081

    
1082
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1083
    int i;
1084

    
1085
    for(i=0; i<width; i++){
1086
        b1[i] += (b0[i] + b2[i] + 2)>>2;
1087
    }
1088
}
1089

    
1090
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1091
    int y;
1092
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1093
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
1094

    
1095
    for(y=-2; y<height; y+=2){
1096
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1097
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1098

    
1099
{START_TIMER
1100
        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1101
        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1102
STOP_TIMER("horizontal_decompose53i")}
1103

    
1104
{START_TIMER
1105
        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1106
        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1107
STOP_TIMER("vertical_decompose53i*")}
1108

    
1109
        b0=b2;
1110
        b1=b3;
1111
    }
1112
}
1113

    
1114
#define liftS lift
1115
#define lift5 lift
1116
#if 1
1117
#define W_AM 3
1118
#define W_AO 0
1119
#define W_AS 1
1120

    
1121
#undef liftS
1122
#define W_BM 1
1123
#define W_BO 8
1124
#define W_BS 4
1125

    
1126
#define W_CM 1
1127
#define W_CO 0
1128
#define W_CS 0
1129

    
1130
#define W_DM 3
1131
#define W_DO 4
1132
#define W_DS 3
1133
#elif 0
1134
#define W_AM 55
1135
#define W_AO 16
1136
#define W_AS 5
1137

    
1138
#define W_BM 3
1139
#define W_BO 32
1140
#define W_BS 6
1141

    
1142
#define W_CM 127
1143
#define W_CO 64
1144
#define W_CS 7
1145

    
1146
#define W_DM 7
1147
#define W_DO 8
1148
#define W_DS 4
1149
#elif 0
1150
#define W_AM 97
1151
#define W_AO 32
1152
#define W_AS 6
1153

    
1154
#define W_BM 63
1155
#define W_BO 512
1156
#define W_BS 10
1157

    
1158
#define W_CM 13
1159
#define W_CO 8
1160
#define W_CS 4
1161

    
1162
#define W_DM 15
1163
#define W_DO 16
1164
#define W_DS 5
1165

    
1166
#else
1167

    
1168
#define W_AM 203
1169
#define W_AO 64
1170
#define W_AS 7
1171

    
1172
#define W_BM 217
1173
#define W_BO 2048
1174
#define W_BS 12
1175

    
1176
#define W_CM 113
1177
#define W_CO 64
1178
#define W_CS 7
1179

    
1180
#define W_DM 227
1181
#define W_DO 128
1182
#define W_DS 9
1183
#endif
1184
static void horizontal_decompose97i(DWTELEM *b, int width){
1185
    DWTELEM temp[width];
1186
    const int w2= (width+1)>>1;
1187

    
1188
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1189
    liftS(temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1190
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1191
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1192
}
1193

    
1194

    
1195
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1196
    int i;
1197

    
1198
    for(i=0; i<width; i++){
1199
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1200
    }
1201
}
1202

    
1203
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1204
    int i;
1205

    
1206
    for(i=0; i<width; i++){
1207
#ifdef lift5
1208
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1209
#else
1210
        int r= 3*(b0[i] + b2[i]);
1211
        r+= r>>4;
1212
        r+= r>>8;
1213
        b1[i] += (r+W_CO)>>W_CS;
1214
#endif
1215
    }
1216
}
1217

    
1218
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1219
    int i;
1220

    
1221
    for(i=0; i<width; i++){
1222
#ifdef liftS
1223
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1224
#else
1225
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1226
#endif
1227
    }
1228
}
1229

    
1230
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1231
    int i;
1232

    
1233
    for(i=0; i<width; i++){
1234
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1235
    }
1236
}
1237

    
1238
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1239
    int y;
1240
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1241
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1242
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1243
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1244

    
1245
    for(y=-4; y<height; y+=2){
1246
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1247
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1248

    
1249
{START_TIMER
1250
        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1251
        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1252
if(width>400){
1253
STOP_TIMER("horizontal_decompose97i")
1254
}}
1255

    
1256
{START_TIMER
1257
        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1258
        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1259
        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1260
        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1261

    
1262
if(width>400){
1263
STOP_TIMER("vertical_decompose97i")
1264
}}
1265

    
1266
        b0=b2;
1267
        b1=b3;
1268
        b2=b4;
1269
        b3=b5;
1270
    }
1271
}
1272

    
1273
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1274
    int level;
1275

    
1276
    for(level=0; level<decomposition_count; level++){
1277
        switch(type){
1278
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1279
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1280
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1281
        }
1282
    }
1283
}
1284

    
1285
static void horizontal_compose53i(DWTELEM *b, int width){
1286
    DWTELEM temp[width];
1287
    const int width2= width>>1;
1288
    const int w2= (width+1)>>1;
1289
    int x;
1290

    
1291
#if 0
1292
    int A1,A2,A3,A4;
1293
    A2= temp[1       ];
1294
    A4= temp[0       ];
1295
    A1= temp[0+width2];
1296
    A1 -= (A2 + A4)>>1;
1297
    A4 += (A1 + 1)>>1;
1298
    b[0+width2] = A1;
1299
    b[0       ] = A4;
1300
    for(x=1; x+1<width2; x+=2){
1301
        A3= temp[x+width2];
1302
        A4= temp[x+1     ];
1303
        A3 -= (A2 + A4)>>1;
1304
        A2 += (A1 + A3 + 2)>>2;
1305
        b[x+width2] = A3;
1306
        b[x       ] = A2;
1307

1308
        A1= temp[x+1+width2];
1309
        A2= temp[x+2       ];
1310
        A1 -= (A2 + A4)>>1;
1311
        A4 += (A1 + A3 + 2)>>2;
1312
        b[x+1+width2] = A1;
1313
        b[x+1       ] = A4;
1314
    }
1315
    A3= temp[width-1];
1316
    A3 -= A2;
1317
    A2 += (A1 + A3 + 2)>>2;
1318
    b[width -1] = A3;
1319
    b[width2-1] = A2;
1320
#else
1321
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1322
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1323
#endif
1324
    for(x=0; x<width2; x++){
1325
        b[2*x    ]= temp[x   ];
1326
        b[2*x + 1]= temp[x+w2];
1327
    }
1328
    if(width&1)
1329
        b[2*x    ]= temp[x   ];
1330
}
1331

    
1332
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1333
    int i;
1334

    
1335
    for(i=0; i<width; i++){
1336
        b1[i] += (b0[i] + b2[i])>>1;
1337
    }
1338
}
1339

    
1340
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1341
    int i;
1342

    
1343
    for(i=0; i<width; i++){
1344
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1345
    }
1346
}
1347

    
1348
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1349
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1350
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1351
    cs->y = -1;
1352
}
1353

    
1354
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1355
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1356
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1357
    cs->y = -1;
1358
}
1359

    
1360
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1361
    int y= cs->y;
1362

    
1363
    DWTELEM *b0= cs->b0;
1364
    DWTELEM *b1= cs->b1;
1365
    DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1366
    DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1367

    
1368
{START_TIMER
1369
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1370
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1371
STOP_TIMER("vertical_compose53i*")}
1372

    
1373
{START_TIMER
1374
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1375
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1376
STOP_TIMER("horizontal_compose53i")}
1377

    
1378
    cs->b0 = b2;
1379
    cs->b1 = b3;
1380
    cs->y += 2;
1381
}
1382

    
1383
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1384
    int y= cs->y;
1385
    DWTELEM *b0= cs->b0;
1386
    DWTELEM *b1= cs->b1;
1387
    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1388
    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1389

    
1390
{START_TIMER
1391
        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1392
        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1393
STOP_TIMER("vertical_compose53i*")}
1394

    
1395
{START_TIMER
1396
        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1397
        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1398
STOP_TIMER("horizontal_compose53i")}
1399

    
1400
    cs->b0 = b2;
1401
    cs->b1 = b3;
1402
    cs->y += 2;
1403
}
1404

    
1405
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1406
    dwt_compose_t cs;
1407
    spatial_compose53i_init(&cs, buffer, height, stride);
1408
    while(cs.y <= height)
1409
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1410
}
1411

    
1412

    
1413
static void horizontal_compose97i(DWTELEM *b, int width){
1414
    DWTELEM temp[width];
1415
    const int w2= (width+1)>>1;
1416

    
1417
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1418
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1419
    liftS(b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1420
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1421
}
1422

    
1423
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1424
    int i;
1425

    
1426
    for(i=0; i<width; i++){
1427
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1428
    }
1429
}
1430

    
1431
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1432
    int i;
1433

    
1434
    for(i=0; i<width; i++){
1435
#ifdef lift5
1436
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1437
#else
1438
        int r= 3*(b0[i] + b2[i]);
1439
        r+= r>>4;
1440
        r+= r>>8;
1441
        b1[i] -= (r+W_CO)>>W_CS;
1442
#endif
1443
    }
1444
}
1445

    
1446
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1447
    int i;
1448

    
1449
    for(i=0; i<width; i++){
1450
#ifdef liftS
1451
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1452
#else
1453
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1454
#endif
1455
    }
1456
}
1457

    
1458
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1459
    int i;
1460

    
1461
    for(i=0; i<width; i++){
1462
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1463
    }
1464
}
1465

    
1466
static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1467
    int i;
1468

    
1469
    for(i=0; i<width; i++){
1470
#ifndef lift5
1471
        int r;
1472
#endif
1473
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1474
#ifdef lift5
1475
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1476
#else
1477
        r= 3*(b2[i] + b4[i]);
1478
        r+= r>>4;
1479
        r+= r>>8;
1480
        b3[i] -= (r+W_CO)>>W_CS;
1481
#endif
1482
#ifdef liftS
1483
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1484
#else
1485
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1486
#endif
1487
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1488
    }
1489
}
1490

    
1491
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1492
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1493
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1494
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1495
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1496
    cs->y = -3;
1497
}
1498

    
1499
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1500
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1501
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1502
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1503
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1504
    cs->y = -3;
1505
}
1506

    
1507
static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1508
    int y = cs->y;
1509

    
1510
    DWTELEM *b0= cs->b0;
1511
    DWTELEM *b1= cs->b1;
1512
    DWTELEM *b2= cs->b2;
1513
    DWTELEM *b3= cs->b3;
1514
    DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1515
    DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1516

    
1517
{START_TIMER
1518
    if(y>0 && y+4<height){
1519
        vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1520
    }else{
1521
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1522
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1523
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1524
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1525
    }
1526
if(width>400){
1527
STOP_TIMER("vertical_compose97i")}}
1528

    
1529
{START_TIMER
1530
        if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1531
        if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
1532
if(width>400 && y+0<(unsigned)height){
1533
STOP_TIMER("horizontal_compose97i")}}
1534

    
1535
    cs->b0=b2;
1536
    cs->b1=b3;
1537
    cs->b2=b4;
1538
    cs->b3=b5;
1539
    cs->y += 2;
1540
}
1541

    
1542
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1543
    int y = cs->y;
1544
    DWTELEM *b0= cs->b0;
1545
    DWTELEM *b1= cs->b1;
1546
    DWTELEM *b2= cs->b2;
1547
    DWTELEM *b3= cs->b3;
1548
    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1549
    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1550

    
1551
{START_TIMER
1552
        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1553
        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1554
        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1555
        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1556
if(width>400){
1557
STOP_TIMER("vertical_compose97i")}}
1558

    
1559
{START_TIMER
1560
        if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1561
        if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
1562
if(width>400 && b0 <= b2){
1563
STOP_TIMER("horizontal_compose97i")}}
1564

    
1565
    cs->b0=b2;
1566
    cs->b1=b3;
1567
    cs->b2=b4;
1568
    cs->b3=b5;
1569
    cs->y += 2;
1570
}
1571

    
1572
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1573
    dwt_compose_t cs;
1574
    spatial_compose97i_init(&cs, buffer, height, stride);
1575
    while(cs.y <= height)
1576
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1577
}
1578

    
1579
void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1580
    int level;
1581
    for(level=decomposition_count-1; level>=0; level--){
1582
        switch(type){
1583
        case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1584
        case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1585
        /* not slicified yet */
1586
        case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1587
          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1588
        }
1589
    }
1590
}
1591

    
1592
void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1593
    int level;
1594
    for(level=decomposition_count-1; level>=0; level--){
1595
        switch(type){
1596
        case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1597
        case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1598
        /* not slicified yet */
1599
        case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1600
        }
1601
    }
1602
}
1603

    
1604
void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1605
    const int support = type==1 ? 3 : 5;
1606
    int level;
1607
    if(type==2) return;
1608

    
1609
    for(level=decomposition_count-1; level>=0; level--){
1610
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1611
            switch(type){
1612
            case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1613
                    break;
1614
            case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1615
                    break;
1616
            case 2: break;
1617
            }
1618
        }
1619
    }
1620
}
1621

    
1622
void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1623
    const int support = type==1 ? 3 : 5;
1624
    int level;
1625
    if(type==2) return;
1626

    
1627
    for(level=decomposition_count-1; level>=0; level--){
1628
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1629
            switch(type){
1630
            case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1631
                    break;
1632
            case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1633
                    break;
1634
            case 2: break;
1635
            }
1636
        }
1637
    }
1638
}
1639

    
1640
void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1641
    if(type==2){
1642
        int level;
1643
        for(level=decomposition_count-1; level>=0; level--)
1644
            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
1645
    }else{
1646
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1647
        int y;
1648
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1649
        for(y=0; y<height; y+=4)
1650
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1651
    }
1652
}
1653

    
1654
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1655
    const int w= b->width;
1656
    const int h= b->height;
1657
    int x, y;
1658

    
1659
    if(1){
1660
        int run=0;
1661
        int runs[w*h];
1662
        int run_index=0;
1663
        int max_index;
1664

    
1665
        for(y=0; y<h; y++){
1666
            for(x=0; x<w; x++){
1667
                int v, p=0;
1668
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1669
                v= src[x + y*stride];
1670

    
1671
                if(y){
1672
                    t= src[x + (y-1)*stride];
1673
                    if(x){
1674
                        lt= src[x - 1 + (y-1)*stride];
1675
                    }
1676
                    if(x + 1 < w){
1677
                        rt= src[x + 1 + (y-1)*stride];
1678
                    }
1679
                }
1680
                if(x){
1681
                    l= src[x - 1 + y*stride];
1682
                    /*if(x > 1){
1683
                        if(orientation==1) ll= src[y + (x-2)*stride];
1684
                        else               ll= src[x - 2 + y*stride];
1685
                    }*/
1686
                }
1687
                if(parent){
1688
                    int px= x>>1;
1689
                    int py= y>>1;
1690
                    if(px<b->parent->width && py<b->parent->height)
1691
                        p= parent[px + py*2*stride];
1692
                }
1693
                if(!(/*ll|*/l|lt|t|rt|p)){
1694
                    if(v){
1695
                        runs[run_index++]= run;
1696
                        run=0;
1697
                    }else{
1698
                        run++;
1699
                    }
1700
                }
1701
            }
1702
        }
1703
        max_index= run_index;
1704
        runs[run_index++]= run;
1705
        run_index=0;
1706
        run= runs[run_index++];
1707

    
1708
        put_symbol2(&s->c, b->state[30], max_index, 0);
1709
        if(run_index <= max_index)
1710
            put_symbol2(&s->c, b->state[1], run, 3);
1711

    
1712
        for(y=0; y<h; y++){
1713
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1714
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1715
                return -1;
1716
            }
1717
            for(x=0; x<w; x++){
1718
                int v, p=0;
1719
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1720
                v= src[x + y*stride];
1721

    
1722
                if(y){
1723
                    t= src[x + (y-1)*stride];
1724
                    if(x){
1725
                        lt= src[x - 1 + (y-1)*stride];
1726
                    }
1727
                    if(x + 1 < w){
1728
                        rt= src[x + 1 + (y-1)*stride];
1729
                    }
1730
                }
1731
                if(x){
1732
                    l= src[x - 1 + y*stride];
1733
                    /*if(x > 1){
1734
                        if(orientation==1) ll= src[y + (x-2)*stride];
1735
                        else               ll= src[x - 2 + y*stride];
1736
                    }*/
1737
                }
1738
                if(parent){
1739
                    int px= x>>1;
1740
                    int py= y>>1;
1741
                    if(px<b->parent->width && py<b->parent->height)
1742
                        p= parent[px + py*2*stride];
1743
                }
1744
                if(/*ll|*/l|lt|t|rt|p){
1745
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1746

    
1747
                    put_rac(&s->c, &b->state[0][context], !!v);
1748
                }else{
1749
                    if(!run){
1750
                        run= runs[run_index++];
1751

    
1752
                        if(run_index <= max_index)
1753
                            put_symbol2(&s->c, b->state[1], run, 3);
1754
                        assert(v);
1755
                    }else{
1756
                        run--;
1757
                        assert(!v);
1758
                    }
1759
                }
1760
                if(v){
1761
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1762
                    int l2= 2*ABS(l) + (l<0);
1763
                    int t2= 2*ABS(t) + (t<0);
1764

    
1765
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1766
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1767
                }
1768
            }
1769
        }
1770
    }
1771
    return 0;
1772
}
1773

    
1774
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1775
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1776
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1777
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1778
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1779
}
1780

    
1781
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1782
    const int w= b->width;
1783
    const int h= b->height;
1784
    int x,y;
1785

    
1786
    if(1){
1787
        int run, runs;
1788
        x_and_coeff *xc= b->x_coeff;
1789
        x_and_coeff *prev_xc= NULL;
1790
        x_and_coeff *prev2_xc= xc;
1791
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1792
        x_and_coeff *prev_parent_xc= parent_xc;
1793

    
1794
        runs= get_symbol2(&s->c, b->state[30], 0);
1795
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1796
        else           run= INT_MAX;
1797

    
1798
        for(y=0; y<h; y++){
1799
            int v=0;
1800
            int lt=0, t=0, rt=0;
1801

    
1802
            if(y && prev_xc->x == 0){
1803
                rt= prev_xc->coeff;
1804
            }
1805
            for(x=0; x<w; x++){
1806
                int p=0;
1807
                const int l= v;
1808

    
1809
                lt= t; t= rt;
1810

    
1811
                if(y){
1812
                    if(prev_xc->x <= x)
1813
                        prev_xc++;
1814
                    if(prev_xc->x == x + 1)
1815
                        rt= prev_xc->coeff;
1816
                    else
1817
                        rt=0;
1818
                }
1819
                if(parent_xc){
1820
                    if(x>>1 > parent_xc->x){
1821
                        parent_xc++;
1822
                    }
1823
                    if(x>>1 == parent_xc->x){
1824
                        p= parent_xc->coeff;
1825
                    }
1826
                }
1827
                if(/*ll|*/l|lt|t|rt|p){
1828
                    int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1829

    
1830
                    v=get_rac(&s->c, &b->state[0][context]);
1831
                    if(v){
1832
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1833
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1834

    
1835
                        xc->x=x;
1836
                        (xc++)->coeff= v;
1837
                    }
1838
                }else{
1839
                    if(!run){
1840
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1841
                        else           run= INT_MAX;
1842
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1843
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1844

    
1845
                        xc->x=x;
1846
                        (xc++)->coeff= v;
1847
                    }else{
1848
                        int max_run;
1849
                        run--;
1850
                        v=0;
1851

    
1852
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1853
                        else  max_run= FFMIN(run, w-x-1);
1854
                        if(parent_xc)
1855
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1856
                        x+= max_run;
1857
                        run-= max_run;
1858
                    }
1859
                }
1860
            }
1861
            (xc++)->x= w+1; //end marker
1862
            prev_xc= prev2_xc;
1863
            prev2_xc= xc;
1864

    
1865
            if(parent_xc){
1866
                if(y&1){
1867
                    while(parent_xc->x != parent->width+1)
1868
                        parent_xc++;
1869
                    parent_xc++;
1870
                    prev_parent_xc= parent_xc;
1871
                }else{
1872
                    parent_xc= prev_parent_xc;
1873
                }
1874
            }
1875
        }
1876

    
1877
        (xc++)->x= w+1; //end marker
1878
    }
1879
}
1880

    
1881
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1882
    const int w= b->width;
1883
    int y;
1884
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1885
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1886
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1887
    int new_index = 0;
1888

    
1889
    START_TIMER
1890

    
1891
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1892
        qadd= 0;
1893
        qmul= 1<<QEXPSHIFT;
1894
    }
1895

    
1896
    /* If we are on the second or later slice, restore our index. */
1897
    if (start_y != 0)
1898
        new_index = save_state[0];
1899

    
1900

    
1901
    for(y=start_y; y<h; y++){
1902
        int x = 0;
1903
        int v;
1904
        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1905
        memset(line, 0, b->width*sizeof(DWTELEM));
1906
        v = b->x_coeff[new_index].coeff;
1907
        x = b->x_coeff[new_index++].x;
1908
        while(x < w)
1909
        {
1910
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1911
            register int u= -(v&1);
1912
            line[x] = (t^u) - u;
1913

    
1914
            v = b->x_coeff[new_index].coeff;
1915
            x = b->x_coeff[new_index++].x;
1916
        }
1917
    }
1918
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1919
        STOP_TIMER("decode_subband")
1920
    }
1921

    
1922
    /* Save our variables for the next slice. */
1923
    save_state[0] = new_index;
1924

    
1925
    return;
1926
}
1927

    
1928
static void reset_contexts(SnowContext *s){
1929
    int plane_index, level, orientation;
1930

    
1931
    for(plane_index=0; plane_index<3; plane_index++){
1932
        for(level=0; level<s->spatial_decomposition_count; level++){
1933
            for(orientation=level ? 1:0; orientation<4; orientation++){
1934
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1935
            }
1936
        }
1937
    }
1938
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1939
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1940
}
1941

    
1942
static int alloc_blocks(SnowContext *s){
1943
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1944
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1945

    
1946
    s->b_width = w;
1947
    s->b_height= h;
1948

    
1949
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1950
    return 0;
1951
}
1952

    
1953
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1954
    uint8_t *bytestream= d->bytestream;
1955
    uint8_t *bytestream_start= d->bytestream_start;
1956
    *d= *s;
1957
    d->bytestream= bytestream;
1958
    d->bytestream_start= bytestream_start;
1959
}
1960

    
1961
//near copy & paste from dsputil, FIXME
1962
static int pix_sum(uint8_t * pix, int line_size, int w)
1963
{
1964
    int s, i, j;
1965

    
1966
    s = 0;
1967
    for (i = 0; i < w; i++) {
1968
        for (j = 0; j < w; j++) {
1969
            s += pix[0];
1970
            pix ++;
1971
        }
1972
        pix += line_size - w;
1973
    }
1974
    return s;
1975
}
1976

    
1977
//near copy & paste from dsputil, FIXME
1978
static int pix_norm1(uint8_t * pix, int line_size, int w)
1979
{
1980
    int s, i, j;
1981
    uint32_t *sq = squareTbl + 256;
1982

    
1983
    s = 0;
1984
    for (i = 0; i < w; i++) {
1985
        for (j = 0; j < w; j ++) {
1986
            s += sq[pix[0]];
1987
            pix ++;
1988
        }
1989
        pix += line_size - w;
1990
    }
1991
    return s;
1992
}
1993

    
1994
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1995
    const int w= s->b_width << s->block_max_depth;
1996
    const int rem_depth= s->block_max_depth - level;
1997
    const int index= (x + y*w) << rem_depth;
1998
    const int block_w= 1<<rem_depth;
1999
    BlockNode block;
2000
    int i,j;
2001

    
2002
    block.color[0]= l;
2003
    block.color[1]= cb;
2004
    block.color[2]= cr;
2005
    block.mx= mx;
2006
    block.my= my;
2007
    block.type= type;
2008
    block.level= level;
2009

    
2010
    for(j=0; j<block_w; j++){
2011
        for(i=0; i<block_w; i++){
2012
            s->block[index + i + j*w]= block;
2013
        }
2014
    }
2015
}
2016

    
2017
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2018
    const int offset[3]= {
2019
          y*c->  stride + x,
2020
        ((y*c->uvstride + x)>>1),
2021
        ((y*c->uvstride + x)>>1),
2022
    };
2023
    int i;
2024
    for(i=0; i<3; i++){
2025
        c->src[0][i]= src [i];
2026
        c->ref[0][i]= ref [i] + offset[i];
2027
    }
2028
    assert(!ref_index);
2029
}
2030

    
2031
//FIXME copy&paste
2032
#define P_LEFT P[1]
2033
#define P_TOP P[2]
2034
#define P_TOPRIGHT P[3]
2035
#define P_MEDIAN P[4]
2036
#define P_MV1 P[9]
2037
#define FLAG_QPEL   1 //must be 1
2038

    
2039
static int encode_q_branch(SnowContext *s, int level, int x, int y){
2040
    uint8_t p_buffer[1024];
2041
    uint8_t i_buffer[1024];
2042
    uint8_t p_state[sizeof(s->block_state)];
2043
    uint8_t i_state[sizeof(s->block_state)];
2044
    RangeCoder pc, ic;
2045
    uint8_t *pbbak= s->c.bytestream;
2046
    uint8_t *pbbak_start= s->c.bytestream_start;
2047
    int score, score2, iscore, i_len, p_len, block_s, sum;
2048
    const int w= s->b_width  << s->block_max_depth;
2049
    const int h= s->b_height << s->block_max_depth;
2050
    const int rem_depth= s->block_max_depth - level;
2051
    const int index= (x + y*w) << rem_depth;
2052
    const int block_w= 1<<(LOG2_MB_SIZE - level);
2053
    int trx= (x+1)<<rem_depth;
2054
    int try= (y+1)<<rem_depth;
2055
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2056
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2057
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2058
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2059
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2060
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2061
    int pl = left->color[0];
2062
    int pcb= left->color[1];
2063
    int pcr= left->color[2];
2064
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2065
    int pmy= mid_pred(left->my, top->my, tr->my);
2066
    int mx=0, my=0;
2067
    int l,cr,cb;
2068
    const int stride= s->current_picture.linesize[0];
2069
    const int uvstride= s->current_picture.linesize[1];
2070
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
2071
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2072
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2073
    int P[10][2];
2074
    int16_t last_mv[3][2];
2075
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2076
    const int shift= 1+qpel;
2077
    MotionEstContext *c= &s->m.me;
2078
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2079
    int my_context= av_log2(2*ABS(left->my - top->my));
2080
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2081

    
2082
    assert(sizeof(s->block_state) >= 256);
2083
    if(s->keyframe){
2084
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2085
        return 0;
2086
    }
2087

    
2088
//    clip predictors / edge ?
2089

    
2090
    P_LEFT[0]= left->mx;
2091
    P_LEFT[1]= left->my;
2092
    P_TOP [0]= top->mx;
2093
    P_TOP [1]= top->my;
2094
    P_TOPRIGHT[0]= tr->mx;
2095
    P_TOPRIGHT[1]= tr->my;
2096

    
2097
    last_mv[0][0]= s->block[index].mx;
2098
    last_mv[0][1]= s->block[index].my;
2099
    last_mv[1][0]= right->mx;
2100
    last_mv[1][1]= right->my;
2101
    last_mv[2][0]= bottom->mx;
2102
    last_mv[2][1]= bottom->my;
2103

    
2104
    s->m.mb_stride=2;
2105
    s->m.mb_x=
2106
    s->m.mb_y= 0;
2107
    s->m.me.skip= 0;
2108

    
2109
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2110

    
2111
    assert(s->m.me.  stride ==   stride);
2112
    assert(s->m.me.uvstride == uvstride);
2113

    
2114
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2115
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2116
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2117
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2118

    
2119
    c->xmin = - x*block_w - 16+2;
2120
    c->ymin = - y*block_w - 16+2;
2121
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2122
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2123

    
2124
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2125
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
2126
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2127
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2128
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2129
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2130
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2131

    
2132
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2133
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2134

    
2135
    if (!y) {
2136
        c->pred_x= P_LEFT[0];
2137
        c->pred_y= P_LEFT[1];
2138
    } else {
2139
        c->pred_x = P_MEDIAN[0];
2140
        c->pred_y = P_MEDIAN[1];
2141
    }
2142

    
2143
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2144
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2145

    
2146
    assert(mx >= c->xmin);
2147
    assert(mx <= c->xmax);
2148
    assert(my >= c->ymin);
2149
    assert(my <= c->ymax);
2150

    
2151
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2152
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2153
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2154

    
2155
  //  subpel search
2156
    pc= s->c;
2157
    pc.bytestream_start=
2158
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2159
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2160

    
2161
    if(level!=s->block_max_depth)
2162
        put_rac(&pc, &p_state[4 + s_context], 1);
2163
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2164
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2165
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2166
    p_len= pc.bytestream - pc.bytestream_start;
2167
    score += (s->lambda2*(p_len*8
2168
              + (pc.outstanding_count - s->c.outstanding_count)*8
2169
              + (-av_log2(pc.range)    + av_log2(s->c.range))
2170
             ))>>FF_LAMBDA_SHIFT;
2171

    
2172
    block_s= block_w*block_w;
2173
    sum = pix_sum(current_data[0], stride, block_w);
2174
    l= (sum + block_s/2)/block_s;
2175
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2176

    
2177
    block_s= block_w*block_w>>2;
2178
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
2179
    cb= (sum + block_s/2)/block_s;
2180
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2181
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
2182
    cr= (sum + block_s/2)/block_s;
2183
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2184

    
2185
    ic= s->c;
2186
    ic.bytestream_start=
2187
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2188
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2189
    if(level!=s->block_max_depth)
2190
        put_rac(&ic, &i_state[4 + s_context], 1);
2191
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2192
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2193
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2194
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2195
    i_len= ic.bytestream - ic.bytestream_start;
2196
    iscore += (s->lambda2*(i_len*8
2197
              + (ic.outstanding_count - s->c.outstanding_count)*8
2198
              + (-av_log2(ic.range)    + av_log2(s->c.range))
2199
             ))>>FF_LAMBDA_SHIFT;
2200

    
2201
//    assert(score==256*256*256*64-1);
2202
    assert(iscore < 255*255*256 + s->lambda2*10);
2203
    assert(iscore >= 0);
2204
    assert(l>=0 && l<=255);
2205
    assert(pl>=0 && pl<=255);
2206

    
2207
    if(level==0){
2208
        int varc= iscore >> 8;
2209
        int vard= score >> 8;
2210
        if (vard <= 64 || vard < varc)
2211
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2212
        else
2213
            c->scene_change_score+= s->m.qscale;
2214
    }
2215

    
2216
    if(level!=s->block_max_depth){
2217
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2218
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2219
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2220
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2221
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2222
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2223

    
2224
        if(score2 < score && score2 < iscore)
2225
            return score2;
2226
    }
2227

    
2228
    if(iscore < score){
2229
        memcpy(pbbak, i_buffer, i_len);
2230
        s->c= ic;
2231
        s->c.bytestream_start= pbbak_start;
2232
        s->c.bytestream= pbbak + i_len;
2233
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2234
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2235
        return iscore;
2236
    }else{
2237
        memcpy(pbbak, p_buffer, p_len);
2238
        s->c= pc;
2239
        s->c.bytestream_start= pbbak_start;
2240
        s->c.bytestream= pbbak + p_len;
2241
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2242
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2243
        return score;
2244
    }
2245
}
2246

    
2247
static always_inline int same_block(BlockNode *a, BlockNode *b){
2248
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2249
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2250
    }else{
2251
        return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
2252
    }
2253
}
2254

    
2255
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2256
    const int w= s->b_width  << s->block_max_depth;
2257
    const int rem_depth= s->block_max_depth - level;
2258
    const int index= (x + y*w) << rem_depth;
2259
    int trx= (x+1)<<rem_depth;
2260
    BlockNode *b= &s->block[index];
2261
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2262
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2263
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2264
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2265
    int pl = left->color[0];
2266
    int pcb= left->color[1];
2267
    int pcr= left->color[2];
2268
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2269
    int pmy= mid_pred(left->my, top->my, tr->my);
2270
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2271
    int my_context= av_log2(2*ABS(left->my - top->my));
2272
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2273

    
2274
    if(s->keyframe){
2275
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2276
        return;
2277
    }
2278

    
2279
    if(level!=s->block_max_depth){
2280
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2281
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
2282
        }else{
2283
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
2284
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2285
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2286
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2287
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2288
            return;
2289
        }
2290
    }
2291
    if(b->type & BLOCK_INTRA){
2292
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2293
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2294
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2295
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2296
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
2297
    }else{
2298
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2299
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2300
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2301
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
2302
    }
2303
}
2304

    
2305
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2306
    const int w= s->b_width << s->block_max_depth;
2307
    const int rem_depth= s->block_max_depth - level;
2308
    const int index= (x + y*w) << rem_depth;
2309
    int trx= (x+1)<<rem_depth;
2310
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2311
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2312
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2313
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2314
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2315

    
2316
    if(s->keyframe){
2317
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2318
        return;
2319
    }
2320

    
2321
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2322
        int type;
2323
        int l = left->color[0];
2324
        int cb= left->color[1];
2325
        int cr= left->color[2];
2326
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2327
        int my= mid_pred(left->my, top->my, tr->my);
2328
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2329
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2330

    
2331
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2332

    
2333
        if(type){
2334
            l += get_symbol(&s->c, &s->block_state[32], 1);
2335
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2336
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2337
        }else{
2338
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2339
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2340
        }
2341
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2342
    }else{
2343
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2344
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2345
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2346
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2347
    }
2348
}
2349

    
2350
static void encode_blocks(SnowContext *s){
2351
    int x, y;
2352
    int w= s->b_width;
2353
    int h= s->b_height;
2354

    
2355
    if(s->avctx->me_method == ME_ITER && !s->keyframe)
2356
        iterative_me(s);
2357

    
2358
    for(y=0; y<h; y++){
2359
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2360
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2361
            return;
2362
        }
2363
        for(x=0; x<w; x++){
2364
            if(s->avctx->me_method == ME_ITER)
2365
                encode_q_branch2(s, 0, x, y);
2366
            else
2367
                encode_q_branch (s, 0, x, y);
2368
        }
2369
    }
2370
}
2371

    
2372
static void decode_blocks(SnowContext *s){
2373
    int x, y;
2374
    int w= s->b_width;
2375
    int h= s->b_height;
2376

    
2377
    for(y=0; y<h; y++){
2378
        for(x=0; x<w; x++){
2379
            decode_q_branch(s, 0, x, y);
2380
        }
2381
    }
2382
}
2383

    
2384
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2385
    int x, y;
2386
START_TIMER
2387
    for(y=0; y < b_h+5; y++){
2388
        for(x=0; x < b_w; x++){
2389
            int a0= src[x    ];
2390
            int a1= src[x + 1];
2391
            int a2= src[x + 2];
2392
            int a3= src[x + 3];
2393
            int a4= src[x + 4];
2394
            int a5= src[x + 5];
2395
//            int am= 9*(a1+a2) - (a0+a3);
2396
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2397
//            int am= 18*(a2+a3) - 2*(a1+a4);
2398
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2399
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2400

    
2401
//            if(b_w==16) am= 8*(a1+a2);
2402

    
2403
            if(dx<8) am = (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2404
            else     am = (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2405

    
2406
            /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2407
            if(am&(~255)) am= ~(am>>31);
2408

    
2409
            tmp[x] = am;
2410

    
2411
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2412
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2413
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2414
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2415
        }
2416
        tmp += stride;
2417
        src += stride;
2418
    }
2419
    tmp -= (b_h+5)*stride;
2420

    
2421
    for(y=0; y < b_h; y++){
2422
        for(x=0; x < b_w; x++){
2423
            int a0= tmp[x + 0*stride];
2424
            int a1= tmp[x + 1*stride];
2425
            int a2= tmp[x + 2*stride];
2426
            int a3= tmp[x + 3*stride];
2427
            int a4= tmp[x + 4*stride];
2428
            int a5= tmp[x + 5*stride];
2429
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2430
//            int am= 18*(a2+a3) - 2*(a1+a4);
2431
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2432
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2433

    
2434
//            if(b_w==16) am= 8*(a1+a2);
2435

    
2436
            if(dy<8) am =  (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2437
            else     am = (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2438

    
2439
            if(am&(~255)) am= ~(am>>31);
2440

    
2441
            dst[x] = am;
2442
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2443
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2444
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2445
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2446
        }
2447
        dst += stride;
2448
        tmp += stride;
2449
    }
2450
STOP_TIMER("mc_block")
2451
}
2452

    
2453
#define mca(dx,dy,b_w)\
2454
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2455
    uint8_t tmp[stride*(b_w+5)];\
2456
    assert(h==b_w);\
2457
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2458
}
2459

    
2460
mca( 0, 0,16)
2461
mca( 8, 0,16)
2462
mca( 0, 8,16)
2463
mca( 8, 8,16)
2464
mca( 0, 0,8)
2465
mca( 8, 0,8)
2466
mca( 0, 8,8)
2467
mca( 8, 8,8)
2468

    
2469
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2470
    if(block->type & BLOCK_INTRA){
2471
        int x, y;
2472
        const int color = block->color[plane_index];
2473
        const int color4= color*0x01010101;
2474
        if(b_w==32){
2475
            for(y=0; y < b_h; y++){
2476
                *(uint32_t*)&dst[0 + y*stride]= color4;
2477
                *(uint32_t*)&dst[4 + y*stride]= color4;
2478
                *(uint32_t*)&dst[8 + y*stride]= color4;
2479
                *(uint32_t*)&dst[12+ y*stride]= color4;
2480
                *(uint32_t*)&dst[16+ y*stride]= color4;
2481
                *(uint32_t*)&dst[20+ y*stride]= color4;
2482
                *(uint32_t*)&dst[24+ y*stride]= color4;
2483
                *(uint32_t*)&dst[28+ y*stride]= color4;
2484
            }
2485
        }else if(b_w==16){
2486
            for(y=0; y < b_h; y++){
2487
                *(uint32_t*)&dst[0 + y*stride]= color4;
2488
                *(uint32_t*)&dst[4 + y*stride]= color4;
2489
                *(uint32_t*)&dst[8 + y*stride]= color4;
2490
                *(uint32_t*)&dst[12+ y*stride]= color4;
2491
            }
2492
        }else if(b_w==8){
2493
            for(y=0; y < b_h; y++){
2494
                *(uint32_t*)&dst[0 + y*stride]= color4;
2495
                *(uint32_t*)&dst[4 + y*stride]= color4;
2496
            }
2497
        }else if(b_w==4){
2498
            for(y=0; y < b_h; y++){
2499
                *(uint32_t*)&dst[0 + y*stride]= color4;
2500
            }
2501
        }else{
2502
            for(y=0; y < b_h; y++){
2503
                for(x=0; x < b_w; x++){
2504
                    dst[x + y*stride]= color;
2505
                }
2506
            }
2507
        }
2508
    }else{
2509
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2510
        int mx= block->mx*scale;
2511
        int my= block->my*scale;
2512
        const int dx= mx&15;
2513
        const int dy= my&15;
2514
        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2515
        sx += (mx>>4) - 2;
2516
        sy += (my>>4) - 2;
2517
        src += sx + sy*stride;
2518
        if(   (unsigned)sx >= w - b_w - 4
2519
           || (unsigned)sy >= h - b_h - 4){
2520
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2521
            src= tmp + MB_SIZE;
2522
        }
2523
        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2524
        assert(!(b_w&(b_w-1)));
2525
        assert(b_w>1 && b_h>1);
2526
        assert(tab_index>=0 && tab_index<4 || b_w==32);
2527
        if((dx&3) || (dy&3))
2528
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2529
        else if(b_w==32){
2530
            int y;
2531
            for(y=0; y<b_h; y+=16){
2532
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2533
                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2534
            }
2535
        }else if(b_w==b_h)
2536
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2537
        else if(b_w==2*b_h){
2538
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 2       + 2*stride,stride);
2539
            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2540
        }else{
2541
            assert(2*b_w==b_h);
2542
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 2 + 2*stride           ,stride);
2543
            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2544
        }
2545
    }
2546
}
2547

    
2548
//FIXME name clenup (b_w, block_w, b_width stuff)
2549
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2550
    DWTELEM * dst = NULL;
2551
    const int b_width = s->b_width  << s->block_max_depth;
2552
    const int b_height= s->b_height << s->block_max_depth;
2553
    const int b_stride= b_width;
2554
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2555
    BlockNode *rt= lt+1;
2556
    BlockNode *lb= lt+b_stride;
2557
    BlockNode *rb= lb+1;
2558
    uint8_t *block[4];
2559
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2560
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2561
    uint8_t *ptmp;
2562
    int x,y;
2563

    
2564
    if(b_x<0){
2565
        lt= rt;
2566
        lb= rb;
2567
    }else if(b_x + 1 >= b_width){
2568
        rt= lt;
2569
        rb= lb;
2570
    }
2571
    if(b_y<0){
2572
        lt= lb;
2573
        rt= rb;
2574
    }else if(b_y + 1 >= b_height){
2575
        lb= lt;
2576
        rb= rt;
2577
    }
2578

    
2579
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2580
        obmc -= src_x;
2581
        b_w += src_x;
2582
        src_x=0;
2583
    }else if(src_x + b_w > w){
2584
        b_w = w - src_x;
2585
    }
2586
    if(src_y<0){
2587
        obmc -= src_y*obmc_stride;
2588
        b_h += src_y;
2589
        src_y=0;
2590
    }else if(src_y + b_h> h){
2591
        b_h = h - src_y;
2592
    }
2593

    
2594
    if(b_w<=0 || b_h<=0) return;
2595

    
2596
assert(src_stride > 2*MB_SIZE + 5);
2597
//    old_dst += src_x + src_y*dst_stride;
2598
    dst8+= src_x + src_y*src_stride;
2599
//    src += src_x + src_y*src_stride;
2600

    
2601
    ptmp= tmp + 3*tmp_step;
2602
    block[0]= ptmp;
2603
    ptmp+=tmp_step;
2604
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2605

    
2606
    if(same_block(lt, rt)){
2607
        block[1]= block[0];
2608
    }else{
2609
        block[1]= ptmp;
2610
        ptmp+=tmp_step;
2611
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2612
    }
2613

    
2614
    if(same_block(lt, lb)){
2615
        block[2]= block[0];
2616
    }else if(same_block(rt, lb)){
2617
        block[2]= block[1];
2618
    }else{
2619
        block[2]= ptmp;
2620
        ptmp+=tmp_step;
2621
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2622
    }
2623

    
2624
    if(same_block(lt, rb) ){
2625
        block[3]= block[0];
2626
    }else if(same_block(rt, rb)){
2627
        block[3]= block[1];
2628
    }else if(same_block(lb, rb)){
2629
        block[3]= block[2];
2630
    }else{
2631
        block[3]= ptmp;
2632
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2633
    }
2634
#if 0
2635
    for(y=0; y<b_h; y++){
2636
        for(x=0; x<b_w; x++){
2637
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2638
            if(add) dst[x + y*dst_stride] += v;
2639
            else    dst[x + y*dst_stride] -= v;
2640
        }
2641
    }
2642
    for(y=0; y<b_h; y++){
2643
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2644
        for(x=0; x<b_w; x++){
2645
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2646
            if(add) dst[x + y*dst_stride] += v;
2647
            else    dst[x + y*dst_stride] -= v;
2648
        }
2649
    }
2650
    for(y=0; y<b_h; y++){
2651
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2652
        for(x=0; x<b_w; x++){
2653
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2654
            if(add) dst[x + y*dst_stride] += v;
2655
            else    dst[x + y*dst_stride] -= v;
2656
        }
2657
    }
2658
    for(y=0; y<b_h; y++){
2659
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2660
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2661
        for(x=0; x<b_w; x++){
2662
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2663
            if(add) dst[x + y*dst_stride] += v;
2664
            else    dst[x + y*dst_stride] -= v;
2665
        }
2666
    }
2667
#else
2668
{
2669

    
2670
    START_TIMER
2671

    
2672
    for(y=0; y<b_h; y++){
2673
        //FIXME ugly missue of obmc_stride
2674
        uint8_t *obmc1= obmc + y*obmc_stride;
2675
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2676
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2677
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2678
        dst = slice_buffer_get_line(sb, src_y + y);
2679
        for(x=0; x<b_w; x++){
2680
            int v=   obmc1[x] * block[3][x + y*src_stride]
2681
                    +obmc2[x] * block[2][x + y*src_stride]
2682
                    +obmc3[x] * block[1][x + y*src_stride]
2683
                    +obmc4[x] * block[0][x + y*src_stride];
2684

    
2685
            v <<= 8 - LOG2_OBMC_MAX;
2686
            if(FRAC_BITS != 8){
2687
                v += 1<<(7 - FRAC_BITS);
2688
                v >>= 8 - FRAC_BITS;
2689
            }
2690
            if(add){
2691
//                v += old_dst[x + y*dst_stride];
2692
                v += dst[x + src_x];
2693
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2694
                if(v&(~255)) v= ~(v>>31);
2695
                dst8[x + y*src_stride] = v;
2696
            }else{
2697
//                old_dst[x + y*dst_stride] -= v;
2698
                dst[x + src_x] -= v;
2699
            }
2700
        }
2701
    }
2702
        STOP_TIMER("Inner add y block")
2703
}
2704
#endif
2705
}
2706

    
2707
//FIXME name clenup (b_w, block_w, b_width stuff)
2708
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2709
    const int b_width = s->b_width  << s->block_max_depth;
2710
    const int b_height= s->b_height << s->block_max_depth;
2711
    const int b_stride= b_width;
2712
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2713
    BlockNode *rt= lt+1;
2714
    BlockNode *lb= lt+b_stride;
2715
    BlockNode *rb= lb+1;
2716
    uint8_t *block[4];
2717
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2718
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2719
    uint8_t *ptmp;
2720
    int x,y;
2721

    
2722
    if(b_x<0){
2723
        lt= rt;
2724
        lb= rb;
2725
    }else if(b_x + 1 >= b_width){
2726
        rt= lt;
2727
        rb= lb;
2728
    }
2729
    if(b_y<0){
2730
        lt= lb;
2731
        rt= rb;
2732
    }else if(b_y + 1 >= b_height){
2733
        lb= lt;
2734
        rb= rt;
2735
    }
2736

    
2737
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2738
        obmc -= src_x;
2739
        b_w += src_x;
2740
        if(!offset_dst)
2741
            dst -= src_x;
2742
        src_x=0;
2743
    }else if(src_x + b_w > w){
2744
        b_w = w - src_x;
2745
    }
2746
    if(src_y<0){
2747
        obmc -= src_y*obmc_stride;
2748
        b_h += src_y;
2749
        if(!offset_dst)
2750
            dst -= src_y*dst_stride;
2751
        src_y=0;
2752
    }else if(src_y + b_h> h){
2753
        b_h = h - src_y;
2754
    }
2755

    
2756
    if(b_w<=0 || b_h<=0) return;
2757

    
2758
assert(src_stride > 2*MB_SIZE + 5);
2759
    if(offset_dst)
2760
        dst += src_x + src_y*dst_stride;
2761
    dst8+= src_x + src_y*src_stride;
2762
//    src += src_x + src_y*src_stride;
2763

    
2764
    ptmp= tmp + 3*tmp_step;
2765
    block[0]= ptmp;
2766
    ptmp+=tmp_step;
2767
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2768

    
2769
    if(same_block(lt, rt)){
2770
        block[1]= block[0];
2771
    }else{
2772
        block[1]= ptmp;
2773
        ptmp+=tmp_step;
2774
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2775
    }
2776

    
2777
    if(same_block(lt, lb)){
2778
        block[2]= block[0];
2779
    }else if(same_block(rt, lb)){
2780
        block[2]= block[1];
2781
    }else{
2782
        block[2]= ptmp;
2783
        ptmp+=tmp_step;
2784
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2785
    }
2786

    
2787
    if(same_block(lt, rb) ){
2788
        block[3]= block[0];
2789
    }else if(same_block(rt, rb)){
2790
        block[3]= block[1];
2791
    }else if(same_block(lb, rb)){
2792
        block[3]= block[2];
2793
    }else{
2794
        block[3]= ptmp;
2795
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2796
    }
2797
#if 0
2798
    for(y=0; y<b_h; y++){
2799
        for(x=0; x<b_w; x++){
2800
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2801
            if(add) dst[x + y*dst_stride] += v;
2802
            else    dst[x + y*dst_stride] -= v;
2803
        }
2804
    }
2805
    for(y=0; y<b_h; y++){
2806
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2807
        for(x=0; x<b_w; x++){
2808
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2809
            if(add) dst[x + y*dst_stride] += v;
2810
            else    dst[x + y*dst_stride] -= v;
2811
        }
2812
    }
2813
    for(y=0; y<b_h; y++){
2814
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2815
        for(x=0; x<b_w; x++){
2816
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2817
            if(add) dst[x + y*dst_stride] += v;
2818
            else    dst[x + y*dst_stride] -= v;
2819
        }
2820
    }
2821
    for(y=0; y<b_h; y++){
2822
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2823
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2824
        for(x=0; x<b_w; x++){
2825
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2826
            if(add) dst[x + y*dst_stride] += v;
2827
            else    dst[x + y*dst_stride] -= v;
2828
        }
2829
    }
2830
#else
2831
    for(y=0; y<b_h; y++){
2832
        //FIXME ugly missue of obmc_stride
2833
        uint8_t *obmc1= obmc + y*obmc_stride;
2834
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2835
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2836
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2837
        for(x=0; x<b_w; x++){
2838
            int v=   obmc1[x] * block[3][x + y*src_stride]
2839
                    +obmc2[x] * block[2][x + y*src_stride]
2840
                    +obmc3[x] * block[1][x + y*src_stride]
2841
                    +obmc4[x] * block[0][x + y*src_stride];
2842

    
2843
            v <<= 8 - LOG2_OBMC_MAX;
2844
            if(FRAC_BITS != 8){
2845
                v += 1<<(7 - FRAC_BITS);
2846
                v >>= 8 - FRAC_BITS;
2847
            }
2848
            if(add){
2849
                v += dst[x + y*dst_stride];
2850
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2851
                if(v&(~255)) v= ~(v>>31);
2852
                dst8[x + y*src_stride] = v;
2853
            }else{
2854
                dst[x + y*dst_stride] -= v;
2855
            }
2856
        }
2857
    }
2858
#endif
2859
}
2860

    
2861
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2862
    Plane *p= &s->plane[plane_index];
2863
    const int mb_w= s->b_width  << s->block_max_depth;
2864
    const int mb_h= s->b_height << s->block_max_depth;
2865
    int x, y, mb_x;
2866
    int block_size = MB_SIZE >> s->block_max_depth;
2867
    int block_w    = plane_index ? block_size/2 : block_size;
2868
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2869
    int obmc_stride= plane_index ? block_size : 2*block_size;
2870
    int ref_stride= s->current_picture.linesize[plane_index];
2871
    uint8_t *ref  = s->last_picture.data[plane_index];
2872
    uint8_t *dst8= s->current_picture.data[plane_index];
2873
    int w= p->width;
2874
    int h= p->height;
2875
    START_TIMER
2876

    
2877
    if(s->keyframe || (s->avctx->debug&512)){
2878
        if(mb_y==mb_h)
2879
            return;
2880

    
2881
        if(add){
2882
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2883
            {
2884
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2885
                DWTELEM * line = sb->line[y];
2886
                for(x=0; x<w; x++)
2887
                {
2888
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2889
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2890
                    v >>= FRAC_BITS;
2891
                    if(v&(~255)) v= ~(v>>31);
2892
                    dst8[x + y*ref_stride]= v;
2893
                }
2894
            }
2895
        }else{
2896
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2897
            {
2898
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2899
                DWTELEM * line = sb->line[y];
2900
                for(x=0; x<w; x++)
2901
                {
2902
                    line[x] -= 128 << FRAC_BITS;
2903
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2904
                }
2905
            }
2906
        }
2907

    
2908
        return;
2909
    }
2910

    
2911
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2912
            START_TIMER
2913

    
2914
            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
2915
                       block_w*mb_x - block_w/2,
2916
                       block_w*mb_y - block_w/2,
2917
                       block_w, block_w,
2918
                       w, h,
2919
                       w, ref_stride, obmc_stride,
2920
                       mb_x - 1, mb_y - 1,
2921
                       add, plane_index);
2922

    
2923
            STOP_TIMER("add_yblock")
2924
        }
2925

    
2926
    STOP_TIMER("predict_slice")
2927
}
2928

    
2929
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2930
    Plane *p= &s->plane[plane_index];
2931
    const int mb_w= s->b_width  << s->block_max_depth;
2932
    const int mb_h= s->b_height << s->block_max_depth;
2933
    int x, y, mb_x;
2934
    int block_size = MB_SIZE >> s->block_max_depth;
2935
    int block_w    = plane_index ? block_size/2 : block_size;
2936
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2937
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2938
    int ref_stride= s->current_picture.linesize[plane_index];
2939
    uint8_t *ref  = s->last_picture.data[plane_index];
2940
    uint8_t *dst8= s->current_picture.data[plane_index];
2941
    int w= p->width;
2942
    int h= p->height;
2943
    START_TIMER
2944

    
2945
    if(s->keyframe || (s->avctx->debug&512)){
2946
        if(mb_y==mb_h)
2947
            return;
2948

    
2949
        if(add){
2950
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2951
                for(x=0; x<w; x++){
2952
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2953
                    v >>= FRAC_BITS;
2954
                    if(v&(~255)) v= ~(v>>31);
2955
                    dst8[x + y*ref_stride]= v;
2956
                }
2957
            }
2958
        }else{
2959
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2960
                for(x=0; x<w; x++){
2961
                    buf[x + y*w]-= 128<<FRAC_BITS;
2962
                }
2963
            }
2964
        }
2965

    
2966
        return;
2967
    }
2968

    
2969
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2970
            START_TIMER
2971

    
2972
            add_yblock(s, buf, dst8, ref, obmc,
2973
                       block_w*mb_x - block_w/2,
2974
                       block_w*mb_y - block_w/2,
2975
                       block_w, block_w,
2976
                       w, h,
2977
                       w, ref_stride, obmc_stride,
2978
                       mb_x - 1, mb_y - 1,
2979
                       add, 1, plane_index);
2980

    
2981
            STOP_TIMER("add_yblock")
2982
        }
2983

    
2984
    STOP_TIMER("predict_slice")
2985
}
2986

    
2987
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2988
    const int mb_h= s->b_height << s->block_max_depth;
2989
    int mb_y;
2990
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2991
        predict_slice(s, buf, plane_index, add, mb_y);
2992
}
2993

    
2994
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2995
    int i, x2, y2;
2996
    Plane *p= &s->plane[plane_index];
2997
    const int block_size = MB_SIZE >> s->block_max_depth;
2998
    const int block_w    = plane_index ? block_size/2 : block_size;
2999
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3000
    const int obmc_stride= plane_index ? block_size : 2*block_size;
3001
    const int ref_stride= s->current_picture.linesize[plane_index];
3002
    uint8_t *ref= s->   last_picture.data[plane_index];
3003
    uint8_t *src= s-> input_picture.data[plane_index];
3004
    DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3005
    const int b_stride = s->b_width << s->block_max_depth;
3006
    const int w= p->width;
3007
    const int h= p->height;
3008
    int index= mb_x + mb_y*b_stride;
3009
    BlockNode *b= &s->block[index];
3010
    BlockNode backup= *b;
3011
    int ab=0;
3012
    int aa=0;
3013

    
3014
    b->type|= BLOCK_INTRA;
3015
    b->color[plane_index]= 0;
3016
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
3017

    
3018
    for(i=0; i<4; i++){
3019
        int mb_x2= mb_x + (i &1) - 1;
3020
        int mb_y2= mb_y + (i>>1) - 1;
3021
        int x= block_w*mb_x2 + block_w/2;
3022
        int y= block_w*mb_y2 + block_w/2;
3023

    
3024
        add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc, 
3025
                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
3026

    
3027
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3028
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3029
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3030
                int obmc_v= obmc[index];
3031
                int d;
3032
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3033
                if(x<0) obmc_v += obmc[index + block_w];
3034
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3035
                if(x+block_w>w) obmc_v += obmc[index - block_w];
3036
                //FIXME precalc this or simplify it somehow else
3037

    
3038
                d = -dst[index] + (1<<(FRAC_BITS-1));
3039
                dst[index] = d;
3040
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
3041
                aa += obmc_v * obmc_v; //FIXME precalclate this
3042
            }
3043
        }
3044
    }
3045
    *b= backup;
3046

    
3047
    return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3048
}
3049

    
3050
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
3051
    const int b_stride = s->b_width << s->block_max_depth;
3052
    const int b_height = s->b_height<< s->block_max_depth;
3053
    int index= x + y*b_stride;
3054
    BlockNode *b     = &s->block[index];
3055
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
3056
    BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
3057
    BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
3058
    BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3059
    int dmx, dmy;
3060
//  int mx_context= av_log2(2*ABS(left->mx - top->mx));
3061
//  int my_context= av_log2(2*ABS(left->my - top->my));
3062

    
3063
    if(x<0 || x>=b_stride || y>=b_height)
3064
        return 0;
3065
    dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3066
    dmy= b->my - mid_pred(left->my, top->my, tr->my);
3067
/*
3068
1            0      0
3069
01X          1-2    1
3070
001XX        3-6    2-3
3071
0001XXX      7-14   4-7
3072
00001XXXX   15-30   8-15
3073
*/
3074
//FIXME try accurate rate
3075
//FIXME intra and inter predictors if surrounding blocks arent the same type
3076
    if(b->type & BLOCK_INTRA){
3077
        return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3078
                   + av_log2(2*ABS(left->color[1] - b->color[1]))
3079
                   + av_log2(2*ABS(left->color[2] - b->color[2])));
3080
    }else
3081
        return 2*(1 + av_log2(2*ABS(dmx))
3082
                    + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
3083
}
3084

    
3085
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3086
    Plane *p= &s->plane[plane_index];
3087
    const int block_size = MB_SIZE >> s->block_max_depth;
3088
    const int block_w    = plane_index ? block_size/2 : block_size;
3089
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3090
    const int obmc_stride= plane_index ? block_size : 2*block_size;
3091
    const int ref_stride= s->current_picture.linesize[plane_index];
3092
    uint8_t *ref= s->   last_picture.data[plane_index];
3093
    uint8_t *dst= s->current_picture.data[plane_index];
3094
    uint8_t *src= s->  input_picture.data[plane_index];
3095
    DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3096
    uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
3097
    uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
3098
    const int b_stride = s->b_width << s->block_max_depth;
3099
    const int b_height = s->b_height<< s->block_max_depth;
3100
    const int w= p->width;
3101
    const int h= p->height;
3102
    int distortion;
3103
    int rate= 0;
3104
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3105
    int sx= block_w*mb_x - block_w/2;
3106
    int sy= block_w*mb_y - block_w/2;
3107
    const int x0= FFMAX(0,-sx);
3108
    const int y0= FFMAX(0,-sy);
3109
    const int x1= FFMIN(block_w*2, w-sx);
3110
    const int y1= FFMIN(block_w*2, h-sy);
3111
    int i,x,y;
3112

    
3113
    pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3114

    
3115
    for(y=y0; y<y1; y++){
3116
        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3117
        const DWTELEM *pred1 = pred + y*obmc_stride;
3118
        uint8_t *cur1 = cur + y*ref_stride;
3119
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3120
        for(x=x0; x<x1; x++){
3121
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3122
            v = (v + pred1[x]) >> FRAC_BITS;
3123
            if(v&(~255)) v= ~(v>>31);
3124
            dst1[x] = v;
3125
        }
3126
    }
3127

    
3128
    //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
3129
    if(block_w==16){
3130
        distortion = 0;
3131
        for(i=0; i<4; i++){
3132
            int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3133
            distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3134
        }
3135
    }else{
3136
        assert(block_w==8);
3137
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3138
    }
3139

    
3140
    if(plane_index==0){
3141
        for(i=0; i<4; i++){
3142
/* ..RRr
3143
 * .RXx.
3144
 * rxx..
3145
 */
3146
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3147
        }
3148
        if(mb_x == b_stride-2)
3149
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3150
    }
3151
    return distortion + rate*penalty_factor;
3152
}
3153

    
3154
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3155
    int i, y2;
3156
    Plane *p= &s->plane[plane_index];
3157
    const int block_size = MB_SIZE >> s->block_max_depth;
3158
    const int block_w    = plane_index ? block_size/2 : block_size;
3159
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3160
    const int obmc_stride= plane_index ? block_size : 2*block_size;
3161
    const int ref_stride= s->current_picture.linesize[plane_index];
3162
    uint8_t *ref= s->   last_picture.data[plane_index];
3163
    uint8_t *dst= s->current_picture.data[plane_index];
3164
    uint8_t *src= s-> input_picture.data[plane_index];
3165
    const static DWTELEM zero_dst[4096]; //FIXME
3166
    const int b_stride = s->b_width << s->block_max_depth;
3167
    const int b_height = s->b_height<< s->block_max_depth;
3168
    const int w= p->width;
3169
    const int h= p->height;
3170
    int distortion= 0;
3171
    int rate= 0;
3172
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3173

    
3174
    for(i=0; i<9; i++){
3175
        int mb_x2= mb_x + (i%3) - 1;
3176
        int mb_y2= mb_y + (i/3) - 1;
3177
        int x= block_w*mb_x2 + block_w/2;
3178
        int y= block_w*mb_y2 + block_w/2;
3179

    
3180
        add_yblock(s, zero_dst, dst, ref, obmc, 
3181
                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3182

    
3183
        //FIXME find a cleaner/simpler way to skip the outside stuff
3184
        for(y2= y; y2<0; y2++)
3185
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3186
        for(y2= h; y2<y+block_w; y2++)
3187
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3188
        if(x<0){
3189
            for(y2= y; y2<y+block_w; y2++)
3190
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3191
        }
3192
        if(x+block_w > w){
3193
            for(y2= y; y2<y+block_w; y2++)
3194
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3195
        }
3196

    
3197
        assert(block_w== 8 || block_w==16);
3198
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3199
    }
3200

    
3201
    if(plane_index==0){
3202
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3203
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3204

    
3205
/* ..RRRr
3206
 * .RXXx.
3207
 * .RXXx.
3208
 * rxxx.
3209
 */
3210
        if(merged)
3211
            rate = get_block_bits(s, mb_x, mb_y, 2);
3212
        for(i=merged?4:0; i<9; i++){
3213
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3214
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3215
        }
3216
    }
3217
    return distortion + rate*penalty_factor;
3218
}
3219

    
3220
static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3221
    const int b_stride= s->b_width << s->block_max_depth;
3222
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3223
    BlockNode backup= *block;
3224
    int rd, index, value;
3225

    
3226
    assert(mb_x>=0 && mb_y>=0);
3227
    assert(mb_x<b_stride);
3228

    
3229
    if(intra){
3230
        block->color[0] = p[0];
3231
        block->color[1] = p[1];
3232
        block->color[2] = p[2];
3233
        block->type |= BLOCK_INTRA;
3234
    }else{
3235
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3236
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
3237
        if(s->me_cache[index] == value)
3238
            return 0;
3239
        s->me_cache[index]= value;
3240

    
3241
        block->mx= p[0];
3242
        block->my= p[1];
3243
        block->type &= ~BLOCK_INTRA;
3244
    }
3245

    
3246
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3247

    
3248
//FIXME chroma
3249
    if(rd < *best_rd){
3250
        *best_rd= rd;
3251
        return 1;
3252
    }else{
3253
        *block= backup;
3254
        return 0;
3255
    }
3256
}
3257

    
3258
/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3259
static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, const uint8_t *obmc_edged, int *best_rd){
3260
    int p[2] = {p0, p1};
3261
    return check_block(s, mb_x, mb_y, p, intra, obmc_edged, best_rd);
3262
}
3263

    
3264
static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int *best_rd){
3265
    const int b_stride= s->b_width << s->block_max_depth;
3266
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3267
    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3268
    int rd, index, value;
3269

    
3270
    assert(mb_x>=0 && mb_y>=0);
3271
    assert(mb_x<b_stride);
3272
    assert(((mb_x|mb_y)&1) == 0);
3273

    
3274
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3275
    value= s->me_cache_generation + (p0>>10) + (p1<<6);
3276
    if(s->me_cache[index] == value)
3277
        return 0;
3278
    s->me_cache[index]= value;
3279

    
3280
    block->mx= p0;
3281
    block->my= p1;
3282
    block->type &= ~BLOCK_INTRA;
3283
    block[1]= block[b_stride]= block[b_stride+1]= *block;
3284

    
3285
    rd= get_4block_rd(s, mb_x, mb_y, 0);
3286

    
3287
//FIXME chroma
3288
    if(rd < *best_rd){
3289
        *best_rd= rd;
3290
        return 1;
3291
    }else{
3292
        block[0]= backup[0];
3293
        block[1]= backup[1];
3294
        block[b_stride]= backup[2];
3295
        block[b_stride+1]= backup[3];
3296
        return 0;
3297
    }
3298
}
3299

    
3300
static void iterative_me(SnowContext *s){
3301
    int pass, mb_x, mb_y;
3302
    const int b_width = s->b_width  << s->block_max_depth;
3303
    const int b_height= s->b_height << s->block_max_depth;
3304
    const int b_stride= b_width;
3305
    int color[3];
3306

    
3307
    for(pass=0; pass<50; pass++){
3308
        int change= 0;
3309

    
3310
        for(mb_y= 0; mb_y<b_height; mb_y++){
3311
            for(mb_x= 0; mb_x<b_width; mb_x++){
3312
                int dia_change, i, j;
3313
                int best_rd= INT_MAX;
3314
                BlockNode backup;
3315
                const int index= mb_x + mb_y * b_stride;
3316
                BlockNode *block= &s->block[index];
3317
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : &null_block;
3318
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : &null_block;
3319
                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : &null_block;
3320
                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : &null_block;
3321
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : &null_block;
3322
                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : &null_block;
3323
                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : &null_block;
3324
                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : &null_block;
3325
                const int b_w= (MB_SIZE >> s->block_max_depth);
3326
                uint8_t obmc_edged[b_w*2][b_w*2];
3327

    
3328
                if(pass && (block->type & BLOCK_OPT))
3329
                    continue;
3330
                block->type |= BLOCK_OPT;
3331

    
3332
                backup= *block;
3333

    
3334
                if(!s->me_cache_generation)
3335
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3336
                s->me_cache_generation += 1<<22;
3337

    
3338
                //FIXME precalc
3339
                {
3340
                    int x, y;
3341
                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3342
                    if(mb_x==0)
3343
                        for(y=0; y<b_w*2; y++)
3344
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3345
                    if(mb_x==b_stride-1)
3346
                        for(y=0; y<b_w*2; y++)
3347
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3348
                    if(mb_y==0){
3349
                        for(x=0; x<b_w*2; x++)
3350
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
3351
                        for(y=1; y<b_w; y++)
3352
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3353
                    }
3354
                    if(mb_y==b_height-1){
3355
                        for(x=0; x<b_w*2; x++)
3356
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3357
                        for(y=b_w; y<b_w*2-1; y++)
3358
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3359
                    }
3360
                }
3361

    
3362
                //skip stuff outside the picture
3363
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3364
                {
3365
                    uint8_t *src= s->  input_picture.data[0];
3366
                    uint8_t *dst= s->current_picture.data[0];
3367
                    const int stride= s->current_picture.linesize[0];
3368
                    const int block_w= MB_SIZE >> s->block_max_depth;
3369
                    const int sx= block_w*mb_x - block_w/2;
3370
                    const int sy= block_w*mb_y - block_w/2;
3371
                    const int w= s->plane[0].width;
3372
                    const int h= s->plane[0].height;
3373
                    int y;
3374

    
3375
                    for(y=sy; y<0; y++)
3376
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3377
                    for(y=h; y<sy+block_w*2; y++)
3378
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3379
                    if(sx<0){
3380
                        for(y=sy; y<sy+block_w*2; y++)
3381
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3382
                    }
3383
                    if(sx+block_w*2 > w){
3384
                        for(y=sy; y<sy+block_w*2; y++)
3385
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3386
                    }
3387
                }
3388

    
3389
                // intra(black) = neighbors' contribution to the current block
3390
                for(i=0; i<3; i++)
3391
                    color[i]= get_dc(s, mb_x, mb_y, i);
3392

    
3393
                // get previous score (cant be cached due to OBMC)
3394
                if(pass > 0 && (block->type&BLOCK_INTRA)){
3395
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
3396
                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3397
                }else
3398
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, *obmc_edged, &best_rd);
3399

    
3400
                check_block_inter(s, mb_x, mb_y, 0, 0, 0, *obmc_edged, &best_rd);
3401
                check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, *obmc_edged, &best_rd);
3402
                check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, *obmc_edged, &best_rd);
3403
                check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, *obmc_edged, &best_rd);
3404
                check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, *obmc_edged, &best_rd);
3405

    
3406
                /* fullpel ME */
3407
                //FIXME avoid subpel interpol / round to nearest integer
3408
                do{
3409
                    dia_change=0;
3410
                    for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3411
                        for(j=0; j<i; j++){
3412
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
3413
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
3414
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
3415
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
3416
                        }
3417
                    }
3418
                }while(dia_change);
3419
                /* subpel ME */
3420
                do{
3421
                    static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3422
                    dia_change=0;
3423
                    for(i=0; i<8; i++)
3424
                        dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, *obmc_edged, &best_rd);
3425
                }while(dia_change);
3426
                //FIXME or try the standard 2 pass qpel or similar
3427
#if 1
3428
                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3429
                //FIXME RD style color selection
3430
#endif
3431
                if(!same_block(block, &backup)){
3432
                    if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3433
                    if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3434
                    if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3435
                    if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3436
                    if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3437
                    if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3438
                    if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3439
                    if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3440
                    change ++;
3441
                }
3442
            }
3443
        }
3444
        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3445
        if(!change)
3446
            break;
3447
    }
3448

    
3449
    if(s->block_max_depth == 1){
3450
        int change= 0;
3451
        for(mb_y= 0; mb_y<b_height; mb_y+=2){
3452
            for(mb_x= 0; mb_x<b_width; mb_x+=2){
3453
                int dia_change, i, j;
3454
                int best_rd, init_rd;
3455
                const int index= mb_x + mb_y * b_stride;
3456
                BlockNode *b[4];
3457

    
3458
                b[0]= &s->block[index];
3459
                b[1]= b[0]+1;
3460
                b[2]= b[0]+b_stride;
3461
                b[3]= b[2]+1;
3462
                if(same_block(b[0], b[1]) &&
3463
                   same_block(b[0], b[2]) &&
3464
                   same_block(b[0], b[3]))
3465
                    continue;
3466

    
3467
                if(!s->me_cache_generation)
3468
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3469
                s->me_cache_generation += 1<<22;
3470

    
3471
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3472

    
3473
                check_4block_inter(s, mb_x, mb_y,
3474
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3475
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, &best_rd);
3476

    
3477
                for(i=0; i<4; i++)
3478
                    if(!(b[i]->type&BLOCK_INTRA))
3479
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, &best_rd);
3480

    
3481
                if(init_rd != best_rd)
3482
                    change++;
3483
            }
3484
        }
3485
        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3486
    }
3487
}
3488

    
3489
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3490
    const int level= b->level;
3491
    const int w= b->width;
3492
    const int h= b->height;
3493
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3494
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3495
    int x,y, thres1, thres2;
3496
//    START_TIMER
3497

    
3498
    if(s->qlog == LOSSLESS_QLOG) return;
3499

    
3500
    bias= bias ? 0 : (3*qmul)>>3;
3501
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3502
    thres2= 2*thres1;
3503

    
3504
    if(!bias){
3505
        for(y=0; y<h; y++){
3506
            for(x=0; x<w; x++){
3507
                int i= src[x + y*stride];
3508

    
3509
                if((unsigned)(i+thres1) > thres2){
3510
                    if(i>=0){
3511
                        i<<= QEXPSHIFT;
3512
                        i/= qmul; //FIXME optimize
3513
                        src[x + y*stride]=  i;
3514
                    }else{
3515
                        i= -i;
3516
                        i<<= QEXPSHIFT;
3517
                        i/= qmul; //FIXME optimize
3518
                        src[x + y*stride]= -i;
3519
                    }
3520
                }else
3521
                    src[x + y*stride]= 0;
3522
            }
3523
        }
3524
    }else{
3525
        for(y=0; y<h; y++){
3526
            for(x=0; x<w; x++){
3527
                int i= src[x + y*stride];
3528

    
3529
                if((unsigned)(i+thres1) > thres2){
3530
                    if(i>=0){
3531
                        i<<= QEXPSHIFT;
3532
                        i= (i + bias) / qmul; //FIXME optimize
3533
                        src[x + y*stride]=  i;
3534
                    }else{
3535
                        i= -i;
3536
                        i<<= QEXPSHIFT;
3537
                        i= (i + bias) / qmul; //FIXME optimize
3538
                        src[x + y*stride]= -i;
3539
                    }
3540
                }else
3541
                    src[x + y*stride]= 0;
3542
            }
3543
        }
3544
    }
3545
    if(level+1 == s->spatial_decomposition_count){
3546
//        STOP_TIMER("quantize")
3547
    }
3548
}
3549

    
3550
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3551
    const int w= b->width;
3552
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3553
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3554
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3555
    int x,y;
3556
    START_TIMER
3557

    
3558
    if(s->qlog == LOSSLESS_QLOG) return;
3559

    
3560
    for(y=start_y; y<end_y; y++){
3561
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3562
        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3563
        for(x=0; x<w; x++){
3564
            int i= line[x];
3565
            if(i<0){
3566
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3567
            }else if(i>0){
3568
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3569
            }
3570
        }
3571
    }
3572
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3573
        STOP_TIMER("dquant")
3574
    }
3575
}
3576

    
3577
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3578
    const int w= b->width;
3579
    const int h= b->height;
3580
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3581
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3582
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3583
    int x,y;
3584
    START_TIMER
3585

    
3586
    if(s->qlog == LOSSLESS_QLOG) return;
3587

    
3588
    for(y=0; y<h; y++){
3589
        for(x=0; x<w; x++){
3590
            int i= src[x + y*stride];
3591
            if(i<0){
3592
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3593
            }else if(i>0){
3594
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3595
            }
3596
        }
3597
    }
3598
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3599
        STOP_TIMER("dquant")
3600
    }
3601
}
3602

    
3603
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3604
    const int w= b->width;
3605
    const int h= b->height;
3606
    int x,y;
3607

    
3608
    for(y=h-1; y>=0; y--){
3609
        for(x=w-1; x>=0; x--){
3610
            int i= x + y*stride;
3611

    
3612
            if(x){
3613
                if(use_median){
3614
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3615
                    else  src[i] -= src[i - 1];
3616
                }else{
3617
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3618
                    else  src[i] -= src[i - 1];
3619
                }
3620
            }else{
3621
                if(y) src[i] -= src[i - stride];
3622
            }
3623
        }
3624
    }
3625
}
3626

    
3627
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3628
    const int w= b->width;
3629
    int x,y;
3630

    
3631
//    START_TIMER
3632

    
3633
    DWTELEM * line;
3634
    DWTELEM * prev;
3635

    
3636
    if (start_y != 0)
3637
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3638

    
3639
    for(y=start_y; y<end_y; y++){
3640
        prev = line;
3641
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3642
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3643
        for(x=0; x<w; x++){
3644
            if(x){
3645
                if(use_median){
3646
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3647
                    else  line[x] += line[x - 1];
3648
                }else{
3649
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3650
                    else  line[x] += line[x - 1];
3651
                }
3652
            }else{
3653
                if(y) line[x] += prev[x];
3654
            }
3655
        }
3656
    }
3657

    
3658
//    STOP_TIMER("correlate")
3659
}
3660

    
3661
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3662
    const int w= b->width;
3663
    const int h= b->height;
3664
    int x,y;
3665

    
3666
    for(y=0; y<h; y++){
3667
        for(x=0; x<w; x++){
3668
            int i= x + y*stride;
3669

    
3670
            if(x){
3671
                if(use_median){
3672
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3673
                    else  src[i] += src[i - 1];
3674
                }else{
3675
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3676
                    else  src[i] += src[i - 1];
3677
                }
3678
            }else{
3679
                if(y) src[i] += src[i - stride];
3680
            }
3681
        }
3682
    }
3683
}
3684

    
3685
static void encode_header(SnowContext *s){
3686
    int plane_index, level, orientation;
3687
    uint8_t kstate[32];
3688

    
3689
    memset(kstate, MID_STATE, sizeof(kstate));
3690

    
3691
    put_rac(&s->c, kstate, s->keyframe);
3692
    if(s->keyframe || s->always_reset)
3693
        reset_contexts(s);
3694
    if(s->keyframe){
3695
        put_symbol(&s->c, s->header_state, s->version, 0);
3696
        put_rac(&s->c, s->header_state, s->always_reset);
3697
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3698
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3699
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3700
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3701
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3702
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3703
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3704
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3705

    
3706
        for(plane_index=0; plane_index<2; plane_index++){
3707
            for(level=0; level<s->spatial_decomposition_count; level++){
3708
                for(orientation=level ? 1:0; orientation<4; orientation++){
3709
                    if(orientation==2) continue;
3710
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3711
                }
3712
            }
3713
        }
3714
    }
3715
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3716
    put_symbol(&s->c, s->header_state, s->qlog, 1);
3717
    put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3718
    put_symbol(&s->c, s->header_state, s->qbias, 1);
3719
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3720
}
3721

    
3722
static int decode_header(SnowContext *s){
3723
    int plane_index, level, orientation;
3724
    uint8_t kstate[32];
3725

    
3726
    memset(kstate, MID_STATE, sizeof(kstate));
3727

    
3728
    s->keyframe= get_rac(&s->c, kstate);
3729
    if(s->keyframe || s->always_reset)
3730
        reset_contexts(s);
3731
    if(s->keyframe){
3732
        s->version= get_symbol(&s->c, s->header_state, 0);
3733
        if(s->version>0){
3734
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3735
            return -1;
3736
        }
3737
        s->always_reset= get_rac(&s->c, s->header_state);
3738
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3739
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3740
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3741
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3742
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3743
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3744
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3745
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3746

    
3747
        for(plane_index=0; plane_index<3; plane_index++){
3748
            for(level=0; level<s->spatial_decomposition_count; level++){
3749
                for(orientation=level ? 1:0; orientation<4; orientation++){
3750
                    int q;
3751
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3752
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3753
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3754
                    s->plane[plane_index].band[level][orientation].qlog= q;
3755
                }
3756
            }
3757
        }
3758
    }
3759

    
3760
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3761
    if(s->spatial_decomposition_type > 2){
3762
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3763
        return -1;
3764
    }
3765

    
3766
    s->qlog= get_symbol(&s->c, s->header_state, 1);
3767
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3768
    s->qbias= get_symbol(&s->c, s->header_state, 1);
3769
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3770
    if(s->block_max_depth > 1){
3771
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3772
        s->block_max_depth= 0;
3773
        return -1;
3774
    }
3775

    
3776
    return 0;
3777
}
3778

    
3779
static void init_qexp(){
3780
    int i;
3781
    double v=128;
3782

    
3783
    for(i=0; i<QROOT; i++){
3784
        qexp[i]= lrintf(v);
3785
        v *= pow(2, 1.0 / QROOT);
3786
    }
3787
}
3788

    
3789
static int common_init(AVCodecContext *avctx){
3790
    SnowContext *s = avctx->priv_data;
3791
    int width, height;
3792
    int level, orientation, plane_index, dec;
3793

    
3794
    s->avctx= avctx;
3795

    
3796
    dsputil_init(&s->dsp, avctx);
3797

    
3798
#define mcf(dx,dy)\
3799
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3800
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3801
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3802
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3803
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3804
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3805

    
3806
    mcf( 0, 0)
3807
    mcf( 4, 0)
3808
    mcf( 8, 0)
3809
    mcf(12, 0)
3810
    mcf( 0, 4)
3811
    mcf( 4, 4)
3812
    mcf( 8, 4)
3813
    mcf(12, 4)
3814
    mcf( 0, 8)
3815
    mcf( 4, 8)
3816
    mcf( 8, 8)
3817
    mcf(12, 8)
3818
    mcf( 0,12)
3819
    mcf( 4,12)
3820
    mcf( 8,12)
3821
    mcf(12,12)
3822

    
3823
#define mcfh(dx,dy)\
3824
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3825
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3826
        mc_block_hpel ## dx ## dy ## 16;\
3827
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3828
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3829
        mc_block_hpel ## dx ## dy ## 8;
3830

    
3831
    mcfh(0, 0)
3832
    mcfh(8, 0)
3833
    mcfh(0, 8)
3834
    mcfh(8, 8)
3835

    
3836
    if(!qexp[0])
3837
        init_qexp();
3838

    
3839
    dec= s->spatial_decomposition_count= 5;
3840
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3841

    
3842
    s->chroma_h_shift= 1; //FIXME XXX
3843
    s->chroma_v_shift= 1;
3844

    
3845
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3846

    
3847
    width= s->avctx->width;
3848
    height= s->avctx->height;
3849

    
3850
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3851

    
3852
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3853
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3854

    
3855
    for(plane_index=0; plane_index<3; plane_index++){
3856
        int w= s->avctx->width;
3857
        int h= s->avctx->height;
3858

    
3859
        if(plane_index){
3860
            w>>= s->chroma_h_shift;
3861
            h>>= s->chroma_v_shift;
3862
        }
3863
        s->plane[plane_index].width = w;
3864
        s->plane[plane_index].height= h;
3865
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3866
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3867
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3868
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3869

    
3870
                b->buf= s->spatial_dwt_buffer;
3871
                b->level= level;
3872
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3873
                b->width = (w + !(orientation&1))>>1;
3874
                b->height= (h + !(orientation>1))>>1;
3875

    
3876
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3877
                b->buf_x_offset = 0;
3878
                b->buf_y_offset = 0;
3879

    
3880
                if(orientation&1){
3881
                    b->buf += (w+1)>>1;
3882
                    b->buf_x_offset = (w+1)>>1;
3883
                }
3884
                if(orientation>1){
3885
                    b->buf += b->stride>>1;
3886
                    b->buf_y_offset = b->stride_line >> 1;
3887
                }
3888

    
3889
                if(level)
3890
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3891
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3892
            }
3893
            w= (w+1)>>1;
3894
            h= (h+1)>>1;
3895
        }
3896
    }
3897

    
3898
    reset_contexts(s);
3899
/*
3900
    width= s->width= avctx->width;
3901
    height= s->height= avctx->height;
3902

3903
    assert(width && height);
3904
*/
3905
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3906

    
3907
    return 0;
3908
}
3909

    
3910

    
3911
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3912
    int width = p->width;
3913
    int height= p->height;
3914
    int level, orientation, x, y;
3915

    
3916
    for(level=0; level<s->spatial_decomposition_count; level++){
3917
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3918
            SubBand *b= &p->band[level][orientation];
3919
            DWTELEM *buf= b->buf;
3920
            int64_t error=0;
3921

    
3922
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3923
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
3924
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3925
            for(y=0; y<height; y++){
3926
                for(x=0; x<width; x++){
3927
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
3928
                    error += d*d;
3929
                }
3930
            }
3931

    
3932
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3933
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3934
        }
3935
    }
3936
}
3937

    
3938
static int encode_init(AVCodecContext *avctx)
3939
{
3940
    SnowContext *s = avctx->priv_data;
3941
    int plane_index;
3942

    
3943
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3944
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3945
               "use vstrict=-2 / -strict -2 to use it anyway\n");
3946
        return -1;
3947
    }
3948

    
3949
    common_init(avctx);
3950
    alloc_blocks(s);
3951

    
3952
    s->version=0;
3953

    
3954
    s->m.avctx   = avctx;
3955
    s->m.flags   = avctx->flags;
3956
    s->m.bit_rate= avctx->bit_rate;
3957

    
3958
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3959
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3960
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3961
    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3962
    h263_encode_init(&s->m); //mv_penalty
3963

    
3964
    if(avctx->flags&CODEC_FLAG_PASS1){
3965
        if(!avctx->stats_out)
3966
            avctx->stats_out = av_mallocz(256);
3967
    }
3968
    if(avctx->flags&CODEC_FLAG_PASS2){
3969
        if(ff_rate_control_init(&s->m) < 0)
3970
            return -1;
3971
    }
3972

    
3973
    for(plane_index=0; plane_index<3; plane_index++){
3974
        calculate_vissual_weight(s, &s->plane[plane_index]);
3975
    }
3976

    
3977

    
3978
    avctx->coded_frame= &s->current_picture;
3979
    switch(avctx->pix_fmt){
3980
//    case PIX_FMT_YUV444P:
3981
//    case PIX_FMT_YUV422P:
3982
    case PIX_FMT_YUV420P:
3983
    case PIX_FMT_GRAY8:
3984
//    case PIX_FMT_YUV411P:
3985
//    case PIX_FMT_YUV410P:
3986
        s->colorspace_type= 0;
3987
        break;
3988
/*    case PIX_FMT_RGBA32:
3989
        s->colorspace= 1;
3990
        break;*/
3991
    default:
3992
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3993
        return -1;
3994
    }
3995
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3996
    s->chroma_h_shift= 1;
3997
    s->chroma_v_shift= 1;
3998

    
3999
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4000
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4001

    
4002
    s->avctx->get_buffer(s->avctx, &s->input_picture);
4003

    
4004
    return 0;
4005
}
4006

    
4007
static int frame_start(SnowContext *s){
4008
   AVFrame tmp;
4009
   int w= s->avctx->width; //FIXME round up to x16 ?
4010
   int h= s->avctx->height;
4011

    
4012
    if(s->current_picture.data[0]){
4013
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
4014
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4015
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4016
    }
4017

    
4018
    tmp= s->last_picture;
4019
    s->last_picture= s->current_picture;
4020
    s->current_picture= tmp;
4021

    
4022
    s->current_picture.reference= 1;
4023
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4024
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4025
        return -1;
4026
    }
4027

    
4028
    return 0;
4029
}
4030

    
4031
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4032
    SnowContext *s = avctx->priv_data;
4033
    RangeCoder * const c= &s->c;
4034
    AVFrame *pict = data;
4035
    const int width= s->avctx->width;
4036
    const int height= s->avctx->height;
4037
    int level, orientation, plane_index, i, y;
4038

    
4039
    ff_init_range_encoder(c, buf, buf_size);
4040
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4041

    
4042
    for(i=0; i<3; i++){
4043
        int shift= !!i;
4044
        for(y=0; y<(height>>shift); y++)
4045
            memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4046
                   &pict->data[i][y * pict->linesize[i]],
4047
                   width>>shift);
4048
    }
4049
    s->new_picture = *pict;
4050

    
4051
    if(avctx->flags&CODEC_FLAG_PASS2){
4052
        s->m.pict_type =
4053
        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4054
        s->keyframe= pict->pict_type==FF_I_TYPE;
4055
        s->m.picture_number= avctx->frame_number;
4056
        pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4057
    }else{
4058
        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4059
        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4060
    }
4061

    
4062
    if(pict->quality){
4063
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
4064
        //<64 >60
4065
        s->qlog += 61*QROOT/8;
4066
    }else{
4067
        s->qlog= LOSSLESS_QLOG;
4068
    }
4069

    
4070
    frame_start(s);
4071
    s->current_picture.key_frame= s->keyframe;
4072