Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ daba69b2

History | View | Annotate | Download (134 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22

    
23
#include "rangecoder.h"
24
#define MID_STATE 128
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QSHIFT 5
35
#define QROOT (1<<QSHIFT)
36
#define LOSSLESS_QLOG -128
37
#define FRAC_BITS 8
38

    
39
static const int8_t quant3[256]={
40
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56
};
57
static const int8_t quant3b[256]={
58
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74
};
75
static const int8_t quant3bA[256]={
76
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92
};
93
static const int8_t quant5[256]={
94
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110
};
111
static const int8_t quant7[256]={
112
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128
};
129
static const int8_t quant9[256]={
130
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146
};
147
static const int8_t quant11[256]={
148
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164
};
165
static const int8_t quant13[256]={
166
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182
};
183

    
184
#define LOG2_OBMC_MAX 6
185
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
186
#if 0 //64*cubic
187
static const uint8_t obmc32[1024]={
188
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220
//error:0.000022
221
};
222
static const uint8_t obmc16[256]={
223
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239
//error:0.000033
240
};
241
#elif 1 // 64*linear
242
static const uint8_t obmc32[1024]={
243
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275
 //error:0.000020
276
};
277
static const uint8_t obmc16[256]={
278
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294
//error:0.000015
295
};
296
#else //64*cos
297
static const uint8_t obmc32[1024]={
298
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330
//error:0.000022
331
};
332
static const uint8_t obmc16[256]={
333
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349
//error:0.000022
350
};
351
#endif
352

    
353
//linear *64
354
static const uint8_t obmc8[64]={
355
 1, 3, 5, 7, 7, 5, 3, 1,
356
 3, 9,15,21,21,15, 9, 3,
357
 5,15,25,35,35,25,15, 5,
358
 7,21,35,49,49,35,21, 7,
359
 7,21,35,49,49,35,21, 7,
360
 5,15,25,35,35,25,15, 5,
361
 3, 9,15,21,21,15, 9, 3,
362
 1, 3, 5, 7, 7, 5, 3, 1,
363
//error:0.000000
364
};
365

    
366
//linear *64
367
static const uint8_t obmc4[16]={
368
 4,12,12, 4,
369
12,36,36,12,
370
12,36,36,12,
371
 4,12,12, 4,
372
//error:0.000000
373
};
374

    
375
static const uint8_t *obmc_tab[4]={
376
    obmc32, obmc16, obmc8, obmc4
377
};
378

    
379
typedef struct BlockNode{
380
    int16_t mx;
381
    int16_t my;
382
    uint8_t color[3];
383
    uint8_t type;
384
//#define TYPE_SPLIT    1
385
#define BLOCK_INTRA   1
386
//#define TYPE_NOCOLOR  4
387
    uint8_t level; //FIXME merge into type?
388
}BlockNode;
389

    
390
#define LOG2_MB_SIZE 4
391
#define MB_SIZE (1<<LOG2_MB_SIZE)
392

    
393
typedef struct x_and_coeff{
394
    int16_t x;
395
    uint16_t coeff;
396
} x_and_coeff;
397

    
398
typedef struct SubBand{
399
    int level;
400
    int stride;
401
    int width;
402
    int height;
403
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
404
    DWTELEM *buf;
405
    int buf_x_offset;
406
    int buf_y_offset;
407
    int stride_line; ///< Stride measured in lines, not pixels.
408
    x_and_coeff * x_coeff;
409
    struct SubBand *parent;
410
    uint8_t state[/*7*2*/ 7 + 512][32];
411
}SubBand;
412

    
413
typedef struct Plane{
414
    int width;
415
    int height;
416
    SubBand band[MAX_DECOMPOSITIONS][4];
417
}Plane;
418

    
419
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
420
typedef struct {
421
    DWTELEM * * line; ///< For use by idwt and predict_slices.
422
    DWTELEM * * data_stack; ///< Used for internal purposes.
423
    int data_stack_top;
424
    int line_count;
425
    int line_width;
426
    int data_count;
427
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
428
} slice_buffer;
429

    
430
typedef struct SnowContext{
431
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
432

    
433
    AVCodecContext *avctx;
434
    RangeCoder c;
435
    DSPContext dsp;
436
    AVFrame input_picture;
437
    AVFrame current_picture;
438
    AVFrame last_picture;
439
    AVFrame mconly_picture;
440
//     uint8_t q_context[16];
441
    uint8_t header_state[32];
442
    uint8_t block_state[128 + 32*128];
443
    int keyframe;
444
    int always_reset;
445
    int version;
446
    int spatial_decomposition_type;
447
    int temporal_decomposition_type;
448
    int spatial_decomposition_count;
449
    int temporal_decomposition_count;
450
    DWTELEM *spatial_dwt_buffer;
451
    int colorspace_type;
452
    int chroma_h_shift;
453
    int chroma_v_shift;
454
    int spatial_scalability;
455
    int qlog;
456
    int lambda;
457
    int lambda2;
458
    int mv_scale;
459
    int qbias;
460
#define QBIAS_SHIFT 3
461
    int b_width;
462
    int b_height;
463
    int block_max_depth;
464
    Plane plane[MAX_PLANES];
465
    BlockNode *block;
466
    slice_buffer sb;
467

    
468
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
469
}SnowContext;
470

    
471
typedef struct {
472
    DWTELEM *b0;
473
    DWTELEM *b1;
474
    DWTELEM *b2;
475
    DWTELEM *b3;
476
    int y;
477
} dwt_compose_t;
478

    
479
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
480
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
481

    
482
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
483
{
484
    int i;
485

    
486
    buf->base_buffer = base_buffer;
487
    buf->line_count = line_count;
488
    buf->line_width = line_width;
489
    buf->data_count = max_allocated_lines;
490
    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
491
    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
492

    
493
    for (i = 0; i < max_allocated_lines; i++)
494
    {
495
      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
496
    }
497

    
498
    buf->data_stack_top = max_allocated_lines - 1;
499
}
500

    
501
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
502
{
503
    int offset;
504
    DWTELEM * buffer;
505

    
506
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
507

    
508
    assert(buf->data_stack_top >= 0);
509
//  assert(!buf->line[line]);
510
    if (buf->line[line])
511
        return buf->line[line];
512

    
513
    offset = buf->line_width * line;
514
    buffer = buf->data_stack[buf->data_stack_top];
515
    buf->data_stack_top--;
516
    buf->line[line] = buffer;
517

    
518
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
519

    
520
    return buffer;
521
}
522

    
523
static void slice_buffer_release(slice_buffer * buf, int line)
524
{
525
    int offset;
526
    DWTELEM * buffer;
527

    
528
    assert(line >= 0 && line < buf->line_count);
529
    assert(buf->line[line]);
530

    
531
    offset = buf->line_width * line;
532
    buffer = buf->line[line];
533
    buf->data_stack_top++;
534
    buf->data_stack[buf->data_stack_top] = buffer;
535
    buf->line[line] = NULL;
536

    
537
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
538
}
539

    
540
static void slice_buffer_flush(slice_buffer * buf)
541
{
542
    int i;
543
    for (i = 0; i < buf->line_count; i++)
544
    {
545
        if (buf->line[i])
546
        {
547
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
548
            slice_buffer_release(buf, i);
549
        }
550
    }
551
}
552

    
553
static void slice_buffer_destroy(slice_buffer * buf)
554
{
555
    int i;
556
    slice_buffer_flush(buf);
557

    
558
    for (i = buf->data_count - 1; i >= 0; i--)
559
    {
560
        assert(buf->data_stack[i]);
561
        av_free(buf->data_stack[i]);
562
    }
563
    assert(buf->data_stack);
564
    av_free(buf->data_stack);
565
    assert(buf->line);
566
    av_free(buf->line);
567
}
568

    
569
#ifdef        __sgi
570
// Avoid a name clash on SGI IRIX
571
#undef        qexp
572
#endif
573
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
574
static uint8_t qexp[QROOT];
575

    
576
static inline int mirror(int v, int m){
577
    if     (v<0) return -v;
578
    else if(v>m) return 2*m-v;
579
    else         return v;
580
}
581

    
582
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
583
    int i;
584

    
585
    if(v){
586
        const int a= ABS(v);
587
        const int e= av_log2(a);
588
#if 1
589
        const int el= FFMIN(e, 10);
590
        put_rac(c, state+0, 0);
591

    
592
        for(i=0; i<el; i++){
593
            put_rac(c, state+1+i, 1);  //1..10
594
        }
595
        for(; i<e; i++){
596
            put_rac(c, state+1+9, 1);  //1..10
597
        }
598
        put_rac(c, state+1+FFMIN(i,9), 0);
599

    
600
        for(i=e-1; i>=el; i--){
601
            put_rac(c, state+22+9, (a>>i)&1); //22..31
602
        }
603
        for(; i>=0; i--){
604
            put_rac(c, state+22+i, (a>>i)&1); //22..31
605
        }
606

    
607
        if(is_signed)
608
            put_rac(c, state+11 + el, v < 0); //11..21
609
#else
610

    
611
        put_rac(c, state+0, 0);
612
        if(e<=9){
613
            for(i=0; i<e; i++){
614
                put_rac(c, state+1+i, 1);  //1..10
615
            }
616
            put_rac(c, state+1+i, 0);
617

    
618
            for(i=e-1; i>=0; i--){
619
                put_rac(c, state+22+i, (a>>i)&1); //22..31
620
            }
621

    
622
            if(is_signed)
623
                put_rac(c, state+11 + e, v < 0); //11..21
624
        }else{
625
            for(i=0; i<e; i++){
626
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
627
            }
628
            put_rac(c, state+1+FFMIN(i,9), 0);
629

    
630
            for(i=e-1; i>=0; i--){
631
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
632
            }
633

    
634
            if(is_signed)
635
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
636
        }
637
#endif
638
    }else{
639
        put_rac(c, state+0, 1);
640
    }
641
}
642

    
643
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
644
    if(get_rac(c, state+0))
645
        return 0;
646
    else{
647
        int i, e, a;
648
        e= 0;
649
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
650
            e++;
651
        }
652

    
653
        a= 1;
654
        for(i=e-1; i>=0; i--){
655
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
656
        }
657

    
658
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
659
            return -a;
660
        else
661
            return a;
662
    }
663
}
664

    
665
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
666
    int i;
667
    int r= log2>=0 ? 1<<log2 : 1;
668

    
669
    assert(v>=0);
670
    assert(log2>=-4);
671

    
672
    while(v >= r){
673
        put_rac(c, state+4+log2, 1);
674
        v -= r;
675
        log2++;
676
        if(log2>0) r+=r;
677
    }
678
    put_rac(c, state+4+log2, 0);
679

    
680
    for(i=log2-1; i>=0; i--){
681
        put_rac(c, state+31-i, (v>>i)&1);
682
    }
683
}
684

    
685
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
686
    int i;
687
    int r= log2>=0 ? 1<<log2 : 1;
688
    int v=0;
689

    
690
    assert(log2>=-4);
691

    
692
    while(get_rac(c, state+4+log2)){
693
        v+= r;
694
        log2++;
695
        if(log2>0) r+=r;
696
    }
697

    
698
    for(i=log2-1; i>=0; i--){
699
        v+= get_rac(c, state+31-i)<<i;
700
    }
701

    
702
    return v;
703
}
704

    
705
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
706
    const int mirror_left= !highpass;
707
    const int mirror_right= (width&1) ^ highpass;
708
    const int w= (width>>1) - 1 + (highpass & width);
709
    int i;
710

    
711
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
712
    if(mirror_left){
713
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
714
        dst += dst_step;
715
        src += src_step;
716
    }
717

    
718
    for(i=0; i<w; i++){
719
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
720
    }
721

    
722
    if(mirror_right){
723
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
724
    }
725
}
726

    
727
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
728
    const int mirror_left= !highpass;
729
    const int mirror_right= (width&1) ^ highpass;
730
    const int w= (width>>1) - 1 + (highpass & width);
731
    int i;
732

    
733
    if(mirror_left){
734
        int r= 3*2*ref[0];
735
        r += r>>4;
736
        r += r>>8;
737
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
738
        dst += dst_step;
739
        src += src_step;
740
    }
741

    
742
    for(i=0; i<w; i++){
743
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
744
        r += r>>4;
745
        r += r>>8;
746
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
747
    }
748

    
749
    if(mirror_right){
750
        int r= 3*2*ref[w*ref_step];
751
        r += r>>4;
752
        r += r>>8;
753
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
754
    }
755
}
756

    
757
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
758
    const int mirror_left= !highpass;
759
    const int mirror_right= (width&1) ^ highpass;
760
    const int w= (width>>1) - 1 + (highpass & width);
761
    int i;
762

    
763
    assert(shift == 4);
764
#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
765
    if(mirror_left){
766
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
767
        dst += dst_step;
768
        src += src_step;
769
    }
770

    
771
    for(i=0; i<w; i++){
772
        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
773
    }
774

    
775
    if(mirror_right){
776
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
777
    }
778
}
779

    
780

    
781
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
782
    int x, i;
783

    
784
    for(x=start; x<width; x+=2){
785
        int64_t sum=0;
786

    
787
        for(i=0; i<n; i++){
788
            int x2= x + 2*i - n + 1;
789
            if     (x2<     0) x2= -x2;
790
            else if(x2>=width) x2= 2*width-x2-2;
791
            sum += coeffs[i]*(int64_t)dst[x2];
792
        }
793
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
794
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
795
    }
796
}
797

    
798
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
799
    int x, y, i;
800
    for(y=start; y<height; y+=2){
801
        for(x=0; x<width; x++){
802
            int64_t sum=0;
803

    
804
            for(i=0; i<n; i++){
805
                int y2= y + 2*i - n + 1;
806
                if     (y2<      0) y2= -y2;
807
                else if(y2>=height) y2= 2*height-y2-2;
808
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
809
            }
810
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
811
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
812
        }
813
    }
814
}
815

    
816
#define SCALEX 1
817
#define LX0 0
818
#define LX1 1
819

    
820
#if 0 // more accurate 9/7
821
#define N1 2
822
#define SHIFT1 14
823
#define COEFFS1 (int[]){-25987,-25987}
824
#define N2 2
825
#define SHIFT2 19
826
#define COEFFS2 (int[]){-27777,-27777}
827
#define N3 2
828
#define SHIFT3 15
829
#define COEFFS3 (int[]){28931,28931}
830
#define N4 2
831
#define SHIFT4 15
832
#define COEFFS4 (int[]){14533,14533}
833
#elif 1 // 13/7 CRF
834
#define N1 4
835
#define SHIFT1 4
836
#define COEFFS1 (int[]){1,-9,-9,1}
837
#define N2 4
838
#define SHIFT2 4
839
#define COEFFS2 (int[]){-1,5,5,-1}
840
#define N3 0
841
#define SHIFT3 1
842
#define COEFFS3 NULL
843
#define N4 0
844
#define SHIFT4 1
845
#define COEFFS4 NULL
846
#elif 1 // 3/5
847
#define LX0 1
848
#define LX1 0
849
#define SCALEX 0.5
850
#define N1 2
851
#define SHIFT1 1
852
#define COEFFS1 (int[]){1,1}
853
#define N2 2
854
#define SHIFT2 2
855
#define COEFFS2 (int[]){-1,-1}
856
#define N3 0
857
#define SHIFT3 0
858
#define COEFFS3 NULL
859
#define N4 0
860
#define SHIFT4 0
861
#define COEFFS4 NULL
862
#elif 1 // 11/5
863
#define N1 0
864
#define SHIFT1 1
865
#define COEFFS1 NULL
866
#define N2 2
867
#define SHIFT2 2
868
#define COEFFS2 (int[]){-1,-1}
869
#define N3 2
870
#define SHIFT3 0
871
#define COEFFS3 (int[]){-1,-1}
872
#define N4 4
873
#define SHIFT4 7
874
#define COEFFS4 (int[]){-5,29,29,-5}
875
#define SCALEX 4
876
#elif 1 // 9/7 CDF
877
#define N1 2
878
#define SHIFT1 7
879
#define COEFFS1 (int[]){-203,-203}
880
#define N2 2
881
#define SHIFT2 12
882
#define COEFFS2 (int[]){-217,-217}
883
#define N3 2
884
#define SHIFT3 7
885
#define COEFFS3 (int[]){113,113}
886
#define N4 2
887
#define SHIFT4 9
888
#define COEFFS4 (int[]){227,227}
889
#define SCALEX 1
890
#elif 1 // 7/5 CDF
891
#define N1 0
892
#define SHIFT1 1
893
#define COEFFS1 NULL
894
#define N2 2
895
#define SHIFT2 2
896
#define COEFFS2 (int[]){-1,-1}
897
#define N3 2
898
#define SHIFT3 0
899
#define COEFFS3 (int[]){-1,-1}
900
#define N4 2
901
#define SHIFT4 4
902
#define COEFFS4 (int[]){3,3}
903
#elif 1 // 9/7 MN
904
#define N1 4
905
#define SHIFT1 4
906
#define COEFFS1 (int[]){1,-9,-9,1}
907
#define N2 2
908
#define SHIFT2 2
909
#define COEFFS2 (int[]){1,1}
910
#define N3 0
911
#define SHIFT3 1
912
#define COEFFS3 NULL
913
#define N4 0
914
#define SHIFT4 1
915
#define COEFFS4 NULL
916
#else // 13/7 CRF
917
#define N1 4
918
#define SHIFT1 4
919
#define COEFFS1 (int[]){1,-9,-9,1}
920
#define N2 4
921
#define SHIFT2 4
922
#define COEFFS2 (int[]){-1,5,5,-1}
923
#define N3 0
924
#define SHIFT3 1
925
#define COEFFS3 NULL
926
#define N4 0
927
#define SHIFT4 1
928
#define COEFFS4 NULL
929
#endif
930
static void horizontal_decomposeX(DWTELEM *b, int width){
931
    DWTELEM temp[width];
932
    const int width2= width>>1;
933
    const int w2= (width+1)>>1;
934
    int x;
935

    
936
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
937
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
938
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
939
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
940

    
941
    for(x=0; x<width2; x++){
942
        temp[x   ]= b[2*x    ];
943
        temp[x+w2]= b[2*x + 1];
944
    }
945
    if(width&1)
946
        temp[x   ]= b[2*x    ];
947
    memcpy(b, temp, width*sizeof(int));
948
}
949

    
950
static void horizontal_composeX(DWTELEM *b, int width){
951
    DWTELEM temp[width];
952
    const int width2= width>>1;
953
    int x;
954
    const int w2= (width+1)>>1;
955

    
956
    memcpy(temp, b, width*sizeof(int));
957
    for(x=0; x<width2; x++){
958
        b[2*x    ]= temp[x   ];
959
        b[2*x + 1]= temp[x+w2];
960
    }
961
    if(width&1)
962
        b[2*x    ]= temp[x   ];
963

    
964
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
965
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
966
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
967
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
968
}
969

    
970
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
971
    int x, y;
972

    
973
    for(y=0; y<height; y++){
974
        for(x=0; x<width; x++){
975
            buffer[y*stride + x] *= SCALEX;
976
        }
977
    }
978

    
979
    for(y=0; y<height; y++){
980
        horizontal_decomposeX(buffer + y*stride, width);
981
    }
982

    
983
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
984
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
985
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
986
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
987
}
988

    
989
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
990
    int x, y;
991

    
992
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
993
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
994
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
995
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
996

    
997
    for(y=0; y<height; y++){
998
        horizontal_composeX(buffer + y*stride, width);
999
    }
1000

    
1001
    for(y=0; y<height; y++){
1002
        for(x=0; x<width; x++){
1003
            buffer[y*stride + x] /= SCALEX;
1004
        }
1005
    }
1006
}
1007

    
1008
static void horizontal_decompose53i(DWTELEM *b, int width){
1009
    DWTELEM temp[width];
1010
    const int width2= width>>1;
1011
    int x;
1012
    const int w2= (width+1)>>1;
1013

    
1014
    for(x=0; x<width2; x++){
1015
        temp[x   ]= b[2*x    ];
1016
        temp[x+w2]= b[2*x + 1];
1017
    }
1018
    if(width&1)
1019
        temp[x   ]= b[2*x    ];
1020
#if 0
1021
    {
1022
    int A1,A2,A3,A4;
1023
    A2= temp[1       ];
1024
    A4= temp[0       ];
1025
    A1= temp[0+width2];
1026
    A1 -= (A2 + A4)>>1;
1027
    A4 += (A1 + 1)>>1;
1028
    b[0+width2] = A1;
1029
    b[0       ] = A4;
1030
    for(x=1; x+1<width2; x+=2){
1031
        A3= temp[x+width2];
1032
        A4= temp[x+1     ];
1033
        A3 -= (A2 + A4)>>1;
1034
        A2 += (A1 + A3 + 2)>>2;
1035
        b[x+width2] = A3;
1036
        b[x       ] = A2;
1037

1038
        A1= temp[x+1+width2];
1039
        A2= temp[x+2       ];
1040
        A1 -= (A2 + A4)>>1;
1041
        A4 += (A1 + A3 + 2)>>2;
1042
        b[x+1+width2] = A1;
1043
        b[x+1       ] = A4;
1044
    }
1045
    A3= temp[width-1];
1046
    A3 -= A2;
1047
    A2 += (A1 + A3 + 2)>>2;
1048
    b[width -1] = A3;
1049
    b[width2-1] = A2;
1050
    }
1051
#else
1052
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1053
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
1054
#endif
1055
}
1056

    
1057
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1058
    int i;
1059

    
1060
    for(i=0; i<width; i++){
1061
        b1[i] -= (b0[i] + b2[i])>>1;
1062
    }
1063
}
1064

    
1065
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1066
    int i;
1067

    
1068
    for(i=0; i<width; i++){
1069
        b1[i] += (b0[i] + b2[i] + 2)>>2;
1070
    }
1071
}
1072

    
1073
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1074
    int y;
1075
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1076
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
1077

    
1078
    for(y=-2; y<height; y+=2){
1079
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1080
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1081

    
1082
{START_TIMER
1083
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
1084
        if(y+2 < height) horizontal_decompose53i(b3, width);
1085
STOP_TIMER("horizontal_decompose53i")}
1086

    
1087
{START_TIMER
1088
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1089
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1090
STOP_TIMER("vertical_decompose53i*")}
1091

    
1092
        b0=b2;
1093
        b1=b3;
1094
    }
1095
}
1096

    
1097
#define liftS lift
1098
#define lift5 lift
1099
#if 1
1100
#define W_AM 3
1101
#define W_AO 0
1102
#define W_AS 1
1103

    
1104
#undef liftS
1105
#define W_BM 1
1106
#define W_BO 8
1107
#define W_BS 4
1108

    
1109
#define W_CM 1
1110
#define W_CO 0
1111
#define W_CS 0
1112

    
1113
#define W_DM 3
1114
#define W_DO 4
1115
#define W_DS 3
1116
#elif 0
1117
#define W_AM 55
1118
#define W_AO 16
1119
#define W_AS 5
1120

    
1121
#define W_BM 3
1122
#define W_BO 32
1123
#define W_BS 6
1124

    
1125
#define W_CM 127
1126
#define W_CO 64
1127
#define W_CS 7
1128

    
1129
#define W_DM 7
1130
#define W_DO 8
1131
#define W_DS 4
1132
#elif 0
1133
#define W_AM 97
1134
#define W_AO 32
1135
#define W_AS 6
1136

    
1137
#define W_BM 63
1138
#define W_BO 512
1139
#define W_BS 10
1140

    
1141
#define W_CM 13
1142
#define W_CO 8
1143
#define W_CS 4
1144

    
1145
#define W_DM 15
1146
#define W_DO 16
1147
#define W_DS 5
1148

    
1149
#else
1150

    
1151
#define W_AM 203
1152
#define W_AO 64
1153
#define W_AS 7
1154

    
1155
#define W_BM 217
1156
#define W_BO 2048
1157
#define W_BS 12
1158

    
1159
#define W_CM 113
1160
#define W_CO 64
1161
#define W_CS 7
1162

    
1163
#define W_DM 227
1164
#define W_DO 128
1165
#define W_DS 9
1166
#endif
1167
static void horizontal_decompose97i(DWTELEM *b, int width){
1168
    DWTELEM temp[width];
1169
    const int w2= (width+1)>>1;
1170

    
1171
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1172
    liftS(temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1173
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1174
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1175
}
1176

    
1177

    
1178
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1179
    int i;
1180

    
1181
    for(i=0; i<width; i++){
1182
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1183
    }
1184
}
1185

    
1186
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1187
    int i;
1188

    
1189
    for(i=0; i<width; i++){
1190
#ifdef lift5
1191
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1192
#else
1193
        int r= 3*(b0[i] + b2[i]);
1194
        r+= r>>4;
1195
        r+= r>>8;
1196
        b1[i] += (r+W_CO)>>W_CS;
1197
#endif
1198
    }
1199
}
1200

    
1201
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1202
    int i;
1203

    
1204
    for(i=0; i<width; i++){
1205
#ifdef liftS
1206
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1207
#else
1208
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1209
#endif
1210
    }
1211
}
1212

    
1213
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1214
    int i;
1215

    
1216
    for(i=0; i<width; i++){
1217
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1218
    }
1219
}
1220

    
1221
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1222
    int y;
1223
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1224
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1225
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1226
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1227

    
1228
    for(y=-4; y<height; y+=2){
1229
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1230
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1231

    
1232
{START_TIMER
1233
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1234
        if(y+4 < height) horizontal_decompose97i(b5, width);
1235
if(width>400){
1236
STOP_TIMER("horizontal_decompose97i")
1237
}}
1238

    
1239
{START_TIMER
1240
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1241
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1242
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1243
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1244

    
1245
if(width>400){
1246
STOP_TIMER("vertical_decompose97i")
1247
}}
1248

    
1249
        b0=b2;
1250
        b1=b3;
1251
        b2=b4;
1252
        b3=b5;
1253
    }
1254
}
1255

    
1256
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1257
    int level;
1258

    
1259
    for(level=0; level<decomposition_count; level++){
1260
        switch(type){
1261
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1262
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1263
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1264
        }
1265
    }
1266
}
1267

    
1268
static void horizontal_compose53i(DWTELEM *b, int width){
1269
    DWTELEM temp[width];
1270
    const int width2= width>>1;
1271
    const int w2= (width+1)>>1;
1272
    int x;
1273

    
1274
#if 0
1275
    int A1,A2,A3,A4;
1276
    A2= temp[1       ];
1277
    A4= temp[0       ];
1278
    A1= temp[0+width2];
1279
    A1 -= (A2 + A4)>>1;
1280
    A4 += (A1 + 1)>>1;
1281
    b[0+width2] = A1;
1282
    b[0       ] = A4;
1283
    for(x=1; x+1<width2; x+=2){
1284
        A3= temp[x+width2];
1285
        A4= temp[x+1     ];
1286
        A3 -= (A2 + A4)>>1;
1287
        A2 += (A1 + A3 + 2)>>2;
1288
        b[x+width2] = A3;
1289
        b[x       ] = A2;
1290

1291
        A1= temp[x+1+width2];
1292
        A2= temp[x+2       ];
1293
        A1 -= (A2 + A4)>>1;
1294
        A4 += (A1 + A3 + 2)>>2;
1295
        b[x+1+width2] = A1;
1296
        b[x+1       ] = A4;
1297
    }
1298
    A3= temp[width-1];
1299
    A3 -= A2;
1300
    A2 += (A1 + A3 + 2)>>2;
1301
    b[width -1] = A3;
1302
    b[width2-1] = A2;
1303
#else
1304
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1305
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1306
#endif
1307
    for(x=0; x<width2; x++){
1308
        b[2*x    ]= temp[x   ];
1309
        b[2*x + 1]= temp[x+w2];
1310
    }
1311
    if(width&1)
1312
        b[2*x    ]= temp[x   ];
1313
}
1314

    
1315
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1316
    int i;
1317

    
1318
    for(i=0; i<width; i++){
1319
        b1[i] += (b0[i] + b2[i])>>1;
1320
    }
1321
}
1322

    
1323
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1324
    int i;
1325

    
1326
    for(i=0; i<width; i++){
1327
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1328
    }
1329
}
1330

    
1331
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1332
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1333
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1334
    cs->y = -1;
1335
}
1336

    
1337
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1338
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1339
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1340
    cs->y = -1;
1341
}
1342

    
1343
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1344
    int y= cs->y;
1345
    int mirror0 = mirror(y-1, height-1);
1346
    int mirror1 = mirror(y  , height-1);
1347
    int mirror2 = mirror(y+1, height-1);
1348
    int mirror3 = mirror(y+2, height-1);
1349

    
1350
    DWTELEM *b0= cs->b0;
1351
    DWTELEM *b1= cs->b1;
1352
    DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1353
    DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1354

    
1355
{START_TIMER
1356
        if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1357
        if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1358
STOP_TIMER("vertical_compose53i*")}
1359

    
1360
{START_TIMER
1361
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1362
        if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1363
STOP_TIMER("horizontal_compose53i")}
1364

    
1365
    cs->b0 = b2;
1366
    cs->b1 = b3;
1367
    cs->y += 2;
1368
}
1369

    
1370
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1371
    int y= cs->y;
1372
    DWTELEM *b0= cs->b0;
1373
    DWTELEM *b1= cs->b1;
1374
    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1375
    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1376

    
1377
{START_TIMER
1378
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1379
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1380
STOP_TIMER("vertical_compose53i*")}
1381

    
1382
{START_TIMER
1383
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1384
        if(b0 <= b2) horizontal_compose53i(b1, width);
1385
STOP_TIMER("horizontal_compose53i")}
1386

    
1387
    cs->b0 = b2;
1388
    cs->b1 = b3;
1389
    cs->y += 2;
1390
}
1391

    
1392
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1393
    dwt_compose_t cs;
1394
    spatial_compose53i_init(&cs, buffer, height, stride);
1395
    while(cs.y <= height)
1396
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1397
}
1398

    
1399

    
1400
static void horizontal_compose97i(DWTELEM *b, int width){
1401
    DWTELEM temp[width];
1402
    const int w2= (width+1)>>1;
1403

    
1404
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1405
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1406
    liftS(b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1407
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1408
}
1409

    
1410
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1411
    int i;
1412

    
1413
    for(i=0; i<width; i++){
1414
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1415
    }
1416
}
1417

    
1418
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1419
    int i;
1420

    
1421
    for(i=0; i<width; i++){
1422
#ifdef lift5
1423
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1424
#else
1425
        int r= 3*(b0[i] + b2[i]);
1426
        r+= r>>4;
1427
        r+= r>>8;
1428
        b1[i] -= (r+W_CO)>>W_CS;
1429
#endif
1430
    }
1431
}
1432

    
1433
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1434
    int i;
1435

    
1436
    for(i=0; i<width; i++){
1437
#ifdef liftS
1438
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1439
#else
1440
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1441
#endif
1442
    }
1443
}
1444

    
1445
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1446
    int i;
1447

    
1448
    for(i=0; i<width; i++){
1449
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1450
    }
1451
}
1452

    
1453
static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1454
    int i;
1455

    
1456
    for(i=0; i<width; i++){
1457
#ifndef lift5
1458
        int r;
1459
#endif
1460
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1461
#ifdef lift5
1462
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1463
#else
1464
        r= 3*(b2[i] + b4[i]);
1465
        r+= r>>4;
1466
        r+= r>>8;
1467
        b3[i] -= (r+W_CO)>>W_CS;
1468
#endif
1469
#ifdef liftS
1470
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1471
#else
1472
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1473
#endif
1474
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1475
    }
1476
}
1477

    
1478
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1479
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1480
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1481
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1482
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1483
    cs->y = -3;
1484
}
1485

    
1486
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1487
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1488
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1489
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1490
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1491
    cs->y = -3;
1492
}
1493

    
1494
static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1495
    int y = cs->y;
1496

    
1497
    int mirror0 = mirror(y - 1, height - 1);
1498
    int mirror1 = mirror(y + 0, height - 1);
1499
    int mirror2 = mirror(y + 1, height - 1);
1500
    int mirror3 = mirror(y + 2, height - 1);
1501
    int mirror4 = mirror(y + 3, height - 1);
1502
    int mirror5 = mirror(y + 4, height - 1);
1503
    DWTELEM *b0= cs->b0;
1504
    DWTELEM *b1= cs->b1;
1505
    DWTELEM *b2= cs->b2;
1506
    DWTELEM *b3= cs->b3;
1507
    DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1508
    DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1509

    
1510
{START_TIMER
1511
    if(y>0 && y+4<height){
1512
        vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1513
    }else{
1514
        if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1515
        if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1516
        if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1517
        if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1518
    }
1519
if(width>400){
1520
STOP_TIMER("vertical_compose97i")}}
1521

    
1522
{START_TIMER
1523
        if(y-1>=  0) horizontal_compose97i(b0, width);
1524
        if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1525
if(width>400 && mirror0 <= mirror2){
1526
STOP_TIMER("horizontal_compose97i")}}
1527

    
1528
    cs->b0=b2;
1529
    cs->b1=b3;
1530
    cs->b2=b4;
1531
    cs->b3=b5;
1532
    cs->y += 2;
1533
}
1534

    
1535
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1536
    int y = cs->y;
1537
    DWTELEM *b0= cs->b0;
1538
    DWTELEM *b1= cs->b1;
1539
    DWTELEM *b2= cs->b2;
1540
    DWTELEM *b3= cs->b3;
1541
    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1542
    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1543

    
1544
        if(stride == width && y+4 < height && 0){
1545
            int x;
1546
            for(x=0; x<width/2; x++)
1547
                b5[x] += 64*2;
1548
            for(; x<width; x++)
1549
                b5[x] += 169*2;
1550
        }
1551

    
1552
{START_TIMER
1553
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1554
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1555
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1556
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1557
if(width>400){
1558
STOP_TIMER("vertical_compose97i")}}
1559

    
1560
{START_TIMER
1561
        if(y-1>=  0) horizontal_compose97i(b0, width);
1562
        if(b0 <= b2) horizontal_compose97i(b1, width);
1563
if(width>400 && b0 <= b2){
1564
STOP_TIMER("horizontal_compose97i")}}
1565

    
1566
    cs->b0=b2;
1567
    cs->b1=b3;
1568
    cs->b2=b4;
1569
    cs->b3=b5;
1570
    cs->y += 2;
1571
}
1572

    
1573
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1574
    dwt_compose_t cs;
1575
    spatial_compose97i_init(&cs, buffer, height, stride);
1576
    while(cs.y <= height)
1577
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1578
}
1579

    
1580
void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1581
    int level;
1582
    for(level=decomposition_count-1; level>=0; level--){
1583
        switch(type){
1584
        case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1585
        case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1586
        /* not slicified yet */
1587
        case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1588
          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1589
        }
1590
    }
1591
}
1592

    
1593
void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1594
    int level;
1595
    for(level=decomposition_count-1; level>=0; level--){
1596
        switch(type){
1597
        case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1598
        case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1599
        /* not slicified yet */
1600
        case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1601
        }
1602
    }
1603
}
1604

    
1605
void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1606
    const int support = type==1 ? 3 : 5;
1607
    int level;
1608
    if(type==2) return;
1609

    
1610
    for(level=decomposition_count-1; level>=0; level--){
1611
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1612
            switch(type){
1613
            case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1614
                    break;
1615
            case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1616
                    break;
1617
            case 2: break;
1618
            }
1619
        }
1620
    }
1621
}
1622

    
1623
void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1624
    const int support = type==1 ? 3 : 5;
1625
    int level;
1626
    if(type==2) return;
1627

    
1628
    for(level=decomposition_count-1; level>=0; level--){
1629
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1630
            switch(type){
1631
            case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1632
                    break;
1633
            case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1634
                    break;
1635
            case 2: break;
1636
            }
1637
        }
1638
    }
1639
}
1640

    
1641
void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1642
    if(type==2){
1643
        int level;
1644
        for(level=decomposition_count-1; level>=0; level--)
1645
            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
1646
    }else{
1647
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1648
        int y;
1649
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1650
        for(y=0; y<height; y+=4)
1651
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1652
    }
1653
}
1654

    
1655
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1656
    const int w= b->width;
1657
    const int h= b->height;
1658
    int x, y;
1659

    
1660
    if(1){
1661
        int run=0;
1662
        int runs[w*h];
1663
        int run_index=0;
1664
        int max_index;
1665

    
1666
        for(y=0; y<h; y++){
1667
            for(x=0; x<w; x++){
1668
                int v, p=0;
1669
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1670
                v= src[x + y*stride];
1671

    
1672
                if(y){
1673
                    t= src[x + (y-1)*stride];
1674
                    if(x){
1675
                        lt= src[x - 1 + (y-1)*stride];
1676
                    }
1677
                    if(x + 1 < w){
1678
                        rt= src[x + 1 + (y-1)*stride];
1679
                    }
1680
                }
1681
                if(x){
1682
                    l= src[x - 1 + y*stride];
1683
                    /*if(x > 1){
1684
                        if(orientation==1) ll= src[y + (x-2)*stride];
1685
                        else               ll= src[x - 2 + y*stride];
1686
                    }*/
1687
                }
1688
                if(parent){
1689
                    int px= x>>1;
1690
                    int py= y>>1;
1691
                    if(px<b->parent->width && py<b->parent->height)
1692
                        p= parent[px + py*2*stride];
1693
                }
1694
                if(!(/*ll|*/l|lt|t|rt|p)){
1695
                    if(v){
1696
                        runs[run_index++]= run;
1697
                        run=0;
1698
                    }else{
1699
                        run++;
1700
                    }
1701
                }
1702
            }
1703
        }
1704
        max_index= run_index;
1705
        runs[run_index++]= run;
1706
        run_index=0;
1707
        run= runs[run_index++];
1708

    
1709
        put_symbol2(&s->c, b->state[30], max_index, 0);
1710
        if(run_index <= max_index)
1711
            put_symbol2(&s->c, b->state[1], run, 3);
1712

    
1713
        for(y=0; y<h; y++){
1714
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1715
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1716
                return -1;
1717
            }
1718
            for(x=0; x<w; x++){
1719
                int v, p=0;
1720
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1721
                v= src[x + y*stride];
1722

    
1723
                if(y){
1724
                    t= src[x + (y-1)*stride];
1725
                    if(x){
1726
                        lt= src[x - 1 + (y-1)*stride];
1727
                    }
1728
                    if(x + 1 < w){
1729
                        rt= src[x + 1 + (y-1)*stride];
1730
                    }
1731
                }
1732
                if(x){
1733
                    l= src[x - 1 + y*stride];
1734
                    /*if(x > 1){
1735
                        if(orientation==1) ll= src[y + (x-2)*stride];
1736
                        else               ll= src[x - 2 + y*stride];
1737
                    }*/
1738
                }
1739
                if(parent){
1740
                    int px= x>>1;
1741
                    int py= y>>1;
1742
                    if(px<b->parent->width && py<b->parent->height)
1743
                        p= parent[px + py*2*stride];
1744
                }
1745
                if(/*ll|*/l|lt|t|rt|p){
1746
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1747

    
1748
                    put_rac(&s->c, &b->state[0][context], !!v);
1749
                }else{
1750
                    if(!run){
1751
                        run= runs[run_index++];
1752

    
1753
                        if(run_index <= max_index)
1754
                            put_symbol2(&s->c, b->state[1], run, 3);
1755
                        assert(v);
1756
                    }else{
1757
                        run--;
1758
                        assert(!v);
1759
                    }
1760
                }
1761
                if(v){
1762
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1763
                    int l2= 2*ABS(l) + (l<0);
1764
                    int t2= 2*ABS(t) + (t<0);
1765

    
1766
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1767
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1768
                }
1769
            }
1770
        }
1771
    }
1772
    return 0;
1773
}
1774

    
1775
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1776
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1777
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1778
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1779
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1780
}
1781

    
1782
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1783
    const int w= b->width;
1784
    const int h= b->height;
1785
    int x,y;
1786

    
1787
    if(1){
1788
        int run, runs;
1789
        x_and_coeff *xc= b->x_coeff;
1790
        x_and_coeff *prev_xc= NULL;
1791
        x_and_coeff *prev2_xc= xc;
1792
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1793
        x_and_coeff *prev_parent_xc= parent_xc;
1794

    
1795
        runs= get_symbol2(&s->c, b->state[30], 0);
1796
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1797
        else           run= INT_MAX;
1798

    
1799
        for(y=0; y<h; y++){
1800
            int v=0;
1801
            int lt=0, t=0, rt=0;
1802

    
1803
            if(y && prev_xc->x == 0){
1804
                rt= prev_xc->coeff;
1805
            }
1806
            for(x=0; x<w; x++){
1807
                int p=0;
1808
                const int l= v;
1809

    
1810
                lt= t; t= rt;
1811

    
1812
                if(y){
1813
                    if(prev_xc->x <= x)
1814
                        prev_xc++;
1815
                    if(prev_xc->x == x + 1)
1816
                        rt= prev_xc->coeff;
1817
                    else
1818
                        rt=0;
1819
                }
1820
                if(parent_xc){
1821
                    if(x>>1 > parent_xc->x){
1822
                        parent_xc++;
1823
                    }
1824
                    if(x>>1 == parent_xc->x){
1825
                        p= parent_xc->coeff;
1826
                    }
1827
                }
1828
                if(/*ll|*/l|lt|t|rt|p){
1829
                    int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1830

    
1831
                    v=get_rac(&s->c, &b->state[0][context]);
1832
                    if(v){
1833
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1834
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1835

    
1836
                        xc->x=x;
1837
                        (xc++)->coeff= v;
1838
                    }
1839
                }else{
1840
                    if(!run){
1841
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1842
                        else           run= INT_MAX;
1843
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1844
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1845

    
1846
                        xc->x=x;
1847
                        (xc++)->coeff= v;
1848
                    }else{
1849
                        int max_run;
1850
                        run--;
1851
                        v=0;
1852

    
1853
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1854
                        else  max_run= FFMIN(run, w-x-1);
1855
                        if(parent_xc)
1856
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1857
                        x+= max_run;
1858
                        run-= max_run;
1859
                    }
1860
                }
1861
            }
1862
            (xc++)->x= w+1; //end marker
1863
            prev_xc= prev2_xc;
1864
            prev2_xc= xc;
1865

    
1866
            if(parent_xc){
1867
                if(y&1){
1868
                    while(parent_xc->x != parent->width+1)
1869
                        parent_xc++;
1870
                    parent_xc++;
1871
                    prev_parent_xc= parent_xc;
1872
                }else{
1873
                    parent_xc= prev_parent_xc;
1874
                }
1875
            }
1876
        }
1877

    
1878
        (xc++)->x= w+1; //end marker
1879
    }
1880
}
1881

    
1882
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1883
    const int w= b->width;
1884
    int y;
1885
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1886
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1887
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1888
    int new_index = 0;
1889

    
1890
    START_TIMER
1891

    
1892
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1893
        qadd= 0;
1894
        qmul= 1<<QEXPSHIFT;
1895
    }
1896

    
1897
    /* If we are on the second or later slice, restore our index. */
1898
    if (start_y != 0)
1899
        new_index = save_state[0];
1900

    
1901

    
1902
    for(y=start_y; y<h; y++){
1903
        int x = 0;
1904
        int v;
1905
        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1906
        memset(line, 0, b->width*sizeof(DWTELEM));
1907
        v = b->x_coeff[new_index].coeff;
1908
        x = b->x_coeff[new_index++].x;
1909
        while(x < w)
1910
        {
1911
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1912
            register int u= -(v&1);
1913
            line[x] = (t^u) - u;
1914

    
1915
            v = b->x_coeff[new_index].coeff;
1916
            x = b->x_coeff[new_index++].x;
1917
        }
1918
    }
1919
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1920
        STOP_TIMER("decode_subband")
1921
    }
1922

    
1923
    /* Save our variables for the next slice. */
1924
    save_state[0] = new_index;
1925

    
1926
    return;
1927
}
1928

    
1929
static void reset_contexts(SnowContext *s){
1930
    int plane_index, level, orientation;
1931

    
1932
    for(plane_index=0; plane_index<3; plane_index++){
1933
        for(level=0; level<s->spatial_decomposition_count; level++){
1934
            for(orientation=level ? 1:0; orientation<4; orientation++){
1935
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1936
            }
1937
        }
1938
    }
1939
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1940
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1941
}
1942

    
1943
static int alloc_blocks(SnowContext *s){
1944
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1945
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1946

    
1947
    s->b_width = w;
1948
    s->b_height= h;
1949

    
1950
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1951
    return 0;
1952
}
1953

    
1954
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1955
    uint8_t *bytestream= d->bytestream;
1956
    uint8_t *bytestream_start= d->bytestream_start;
1957
    *d= *s;
1958
    d->bytestream= bytestream;
1959
    d->bytestream_start= bytestream_start;
1960
}
1961

    
1962
//near copy & paste from dsputil, FIXME
1963
static int pix_sum(uint8_t * pix, int line_size, int w)
1964
{
1965
    int s, i, j;
1966

    
1967
    s = 0;
1968
    for (i = 0; i < w; i++) {
1969
        for (j = 0; j < w; j++) {
1970
            s += pix[0];
1971
            pix ++;
1972
        }
1973
        pix += line_size - w;
1974
    }
1975
    return s;
1976
}
1977

    
1978
//near copy & paste from dsputil, FIXME
1979
static int pix_norm1(uint8_t * pix, int line_size, int w)
1980
{
1981
    int s, i, j;
1982
    uint32_t *sq = squareTbl + 256;
1983

    
1984
    s = 0;
1985
    for (i = 0; i < w; i++) {
1986
        for (j = 0; j < w; j ++) {
1987
            s += sq[pix[0]];
1988
            pix ++;
1989
        }
1990
        pix += line_size - w;
1991
    }
1992
    return s;
1993
}
1994

    
1995
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1996
    const int w= s->b_width << s->block_max_depth;
1997
    const int rem_depth= s->block_max_depth - level;
1998
    const int index= (x + y*w) << rem_depth;
1999
    const int block_w= 1<<rem_depth;
2000
    BlockNode block;
2001
    int i,j;
2002

    
2003
    block.color[0]= l;
2004
    block.color[1]= cb;
2005
    block.color[2]= cr;
2006
    block.mx= mx;
2007
    block.my= my;
2008
    block.type= type;
2009
    block.level= level;
2010

    
2011
    for(j=0; j<block_w; j++){
2012
        for(i=0; i<block_w; i++){
2013
            s->block[index + i + j*w]= block;
2014
        }
2015
    }
2016
}
2017

    
2018
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2019
    const int offset[3]= {
2020
          y*c->  stride + x,
2021
        ((y*c->uvstride + x)>>1),
2022
        ((y*c->uvstride + x)>>1),
2023
    };
2024
    int i;
2025
    for(i=0; i<3; i++){
2026
        c->src[0][i]= src [i];
2027
        c->ref[0][i]= ref [i] + offset[i];
2028
    }
2029
    assert(!ref_index);
2030
}
2031

    
2032
//FIXME copy&paste
2033
#define P_LEFT P[1]
2034
#define P_TOP P[2]
2035
#define P_TOPRIGHT P[3]
2036
#define P_MEDIAN P[4]
2037
#define P_MV1 P[9]
2038
#define FLAG_QPEL   1 //must be 1
2039

    
2040
static int encode_q_branch(SnowContext *s, int level, int x, int y){
2041
    uint8_t p_buffer[1024];
2042
    uint8_t i_buffer[1024];
2043
    uint8_t p_state[sizeof(s->block_state)];
2044
    uint8_t i_state[sizeof(s->block_state)];
2045
    RangeCoder pc, ic;
2046
    uint8_t *pbbak= s->c.bytestream;
2047
    uint8_t *pbbak_start= s->c.bytestream_start;
2048
    int score, score2, iscore, i_len, p_len, block_s, sum;
2049
    const int w= s->b_width  << s->block_max_depth;
2050
    const int h= s->b_height << s->block_max_depth;
2051
    const int rem_depth= s->block_max_depth - level;
2052
    const int index= (x + y*w) << rem_depth;
2053
    const int block_w= 1<<(LOG2_MB_SIZE - level);
2054
    static BlockNode null_block= { //FIXME add border maybe
2055
        .color= {128,128,128},
2056
        .mx= 0,
2057
        .my= 0,
2058
        .type= 0,
2059
        .level= 0,
2060
    };
2061
    int trx= (x+1)<<rem_depth;
2062
    int try= (y+1)<<rem_depth;
2063
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2064
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2065
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2066
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2067
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2068
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2069
    int pl = left->color[0];
2070
    int pcb= left->color[1];
2071
    int pcr= left->color[2];
2072
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2073
    int pmy= mid_pred(left->my, top->my, tr->my);
2074
    int mx=0, my=0;
2075
    int l,cr,cb, i;
2076
    const int stride= s->current_picture.linesize[0];
2077
    const int uvstride= s->current_picture.linesize[1];
2078
    const int instride= s->input_picture.linesize[0];
2079
    const int uvinstride= s->input_picture.linesize[1];
2080
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
2081
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2082
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2083
    uint8_t current_mb[3][stride*block_w];
2084
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2085
    int P[10][2];
2086
    int16_t last_mv[3][2];
2087
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2088
    const int shift= 1+qpel;
2089
    MotionEstContext *c= &s->m.me;
2090
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2091
    int my_context= av_log2(2*ABS(left->my - top->my));
2092
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2093

    
2094
    assert(sizeof(s->block_state) >= 256);
2095
    if(s->keyframe){
2096
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2097
        return 0;
2098
    }
2099

    
2100
    //FIXME optimize
2101
    for(i=0; i<block_w; i++)
2102
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
2103
    for(i=0; i<block_w>>1; i++)
2104
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2105
    for(i=0; i<block_w>>1; i++)
2106
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2107

    
2108
//    clip predictors / edge ?
2109

    
2110
    P_LEFT[0]= left->mx;
2111
    P_LEFT[1]= left->my;
2112
    P_TOP [0]= top->mx;
2113
    P_TOP [1]= top->my;
2114
    P_TOPRIGHT[0]= tr->mx;
2115
    P_TOPRIGHT[1]= tr->my;
2116

    
2117
    last_mv[0][0]= s->block[index].mx;
2118
    last_mv[0][1]= s->block[index].my;
2119
    last_mv[1][0]= right->mx;
2120
    last_mv[1][1]= right->my;
2121
    last_mv[2][0]= bottom->mx;
2122
    last_mv[2][1]= bottom->my;
2123

    
2124
    s->m.mb_stride=2;
2125
    s->m.mb_x=
2126
    s->m.mb_y= 0;
2127
    s->m.me.skip= 0;
2128

    
2129
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2130

    
2131
    assert(s->m.me.  stride ==   stride);
2132
    assert(s->m.me.uvstride == uvstride);
2133

    
2134
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2135
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2136
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2137
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2138

    
2139
    c->xmin = - x*block_w - 16+2;
2140
    c->ymin = - y*block_w - 16+2;
2141
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2142
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2143

    
2144
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2145
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
2146
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2147
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2148
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2149
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2150
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2151

    
2152
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2153
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2154

    
2155
    if (!y) {
2156
        c->pred_x= P_LEFT[0];
2157
        c->pred_y= P_LEFT[1];
2158
    } else {
2159
        c->pred_x = P_MEDIAN[0];
2160
        c->pred_y = P_MEDIAN[1];
2161
    }
2162

    
2163
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2164
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2165

    
2166
    assert(mx >= c->xmin);
2167
    assert(mx <= c->xmax);
2168
    assert(my >= c->ymin);
2169
    assert(my <= c->ymax);
2170

    
2171
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2172
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2173
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2174

    
2175
  //  subpel search
2176
    pc= s->c;
2177
    pc.bytestream_start=
2178
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2179
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2180

    
2181
    if(level!=s->block_max_depth)
2182
        put_rac(&pc, &p_state[4 + s_context], 1);
2183
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2184
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2185
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2186
    p_len= pc.bytestream - pc.bytestream_start;
2187
    score += (s->lambda2*(p_len*8
2188
              + (pc.outstanding_count - s->c.outstanding_count)*8
2189
              + (-av_log2(pc.range)    + av_log2(s->c.range))
2190
             ))>>FF_LAMBDA_SHIFT;
2191

    
2192
    block_s= block_w*block_w;
2193
    sum = pix_sum(&current_mb[0][0], stride, block_w);
2194
    l= (sum + block_s/2)/block_s;
2195
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2196

    
2197
    block_s= block_w*block_w>>2;
2198
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2199
    cb= (sum + block_s/2)/block_s;
2200
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2201
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2202
    cr= (sum + block_s/2)/block_s;
2203
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2204

    
2205
    ic= s->c;
2206
    ic.bytestream_start=
2207
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2208
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2209
    if(level!=s->block_max_depth)
2210
        put_rac(&ic, &i_state[4 + s_context], 1);
2211
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2212
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2213
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2214
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2215
    i_len= ic.bytestream - ic.bytestream_start;
2216
    iscore += (s->lambda2*(i_len*8
2217
              + (ic.outstanding_count - s->c.outstanding_count)*8
2218
              + (-av_log2(ic.range)    + av_log2(s->c.range))
2219
             ))>>FF_LAMBDA_SHIFT;
2220

    
2221
//    assert(score==256*256*256*64-1);
2222
    assert(iscore < 255*255*256 + s->lambda2*10);
2223
    assert(iscore >= 0);
2224
    assert(l>=0 && l<=255);
2225
    assert(pl>=0 && pl<=255);
2226

    
2227
    if(level==0){
2228
        int varc= iscore >> 8;
2229
        int vard= score >> 8;
2230
        if (vard <= 64 || vard < varc)
2231
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2232
        else
2233
            c->scene_change_score+= s->m.qscale;
2234
    }
2235

    
2236
    if(level!=s->block_max_depth){
2237
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2238
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2239
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2240
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2241
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2242
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2243

    
2244
        if(score2 < score && score2 < iscore)
2245
            return score2;
2246
    }
2247

    
2248
    if(iscore < score){
2249
        memcpy(pbbak, i_buffer, i_len);
2250
        s->c= ic;
2251
        s->c.bytestream_start= pbbak_start;
2252
        s->c.bytestream= pbbak + i_len;
2253
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2254
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2255
        return iscore;
2256
    }else{
2257
        memcpy(pbbak, p_buffer, p_len);
2258
        s->c= pc;
2259
        s->c.bytestream_start= pbbak_start;
2260
        s->c.bytestream= pbbak + p_len;
2261
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2262
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2263
        return score;
2264
    }
2265
}
2266

    
2267
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2268
    const int w= s->b_width << s->block_max_depth;
2269
    const int rem_depth= s->block_max_depth - level;
2270
    const int index= (x + y*w) << rem_depth;
2271
    static BlockNode null_block= { //FIXME add border maybe
2272
        .color= {128,128,128},
2273
        .mx= 0,
2274
        .my= 0,
2275
        .type= 0,
2276
        .level= 0,
2277
    };
2278
    int trx= (x+1)<<rem_depth;
2279
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2280
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2281
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2282
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2283
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2284

    
2285
    if(s->keyframe){
2286
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2287
        return;
2288
    }
2289

    
2290
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2291
        int type;
2292
        int l = left->color[0];
2293
        int cb= left->color[1];
2294
        int cr= left->color[2];
2295
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2296
        int my= mid_pred(left->my, top->my, tr->my);
2297
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2298
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2299

    
2300
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2301

    
2302
        if(type){
2303
            l += get_symbol(&s->c, &s->block_state[32], 1);
2304
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2305
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2306
        }else{
2307
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2308
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2309
        }
2310
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2311
    }else{
2312
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2313
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2314
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2315
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2316
    }
2317
}
2318

    
2319
static void encode_blocks(SnowContext *s){
2320
    int x, y;
2321
    int w= s->b_width;
2322
    int h= s->b_height;
2323

    
2324
    for(y=0; y<h; y++){
2325
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2326
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2327
            return;
2328
        }
2329
        for(x=0; x<w; x++){
2330
            encode_q_branch(s, 0, x, y);
2331
        }
2332
    }
2333
}
2334

    
2335
static void decode_blocks(SnowContext *s){
2336
    int x, y;
2337
    int w= s->b_width;
2338
    int h= s->b_height;
2339

    
2340
    for(y=0; y<h; y++){
2341
        for(x=0; x<w; x++){
2342
            decode_q_branch(s, 0, x, y);
2343
        }
2344
    }
2345
}
2346

    
2347
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2348
    int x, y;
2349
START_TIMER
2350
    for(y=0; y < b_h+5; y++){
2351
        for(x=0; x < b_w; x++){
2352
            int a0= src[x    ];
2353
            int a1= src[x + 1];
2354
            int a2= src[x + 2];
2355
            int a3= src[x + 3];
2356
            int a4= src[x + 4];
2357
            int a5= src[x + 5];
2358
//            int am= 9*(a1+a2) - (a0+a3);
2359
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2360
//            int am= 18*(a2+a3) - 2*(a1+a4);
2361
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2362
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2363

    
2364
//            if(b_w==16) am= 8*(a1+a2);
2365

    
2366
            if(dx<8) am = (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2367
            else     am = (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2368

    
2369
            /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2370
            if(am&(~255)) am= ~(am>>31);
2371

    
2372
            tmp[x] = am;
2373

    
2374
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2375
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2376
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2377
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2378
        }
2379
        tmp += stride;
2380
        src += stride;
2381
    }
2382
    tmp -= (b_h+5)*stride;
2383

    
2384
    for(y=0; y < b_h; y++){
2385
        for(x=0; x < b_w; x++){
2386
            int a0= tmp[x + 0*stride];
2387
            int a1= tmp[x + 1*stride];
2388
            int a2= tmp[x + 2*stride];
2389
            int a3= tmp[x + 3*stride];
2390
            int a4= tmp[x + 4*stride];
2391
            int a5= tmp[x + 5*stride];
2392
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2393
//            int am= 18*(a2+a3) - 2*(a1+a4);
2394
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2395
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2396

    
2397
//            if(b_w==16) am= 8*(a1+a2);
2398

    
2399
            if(dy<8) am =  (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2400
            else     am = (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2401

    
2402
            if(am&(~255)) am= ~(am>>31);
2403

    
2404
            dst[x] = am;
2405
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2406
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2407
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2408
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2409
        }
2410
        dst += stride;
2411
        tmp += stride;
2412
    }
2413
STOP_TIMER("mc_block")
2414
}
2415

    
2416
#define mca(dx,dy,b_w)\
2417
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2418
    uint8_t tmp[stride*(b_w+5)];\
2419
    assert(h==b_w);\
2420
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2421
}
2422

    
2423
mca( 0, 0,16)
2424
mca( 8, 0,16)
2425
mca( 0, 8,16)
2426
mca( 8, 8,16)
2427
mca( 0, 0,8)
2428
mca( 8, 0,8)
2429
mca( 0, 8,8)
2430
mca( 8, 8,8)
2431

    
2432
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2433
    if(block->type){
2434
        int x, y;
2435
        const int color= block->color[plane_index];
2436
        for(y=0; y < b_h; y++){
2437
            for(x=0; x < b_w; x++){
2438
                dst[x + y*stride]= color;
2439
            }
2440
        }
2441
    }else{
2442
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2443
        int mx= block->mx*scale;
2444
        int my= block->my*scale;
2445
        const int dx= mx&15;
2446
        const int dy= my&15;
2447
        sx += (mx>>4) - 2;
2448
        sy += (my>>4) - 2;
2449
        src += sx + sy*stride;
2450
        if(   (unsigned)sx >= w - b_w - 4
2451
           || (unsigned)sy >= h - b_h - 4){
2452
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2453
            src= tmp + MB_SIZE;
2454
        }
2455
        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2456
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2457
        else
2458
            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2459
    }
2460
}
2461

    
2462
static always_inline int same_block(BlockNode *a, BlockNode *b){
2463
    return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2464
}
2465

    
2466
//FIXME name clenup (b_w, block_w, b_width stuff)
2467
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2468
    DWTELEM * dst = NULL;
2469
    const int b_width = s->b_width  << s->block_max_depth;
2470
    const int b_height= s->b_height << s->block_max_depth;
2471
    const int b_stride= b_width;
2472
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2473
    BlockNode *rt= lt+1;
2474
    BlockNode *lb= lt+b_stride;
2475
    BlockNode *rb= lb+1;
2476
    uint8_t *block[4];
2477
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2478
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2479
    uint8_t *ptmp;
2480
    int x,y;
2481

    
2482
    if(b_x<0){
2483
        lt= rt;
2484
        lb= rb;
2485
    }else if(b_x + 1 >= b_width){
2486
        rt= lt;
2487
        rb= lb;
2488
    }
2489
    if(b_y<0){
2490
        lt= lb;
2491
        rt= rb;
2492
    }else if(b_y + 1 >= b_height){
2493
        lb= lt;
2494
        rb= rt;
2495
    }
2496

    
2497
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2498
        obmc -= src_x;
2499
        b_w += src_x;
2500
        src_x=0;
2501
    }else if(src_x + b_w > w){
2502
        b_w = w - src_x;
2503
    }
2504
    if(src_y<0){
2505
        obmc -= src_y*obmc_stride;
2506
        b_h += src_y;
2507
        src_y=0;
2508
    }else if(src_y + b_h> h){
2509
        b_h = h - src_y;
2510
    }
2511

    
2512
    if(b_w<=0 || b_h<=0) return;
2513

    
2514
assert(src_stride > 2*MB_SIZE + 5);
2515
//    old_dst += src_x + src_y*dst_stride;
2516
    dst8+= src_x + src_y*src_stride;
2517
//    src += src_x + src_y*src_stride;
2518

    
2519
    ptmp= tmp + 3*tmp_step;
2520
    block[0]= ptmp;
2521
    ptmp+=tmp_step;
2522
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2523

    
2524
    if(same_block(lt, rt)){
2525
        block[1]= block[0];
2526
    }else{
2527
        block[1]= ptmp;
2528
        ptmp+=tmp_step;
2529
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2530
    }
2531

    
2532
    if(same_block(lt, lb)){
2533
        block[2]= block[0];
2534
    }else if(same_block(rt, lb)){
2535
        block[2]= block[1];
2536
    }else{
2537
        block[2]= ptmp;
2538
        ptmp+=tmp_step;
2539
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2540
    }
2541

    
2542
    if(same_block(lt, rb) ){
2543
        block[3]= block[0];
2544
    }else if(same_block(rt, rb)){
2545
        block[3]= block[1];
2546
    }else if(same_block(lb, rb)){
2547
        block[3]= block[2];
2548
    }else{
2549
        block[3]= ptmp;
2550
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2551
    }
2552
#if 0
2553
    for(y=0; y<b_h; y++){
2554
        for(x=0; x<b_w; x++){
2555
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2556
            if(add) dst[x + y*dst_stride] += v;
2557
            else    dst[x + y*dst_stride] -= v;
2558
        }
2559
    }
2560
    for(y=0; y<b_h; y++){
2561
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2562
        for(x=0; x<b_w; x++){
2563
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2564
            if(add) dst[x + y*dst_stride] += v;
2565
            else    dst[x + y*dst_stride] -= v;
2566
        }
2567
    }
2568
    for(y=0; y<b_h; y++){
2569
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2570
        for(x=0; x<b_w; x++){
2571
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2572
            if(add) dst[x + y*dst_stride] += v;
2573
            else    dst[x + y*dst_stride] -= v;
2574
        }
2575
    }
2576
    for(y=0; y<b_h; y++){
2577
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2578
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2579
        for(x=0; x<b_w; x++){
2580
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2581
            if(add) dst[x + y*dst_stride] += v;
2582
            else    dst[x + y*dst_stride] -= v;
2583
        }
2584
    }
2585
#else
2586
{
2587

    
2588
    START_TIMER
2589

    
2590
    for(y=0; y<b_h; y++){
2591
        //FIXME ugly missue of obmc_stride
2592
        uint8_t *obmc1= obmc + y*obmc_stride;
2593
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2594
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2595
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2596
        dst = slice_buffer_get_line(sb, src_y + y);
2597
        for(x=0; x<b_w; x++){
2598
            int v=   obmc1[x] * block[3][x + y*src_stride]
2599
                    +obmc2[x] * block[2][x + y*src_stride]
2600
                    +obmc3[x] * block[1][x + y*src_stride]
2601
                    +obmc4[x] * block[0][x + y*src_stride];
2602

    
2603
            v <<= 8 - LOG2_OBMC_MAX;
2604
            if(FRAC_BITS != 8){
2605
                v += 1<<(7 - FRAC_BITS);
2606
                v >>= 8 - FRAC_BITS;
2607
            }
2608
            if(add){
2609
//                v += old_dst[x + y*dst_stride];
2610
                v += dst[x + src_x];
2611
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2612
                if(v&(~255)) v= ~(v>>31);
2613
                dst8[x + y*src_stride] = v;
2614
            }else{
2615
//                old_dst[x + y*dst_stride] -= v;
2616
                dst[x + src_x] -= v;
2617
            }
2618
        }
2619
    }
2620
        STOP_TIMER("Inner add y block")
2621
}
2622
#endif
2623
}
2624

    
2625
//FIXME name clenup (b_w, block_w, b_width stuff)
2626
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2627
    const int b_width = s->b_width  << s->block_max_depth;
2628
    const int b_height= s->b_height << s->block_max_depth;
2629
    const int b_stride= b_width;
2630
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2631
    BlockNode *rt= lt+1;
2632
    BlockNode *lb= lt+b_stride;
2633
    BlockNode *rb= lb+1;
2634
    uint8_t *block[4];
2635
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2636
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2637
    uint8_t *ptmp;
2638
    int x,y;
2639

    
2640
    if(b_x<0){
2641
        lt= rt;
2642
        lb= rb;
2643
    }else if(b_x + 1 >= b_width){
2644
        rt= lt;
2645
        rb= lb;
2646
    }
2647
    if(b_y<0){
2648
        lt= lb;
2649
        rt= rb;
2650
    }else if(b_y + 1 >= b_height){
2651
        lb= lt;
2652
        rb= rt;
2653
    }
2654

    
2655
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2656
        obmc -= src_x;
2657
        b_w += src_x;
2658
        src_x=0;
2659
    }else if(src_x + b_w > w){
2660
        b_w = w - src_x;
2661
    }
2662
    if(src_y<0){
2663
        obmc -= src_y*obmc_stride;
2664
        b_h += src_y;
2665
        src_y=0;
2666
    }else if(src_y + b_h> h){
2667
        b_h = h - src_y;
2668
    }
2669

    
2670
    if(b_w<=0 || b_h<=0) return;
2671

    
2672
assert(src_stride > 2*MB_SIZE + 5);
2673
    dst += src_x + src_y*dst_stride;
2674
    dst8+= src_x + src_y*src_stride;
2675
//    src += src_x + src_y*src_stride;
2676

    
2677
    ptmp= tmp + 3*tmp_step;
2678
    block[0]= ptmp;
2679
    ptmp+=tmp_step;
2680
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2681

    
2682
    if(same_block(lt, rt)){
2683
        block[1]= block[0];
2684
    }else{
2685
        block[1]= ptmp;
2686
        ptmp+=tmp_step;
2687
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2688
    }
2689

    
2690
    if(same_block(lt, lb)){
2691
        block[2]= block[0];
2692
    }else if(same_block(rt, lb)){
2693
        block[2]= block[1];
2694
    }else{
2695
        block[2]= ptmp;
2696
        ptmp+=tmp_step;
2697
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2698
    }
2699

    
2700
    if(same_block(lt, rb) ){
2701
        block[3]= block[0];
2702
    }else if(same_block(rt, rb)){
2703
        block[3]= block[1];
2704
    }else if(same_block(lb, rb)){
2705
        block[3]= block[2];
2706
    }else{
2707
        block[3]= ptmp;
2708
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2709
    }
2710
#if 0
2711
    for(y=0; y<b_h; y++){
2712
        for(x=0; x<b_w; x++){
2713
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2714
            if(add) dst[x + y*dst_stride] += v;
2715
            else    dst[x + y*dst_stride] -= v;
2716
        }
2717
    }
2718
    for(y=0; y<b_h; y++){
2719
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2720
        for(x=0; x<b_w; x++){
2721
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2722
            if(add) dst[x + y*dst_stride] += v;
2723
            else    dst[x + y*dst_stride] -= v;
2724
        }
2725
    }
2726
    for(y=0; y<b_h; y++){
2727
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2728
        for(x=0; x<b_w; x++){
2729
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2730
            if(add) dst[x + y*dst_stride] += v;
2731
            else    dst[x + y*dst_stride] -= v;
2732
        }
2733
    }
2734
    for(y=0; y<b_h; y++){
2735
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2736
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2737
        for(x=0; x<b_w; x++){
2738
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2739
            if(add) dst[x + y*dst_stride] += v;
2740
            else    dst[x + y*dst_stride] -= v;
2741
        }
2742
    }
2743
#else
2744
    for(y=0; y<b_h; y++){
2745
        //FIXME ugly missue of obmc_stride
2746
        uint8_t *obmc1= obmc + y*obmc_stride;
2747
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2748
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2749
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2750
        for(x=0; x<b_w; x++){
2751
            int v=   obmc1[x] * block[3][x + y*src_stride]
2752
                    +obmc2[x] * block[2][x + y*src_stride]
2753
                    +obmc3[x] * block[1][x + y*src_stride]
2754
                    +obmc4[x] * block[0][x + y*src_stride];
2755

    
2756
            v <<= 8 - LOG2_OBMC_MAX;
2757
            if(FRAC_BITS != 8){
2758
                v += 1<<(7 - FRAC_BITS);
2759
                v >>= 8 - FRAC_BITS;
2760
            }
2761
            if(add){
2762
                v += dst[x + y*dst_stride];
2763
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2764
                if(v&(~255)) v= ~(v>>31);
2765
                dst8[x + y*src_stride] = v;
2766
            }else{
2767
                dst[x + y*dst_stride] -= v;
2768
            }
2769
        }
2770
    }
2771
#endif
2772
}
2773

    
2774
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2775
    Plane *p= &s->plane[plane_index];
2776
    const int mb_w= s->b_width  << s->block_max_depth;
2777
    const int mb_h= s->b_height << s->block_max_depth;
2778
    int x, y, mb_x;
2779
    int block_size = MB_SIZE >> s->block_max_depth;
2780
    int block_w    = plane_index ? block_size/2 : block_size;
2781
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2782
    int obmc_stride= plane_index ? block_size : 2*block_size;
2783
    int ref_stride= s->current_picture.linesize[plane_index];
2784
    uint8_t *ref  = s->last_picture.data[plane_index];
2785
    uint8_t *dst8= s->current_picture.data[plane_index];
2786
    int w= p->width;
2787
    int h= p->height;
2788
    START_TIMER
2789

    
2790
    if(s->keyframe || (s->avctx->debug&512)){
2791
        if(mb_y==mb_h)
2792
            return;
2793

    
2794
        if(add){
2795
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2796
            {
2797
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2798
                DWTELEM * line = sb->line[y];
2799
                for(x=0; x<w; x++)
2800
                {
2801
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2802
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2803
                    v >>= FRAC_BITS;
2804
                    if(v&(~255)) v= ~(v>>31);
2805
                    dst8[x + y*ref_stride]= v;
2806
                }
2807
            }
2808
        }else{
2809
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2810
            {
2811
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2812
                DWTELEM * line = sb->line[y];
2813
                for(x=0; x<w; x++)
2814
                {
2815
                    line[x] -= 128 << FRAC_BITS;
2816
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2817
                }
2818
            }
2819
        }
2820

    
2821
        return;
2822
    }
2823

    
2824
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2825
            START_TIMER
2826

    
2827
            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
2828
                       block_w*mb_x - block_w/2,
2829
                       block_w*mb_y - block_w/2,
2830
                       block_w, block_w,
2831
                       w, h,
2832
                       w, ref_stride, obmc_stride,
2833
                       mb_x - 1, mb_y - 1,
2834
                       add, plane_index);
2835

    
2836
            STOP_TIMER("add_yblock")
2837
        }
2838

    
2839
    STOP_TIMER("predict_slice")
2840
}
2841

    
2842
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2843
    Plane *p= &s->plane[plane_index];
2844
    const int mb_w= s->b_width  << s->block_max_depth;
2845
    const int mb_h= s->b_height << s->block_max_depth;
2846
    int x, y, mb_x;
2847
    int block_size = MB_SIZE >> s->block_max_depth;
2848
    int block_w    = plane_index ? block_size/2 : block_size;
2849
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2850
    int obmc_stride= plane_index ? block_size : 2*block_size;
2851
    int ref_stride= s->current_picture.linesize[plane_index];
2852
    uint8_t *ref  = s->last_picture.data[plane_index];
2853
    uint8_t *dst8= s->current_picture.data[plane_index];
2854
    int w= p->width;
2855
    int h= p->height;
2856
    START_TIMER
2857

    
2858
    if(s->keyframe || (s->avctx->debug&512)){
2859
        if(mb_y==mb_h)
2860
            return;
2861

    
2862
        if(add){
2863
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2864
                for(x=0; x<w; x++){
2865
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2866
                    v >>= FRAC_BITS;
2867
                    if(v&(~255)) v= ~(v>>31);
2868
                    dst8[x + y*ref_stride]= v;
2869
                }
2870
            }
2871
        }else{
2872
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2873
                for(x=0; x<w; x++){
2874
                    buf[x + y*w]-= 128<<FRAC_BITS;
2875
                }
2876
            }
2877
        }
2878

    
2879
        return;
2880
    }
2881

    
2882
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2883
            START_TIMER
2884

    
2885
            add_yblock(s, buf, dst8, ref, obmc,
2886
                       block_w*mb_x - block_w/2,
2887
                       block_w*mb_y - block_w/2,
2888
                       block_w, block_w,
2889
                       w, h,
2890
                       w, ref_stride, obmc_stride,
2891
                       mb_x - 1, mb_y - 1,
2892
                       add, plane_index);
2893

    
2894
            STOP_TIMER("add_yblock")
2895
        }
2896

    
2897
    STOP_TIMER("predict_slice")
2898
}
2899

    
2900
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2901
    const int mb_h= s->b_height << s->block_max_depth;
2902
    int mb_y;
2903
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2904
        predict_slice(s, buf, plane_index, add, mb_y);
2905
}
2906

    
2907
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2908
    const int level= b->level;
2909
    const int w= b->width;
2910
    const int h= b->height;
2911
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2912
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2913
    int x,y, thres1, thres2;
2914
//    START_TIMER
2915

    
2916
    if(s->qlog == LOSSLESS_QLOG) return;
2917

    
2918
    bias= bias ? 0 : (3*qmul)>>3;
2919
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2920
    thres2= 2*thres1;
2921

    
2922
    if(!bias){
2923
        for(y=0; y<h; y++){
2924
            for(x=0; x<w; x++){
2925
                int i= src[x + y*stride];
2926

    
2927
                if((unsigned)(i+thres1) > thres2){
2928
                    if(i>=0){
2929
                        i<<= QEXPSHIFT;
2930
                        i/= qmul; //FIXME optimize
2931
                        src[x + y*stride]=  i;
2932
                    }else{
2933
                        i= -i;
2934
                        i<<= QEXPSHIFT;
2935
                        i/= qmul; //FIXME optimize
2936
                        src[x + y*stride]= -i;
2937
                    }
2938
                }else
2939
                    src[x + y*stride]= 0;
2940
            }
2941
        }
2942
    }else{
2943
        for(y=0; y<h; y++){
2944
            for(x=0; x<w; x++){
2945
                int i= src[x + y*stride];
2946

    
2947
                if((unsigned)(i+thres1) > thres2){
2948
                    if(i>=0){
2949
                        i<<= QEXPSHIFT;
2950
                        i= (i + bias) / qmul; //FIXME optimize
2951
                        src[x + y*stride]=  i;
2952
                    }else{
2953
                        i= -i;
2954
                        i<<= QEXPSHIFT;
2955
                        i= (i + bias) / qmul; //FIXME optimize
2956
                        src[x + y*stride]= -i;
2957
                    }
2958
                }else
2959
                    src[x + y*stride]= 0;
2960
            }
2961
        }
2962
    }
2963
    if(level+1 == s->spatial_decomposition_count){
2964
//        STOP_TIMER("quantize")
2965
    }
2966
}
2967

    
2968
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
2969
    const int w= b->width;
2970
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2971
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2972
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2973
    int x,y;
2974
    START_TIMER
2975

    
2976
    if(s->qlog == LOSSLESS_QLOG) return;
2977

    
2978
    for(y=start_y; y<end_y; y++){
2979
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2980
        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2981
        for(x=0; x<w; x++){
2982
            int i= line[x];
2983
            if(i<0){
2984
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2985
            }else if(i>0){
2986
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2987
            }
2988
        }
2989
    }
2990
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2991
        STOP_TIMER("dquant")
2992
    }
2993
}
2994

    
2995
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2996
    const int w= b->width;
2997
    const int h= b->height;
2998
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2999
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3000
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3001
    int x,y;
3002
    START_TIMER
3003

    
3004
    if(s->qlog == LOSSLESS_QLOG) return;
3005

    
3006
    for(y=0; y<h; y++){
3007
        for(x=0; x<w; x++){
3008
            int i= src[x + y*stride];
3009
            if(i<0){
3010
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3011
            }else if(i>0){
3012
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3013
            }
3014
        }
3015
    }
3016
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3017
        STOP_TIMER("dquant")
3018
    }
3019
}
3020

    
3021
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3022
    const int w= b->width;
3023
    const int h= b->height;
3024
    int x,y;
3025

    
3026
    for(y=h-1; y>=0; y--){
3027
        for(x=w-1; x>=0; x--){
3028
            int i= x + y*stride;
3029

    
3030
            if(x){
3031
                if(use_median){
3032
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3033
                    else  src[i] -= src[i - 1];
3034
                }else{
3035
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3036
                    else  src[i] -= src[i - 1];
3037
                }
3038
            }else{
3039
                if(y) src[i] -= src[i - stride];
3040
            }
3041
        }
3042
    }
3043
}
3044

    
3045
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3046
    const int w= b->width;
3047
    int x,y;
3048

    
3049
//    START_TIMER
3050

    
3051
    DWTELEM * line;
3052
    DWTELEM * prev;
3053

    
3054
    if (start_y != 0)
3055
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3056

    
3057
    for(y=start_y; y<end_y; y++){
3058
        prev = line;
3059
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3060
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3061
        for(x=0; x<w; x++){
3062
            if(x){
3063
                if(use_median){
3064
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3065
                    else  line[x] += line[x - 1];
3066
                }else{
3067
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3068
                    else  line[x] += line[x - 1];
3069
                }
3070
            }else{
3071
                if(y) line[x] += prev[x];
3072
            }
3073
        }
3074
    }
3075

    
3076
//    STOP_TIMER("correlate")
3077
}
3078

    
3079
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3080
    const int w= b->width;
3081
    const int h= b->height;
3082
    int x,y;
3083

    
3084
    for(y=0; y<h; y++){
3085
        for(x=0; x<w; x++){
3086
            int i= x + y*stride;
3087

    
3088
            if(x){
3089
                if(use_median){
3090
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3091
                    else  src[i] += src[i - 1];
3092
                }else{
3093
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3094
                    else  src[i] += src[i - 1];
3095
                }
3096
            }else{
3097
                if(y) src[i] += src[i - stride];
3098
            }
3099
        }
3100
    }
3101
}
3102

    
3103
static void encode_header(SnowContext *s){
3104
    int plane_index, level, orientation;
3105
    uint8_t kstate[32];
3106

    
3107
    memset(kstate, MID_STATE, sizeof(kstate));
3108

    
3109
    put_rac(&s->c, kstate, s->keyframe);
3110
    if(s->keyframe || s->always_reset)
3111
        reset_contexts(s);
3112
    if(s->keyframe){
3113
        put_symbol(&s->c, s->header_state, s->version, 0);
3114
        put_rac(&s->c, s->header_state, s->always_reset);
3115
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3116
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3117
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3118
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3119
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3120
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3121
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3122
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3123

    
3124
        for(plane_index=0; plane_index<2; plane_index++){
3125
            for(level=0; level<s->spatial_decomposition_count; level++){
3126
                for(orientation=level ? 1:0; orientation<4; orientation++){
3127
                    if(orientation==2) continue;
3128
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3129
                }
3130
            }
3131
        }
3132
    }
3133
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3134
    put_symbol(&s->c, s->header_state, s->qlog, 1);
3135
    put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3136
    put_symbol(&s->c, s->header_state, s->qbias, 1);
3137
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3138
}
3139

    
3140
static int decode_header(SnowContext *s){
3141
    int plane_index, level, orientation;
3142
    uint8_t kstate[32];
3143

    
3144
    memset(kstate, MID_STATE, sizeof(kstate));
3145

    
3146
    s->keyframe= get_rac(&s->c, kstate);
3147
    if(s->keyframe || s->always_reset)
3148
        reset_contexts(s);
3149
    if(s->keyframe){
3150
        s->version= get_symbol(&s->c, s->header_state, 0);
3151
        if(s->version>0){
3152
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3153
            return -1;
3154
        }
3155
        s->always_reset= get_rac(&s->c, s->header_state);
3156
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3157
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3158
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3159
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3160
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3161
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3162
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3163
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3164

    
3165
        for(plane_index=0; plane_index<3; plane_index++){
3166
            for(level=0; level<s->spatial_decomposition_count; level++){
3167
                for(orientation=level ? 1:0; orientation<4; orientation++){
3168
                    int q;
3169
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3170
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3171
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3172
                    s->plane[plane_index].band[level][orientation].qlog= q;
3173
                }
3174
            }
3175
        }
3176
    }
3177

    
3178
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3179
    if(s->spatial_decomposition_type > 2){
3180
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3181
        return -1;
3182
    }
3183

    
3184
    s->qlog= get_symbol(&s->c, s->header_state, 1);
3185
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3186
    s->qbias= get_symbol(&s->c, s->header_state, 1);
3187
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3188
    if(s->block_max_depth > 1){
3189
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3190
        s->block_max_depth= 0;
3191
        return -1;
3192
    }
3193

    
3194
    return 0;
3195
}
3196

    
3197
static void init_qexp(){
3198
    int i;
3199
    double v=128;
3200

    
3201
    for(i=0; i<QROOT; i++){
3202
        qexp[i]= lrintf(v);
3203
        v *= pow(2, 1.0 / QROOT);
3204
    }
3205
}
3206

    
3207
static int common_init(AVCodecContext *avctx){
3208
    SnowContext *s = avctx->priv_data;
3209
    int width, height;
3210
    int level, orientation, plane_index, dec;
3211

    
3212
    s->avctx= avctx;
3213

    
3214
    dsputil_init(&s->dsp, avctx);
3215

    
3216
#define mcf(dx,dy)\
3217
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3218
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3219
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3220
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3221
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3222
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3223

    
3224
    mcf( 0, 0)
3225
    mcf( 4, 0)
3226
    mcf( 8, 0)
3227
    mcf(12, 0)
3228
    mcf( 0, 4)
3229
    mcf( 4, 4)
3230
    mcf( 8, 4)
3231
    mcf(12, 4)
3232
    mcf( 0, 8)
3233
    mcf( 4, 8)
3234
    mcf( 8, 8)
3235
    mcf(12, 8)
3236
    mcf( 0,12)
3237
    mcf( 4,12)
3238
    mcf( 8,12)
3239
    mcf(12,12)
3240

    
3241
#define mcfh(dx,dy)\
3242
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3243
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3244
        mc_block_hpel ## dx ## dy ## 16;\
3245
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3246
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3247
        mc_block_hpel ## dx ## dy ## 8;
3248

    
3249
    mcfh(0, 0)
3250
    mcfh(8, 0)
3251
    mcfh(0, 8)
3252
    mcfh(8, 8)
3253

    
3254
    if(!qexp[0])
3255
        init_qexp();
3256

    
3257
    dec= s->spatial_decomposition_count= 5;
3258
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3259

    
3260
    s->chroma_h_shift= 1; //FIXME XXX
3261
    s->chroma_v_shift= 1;
3262

    
3263
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3264

    
3265
    width= s->avctx->width;
3266
    height= s->avctx->height;
3267

    
3268
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3269

    
3270
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3271
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3272

    
3273
    for(plane_index=0; plane_index<3; plane_index++){
3274
        int w= s->avctx->width;
3275
        int h= s->avctx->height;
3276

    
3277
        if(plane_index){
3278
            w>>= s->chroma_h_shift;
3279
            h>>= s->chroma_v_shift;
3280
        }
3281
        s->plane[plane_index].width = w;
3282
        s->plane[plane_index].height= h;
3283
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3284
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3285
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3286
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3287

    
3288
                b->buf= s->spatial_dwt_buffer;
3289
                b->level= level;
3290
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3291
                b->width = (w + !(orientation&1))>>1;
3292
                b->height= (h + !(orientation>1))>>1;
3293

    
3294
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3295
                b->buf_x_offset = 0;
3296
                b->buf_y_offset = 0;
3297

    
3298
                if(orientation&1){
3299
                    b->buf += (w+1)>>1;
3300
                    b->buf_x_offset = (w+1)>>1;
3301
                }
3302
                if(orientation>1){
3303
                    b->buf += b->stride>>1;
3304
                    b->buf_y_offset = b->stride_line >> 1;
3305
                }
3306

    
3307
                if(level)
3308
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3309
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3310
            }
3311
            w= (w+1)>>1;
3312
            h= (h+1)>>1;
3313
        }
3314
    }
3315

    
3316
    reset_contexts(s);
3317
/*
3318
    width= s->width= avctx->width;
3319
    height= s->height= avctx->height;
3320

3321
    assert(width && height);
3322
*/
3323
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3324

    
3325
    return 0;
3326
}
3327

    
3328

    
3329
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3330
    int width = p->width;
3331
    int height= p->height;
3332
    int level, orientation, x, y;
3333

    
3334
    for(level=0; level<s->spatial_decomposition_count; level++){
3335
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3336
            SubBand *b= &p->band[level][orientation];
3337
            DWTELEM *buf= b->buf;
3338
            int64_t error=0;
3339

    
3340
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3341
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
3342
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3343
            for(y=0; y<height; y++){
3344
                for(x=0; x<width; x++){
3345
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
3346
                    error += d*d;
3347
                }
3348
            }
3349

    
3350
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3351
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3352
        }
3353
    }
3354
}
3355

    
3356
static int encode_init(AVCodecContext *avctx)
3357
{
3358
    SnowContext *s = avctx->priv_data;
3359
    int plane_index;
3360

    
3361
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3362
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3363
               "use vstrict=-2 / -strict -2 to use it anyway\n");
3364
        return -1;
3365
    }
3366

    
3367
    common_init(avctx);
3368
    alloc_blocks(s);
3369

    
3370
    s->version=0;
3371

    
3372
    s->m.avctx   = avctx;
3373
    s->m.flags   = avctx->flags;
3374
    s->m.bit_rate= avctx->bit_rate;
3375

    
3376
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3377
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3378
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3379
    h263_encode_init(&s->m); //mv_penalty
3380

    
3381
    if(avctx->flags&CODEC_FLAG_PASS1){
3382
        if(!avctx->stats_out)
3383
            avctx->stats_out = av_mallocz(256);
3384
    }
3385
    if(avctx->flags&CODEC_FLAG_PASS2){
3386
        if(ff_rate_control_init(&s->m) < 0)
3387
            return -1;
3388
    }
3389

    
3390
    for(plane_index=0; plane_index<3; plane_index++){
3391
        calculate_vissual_weight(s, &s->plane[plane_index]);
3392
    }
3393

    
3394

    
3395
    avctx->coded_frame= &s->current_picture;
3396
    switch(avctx->pix_fmt){
3397
//    case PIX_FMT_YUV444P:
3398
//    case PIX_FMT_YUV422P:
3399
    case PIX_FMT_YUV420P:
3400
    case PIX_FMT_GRAY8:
3401
//    case PIX_FMT_YUV411P:
3402
//    case PIX_FMT_YUV410P:
3403
        s->colorspace_type= 0;
3404
        break;
3405
/*    case PIX_FMT_RGBA32:
3406
        s->colorspace= 1;
3407
        break;*/
3408
    default:
3409
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3410
        return -1;
3411
    }
3412
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3413
    s->chroma_h_shift= 1;
3414
    s->chroma_v_shift= 1;
3415
    return 0;
3416
}
3417

    
3418
static int frame_start(SnowContext *s){
3419
   AVFrame tmp;
3420
   int w= s->avctx->width; //FIXME round up to x16 ?
3421
   int h= s->avctx->height;
3422

    
3423
    if(s->current_picture.data[0]){
3424
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
3425
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3426
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3427
    }
3428

    
3429
    tmp= s->last_picture;
3430
    s->last_picture= s->current_picture;
3431
    s->current_picture= tmp;
3432

    
3433
    s->current_picture.reference= 1;
3434
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3435
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3436
        return -1;
3437
    }
3438

    
3439
    return 0;
3440
}
3441

    
3442
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3443
    SnowContext *s = avctx->priv_data;
3444
    RangeCoder * const c= &s->c;
3445
    AVFrame *pict = data;
3446
    const int width= s->avctx->width;
3447
    const int height= s->avctx->height;
3448
    int level, orientation, plane_index;
3449

    
3450
    ff_init_range_encoder(c, buf, buf_size);
3451
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3452

    
3453
    s->input_picture = *pict;
3454

    
3455
    if(avctx->flags&CODEC_FLAG_PASS2){
3456
        s->m.pict_type =
3457
        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3458
        s->keyframe= pict->pict_type==FF_I_TYPE;
3459
        s->m.picture_number= avctx->frame_number;
3460
        pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3461
    }else{
3462
        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3463
        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3464
    }
3465

    
3466
    if(pict->quality){
3467
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3468
        //<64 >60
3469
        s->qlog += 61*QROOT/8;
3470
    }else{
3471
        s->qlog= LOSSLESS_QLOG;
3472
    }
3473

    
3474
    frame_start(s);
3475
    s->current_picture.key_frame= s->keyframe;
3476

    
3477
    s->m.current_picture_ptr= &s->m.current_picture;
3478
    if(pict->pict_type == P_TYPE){
3479
        int block_width = (width +15)>>4;
3480
        int block_height= (height+15)>>4;
3481
        int stride= s->current_picture.linesize[0];
3482

    
3483
        assert(s->current_picture.data[0]);
3484
        assert(s->last_picture.data[0]);
3485

    
3486
        s->m.avctx= s->avctx;
3487
        s->m.current_picture.data[0]= s->current_picture.data[0];
3488
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
3489
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3490
        s->m.   last_picture_ptr= &s->m.   last_picture;
3491
        s->m.linesize=
3492
        s->m.   last_picture.linesize[0]=
3493
        s->m.    new_picture.linesize[0]=
3494
        s->m.current_picture.linesize[0]= stride;
3495
        s->m.uvlinesize= s->current_picture.linesize[1];
3496
        s->m.width = width;
3497
        s->m.height= height;
3498
        s->m.mb_width = block_width;
3499
        s->m.mb_height= block_height;
3500
        s->m.mb_stride=   s->m.mb_width+1;
3501
        s->m.b8_stride= 2*s->m.mb_width+1;
3502
        s->m.f_code=1;
3503
        s->m.pict_type= pict->pict_type;
3504
        s->m.me_method= s->avctx->me_method;
3505
        s->m.me.scene_change_score=0;
3506
        s->m.flags= s->avctx->flags;
3507
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3508
        s->m.out_format= FMT_H263;
3509
        s->m.unrestricted_mv= 1;
3510

    
3511
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3512
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3513
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3514

    
3515
        s->m.dsp= s->dsp; //move
3516
        ff_init_me(&s->m);
3517
    }
3518

    
3519
redo_frame:
3520

    
3521
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3522

    
3523
    encode_header(s);
3524
    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3525
    encode_blocks(s);
3526
    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3527

    
3528
    for(plane_index=0; plane_index<3; plane_index++){
3529
        Plane *p= &s->plane[plane_index];
3530
        int w= p->width;
3531
        int h= p->height;
3532
        int x, y;
3533
//        int bits= put_bits_count(&s->c.pb);
3534

    
3535
        //FIXME optimize
3536
     if(pict->data[plane_index]) //FIXME gray hack
3537
        for(y=0; y<h; y++){
3538
            for(x=0; x<w; x++){
3539
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3540
            }
3541
        }
3542
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3543

    
3544
        if(   plane_index==0
3545
           && pict->pict_type == P_TYPE
3546
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3547
            ff_init_range_encoder(c, buf, buf_size);
3548
            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3549
            pict->pict_type= FF_I_TYPE;
3550
            s->keyframe=1;
3551
            reset_contexts(s);
3552
            goto redo_frame;
3553
        }
3554

    
3555
        if(s->qlog == LOSSLESS_QLOG){
3556
            for(y=0; y<h; y++){
3557
                for(x=0; x<w; x++){
3558
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3559
                }
3560
            }
3561
        }
3562

    
3563
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3564

    
3565
        for(level=0; level<s->spatial_decomposition_count; level++){
3566
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3567
                SubBand *b= &p->band[level][orientation];
3568

    
3569
                quantize(s, b, b->buf, b->stride, s->qbias);
3570
                if(orientation==0)
3571
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3572
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3573
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
3574
                if(orientation==0)
3575
                    correlate(s, b, b->buf, b->stride, 1, 0);
3576
            }
3577
        }
3578
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3579

    
3580
        for(level=0; level<s->spatial_decomposition_count; level++){
3581
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3582
                SubBand *b= &p->band[level][orientation];
3583

    
3584
                dequantize(s, b, b->buf, b->stride);
3585
            }
3586
        }
3587

    
3588
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3589
        if(s->qlog == LOSSLESS_QLOG){
3590
            for(y=0; y<h; y++){
3591
                for(x=0; x<w; x++){
3592
                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
3593
                }
3594
            }
3595
        }
3596
{START_TIMER
3597
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3598
STOP_TIMER("pred-conv")}
3599
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3600
            int64_t error= 0;
3601

    
3602
    if(pict->data[plane_index]) //FIXME gray hack
3603
            for(y=0; y<h; y++){
3604
                for(x=0; x<w; x++){
3605
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3606
                    error += d*d;
3607
                }
3608
            }
3609
            s->avctx->error[plane_index] += error;
3610
            s->current_picture.error[plane_index] = error;
3611
        }
3612
    }
3613

    
3614
    if(s->last_picture.data[0])
3615
        avctx->release_buffer(avctx, &s->last_picture);
3616

    
3617
    s->current_picture.coded_picture_number = avctx->frame_number;
3618
    s->current_picture.pict_type = pict->pict_type;
3619
    s->current_picture.quality = pict->quality;
3620
    if(avctx->flags&CODEC_FLAG_PASS1){
3621
        s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
3622
        s->m.current_picture.display_picture_number =
3623
        s->m.current_picture.coded_picture_number = avctx->frame_number;
3624
        s->m.pict_type = pict->pict_type;
3625
        s->m.current_picture.quality = pict->quality;
3626
        ff_write_pass1_stats(&s->m);
3627
    }
3628
    if(avctx->flags&CODEC_FLAG_PASS2){
3629
        s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3630
    }
3631

    
3632
    emms_c();
3633

    
3634
    return ff_rac_terminate(c);
3635
}
3636

    
3637
static void common_end(SnowContext *s){
3638
    int plane_index, level, orientation;
3639

    
3640
    av_freep(&s->spatial_dwt_buffer);
3641

    
3642
    av_freep(&s->m.me.scratchpad);
3643
    av_freep(&s->m.me.map);
3644
    av_freep(&s->m.me.score_map);
3645

    
3646
    av_freep(&s->block);
3647

    
3648
    for(plane_index=0; plane_index<3; plane_index++){
3649
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3650
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3651
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3652

    
3653
                av_freep(&b->x_coeff);
3654
            }
3655
        }
3656
    }
3657
}
3658

    
3659
static int encode_end(AVCodecContext *avctx)
3660
{
3661
    SnowContext *s = avctx->priv_data;
3662

    
3663
    common_end(s);
3664
    av_free(avctx->stats_out);
3665

    
3666
    return 0;
3667
}
3668

    
3669
static int decode_init(AVCodecContext *avctx)
3670
{
3671
    SnowContext *s = avctx->priv_data;
3672
    int block_size;
3673

    
3674
    avctx->pix_fmt= PIX_FMT_YUV420P;
3675

    
3676
    common_init(avctx);
3677

    
3678
    block_size = MB_SIZE >> s->block_max_depth;
3679
    slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 2)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
3680

    
3681
    return 0;
3682
}
3683

    
3684
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3685
    SnowContext *s = avctx->priv_data;
3686
    RangeCoder * const c= &s->c;
3687
    int bytes_read;
3688
    AVFrame *picture = data;
3689
    int level, orientation, plane_index;
3690

    
3691
    ff_init_range_decoder(c, buf, buf_size);
3692
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3693

    
3694
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3695
    decode_header(s);
3696
    if(!s->block) alloc_blocks(s);
3697

    
3698
    frame_start(s);
3699
    //keyframe flag dupliaction mess FIXME
3700
    if(avctx->debug&FF_DEBUG_PICT_INFO)
3701
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3702

    
3703
    decode_blocks(s);
3704

    
3705
    for(plane_index=0; plane_index<3; plane_index++){
3706
        Plane *p= &s->plane[plane_index];
3707
        int w= p->width;
3708
        int h= p->height;
3709
        int x, y;
3710
        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
3711

    
3712
if(s->avctx->debug&2048){
3713
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3714
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3715

    
3716
        for(y=0; y<h; y++){
3717
            for(x=0; x<w; x++){
3718
                int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
3719
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3720
            }
3721
        }
3722
}
3723

    
3724
{   START_TIMER
3725
    for(level=0; level<s->spatial_decomposition_count; level++){
3726
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3727
            SubBand *b= &p->band[level][orientation];
3728
            unpack_coeffs(s, b, b->parent, orientation);
3729
        }
3730
    }
3731
    STOP_TIMER("unpack coeffs");
3732
}
3733

    
3734
{START_TIMER
3735
    const int mb_h= s->b_height << s->block_max_depth;
3736
    const int block_size = MB_SIZE >> s->block_max_depth;
3737
    const int block_w    = plane_index ? block_size/2 : block_size;
3738
    int mb_y;
3739
    dwt_compose_t cs[MAX_DECOMPOSITIONS];
3740
    int yd=0, yq=0;
3741
    int y;
3742
    int end_y;
3743

    
3744
    ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
3745
    for(mb_y=0; mb_y<=mb_h; mb_y++){
3746

    
3747
        int slice_starty = block_w*mb_y;
3748
        int slice_h = block_w*(mb_y+1);
3749
        if (!(s->keyframe || s->avctx->debug&512)){
3750
            slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
3751
            slice_h -= (block_w >> 1);
3752
        }
3753

    
3754
        {
3755
        START_TIMER
3756
        for(level=0; level<s->spatial_decomposition_count; level++){
3757
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3758
                SubBand *b= &p->band[level][orientation];
3759
                int start_y;
3760
                int end_y;
3761
                int our_mb_start = mb_y;
3762
                int our_mb_end = (mb_y + 1);
3763
                start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2: 0);
3764
                end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2);
3765
                if (!(s->keyframe || s->avctx->debug&512)){
3766
                    start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
3767
                    end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
3768
                }
3769
                start_y = FFMIN(b->height, start_y);
3770
                end_y = FFMIN(b->height, end_y);
3771

    
3772
                if (start_y != end_y){
3773
                    if (orientation == 0){
3774
                        SubBand * correlate_band = &p->band[0][0];
3775
                        int correlate_end_y = FFMIN(b->height, end_y + 1);
3776
                        int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
3777
                        decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
3778
                        correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
3779
                        dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
3780
                    }
3781
                    else
3782
                        decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
3783
                }
3784
            }
3785
        }
3786
        STOP_TIMER("decode_subband_slice");
3787
        }
3788

    
3789
{   START_TIMER
3790
        for(; yd<slice_h; yd+=4){
3791
            ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
3792
        }
3793
    STOP_TIMER("idwt slice");}
3794

    
3795

    
3796
        if(s->qlog == LOSSLESS_QLOG){
3797
            for(; yq<slice_h && yq<h; yq++){
3798
                DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
3799
                for(x=0; x<w; x++){
3800
                    line[x] <<= FRAC_BITS;
3801
                }
3802
            }
3803
        }
3804

    
3805
        predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
3806

    
3807
        y = FFMIN(p->height, slice_starty);
3808
        end_y = FFMIN(p->height, slice_h);
3809
        while(y < end_y)
3810
            slice_buffer_release(&s->sb, y++);
3811
    }
3812

    
3813
    slice_buffer_flush(&s->sb);
3814

    
3815
STOP_TIMER("idwt + predict_slices")}
3816
    }
3817

    
3818
    emms_c();
3819

    
3820
    if(s->last_picture.data[0])
3821
        avctx->release_buffer(avctx, &s->last_picture);
3822

    
3823
if(!(s->avctx->debug&2048))
3824
    *picture= s->current_picture;
3825
else
3826
    *picture= s->mconly_picture;
3827

    
3828
    *data_size = sizeof(AVFrame);
3829

    
3830
    bytes_read= c->bytestream - c->bytestream_start;
3831
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
3832

    
3833
    return bytes_read;
3834
}
3835

    
3836
static int decode_end(AVCodecContext *avctx)
3837
{
3838
    SnowContext *s = avctx->priv_data;
3839

    
3840
    slice_buffer_destroy(&s->sb);
3841

    
3842
    common_end(s);
3843

    
3844
    return 0;
3845
}
3846

    
3847
AVCodec snow_decoder = {
3848
    "snow",
3849
    CODEC_TYPE_VIDEO,
3850
    CODEC_ID_SNOW,
3851
    sizeof(SnowContext),
3852
    decode_init,
3853
    NULL,
3854
    decode_end,
3855
    decode_frame,
3856
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3857
    NULL
3858
};
3859

    
3860
#ifdef CONFIG_ENCODERS
3861
AVCodec snow_encoder = {
3862
    "snow",
3863
    CODEC_TYPE_VIDEO,
3864
    CODEC_ID_SNOW,
3865
    sizeof(SnowContext),
3866
    encode_init,
3867
    encode_frame,
3868
    encode_end,
3869
};
3870
#endif
3871

    
3872

    
3873
#if 0
3874
#undef malloc
3875
#undef free
3876
#undef printf
3877

3878
int main(){
3879
    int width=256;
3880
    int height=256;
3881
    int buffer[2][width*height];
3882
    SnowContext s;
3883
    int i;
3884
    s.spatial_decomposition_count=6;
3885
    s.spatial_decomposition_type=1;
3886

3887
    printf("testing 5/3 DWT\n");
3888
    for(i=0; i<width*height; i++)
3889
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3890

3891
    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3892
    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3893

3894
    for(i=0; i<width*height; i++)
3895
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3896

3897
    printf("testing 9/7 DWT\n");
3898
    s.spatial_decomposition_type=0;
3899
    for(i=0; i<width*height; i++)
3900
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3901

3902
    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3903
    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3904

3905
    for(i=0; i<width*height; i++)
3906
        if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3907

3908
#if 0
3909
    printf("testing AC coder\n");
3910
    memset(s.header_state, 0, sizeof(s.header_state));
3911
    ff_init_range_encoder(&s.c, buffer[0], 256*256);
3912
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3913

3914
    for(i=-256; i<256; i++){
3915
START_TIMER
3916
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3917
STOP_TIMER("put_symbol")
3918
    }
3919
    ff_rac_terminate(&s.c);
3920

3921
    memset(s.header_state, 0, sizeof(s.header_state));
3922
    ff_init_range_decoder(&s.c, buffer[0], 256*256);
3923
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3924

3925
    for(i=-256; i<256; i++){
3926
        int j;
3927
START_TIMER
3928
        j= get_symbol(&s.c, s.header_state, 1);
3929
STOP_TIMER("get_symbol")
3930
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3931
    }
3932
#endif
3933
{
3934
int level, orientation, x, y;
3935
int64_t errors[8][4];
3936
int64_t g=0;
3937

    
3938
    memset(errors, 0, sizeof(errors));
3939
    s.spatial_decomposition_count=3;
3940
    s.spatial_decomposition_type=0;
3941
    for(level=0; level<s.spatial_decomposition_count; level++){
3942
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3943
            int w= width  >> (s.spatial_decomposition_count-level);
3944
            int h= height >> (s.spatial_decomposition_count-level);
3945
            int stride= width  << (s.spatial_decomposition_count-level);
3946
            DWTELEM *buf= buffer[0];
3947
            int64_t error=0;
3948

    
3949
            if(orientation&1) buf+=w;
3950
            if(orientation>1) buf+=stride>>1;
3951

    
3952
            memset(buffer[0], 0, sizeof(int)*width*height);
3953
            buf[w/2 + h/2*stride]= 256*256;
3954
            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3955
            for(y=0; y<height; y++){
3956
                for(x=0; x<width; x++){
3957
                    int64_t d= buffer[0][x + y*width];
3958
                    error += d*d;
3959
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3960
                }
3961
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3962
            }
3963
            error= (int)(sqrt(error)+0.5);
3964
            errors[level][orientation]= error;
3965
            if(g) g=ff_gcd(g, error);
3966
            else g= error;
3967
        }
3968
    }
3969
    printf("static int const visual_weight[][4]={\n");
3970
    for(level=0; level<s.spatial_decomposition_count; level++){
3971
        printf("  {");
3972
        for(orientation=0; orientation<4; orientation++){
3973
            printf("%8lld,", errors[level][orientation]/g);
3974
        }
3975
        printf("},\n");
3976
    }
3977
    printf("};\n");
3978
    {
3979
            int level=2;
3980
            int orientation=3;
3981
            int w= width  >> (s.spatial_decomposition_count-level);
3982
            int h= height >> (s.spatial_decomposition_count-level);
3983
            int stride= width  << (s.spatial_decomposition_count-level);
3984
            DWTELEM *buf= buffer[0];
3985
            int64_t error=0;
3986

    
3987
            buf+=w;
3988
            buf+=stride>>1;
3989

    
3990
            memset(buffer[0], 0, sizeof(int)*width*height);
3991
#if 1
3992
            for(y=0; y<height; y++){
3993
                for(x=0; x<width; x++){
3994
                    int tab[4]={0,2,3,1};
3995
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3996
                }
3997
            }
3998
            ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
3999
#else
4000
            for(y=0; y<h; y++){
4001
                for(x=0; x<w; x++){
4002
                    buf[x + y*stride  ]=169;
4003
                    buf[x + y*stride-w]=64;
4004
                }
4005
            }
4006
            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4007
#endif
4008
            for(y=0; y<height; y++){
4009
                for(x=0; x<width; x++){
4010
                    int64_t d= buffer[0][x + y*width];
4011
                    error += d*d;
4012
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4013
                }
4014
                if(ABS(height/2-y)<9) printf("\n");
4015
            }
4016
    }
4017

    
4018
}
4019
    return 0;
4020
}
4021
#endif
4022