Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ a0d1931c

History | View | Annotate | Download (131 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22

    
23
#include "rangecoder.h"
24
#define MID_STATE 128
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QROOT 8 
35
#define LOSSLESS_QLOG -128
36
#define FRAC_BITS 8
37

    
38
static const int8_t quant3[256]={
39
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
55
};
56
static const int8_t quant3b[256]={
57
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73
};
74
static const int8_t quant5[256]={
75
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
81
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
83
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
88
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
89
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
90
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
91
};
92
static const int8_t quant7[256]={
93
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
96
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
99
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
100
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
101
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
104
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
105
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
106
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
107
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
109
};
110
static const int8_t quant9[256]={
111
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
117
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
118
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
119
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
123
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
124
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
125
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
126
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
127
};
128
static const int8_t quant11[256]={
129
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
135
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
136
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
137
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
140
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
141
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
142
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
143
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
145
};
146
static const int8_t quant13[256]={
147
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
148
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
153
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
154
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
155
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
157
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
158
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
159
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
160
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
162
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
163
};
164

    
165
#define LOG2_OBMC_MAX 6
166
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
167
#if 0 //64*cubic
168
static const uint8_t obmc32[1024]={
169
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
171
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
172
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
173
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
174
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
175
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
176
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
177
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
178
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
179
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
180
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
181
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
182
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
184
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
185
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
186
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
187
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
188
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
189
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
192
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
193
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
194
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
196
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
197
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
198
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
199
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
200
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201
//error:0.000022
202
};
203
static const uint8_t obmc16[256]={
204
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
205
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
206
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
207
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
208
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
209
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
211
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
212
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
213
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
214
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
215
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
216
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
219
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
220
//error:0.000033
221
};
222
#elif 1 // 64*linear
223
static const uint8_t obmc32[1024]={
224
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
225
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
226
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
227
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
228
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
229
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
230
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
231
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
232
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
233
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
234
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
235
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
236
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
237
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
239
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
240
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
241
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
242
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
243
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
244
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
245
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
246
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
247
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
248
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
249
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
251
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
252
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
253
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
254
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
255
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
256
 //error:0.000020
257
};
258
static const uint8_t obmc16[256]={
259
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
260
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
261
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
262
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
263
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
264
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
266
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
267
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
268
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
269
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
270
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
271
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
272
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
273
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
274
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
275
//error:0.000015
276
};
277
#else //64*cos
278
static const uint8_t obmc32[1024]={
279
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
281
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
282
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
283
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
284
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
285
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
286
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
287
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
288
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
289
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
290
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
291
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
292
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
294
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
295
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
296
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
297
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
298
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
299
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
302
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
303
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
304
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
306
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
307
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
308
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
309
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
310
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311
//error:0.000022
312
};
313
static const uint8_t obmc16[256]={
314
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
315
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
316
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
317
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
318
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
319
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
321
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
322
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
323
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
324
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
325
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
326
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
329
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
330
//error:0.000022
331
};
332
#endif
333

    
334
//linear *64
335
static const uint8_t obmc8[64]={
336
 1, 3, 5, 7, 7, 5, 3, 1,
337
 3, 9,15,21,21,15, 9, 3,
338
 5,15,25,35,35,25,15, 5,
339
 7,21,35,49,49,35,21, 7,
340
 7,21,35,49,49,35,21, 7,
341
 5,15,25,35,35,25,15, 5,
342
 3, 9,15,21,21,15, 9, 3,
343
 1, 3, 5, 7, 7, 5, 3, 1,
344
//error:0.000000
345
};
346

    
347
//linear *64
348
static const uint8_t obmc4[16]={
349
 4,12,12, 4,
350
12,36,36,12,
351
12,36,36,12,
352
 4,12,12, 4,
353
//error:0.000000
354
};
355

    
356
static const uint8_t *obmc_tab[4]={
357
    obmc32, obmc16, obmc8, obmc4
358
};
359

    
360
typedef struct BlockNode{
361
    int16_t mx;
362
    int16_t my;
363
    uint8_t color[3];
364
    uint8_t type;
365
//#define TYPE_SPLIT    1
366
#define BLOCK_INTRA   1
367
//#define TYPE_NOCOLOR  4
368
    uint8_t level; //FIXME merge into type?
369
}BlockNode;
370

    
371
#define LOG2_MB_SIZE 4
372
#define MB_SIZE (1<<LOG2_MB_SIZE)
373

    
374
typedef struct x_and_coeff{
375
    int16_t x;
376
    int16_t coeff;
377
} x_and_coeff;
378

    
379
typedef struct SubBand{
380
    int level;
381
    int stride;
382
    int width;
383
    int height;
384
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
385
    DWTELEM *buf;
386
    int buf_x_offset;
387
    int buf_y_offset;
388
    int stride_line; ///< Stride measured in lines, not pixels.
389
    x_and_coeff * x_coeff;
390
    struct SubBand *parent;
391
    uint8_t state[/*7*2*/ 7 + 512][32];
392
}SubBand;
393

    
394
typedef struct Plane{
395
    int width;
396
    int height;
397
    SubBand band[MAX_DECOMPOSITIONS][4];
398
}Plane;
399

    
400
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
401
typedef struct {
402
    DWTELEM * * line; ///< For use by idwt and predict_slices.
403
    DWTELEM * * data_stack; ///< Used for internal purposes.
404
    int data_stack_top;
405
    int line_count;
406
    int line_width;
407
    int data_count;
408
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
409
} slice_buffer;
410

    
411
typedef struct SnowContext{
412
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
413

    
414
    AVCodecContext *avctx;
415
    RangeCoder c;
416
    DSPContext dsp;
417
    AVFrame input_picture;
418
    AVFrame current_picture;
419
    AVFrame last_picture;
420
    AVFrame mconly_picture;
421
//     uint8_t q_context[16];
422
    uint8_t header_state[32];
423
    uint8_t block_state[128 + 32*128];
424
    int keyframe;
425
    int always_reset;
426
    int version;
427
    int spatial_decomposition_type;
428
    int temporal_decomposition_type;
429
    int spatial_decomposition_count;
430
    int temporal_decomposition_count;
431
    DWTELEM *spatial_dwt_buffer;
432
    int colorspace_type;
433
    int chroma_h_shift;
434
    int chroma_v_shift;
435
    int spatial_scalability;
436
    int qlog;
437
    int lambda;
438
    int lambda2;
439
    int mv_scale;
440
    int qbias;
441
#define QBIAS_SHIFT 3
442
    int b_width;
443
    int b_height;
444
    int block_max_depth;
445
    Plane plane[MAX_PLANES];
446
    BlockNode *block;
447
    slice_buffer sb;
448

    
449
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
450
}SnowContext;
451

    
452
typedef struct {
453
    DWTELEM *b0;
454
    DWTELEM *b1;
455
    DWTELEM *b2;
456
    DWTELEM *b3;
457
    int y;
458
} dwt_compose_t;
459

    
460
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
461
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
462

    
463
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
464
{
465
    int i;
466
  
467
    buf->base_buffer = base_buffer;
468
    buf->line_count = line_count;
469
    buf->line_width = line_width;
470
    buf->data_count = max_allocated_lines;
471
    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
472
    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
473
  
474
    for (i = 0; i < max_allocated_lines; i++)
475
    {
476
      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
477
    }
478
    
479
    buf->data_stack_top = max_allocated_lines - 1;
480
}
481

    
482
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
483
{
484
    int i;
485
    int offset;
486
    DWTELEM * buffer;
487
  
488
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);  
489
  
490
    assert(buf->data_stack_top >= 0);
491
//  assert(!buf->line[line]);
492
    if (buf->line[line])
493
        return buf->line[line];
494
    
495
    offset = buf->line_width * line;
496
    buffer = buf->data_stack[buf->data_stack_top];
497
    buf->data_stack_top--;
498
    buf->line[line] = buffer;
499
  
500
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
501
  
502
    return buffer;
503
}
504

    
505
static void slice_buffer_release(slice_buffer * buf, int line)
506
{
507
    int i;
508
    int offset;
509
    DWTELEM * buffer;
510

    
511
    assert(line >= 0 && line < buf->line_count);
512
    assert(buf->line[line]);
513

    
514
    offset = buf->line_width * line;
515
    buffer = buf->line[line];
516
    buf->data_stack_top++;
517
    buf->data_stack[buf->data_stack_top] = buffer;
518
    buf->line[line] = NULL;
519
  
520
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
521
}
522

    
523
static void slice_buffer_flush(slice_buffer * buf)
524
{
525
    int i;
526
    for (i = 0; i < buf->line_count; i++)
527
    {
528
        if (buf->line[i])
529
        {
530
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
531
            slice_buffer_release(buf, i);
532
        }
533
    }
534
}
535

    
536
static void slice_buffer_destroy(slice_buffer * buf)
537
{
538
    int i;
539
    slice_buffer_flush(buf);
540
  
541
    for (i = buf->data_count - 1; i >= 0; i--)
542
    {
543
        assert(buf->data_stack[i]);
544
        av_free(buf->data_stack[i]);
545
    }
546
    assert(buf->data_stack);
547
    av_free(buf->data_stack);
548
    assert(buf->line);
549
    av_free(buf->line);
550
}
551

    
552
#ifdef        __sgi
553
// Avoid a name clash on SGI IRIX
554
#undef        qexp
555
#endif
556
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
557
static const uint8_t qexp[8]={
558
    128, 140, 152, 166, 181, 197, 215, 235
559
//   64,  70,  76,  83,  91,  99, 108, 117
560
//   32,  35,  38,  41,  45,  49,  54,  59
561
//   16,  17,  19,  21,  23,  25,  27,  29
562
//    8,   9,  10,  10,  11,  12,  13,  15
563
};
564

    
565
static inline int mirror(int v, int m){
566
    if     (v<0) return -v;
567
    else if(v>m) return 2*m-v;
568
    else         return v;
569
}
570

    
571
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
572
    int i;
573

    
574
    if(v){
575
        const int a= ABS(v);
576
        const int e= av_log2(a);
577
#if 1
578
        const int el= FFMIN(e, 10);   
579
        put_rac(c, state+0, 0);
580

    
581
        for(i=0; i<el; i++){
582
            put_rac(c, state+1+i, 1);  //1..10
583
        }
584
        for(; i<e; i++){
585
            put_rac(c, state+1+9, 1);  //1..10
586
        }
587
        put_rac(c, state+1+FFMIN(i,9), 0);
588

    
589
        for(i=e-1; i>=el; i--){
590
            put_rac(c, state+22+9, (a>>i)&1); //22..31
591
        }
592
        for(; i>=0; i--){
593
            put_rac(c, state+22+i, (a>>i)&1); //22..31
594
        }
595

    
596
        if(is_signed)
597
            put_rac(c, state+11 + el, v < 0); //11..21
598
#else
599
        
600
        put_rac(c, state+0, 0);
601
        if(e<=9){
602
            for(i=0; i<e; i++){
603
                put_rac(c, state+1+i, 1);  //1..10
604
            }
605
            put_rac(c, state+1+i, 0);
606

    
607
            for(i=e-1; i>=0; i--){
608
                put_rac(c, state+22+i, (a>>i)&1); //22..31
609
            }
610

    
611
            if(is_signed)
612
                put_rac(c, state+11 + e, v < 0); //11..21
613
        }else{
614
            for(i=0; i<e; i++){
615
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
616
            }
617
            put_rac(c, state+1+FFMIN(i,9), 0);
618

    
619
            for(i=e-1; i>=0; i--){
620
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
621
            }
622

    
623
            if(is_signed)
624
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
625
        }
626
#endif
627
    }else{
628
        put_rac(c, state+0, 1);
629
    }
630
}
631

    
632
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
633
    if(get_rac(c, state+0))
634
        return 0;
635
    else{
636
        int i, e, a;
637
        e= 0;
638
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
639
            e++;
640
        }
641

    
642
        a= 1;
643
        for(i=e-1; i>=0; i--){
644
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
645
        }
646

    
647
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
648
            return -a;
649
        else
650
            return a;
651
    }
652
}
653

    
654
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
655
    int i;
656
    int r= log2>=0 ? 1<<log2 : 1;
657

    
658
    assert(v>=0);
659
    assert(log2>=-4);
660

    
661
    while(v >= r){
662
        put_rac(c, state+4+log2, 1);
663
        v -= r;
664
        log2++;
665
        if(log2>0) r+=r;
666
    }
667
    put_rac(c, state+4+log2, 0);
668
    
669
    for(i=log2-1; i>=0; i--){
670
        put_rac(c, state+31-i, (v>>i)&1);
671
    }
672
}
673

    
674
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
675
    int i;
676
    int r= log2>=0 ? 1<<log2 : 1;
677
    int v=0;
678

    
679
    assert(log2>=-4);
680

    
681
    while(get_rac(c, state+4+log2)){
682
        v+= r;
683
        log2++;
684
        if(log2>0) r+=r;
685
    }
686
    
687
    for(i=log2-1; i>=0; i--){
688
        v+= get_rac(c, state+31-i)<<i;
689
    }
690

    
691
    return v;
692
}
693

    
694
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
695
    const int mirror_left= !highpass;
696
    const int mirror_right= (width&1) ^ highpass;
697
    const int w= (width>>1) - 1 + (highpass & width);
698
    int i;
699

    
700
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
701
    if(mirror_left){
702
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
703
        dst += dst_step;
704
        src += src_step;
705
    }
706
    
707
    for(i=0; i<w; i++){
708
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
709
    }
710
    
711
    if(mirror_right){
712
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
713
    }
714
}
715

    
716
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
717
    const int mirror_left= !highpass;
718
    const int mirror_right= (width&1) ^ highpass;
719
    const int w= (width>>1) - 1 + (highpass & width);
720
    int i;
721

    
722
    if(mirror_left){
723
        int r= 3*2*ref[0];
724
        r += r>>4;
725
        r += r>>8;
726
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
727
        dst += dst_step;
728
        src += src_step;
729
    }
730
    
731
    for(i=0; i<w; i++){
732
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
733
        r += r>>4;
734
        r += r>>8;
735
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
736
    }
737
    
738
    if(mirror_right){
739
        int r= 3*2*ref[w*ref_step];
740
        r += r>>4;
741
        r += r>>8;
742
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
743
    }
744
}
745

    
746

    
747
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
748
    int x, i;
749
    
750
    for(x=start; x<width; x+=2){
751
        int64_t sum=0;
752

    
753
        for(i=0; i<n; i++){
754
            int x2= x + 2*i - n + 1;
755
            if     (x2<     0) x2= -x2;
756
            else if(x2>=width) x2= 2*width-x2-2;
757
            sum += coeffs[i]*(int64_t)dst[x2];
758
        }
759
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
760
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
761
    }
762
}
763

    
764
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
765
    int x, y, i;
766
    for(y=start; y<height; y+=2){
767
        for(x=0; x<width; x++){
768
            int64_t sum=0;
769
    
770
            for(i=0; i<n; i++){
771
                int y2= y + 2*i - n + 1;
772
                if     (y2<      0) y2= -y2;
773
                else if(y2>=height) y2= 2*height-y2-2;
774
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
775
            }
776
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
777
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
778
        }
779
    }
780
}
781

    
782
#define SCALEX 1
783
#define LX0 0
784
#define LX1 1
785

    
786
#if 0 // more accurate 9/7
787
#define N1 2
788
#define SHIFT1 14
789
#define COEFFS1 (int[]){-25987,-25987}
790
#define N2 2
791
#define SHIFT2 19
792
#define COEFFS2 (int[]){-27777,-27777}
793
#define N3 2
794
#define SHIFT3 15
795
#define COEFFS3 (int[]){28931,28931}
796
#define N4 2
797
#define SHIFT4 15
798
#define COEFFS4 (int[]){14533,14533}
799
#elif 1 // 13/7 CRF
800
#define N1 4
801
#define SHIFT1 4
802
#define COEFFS1 (int[]){1,-9,-9,1}
803
#define N2 4
804
#define SHIFT2 4
805
#define COEFFS2 (int[]){-1,5,5,-1}
806
#define N3 0
807
#define SHIFT3 1
808
#define COEFFS3 NULL
809
#define N4 0
810
#define SHIFT4 1
811
#define COEFFS4 NULL
812
#elif 1 // 3/5
813
#define LX0 1
814
#define LX1 0
815
#define SCALEX 0.5
816
#define N1 2
817
#define SHIFT1 1
818
#define COEFFS1 (int[]){1,1}
819
#define N2 2
820
#define SHIFT2 2
821
#define COEFFS2 (int[]){-1,-1}
822
#define N3 0
823
#define SHIFT3 0
824
#define COEFFS3 NULL
825
#define N4 0
826
#define SHIFT4 0
827
#define COEFFS4 NULL
828
#elif 1 // 11/5 
829
#define N1 0
830
#define SHIFT1 1
831
#define COEFFS1 NULL
832
#define N2 2
833
#define SHIFT2 2
834
#define COEFFS2 (int[]){-1,-1}
835
#define N3 2
836
#define SHIFT3 0
837
#define COEFFS3 (int[]){-1,-1}
838
#define N4 4
839
#define SHIFT4 7
840
#define COEFFS4 (int[]){-5,29,29,-5}
841
#define SCALEX 4
842
#elif 1 // 9/7 CDF
843
#define N1 2
844
#define SHIFT1 7
845
#define COEFFS1 (int[]){-203,-203}
846
#define N2 2
847
#define SHIFT2 12
848
#define COEFFS2 (int[]){-217,-217}
849
#define N3 2
850
#define SHIFT3 7
851
#define COEFFS3 (int[]){113,113}
852
#define N4 2
853
#define SHIFT4 9
854
#define COEFFS4 (int[]){227,227}
855
#define SCALEX 1
856
#elif 1 // 7/5 CDF
857
#define N1 0
858
#define SHIFT1 1
859
#define COEFFS1 NULL
860
#define N2 2
861
#define SHIFT2 2
862
#define COEFFS2 (int[]){-1,-1}
863
#define N3 2
864
#define SHIFT3 0
865
#define COEFFS3 (int[]){-1,-1}
866
#define N4 2
867
#define SHIFT4 4
868
#define COEFFS4 (int[]){3,3}
869
#elif 1 // 9/7 MN
870
#define N1 4
871
#define SHIFT1 4
872
#define COEFFS1 (int[]){1,-9,-9,1}
873
#define N2 2
874
#define SHIFT2 2
875
#define COEFFS2 (int[]){1,1}
876
#define N3 0
877
#define SHIFT3 1
878
#define COEFFS3 NULL
879
#define N4 0
880
#define SHIFT4 1
881
#define COEFFS4 NULL
882
#else // 13/7 CRF
883
#define N1 4
884
#define SHIFT1 4
885
#define COEFFS1 (int[]){1,-9,-9,1}
886
#define N2 4
887
#define SHIFT2 4
888
#define COEFFS2 (int[]){-1,5,5,-1}
889
#define N3 0
890
#define SHIFT3 1
891
#define COEFFS3 NULL
892
#define N4 0
893
#define SHIFT4 1
894
#define COEFFS4 NULL
895
#endif
896
static void horizontal_decomposeX(DWTELEM *b, int width){
897
    DWTELEM temp[width];
898
    const int width2= width>>1;
899
    const int w2= (width+1)>>1;
900
    int A1,A2,A3,A4, x;
901

    
902
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
903
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
904
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
905
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
906
    
907
    for(x=0; x<width2; x++){
908
        temp[x   ]= b[2*x    ];
909
        temp[x+w2]= b[2*x + 1];
910
    }
911
    if(width&1)
912
        temp[x   ]= b[2*x    ];
913
    memcpy(b, temp, width*sizeof(int));
914
}
915

    
916
static void horizontal_composeX(DWTELEM *b, int width){
917
    DWTELEM temp[width];
918
    const int width2= width>>1;
919
    int A1,A2,A3,A4, x;
920
    const int w2= (width+1)>>1;
921

    
922
    memcpy(temp, b, width*sizeof(int));
923
    for(x=0; x<width2; x++){
924
        b[2*x    ]= temp[x   ];
925
        b[2*x + 1]= temp[x+w2];
926
    }
927
    if(width&1)
928
        b[2*x    ]= temp[x   ];
929

    
930
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
931
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
932
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
933
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
934
}
935

    
936
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
937
    int x, y;
938
  
939
    for(y=0; y<height; y++){
940
        for(x=0; x<width; x++){
941
            buffer[y*stride + x] *= SCALEX;
942
        }
943
    }
944

    
945
    for(y=0; y<height; y++){
946
        horizontal_decomposeX(buffer + y*stride, width);
947
    }
948
    
949
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
950
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
951
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
952
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
953
}
954

    
955
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
956
    int x, y;
957
  
958
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
959
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
960
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
961
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
962

    
963
    for(y=0; y<height; y++){
964
        horizontal_composeX(buffer + y*stride, width);
965
    }
966

    
967
    for(y=0; y<height; y++){
968
        for(x=0; x<width; x++){
969
            buffer[y*stride + x] /= SCALEX;
970
        }
971
    }
972
}
973

    
974
static void horizontal_decompose53i(DWTELEM *b, int width){
975
    DWTELEM temp[width];
976
    const int width2= width>>1;
977
    int A1,A2,A3,A4, x;
978
    const int w2= (width+1)>>1;
979

    
980
    for(x=0; x<width2; x++){
981
        temp[x   ]= b[2*x    ];
982
        temp[x+w2]= b[2*x + 1];
983
    }
984
    if(width&1)
985
        temp[x   ]= b[2*x    ];
986
#if 0
987
    A2= temp[1       ];
988
    A4= temp[0       ];
989
    A1= temp[0+width2];
990
    A1 -= (A2 + A4)>>1;
991
    A4 += (A1 + 1)>>1;
992
    b[0+width2] = A1;
993
    b[0       ] = A4;
994
    for(x=1; x+1<width2; x+=2){
995
        A3= temp[x+width2];
996
        A4= temp[x+1     ];
997
        A3 -= (A2 + A4)>>1;
998
        A2 += (A1 + A3 + 2)>>2;
999
        b[x+width2] = A3;
1000
        b[x       ] = A2;
1001

1002
        A1= temp[x+1+width2];
1003
        A2= temp[x+2       ];
1004
        A1 -= (A2 + A4)>>1;
1005
        A4 += (A1 + A3 + 2)>>2;
1006
        b[x+1+width2] = A1;
1007
        b[x+1       ] = A4;
1008
    }
1009
    A3= temp[width-1];
1010
    A3 -= A2;
1011
    A2 += (A1 + A3 + 2)>>2;
1012
    b[width -1] = A3;
1013
    b[width2-1] = A2;
1014
#else        
1015
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1016
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
1017
#endif
1018
}
1019

    
1020
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1021
    int i;
1022
    
1023
    for(i=0; i<width; i++){
1024
        b1[i] -= (b0[i] + b2[i])>>1;
1025
    }
1026
}
1027

    
1028
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1029
    int i;
1030
    
1031
    for(i=0; i<width; i++){
1032
        b1[i] += (b0[i] + b2[i] + 2)>>2;
1033
    }
1034
}
1035

    
1036
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1037
    int y;
1038
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1039
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
1040
  
1041
    for(y=-2; y<height; y+=2){
1042
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1043
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1044

    
1045
{START_TIMER
1046
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
1047
        if(y+2 < height) horizontal_decompose53i(b3, width);
1048
STOP_TIMER("horizontal_decompose53i")}
1049
        
1050
{START_TIMER
1051
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1052
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1053
STOP_TIMER("vertical_decompose53i*")}
1054
        
1055
        b0=b2;
1056
        b1=b3;
1057
    }
1058
}
1059

    
1060
#define lift5 lift
1061
#if 1
1062
#define W_AM 3
1063
#define W_AO 0
1064
#define W_AS 1
1065

    
1066
#define W_BM 1
1067
#define W_BO 8
1068
#define W_BS 4
1069

    
1070
#undef lift5
1071
#define W_CM 9999
1072
#define W_CO 2
1073
#define W_CS 2
1074

    
1075
#define W_DM 15
1076
#define W_DO 16
1077
#define W_DS 5
1078
#elif 0
1079
#define W_AM 55
1080
#define W_AO 16
1081
#define W_AS 5
1082

    
1083
#define W_BM 3
1084
#define W_BO 32
1085
#define W_BS 6
1086

    
1087
#define W_CM 127
1088
#define W_CO 64
1089
#define W_CS 7
1090

    
1091
#define W_DM 7
1092
#define W_DO 8
1093
#define W_DS 4
1094
#elif 0
1095
#define W_AM 97
1096
#define W_AO 32
1097
#define W_AS 6
1098

    
1099
#define W_BM 63
1100
#define W_BO 512
1101
#define W_BS 10
1102

    
1103
#define W_CM 13
1104
#define W_CO 8
1105
#define W_CS 4
1106

    
1107
#define W_DM 15
1108
#define W_DO 16
1109
#define W_DS 5
1110

    
1111
#else
1112

    
1113
#define W_AM 203
1114
#define W_AO 64
1115
#define W_AS 7
1116

    
1117
#define W_BM 217
1118
#define W_BO 2048
1119
#define W_BS 12
1120

    
1121
#define W_CM 113
1122
#define W_CO 64
1123
#define W_CS 7
1124

    
1125
#define W_DM 227
1126
#define W_DO 128
1127
#define W_DS 9
1128
#endif
1129
static void horizontal_decompose97i(DWTELEM *b, int width){
1130
    DWTELEM temp[width];
1131
    const int w2= (width+1)>>1;
1132

    
1133
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1134
    lift (temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1135
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1136
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1137
}
1138

    
1139

    
1140
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1141
    int i;
1142
    
1143
    for(i=0; i<width; i++){
1144
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1145
    }
1146
}
1147

    
1148
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1149
    int i;
1150
    
1151
    for(i=0; i<width; i++){
1152
#ifdef lift5
1153
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1154
#else
1155
        int r= 3*(b0[i] + b2[i]);
1156
        r+= r>>4;
1157
        r+= r>>8;
1158
        b1[i] += (r+W_CO)>>W_CS;
1159
#endif
1160
    }
1161
}
1162

    
1163
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1164
    int i;
1165
    
1166
    for(i=0; i<width; i++){
1167
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1168
    }
1169
}
1170

    
1171
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1172
    int i;
1173
    
1174
    for(i=0; i<width; i++){
1175
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1176
    }
1177
}
1178

    
1179
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1180
    int y;
1181
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1182
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1183
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1184
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1185
  
1186
    for(y=-4; y<height; y+=2){
1187
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1188
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1189

    
1190
{START_TIMER
1191
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1192
        if(y+4 < height) horizontal_decompose97i(b5, width);
1193
if(width>400){
1194
STOP_TIMER("horizontal_decompose97i")
1195
}}
1196
        
1197
{START_TIMER
1198
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1199
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1200
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1201
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1202

    
1203
if(width>400){
1204
STOP_TIMER("vertical_decompose97i")
1205
}}
1206
        
1207
        b0=b2;
1208
        b1=b3;
1209
        b2=b4;
1210
        b3=b5;
1211
    }
1212
}
1213

    
1214
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1215
    int level;
1216
    
1217
    for(level=0; level<decomposition_count; level++){
1218
        switch(type){
1219
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1220
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1221
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1222
        }
1223
    }
1224
}
1225

    
1226
static void horizontal_compose53i(DWTELEM *b, int width){
1227
    DWTELEM temp[width];
1228
    const int width2= width>>1;
1229
    const int w2= (width+1)>>1;
1230
    int A1,A2,A3,A4, x;
1231

    
1232
#if 0
1233
    A2= temp[1       ];
1234
    A4= temp[0       ];
1235
    A1= temp[0+width2];
1236
    A1 -= (A2 + A4)>>1;
1237
    A4 += (A1 + 1)>>1;
1238
    b[0+width2] = A1;
1239
    b[0       ] = A4;
1240
    for(x=1; x+1<width2; x+=2){
1241
        A3= temp[x+width2];
1242
        A4= temp[x+1     ];
1243
        A3 -= (A2 + A4)>>1;
1244
        A2 += (A1 + A3 + 2)>>2;
1245
        b[x+width2] = A3;
1246
        b[x       ] = A2;
1247

1248
        A1= temp[x+1+width2];
1249
        A2= temp[x+2       ];
1250
        A1 -= (A2 + A4)>>1;
1251
        A4 += (A1 + A3 + 2)>>2;
1252
        b[x+1+width2] = A1;
1253
        b[x+1       ] = A4;
1254
    }
1255
    A3= temp[width-1];
1256
    A3 -= A2;
1257
    A2 += (A1 + A3 + 2)>>2;
1258
    b[width -1] = A3;
1259
    b[width2-1] = A2;
1260
#else   
1261
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1262
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1263
#endif
1264
    for(x=0; x<width2; x++){
1265
        b[2*x    ]= temp[x   ];
1266
        b[2*x + 1]= temp[x+w2];
1267
    }
1268
    if(width&1)
1269
        b[2*x    ]= temp[x   ];
1270
}
1271

    
1272
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1273
    int i;
1274
    
1275
    for(i=0; i<width; i++){
1276
        b1[i] += (b0[i] + b2[i])>>1;
1277
    }
1278
}
1279

    
1280
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1281
    int i;
1282
    
1283
    for(i=0; i<width; i++){
1284
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1285
    }
1286
}
1287

    
1288
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1289
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1290
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1291
    cs->y = -1;
1292
}
1293

    
1294
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1295
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1296
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1297
    cs->y = -1;
1298
}
1299

    
1300
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1301
    int y= cs->y;
1302
    int mirror0 = mirror(y-1, height-1);
1303
    int mirror1 = mirror(y  , height-1);
1304
    int mirror2 = mirror(y+1, height-1);
1305
    int mirror3 = mirror(y+2, height-1);
1306
    
1307
    DWTELEM *b0= cs->b0;
1308
    DWTELEM *b1= cs->b1;
1309
    DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1310
    DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1311

    
1312
{START_TIMER
1313
        if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1314
        if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1315
STOP_TIMER("vertical_compose53i*")}
1316

    
1317
{START_TIMER
1318
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1319
        if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1320
STOP_TIMER("horizontal_compose53i")}
1321

    
1322
    cs->b0 = b2;
1323
    cs->b1 = b3;
1324
    cs->y += 2;
1325
}
1326

    
1327
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1328
    int y= cs->y;
1329
    DWTELEM *b0= cs->b0;
1330
    DWTELEM *b1= cs->b1;
1331
    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1332
    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1333

    
1334
{START_TIMER
1335
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1336
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1337
STOP_TIMER("vertical_compose53i*")}
1338

    
1339
{START_TIMER
1340
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1341
        if(b0 <= b2) horizontal_compose53i(b1, width);
1342
STOP_TIMER("horizontal_compose53i")}
1343

    
1344
    cs->b0 = b2;
1345
    cs->b1 = b3;
1346
    cs->y += 2;
1347
}
1348

    
1349
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1350
    dwt_compose_t cs;
1351
    spatial_compose53i_init(&cs, buffer, height, stride);
1352
    while(cs.y <= height)
1353
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1354
}   
1355

    
1356
 
1357
static void horizontal_compose97i(DWTELEM *b, int width){
1358
    DWTELEM temp[width];
1359
    const int w2= (width+1)>>1;
1360

    
1361
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1362
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1363
    lift (b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1364
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1365
}
1366

    
1367
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1368
    int i;
1369
    
1370
    for(i=0; i<width; i++){
1371
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1372
    }
1373
}
1374

    
1375
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1376
    int i;
1377
    
1378
    for(i=0; i<width; i++){
1379
#ifdef lift5
1380
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1381
#else
1382
        int r= 3*(b0[i] + b2[i]);
1383
        r+= r>>4;
1384
        r+= r>>8;
1385
        b1[i] -= (r+W_CO)>>W_CS;
1386
#endif
1387
    }
1388
}
1389

    
1390
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1391
    int i;
1392
    
1393
    for(i=0; i<width; i++){
1394
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1395
    }
1396
}
1397

    
1398
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1399
    int i;
1400
    
1401
    for(i=0; i<width; i++){
1402
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1403
    }
1404
}
1405

    
1406
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1407
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1408
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1409
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1410
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1411
    cs->y = -3;
1412
}
1413

    
1414
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1415
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1416
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1417
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1418
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1419
    cs->y = -3;
1420
}
1421

    
1422
static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1423
    int y = cs->y;
1424
    
1425
    int mirror0 = mirror(y - 1, height - 1);
1426
    int mirror1 = mirror(y + 0, height - 1);
1427
    int mirror2 = mirror(y + 1, height - 1);
1428
    int mirror3 = mirror(y + 2, height - 1);
1429
    int mirror4 = mirror(y + 3, height - 1);
1430
    int mirror5 = mirror(y + 4, height - 1);
1431
    DWTELEM *b0= cs->b0;
1432
    DWTELEM *b1= cs->b1;
1433
    DWTELEM *b2= cs->b2;
1434
    DWTELEM *b3= cs->b3;
1435
    DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1436
    DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1437
        
1438
        if(stride_line == 1 && y+4 < height && 0){ 
1439
            int x;
1440
            for(x=0; x<width/2; x++)
1441
                b5[x] += 64*2;
1442
            for(; x<width; x++)
1443
                b5[x] += 169*2;
1444
        }
1445
        
1446
//        if(mirror3 <= mirror5 && mirror2 <= mirror4 && mirror1 <= mirror3 && mirror0 <= mirror2)
1447
//        {
1448
//{START_TIMER
1449
//            vertical_compose97_complete(b0, b1, b2, b3, b4, b5, width);
1450
//if(width>400){
1451
//STOP_TIMER("vertical_compose97i-NEW")}}
1452
//        }
1453
//        else
1454
//        {
1455
{START_TIMER
1456
            if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1457
            if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1458
            if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1459
            if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1460
if(width>400){
1461
STOP_TIMER("vertical_compose97i")}}
1462
//        }
1463

    
1464
{START_TIMER
1465
        if(y-1>=  0) horizontal_compose97i(b0, width);
1466
        if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1467
if(width>400 && mirror0 <= mirror2){
1468
STOP_TIMER("horizontal_compose97i")}}
1469

    
1470
    cs->b0=b2;
1471
    cs->b1=b3;
1472
    cs->b2=b4;
1473
    cs->b3=b5;
1474
    cs->y += 2;
1475
}
1476

    
1477
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1478
    int y = cs->y;
1479
    DWTELEM *b0= cs->b0;
1480
    DWTELEM *b1= cs->b1;
1481
    DWTELEM *b2= cs->b2;
1482
    DWTELEM *b3= cs->b3;
1483
    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1484
    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1485

    
1486
        if(stride == width && y+4 < height && 0){ 
1487
            int x;
1488
            for(x=0; x<width/2; x++)
1489
                b5[x] += 64*2;
1490
            for(; x<width; x++)
1491
                b5[x] += 169*2;
1492
        }
1493
        
1494
{START_TIMER
1495
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1496
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1497
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1498
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1499
if(width>400){
1500
STOP_TIMER("vertical_compose97i")}}
1501

    
1502
{START_TIMER
1503
        if(y-1>=  0) horizontal_compose97i(b0, width);
1504
        if(b0 <= b2) horizontal_compose97i(b1, width);
1505
if(width>400 && b0 <= b2){
1506
STOP_TIMER("horizontal_compose97i")}}
1507

    
1508
    cs->b0=b2;
1509
    cs->b1=b3;
1510
    cs->b2=b4;
1511
    cs->b3=b5;
1512
    cs->y += 2;
1513
}
1514

    
1515
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1516
    dwt_compose_t cs;
1517
    spatial_compose97i_init(&cs, buffer, height, stride);
1518
    while(cs.y <= height)
1519
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1520
}
1521

    
1522
void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1523
    int level;
1524
    for(level=decomposition_count-1; level>=0; level--){
1525
        switch(type){
1526
        case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1527
        case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1528
        /* not slicified yet */
1529
        case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1530
          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1531
        }
1532
    }
1533
}
1534

    
1535
void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1536
    int level;
1537
    for(level=decomposition_count-1; level>=0; level--){
1538
        switch(type){
1539
        case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1540
        case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1541
        /* not slicified yet */
1542
        case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1543
        }
1544
    }
1545
}
1546

    
1547
void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1548
    const int support = type==1 ? 3 : 5;
1549
    int level;
1550
    if(type==2) return;
1551

    
1552
    for(level=decomposition_count-1; level>=0; level--){
1553
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1554
            switch(type){
1555
            case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1556
                    break;
1557
            case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1558
                    break;
1559
            case 2: break;
1560
            }
1561
        }
1562
    }
1563
}
1564

    
1565
void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1566
    const int support = type==1 ? 3 : 5;
1567
    int level;
1568
    if(type==2) return;
1569

    
1570
    for(level=decomposition_count-1; level>=0; level--){
1571
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1572
            switch(type){
1573
            case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1574
                    break;
1575
            case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1576
                    break;
1577
            case 2: break;
1578
            }
1579
        }
1580
    }
1581
}
1582

    
1583
void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1584
    if(type==2){
1585
        int level;
1586
        for(level=decomposition_count-1; level>=0; level--)
1587
            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
1588
    }else{
1589
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1590
        int y;
1591
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1592
        for(y=0; y<height; y+=4)
1593
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1594
    }
1595
}
1596

    
1597
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1598
    const int w= b->width;
1599
    const int h= b->height;
1600
    int x, y;
1601

    
1602
    if(1){
1603
        int run=0;
1604
        int runs[w*h];
1605
        int run_index=0;
1606
                
1607
        for(y=0; y<h; y++){
1608
            for(x=0; x<w; x++){
1609
                int v, p=0;
1610
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1611
                v= src[x + y*stride];
1612

    
1613
                if(y){
1614
                    t= src[x + (y-1)*stride];
1615
                    if(x){
1616
                        lt= src[x - 1 + (y-1)*stride];
1617
                    }
1618
                    if(x + 1 < w){
1619
                        rt= src[x + 1 + (y-1)*stride];
1620
                    }
1621
                }
1622
                if(x){
1623
                    l= src[x - 1 + y*stride];
1624
                    /*if(x > 1){
1625
                        if(orientation==1) ll= src[y + (x-2)*stride];
1626
                        else               ll= src[x - 2 + y*stride];
1627
                    }*/
1628
                }
1629
                if(parent){
1630
                    int px= x>>1;
1631
                    int py= y>>1;
1632
                    if(px<b->parent->width && py<b->parent->height) 
1633
                        p= parent[px + py*2*stride];
1634
                }
1635
                if(!(/*ll|*/l|lt|t|rt|p)){
1636
                    if(v){
1637
                        runs[run_index++]= run;
1638
                        run=0;
1639
                    }else{
1640
                        run++;
1641
                    }
1642
                }
1643
            }
1644
        }
1645
        runs[run_index++]= run;
1646
        run_index=0;
1647
        run= runs[run_index++];
1648

    
1649
        put_symbol2(&s->c, b->state[1], run, 3);
1650
        
1651
        for(y=0; y<h; y++){
1652
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1653
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1654
                return -1;
1655
            }
1656
            for(x=0; x<w; x++){
1657
                int v, p=0;
1658
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1659
                v= src[x + y*stride];
1660

    
1661
                if(y){
1662
                    t= src[x + (y-1)*stride];
1663
                    if(x){
1664
                        lt= src[x - 1 + (y-1)*stride];
1665
                    }
1666
                    if(x + 1 < w){
1667
                        rt= src[x + 1 + (y-1)*stride];
1668
                    }
1669
                }
1670
                if(x){
1671
                    l= src[x - 1 + y*stride];
1672
                    /*if(x > 1){
1673
                        if(orientation==1) ll= src[y + (x-2)*stride];
1674
                        else               ll= src[x - 2 + y*stride];
1675
                    }*/
1676
                }
1677
                if(parent){
1678
                    int px= x>>1;
1679
                    int py= y>>1;
1680
                    if(px<b->parent->width && py<b->parent->height) 
1681
                        p= parent[px + py*2*stride];
1682
                }
1683
                if(/*ll|*/l|lt|t|rt|p){
1684
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1685

    
1686
                    put_rac(&s->c, &b->state[0][context], !!v);
1687
                }else{
1688
                    if(!run){
1689
                        run= runs[run_index++];
1690

    
1691
                        put_symbol2(&s->c, b->state[1], run, 3);
1692
                        assert(v);
1693
                    }else{
1694
                        run--;
1695
                        assert(!v);
1696
                    }
1697
                }
1698
                if(v){
1699
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1700

    
1701
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1702
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1703
                }
1704
            }
1705
        }
1706
    }
1707
    return 0;
1708
}
1709

    
1710
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1711
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1712
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1713
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1714
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1715
}
1716

    
1717
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1718
    const int w= b->width;
1719
    const int h= b->height;
1720
    int x,y;
1721
    
1722
    if(1){
1723
        int run;
1724
        int index=0;
1725
        int prev_index=-1;
1726
        int prev2_index=0;
1727
        int parent_index= 0;
1728
        int prev_parent_index= 0;
1729

    
1730
        run= get_symbol2(&s->c, b->state[1], 3);
1731
        for(y=0; y<h; y++){
1732
            int v=0;
1733
            int lt=0, t=0, rt=0;
1734

    
1735
            if(y && b->x_coeff[prev_index].x == 0){
1736
                rt= b->x_coeff[prev_index].coeff;
1737
            }
1738
            for(x=0; x<w; x++){
1739
                int p=0;
1740
                const int l= v;
1741
                
1742
                lt= t; t= rt;
1743

    
1744
                if(y){
1745
                    if(b->x_coeff[prev_index].x <= x)
1746
                        prev_index++;
1747
                    if(b->x_coeff[prev_index].x == x + 1)
1748
                        rt= b->x_coeff[prev_index].coeff;
1749
                    else
1750
                        rt=0;
1751
                }
1752
                if(parent){
1753
                    if(x>>1 > parent->x_coeff[parent_index].x){
1754
                        parent_index++;
1755
                    }
1756
                    if(x>>1 == parent->x_coeff[parent_index].x){
1757
                        p= parent->x_coeff[parent_index].coeff;
1758
                    }
1759
                }
1760
                if(/*ll|*/l|lt|t|rt|p){
1761
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1762

    
1763
                    v=get_rac(&s->c, &b->state[0][context]);
1764
                }else{
1765
                    if(!run){
1766
                        run= get_symbol2(&s->c, b->state[1], 3);
1767
                        v=1;
1768
                    }else{
1769
                        run--;
1770
                        v=0;
1771

    
1772
                        if(y && parent){
1773
                            int max_run;
1774

    
1775
                            max_run= FFMIN(run, b->x_coeff[prev_index].x - x - 2);
1776
                            max_run= FFMIN(max_run, 2*parent->x_coeff[parent_index].x - x - 1);
1777
                            x+= max_run;
1778
                            run-= max_run;
1779
                        }
1780
                    }
1781
                }
1782
                if(v){
1783
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1784
                    v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1785
                    if(get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
1786
                        v *= -1;
1787
                    b->x_coeff[index].x=x;
1788
                    b->x_coeff[index++].coeff= v;
1789
                }
1790
            }
1791
            b->x_coeff[index++].x= w+1; //end marker
1792
            prev_index= prev2_index;
1793
            prev2_index= index;
1794
            
1795
            if(parent){
1796
                if(y&1){
1797
                    while(parent->x_coeff[parent_index].x != parent->width+1)
1798
                        parent_index++;
1799
                    parent_index++;
1800
                    prev_parent_index= parent_index;
1801
                }else{
1802
                    parent_index= prev_parent_index;
1803
                }
1804
            }
1805
        }
1806

    
1807
        b->x_coeff[index++].x= w+1; //end marker
1808
    }
1809
}
1810

    
1811
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1812
    const int w= b->width;
1813
    int x,y;
1814
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
1815
    int qmul= qexp[qlog&7]<<(qlog>>3);
1816
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1817
    int new_index = 0;
1818
    
1819
    START_TIMER
1820

    
1821
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1822
        qadd= 0;
1823
        qmul= 1<<QEXPSHIFT;
1824
    }
1825

    
1826
    /* If we are on the second or later slice, restore our index. */
1827
    if (start_y != 0)
1828
        new_index = save_state[0];
1829

    
1830
        
1831
    for(y=start_y; y<h; y++){
1832
        int x = 0;
1833
        int v;
1834
        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1835
        memset(line, 0, b->width*sizeof(DWTELEM));
1836
        v = b->x_coeff[new_index].coeff;
1837
        x = b->x_coeff[new_index++].x;
1838
        while(x < w)
1839
        {
1840
            if (v < 0)
1841
                line[x] = -(( -v*qmul + qadd)>>(QEXPSHIFT));
1842
            else
1843
                line[x] = (( v*qmul + qadd)>>(QEXPSHIFT));
1844
            v = b->x_coeff[new_index].coeff;
1845
            x = b->x_coeff[new_index++].x;
1846
        }
1847
    }
1848
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1849
        STOP_TIMER("decode_subband")
1850
    }
1851
        
1852
    /* Save our variables for the next slice. */
1853
    save_state[0] = new_index;
1854
        
1855
    return;
1856
}
1857

    
1858
static void reset_contexts(SnowContext *s){
1859
    int plane_index, level, orientation;
1860

    
1861
    for(plane_index=0; plane_index<3; plane_index++){
1862
        for(level=0; level<s->spatial_decomposition_count; level++){
1863
            for(orientation=level ? 1:0; orientation<4; orientation++){
1864
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1865
            }
1866
        }
1867
    }
1868
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1869
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1870
}
1871

    
1872
static int alloc_blocks(SnowContext *s){
1873
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1874
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1875
    
1876
    s->b_width = w;
1877
    s->b_height= h;
1878
    
1879
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1880
    return 0;
1881
}
1882

    
1883
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1884
    uint8_t *bytestream= d->bytestream;
1885
    uint8_t *bytestream_start= d->bytestream_start;
1886
    *d= *s;
1887
    d->bytestream= bytestream;
1888
    d->bytestream_start= bytestream_start;
1889
}
1890

    
1891
//near copy & paste from dsputil, FIXME
1892
static int pix_sum(uint8_t * pix, int line_size, int w)
1893
{
1894
    int s, i, j;
1895

    
1896
    s = 0;
1897
    for (i = 0; i < w; i++) {
1898
        for (j = 0; j < w; j++) {
1899
            s += pix[0];
1900
            pix ++;
1901
        }
1902
        pix += line_size - w;
1903
    }
1904
    return s;
1905
}
1906

    
1907
//near copy & paste from dsputil, FIXME
1908
static int pix_norm1(uint8_t * pix, int line_size, int w)
1909
{
1910
    int s, i, j;
1911
    uint32_t *sq = squareTbl + 256;
1912

    
1913
    s = 0;
1914
    for (i = 0; i < w; i++) {
1915
        for (j = 0; j < w; j ++) {
1916
            s += sq[pix[0]];
1917
            pix ++;
1918
        }
1919
        pix += line_size - w;
1920
    }
1921
    return s;
1922
}
1923

    
1924
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1925
    const int w= s->b_width << s->block_max_depth;
1926
    const int rem_depth= s->block_max_depth - level;
1927
    const int index= (x + y*w) << rem_depth;
1928
    const int block_w= 1<<rem_depth;
1929
    BlockNode block;
1930
    int i,j;
1931
    
1932
    block.color[0]= l;
1933
    block.color[1]= cb;
1934
    block.color[2]= cr;
1935
    block.mx= mx;
1936
    block.my= my;
1937
    block.type= type;
1938
    block.level= level;
1939

    
1940
    for(j=0; j<block_w; j++){
1941
        for(i=0; i<block_w; i++){
1942
            s->block[index + i + j*w]= block;
1943
        }
1944
    }
1945
}
1946

    
1947
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1948
    const int offset[3]= {
1949
          y*c->  stride + x,
1950
        ((y*c->uvstride + x)>>1),
1951
        ((y*c->uvstride + x)>>1),
1952
    };
1953
    int i;
1954
    for(i=0; i<3; i++){
1955
        c->src[0][i]= src [i];
1956
        c->ref[0][i]= ref [i] + offset[i];
1957
    }
1958
    assert(!ref_index);
1959
}
1960

    
1961
//FIXME copy&paste
1962
#define P_LEFT P[1]
1963
#define P_TOP P[2]
1964
#define P_TOPRIGHT P[3]
1965
#define P_MEDIAN P[4]
1966
#define P_MV1 P[9]
1967
#define FLAG_QPEL   1 //must be 1
1968

    
1969
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1970
    uint8_t p_buffer[1024];
1971
    uint8_t i_buffer[1024];
1972
    uint8_t p_state[sizeof(s->block_state)];
1973
    uint8_t i_state[sizeof(s->block_state)];
1974
    RangeCoder pc, ic;
1975
    uint8_t *pbbak= s->c.bytestream;
1976
    uint8_t *pbbak_start= s->c.bytestream_start;
1977
    int score, score2, iscore, i_len, p_len, block_s, sum;
1978
    const int w= s->b_width  << s->block_max_depth;
1979
    const int h= s->b_height << s->block_max_depth;
1980
    const int rem_depth= s->block_max_depth - level;
1981
    const int index= (x + y*w) << rem_depth;
1982
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1983
    static BlockNode null_block= { //FIXME add border maybe
1984
        .color= {128,128,128},
1985
        .mx= 0,
1986
        .my= 0,
1987
        .type= 0,
1988
        .level= 0,
1989
    };
1990
    int trx= (x+1)<<rem_depth;
1991
    int try= (y+1)<<rem_depth;
1992
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
1993
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
1994
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1995
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1996
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1997
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1998
    int pl = left->color[0];
1999
    int pcb= left->color[1];
2000
    int pcr= left->color[2];
2001
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2002
    int pmy= mid_pred(left->my, top->my, tr->my);
2003
    int mx=0, my=0;
2004
    int l,cr,cb, i;
2005
    const int stride= s->current_picture.linesize[0];
2006
    const int uvstride= s->current_picture.linesize[1];
2007
    const int instride= s->input_picture.linesize[0];
2008
    const int uvinstride= s->input_picture.linesize[1];
2009
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
2010
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2011
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2012
    uint8_t current_mb[3][stride*block_w];
2013
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2014
    int P[10][2];
2015
    int16_t last_mv[3][2];
2016
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2017
    const int shift= 1+qpel;
2018
    MotionEstContext *c= &s->m.me;
2019
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2020
    int my_context= av_log2(2*ABS(left->my - top->my));
2021
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2022

    
2023
    assert(sizeof(s->block_state) >= 256);
2024
    if(s->keyframe){
2025
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2026
        return 0;
2027
    }
2028

    
2029
    //FIXME optimize
2030
    for(i=0; i<block_w; i++)
2031
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
2032
    for(i=0; i<block_w>>1; i++)
2033
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2034
    for(i=0; i<block_w>>1; i++)
2035
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2036

    
2037
//    clip predictors / edge ?
2038

    
2039
    P_LEFT[0]= left->mx;
2040
    P_LEFT[1]= left->my;
2041
    P_TOP [0]= top->mx;
2042
    P_TOP [1]= top->my;
2043
    P_TOPRIGHT[0]= tr->mx;
2044
    P_TOPRIGHT[1]= tr->my;
2045
    
2046
    last_mv[0][0]= s->block[index].mx;
2047
    last_mv[0][1]= s->block[index].my;
2048
    last_mv[1][0]= right->mx;
2049
    last_mv[1][1]= right->my;
2050
    last_mv[2][0]= bottom->mx;
2051
    last_mv[2][1]= bottom->my;
2052
    
2053
    s->m.mb_stride=2;
2054
    s->m.mb_x= 
2055
    s->m.mb_y= 0;
2056
    s->m.me.skip= 0;
2057

    
2058
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2059
    
2060
    assert(s->m.me.  stride ==   stride);
2061
    assert(s->m.me.uvstride == uvstride);
2062
    
2063
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2064
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2065
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2066
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2067
    
2068
    c->xmin = - x*block_w - 16+2;
2069
    c->ymin = - y*block_w - 16+2;
2070
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2071
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2072

    
2073
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2074
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
2075
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2076
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2077
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2078
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2079
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2080

    
2081
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2082
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2083

    
2084
    if (!y) {
2085
        c->pred_x= P_LEFT[0];
2086
        c->pred_y= P_LEFT[1];
2087
    } else {
2088
        c->pred_x = P_MEDIAN[0];
2089
        c->pred_y = P_MEDIAN[1];
2090
    }
2091

    
2092
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
2093
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2094

    
2095
    assert(mx >= c->xmin);
2096
    assert(mx <= c->xmax);
2097
    assert(my >= c->ymin);
2098
    assert(my <= c->ymax);
2099
    
2100
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2101
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2102
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2103
                             
2104
  //  subpel search
2105
    pc= s->c;
2106
    pc.bytestream_start=
2107
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2108
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2109

    
2110
    if(level!=s->block_max_depth)
2111
        put_rac(&pc, &p_state[4 + s_context], 1);
2112
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2113
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2114
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2115
    p_len= pc.bytestream - pc.bytestream_start;
2116
    score += (s->lambda2*(p_len*8
2117
              + (pc.outstanding_count - s->c.outstanding_count)*8
2118
              + (-av_log2(pc.range)    + av_log2(s->c.range))
2119
             ))>>FF_LAMBDA_SHIFT;
2120

    
2121
    block_s= block_w*block_w;
2122
    sum = pix_sum(&current_mb[0][0], stride, block_w);
2123
    l= (sum + block_s/2)/block_s;
2124
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2125
    
2126
    block_s= block_w*block_w>>2;
2127
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2128
    cb= (sum + block_s/2)/block_s;
2129
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2130
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2131
    cr= (sum + block_s/2)/block_s;
2132
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2133

    
2134
    ic= s->c;
2135
    ic.bytestream_start=
2136
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2137
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2138
    if(level!=s->block_max_depth)
2139
        put_rac(&ic, &i_state[4 + s_context], 1);
2140
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2141
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2142
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2143
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2144
    i_len= ic.bytestream - ic.bytestream_start;
2145
    iscore += (s->lambda2*(i_len*8
2146
              + (ic.outstanding_count - s->c.outstanding_count)*8
2147
              + (-av_log2(ic.range)    + av_log2(s->c.range))
2148
             ))>>FF_LAMBDA_SHIFT;
2149

    
2150
//    assert(score==256*256*256*64-1);
2151
    assert(iscore < 255*255*256 + s->lambda2*10);
2152
    assert(iscore >= 0);
2153
    assert(l>=0 && l<=255);
2154
    assert(pl>=0 && pl<=255);
2155

    
2156
    if(level==0){
2157
        int varc= iscore >> 8;
2158
        int vard= score >> 8;
2159
        if (vard <= 64 || vard < varc)
2160
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2161
        else
2162
            c->scene_change_score+= s->m.qscale;
2163
    }
2164
        
2165
    if(level!=s->block_max_depth){
2166
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2167
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2168
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2169
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2170
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2171
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2172
    
2173
        if(score2 < score && score2 < iscore)
2174
            return score2;
2175
    }
2176
    
2177
    if(iscore < score){
2178
        memcpy(pbbak, i_buffer, i_len);
2179
        s->c= ic;
2180
        s->c.bytestream_start= pbbak_start;
2181
        s->c.bytestream= pbbak + i_len;
2182
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2183
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2184
        return iscore;
2185
    }else{
2186
        memcpy(pbbak, p_buffer, p_len);
2187
        s->c= pc;
2188
        s->c.bytestream_start= pbbak_start;
2189
        s->c.bytestream= pbbak + p_len;
2190
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2191
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2192
        return score;
2193
    }
2194
}
2195

    
2196
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2197
    const int w= s->b_width << s->block_max_depth;
2198
    const int rem_depth= s->block_max_depth - level;
2199
    const int index= (x + y*w) << rem_depth;
2200
    static BlockNode null_block= { //FIXME add border maybe
2201
        .color= {128,128,128},
2202
        .mx= 0,
2203
        .my= 0,
2204
        .type= 0,
2205
        .level= 0,
2206
    };
2207
    int trx= (x+1)<<rem_depth;
2208
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2209
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2210
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2211
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2212
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2213
    
2214
    if(s->keyframe){
2215
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2216
        return;
2217
    }
2218

    
2219
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2220
        int type;
2221
        int l = left->color[0];
2222
        int cb= left->color[1];
2223
        int cr= left->color[2];
2224
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2225
        int my= mid_pred(left->my, top->my, tr->my);
2226
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2227
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2228
        
2229
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2230

    
2231
        if(type){
2232
            l += get_symbol(&s->c, &s->block_state[32], 1);
2233
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2234
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2235
        }else{
2236
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2237
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2238
        }
2239
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2240
    }else{
2241
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2242
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2243
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2244
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2245
    }
2246
}
2247

    
2248
static void encode_blocks(SnowContext *s){
2249
    int x, y;
2250
    int w= s->b_width;
2251
    int h= s->b_height;
2252

    
2253
    for(y=0; y<h; y++){
2254
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2255
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2256
            return;
2257
        }
2258
        for(x=0; x<w; x++){
2259
            encode_q_branch(s, 0, x, y);
2260
        }
2261
    }
2262
}
2263

    
2264
static void decode_blocks(SnowContext *s){
2265
    int x, y;
2266
    int w= s->b_width;
2267
    int h= s->b_height;
2268

    
2269
    for(y=0; y<h; y++){
2270
        for(x=0; x<w; x++){
2271
            decode_q_branch(s, 0, x, y);
2272
        }
2273
    }
2274
}
2275

    
2276
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2277
    int x, y;
2278
START_TIMER
2279
    for(y=0; y < b_h+5; y++){
2280
        for(x=0; x < b_w; x++){
2281
            int a0= src[x    ];
2282
            int a1= src[x + 1];
2283
            int a2= src[x + 2];
2284
            int a3= src[x + 3];
2285
            int a4= src[x + 4];
2286
            int a5= src[x + 5];
2287
//            int am= 9*(a1+a2) - (a0+a3);
2288
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2289
//            int am= 18*(a2+a3) - 2*(a1+a4);
2290
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2291
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2292

    
2293
//            if(b_w==16) am= 8*(a1+a2);
2294

    
2295
            if(dx<8) tmp[x]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2296
            else     tmp[x]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2297

    
2298
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2299
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2300
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2301
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2302
        }
2303
        tmp += stride;
2304
        src += stride;
2305
    }
2306
    tmp -= (b_h+5)*stride;
2307
    
2308
    for(y=0; y < b_h; y++){
2309
        for(x=0; x < b_w; x++){
2310
            int a0= tmp[x + 0*stride];
2311
            int a1= tmp[x + 1*stride];
2312
            int a2= tmp[x + 2*stride];
2313
            int a3= tmp[x + 3*stride];
2314
            int a4= tmp[x + 4*stride];
2315
            int a5= tmp[x + 5*stride];
2316
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2317
//            int am= 18*(a2+a3) - 2*(a1+a4);
2318
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2319
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2320
            
2321
//            if(b_w==16) am= 8*(a1+a2);
2322

    
2323
            if(dy<8) dst[x]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2324
            else     dst[x]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2325

    
2326
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2327
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2328
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2329
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2330
        }
2331
        dst += stride;
2332
        tmp += stride;
2333
    }
2334
STOP_TIMER("mc_block")
2335
}
2336

    
2337
#define mca(dx,dy,b_w)\
2338
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2339
    uint8_t tmp[stride*(b_w+5)];\
2340
    assert(h==b_w);\
2341
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2342
}
2343

    
2344
mca( 0, 0,16)
2345
mca( 8, 0,16)
2346
mca( 0, 8,16)
2347
mca( 8, 8,16)
2348
mca( 0, 0,8)
2349
mca( 8, 0,8)
2350
mca( 0, 8,8)
2351
mca( 8, 8,8)
2352

    
2353
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2354
    if(block->type){
2355
        int x, y;
2356
        const int color= block->color[plane_index];
2357
        for(y=0; y < b_h; y++){
2358
            for(x=0; x < b_w; x++){
2359
                dst[x + y*stride]= color;
2360
            }
2361
        }
2362
    }else{
2363
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2364
        int mx= block->mx*scale;
2365
        int my= block->my*scale;
2366
        const int dx= mx&15;
2367
        const int dy= my&15;
2368
        sx += (mx>>4) - 2;
2369
        sy += (my>>4) - 2;
2370
        src += sx + sy*stride;
2371
        if(   (unsigned)sx >= w - b_w - 4
2372
           || (unsigned)sy >= h - b_h - 4){
2373
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2374
            src= tmp + MB_SIZE;
2375
        }
2376
        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2377
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2378
        else
2379
            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2380
    }
2381
}
2382

    
2383
static always_inline int same_block(BlockNode *a, BlockNode *b){
2384
    return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2385
}
2386

    
2387
//FIXME name clenup (b_w, block_w, b_width stuff)
2388
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2389
    DWTELEM * dst = NULL;
2390
    const int b_width = s->b_width  << s->block_max_depth;
2391
    const int b_height= s->b_height << s->block_max_depth;
2392
    const int b_stride= b_width;
2393
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2394
    BlockNode *rt= lt+1;
2395
    BlockNode *lb= lt+b_stride;
2396
    BlockNode *rb= lb+1;
2397
    uint8_t *block[4]; 
2398
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2399
    int x,y;
2400

    
2401
    if(b_x<0){
2402
        lt= rt;
2403
        lb= rb;
2404
    }else if(b_x + 1 >= b_width){
2405
        rt= lt;
2406
        rb= lb;
2407
    }
2408
    if(b_y<0){
2409
        lt= lb;
2410
        rt= rb;
2411
    }else if(b_y + 1 >= b_height){
2412
        lb= lt;
2413
        rb= rt;
2414
    }
2415
        
2416
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2417
        obmc -= src_x;
2418
        b_w += src_x;
2419
        src_x=0;
2420
    }else if(src_x + b_w > w){
2421
        b_w = w - src_x;
2422
    }
2423
    if(src_y<0){
2424
        obmc -= src_y*obmc_stride;
2425
        b_h += src_y;
2426
        src_y=0;
2427
    }else if(src_y + b_h> h){
2428
        b_h = h - src_y;
2429
    }
2430
    
2431
    if(b_w<=0 || b_h<=0) return;
2432

    
2433
assert(src_stride > 7*MB_SIZE);
2434
//    old_dst += src_x + src_y*dst_stride;
2435
    dst8+= src_x + src_y*src_stride;
2436
//    src += src_x + src_y*src_stride;
2437

    
2438
    block[0]= tmp+3*MB_SIZE;
2439
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2440

    
2441
    if(same_block(lt, rt)){
2442
        block[1]= block[0];
2443
    }else{
2444
        block[1]= tmp + 4*MB_SIZE;
2445
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2446
    }
2447
        
2448
    if(same_block(lt, lb)){
2449
        block[2]= block[0];
2450
    }else if(same_block(rt, lb)){
2451
        block[2]= block[1];
2452
    }else{
2453
        block[2]= tmp+5*MB_SIZE;
2454
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2455
    }
2456

    
2457
    if(same_block(lt, rb) ){
2458
        block[3]= block[0];
2459
    }else if(same_block(rt, rb)){
2460
        block[3]= block[1];
2461
    }else if(same_block(lb, rb)){
2462
        block[3]= block[2];
2463
    }else{
2464
        block[3]= tmp+6*MB_SIZE;
2465
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2466
    }
2467
#if 0
2468
    for(y=0; y<b_h; y++){
2469
        for(x=0; x<b_w; x++){
2470
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2471
            if(add) dst[x + y*dst_stride] += v;
2472
            else    dst[x + y*dst_stride] -= v;
2473
        }
2474
    }
2475
    for(y=0; y<b_h; y++){
2476
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2477
        for(x=0; x<b_w; x++){
2478
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2479
            if(add) dst[x + y*dst_stride] += v;
2480
            else    dst[x + y*dst_stride] -= v;
2481
        }
2482
    }
2483
    for(y=0; y<b_h; y++){
2484
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2485
        for(x=0; x<b_w; x++){
2486
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2487
            if(add) dst[x + y*dst_stride] += v;
2488
            else    dst[x + y*dst_stride] -= v;
2489
        }
2490
    }
2491
    for(y=0; y<b_h; y++){
2492
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2493
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2494
        for(x=0; x<b_w; x++){
2495
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2496
            if(add) dst[x + y*dst_stride] += v;
2497
            else    dst[x + y*dst_stride] -= v;
2498
        }
2499
    }
2500
#else
2501
{
2502

    
2503
    START_TIMER
2504
    
2505
    int block_index = 0;
2506
    for(y=0; y<b_h; y++){
2507
        //FIXME ugly missue of obmc_stride
2508
        uint8_t *obmc1= obmc + y*obmc_stride;
2509
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2510
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2511
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2512
        dst = slice_buffer_get_line(sb, src_y + y);
2513
        for(x=0; x<b_w; x++){
2514
            int v=   obmc1[x] * block[3][x + y*src_stride]
2515
                    +obmc2[x] * block[2][x + y*src_stride]
2516
                    +obmc3[x] * block[1][x + y*src_stride]
2517
                    +obmc4[x] * block[0][x + y*src_stride];
2518

    
2519
            v <<= 8 - LOG2_OBMC_MAX;
2520
            if(FRAC_BITS != 8){
2521
                v += 1<<(7 - FRAC_BITS);
2522
                v >>= 8 - FRAC_BITS;
2523
            }
2524
            if(add){
2525
//                v += old_dst[x + y*dst_stride];
2526
                v += dst[x + src_x];
2527
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2528
                if(v&(~255)) v= ~(v>>31);
2529
                dst8[x + y*src_stride] = v;
2530
            }else{
2531
//                old_dst[x + y*dst_stride] -= v;
2532
                dst[x + src_x] -= v;
2533
            }
2534
        }
2535
    }
2536
        STOP_TIMER("Inner add y block")
2537
}
2538
#endif
2539
}
2540

    
2541
//FIXME name clenup (b_w, block_w, b_width stuff)
2542
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2543
    const int b_width = s->b_width  << s->block_max_depth;
2544
    const int b_height= s->b_height << s->block_max_depth;
2545
    const int b_stride= b_width;
2546
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2547
    BlockNode *rt= lt+1;
2548
    BlockNode *lb= lt+b_stride;
2549
    BlockNode *rb= lb+1;
2550
    uint8_t *block[4]; 
2551
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2552
    int x,y;
2553

    
2554
    if(b_x<0){
2555
        lt= rt;
2556
        lb= rb;
2557
    }else if(b_x + 1 >= b_width){
2558
        rt= lt;
2559
        rb= lb;
2560
    }
2561
    if(b_y<0){
2562
        lt= lb;
2563
        rt= rb;
2564
    }else if(b_y + 1 >= b_height){
2565
        lb= lt;
2566
        rb= rt;
2567
    }
2568
        
2569
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2570
        obmc -= src_x;
2571
        b_w += src_x;
2572
        src_x=0;
2573
    }else if(src_x + b_w > w){
2574
        b_w = w - src_x;
2575
    }
2576
    if(src_y<0){
2577
        obmc -= src_y*obmc_stride;
2578
        b_h += src_y;
2579
        src_y=0;
2580
    }else if(src_y + b_h> h){
2581
        b_h = h - src_y;
2582
    }
2583
    
2584
    if(b_w<=0 || b_h<=0) return;
2585

    
2586
assert(src_stride > 7*MB_SIZE);
2587
    dst += src_x + src_y*dst_stride;
2588
    dst8+= src_x + src_y*src_stride;
2589
//    src += src_x + src_y*src_stride;
2590

    
2591
    block[0]= tmp+3*MB_SIZE;
2592
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2593

    
2594
    if(same_block(lt, rt)){
2595
        block[1]= block[0];
2596
    }else{
2597
        block[1]= tmp + 4*MB_SIZE;
2598
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2599
    }
2600
        
2601
    if(same_block(lt, lb)){
2602
        block[2]= block[0];
2603
    }else if(same_block(rt, lb)){
2604
        block[2]= block[1];
2605
    }else{
2606
        block[2]= tmp+5*MB_SIZE;
2607
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2608
    }
2609

    
2610
    if(same_block(lt, rb) ){
2611
        block[3]= block[0];
2612
    }else if(same_block(rt, rb)){
2613
        block[3]= block[1];
2614
    }else if(same_block(lb, rb)){
2615
        block[3]= block[2];
2616
    }else{
2617
        block[3]= tmp+6*MB_SIZE;
2618
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2619
    }
2620
#if 0
2621
    for(y=0; y<b_h; y++){
2622
        for(x=0; x<b_w; x++){
2623
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2624
            if(add) dst[x + y*dst_stride] += v;
2625
            else    dst[x + y*dst_stride] -= v;
2626
        }
2627
    }
2628
    for(y=0; y<b_h; y++){
2629
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2630
        for(x=0; x<b_w; x++){
2631
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2632
            if(add) dst[x + y*dst_stride] += v;
2633
            else    dst[x + y*dst_stride] -= v;
2634
        }
2635
    }
2636
    for(y=0; y<b_h; y++){
2637
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2638
        for(x=0; x<b_w; x++){
2639
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2640
            if(add) dst[x + y*dst_stride] += v;
2641
            else    dst[x + y*dst_stride] -= v;
2642
        }
2643
    }
2644
    for(y=0; y<b_h; y++){
2645
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2646
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2647
        for(x=0; x<b_w; x++){
2648
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2649
            if(add) dst[x + y*dst_stride] += v;
2650
            else    dst[x + y*dst_stride] -= v;
2651
        }
2652
    }
2653
#else
2654
    for(y=0; y<b_h; y++){
2655
        //FIXME ugly missue of obmc_stride
2656
        uint8_t *obmc1= obmc + y*obmc_stride;
2657
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2658
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2659
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2660
        for(x=0; x<b_w; x++){
2661
            int v=   obmc1[x] * block[3][x + y*src_stride]
2662
                    +obmc2[x] * block[2][x + y*src_stride]
2663
                    +obmc3[x] * block[1][x + y*src_stride]
2664
                    +obmc4[x] * block[0][x + y*src_stride];
2665
            
2666
            v <<= 8 - LOG2_OBMC_MAX;
2667
            if(FRAC_BITS != 8){
2668
                v += 1<<(7 - FRAC_BITS);
2669
                v >>= 8 - FRAC_BITS;
2670
            }
2671
            if(add){
2672
                v += dst[x + y*dst_stride];
2673
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2674
                if(v&(~255)) v= ~(v>>31);
2675
                dst8[x + y*src_stride] = v;
2676
            }else{
2677
                dst[x + y*dst_stride] -= v;
2678
            }
2679
        }
2680
    }
2681
#endif
2682
}
2683

    
2684
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2685
    Plane *p= &s->plane[plane_index];
2686
    const int mb_w= s->b_width  << s->block_max_depth;
2687
    const int mb_h= s->b_height << s->block_max_depth;
2688
    int x, y, mb_x;
2689
    int block_size = MB_SIZE >> s->block_max_depth;
2690
    int block_w    = plane_index ? block_size/2 : block_size;
2691
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2692
    int obmc_stride= plane_index ? block_size : 2*block_size;
2693
    int ref_stride= s->current_picture.linesize[plane_index];
2694
    uint8_t *ref  = s->last_picture.data[plane_index];
2695
    uint8_t *dst8= s->current_picture.data[plane_index];
2696
    int w= p->width;
2697
    int h= p->height;
2698
    START_TIMER
2699
    
2700
    if(s->keyframe || (s->avctx->debug&512)){
2701
        if(mb_y==mb_h)
2702
            return;
2703

    
2704
        if(add){
2705
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++)
2706
            {
2707
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2708
                DWTELEM * line = sb->line[y];
2709
                for(x=0; x<w; x++)
2710
                {
2711
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2712
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2713
                    v >>= FRAC_BITS;
2714
                    if(v&(~255)) v= ~(v>>31);
2715
                    dst8[x + y*ref_stride]= v;
2716
                }
2717
            }
2718
        }else{
2719
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++)
2720
            {
2721
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2722
                DWTELEM * line = sb->line[y];
2723
                for(x=0; x<w; x++)
2724
                {
2725
                    line[x] -= 128 << FRAC_BITS;
2726
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2727
                }
2728
            }
2729
        }
2730

    
2731
        return;
2732
    }
2733
    
2734
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2735
            START_TIMER
2736

    
2737
            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, 
2738
                       block_w*mb_x - block_w/2,
2739
                       block_w*mb_y - block_w/2,
2740
                       block_w, block_w,
2741
                       w, h,
2742
                       w, ref_stride, obmc_stride,
2743
                       mb_x - 1, mb_y - 1,
2744
                       add, plane_index);
2745
            
2746
            STOP_TIMER("add_yblock")
2747
        }
2748
    
2749
    STOP_TIMER("predict_slice")
2750
}
2751

    
2752
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2753
    Plane *p= &s->plane[plane_index];
2754
    const int mb_w= s->b_width  << s->block_max_depth;
2755
    const int mb_h= s->b_height << s->block_max_depth;
2756
    int x, y, mb_x;
2757
    int block_size = MB_SIZE >> s->block_max_depth;
2758
    int block_w    = plane_index ? block_size/2 : block_size;
2759
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2760
    int obmc_stride= plane_index ? block_size : 2*block_size;
2761
    int ref_stride= s->current_picture.linesize[plane_index];
2762
    uint8_t *ref  = s->last_picture.data[plane_index];
2763
    uint8_t *dst8= s->current_picture.data[plane_index];
2764
    int w= p->width;
2765
    int h= p->height;
2766
    START_TIMER
2767
    
2768
    if(s->keyframe || (s->avctx->debug&512)){
2769
        if(mb_y==mb_h)
2770
            return;
2771

    
2772
        if(add){
2773
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
2774
                for(x=0; x<w; x++){
2775
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2776
                    v >>= FRAC_BITS;
2777
                    if(v&(~255)) v= ~(v>>31);
2778
                    dst8[x + y*ref_stride]= v;
2779
                }
2780
            }
2781
        }else{
2782
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
2783
                for(x=0; x<w; x++){
2784
                    buf[x + y*w]-= 128<<FRAC_BITS;
2785
                }
2786
            }
2787
        }
2788

    
2789
        return;
2790
    }
2791
    
2792
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2793
            START_TIMER
2794

    
2795
            add_yblock(s, buf, dst8, ref, obmc, 
2796
                       block_w*mb_x - block_w/2,
2797
                       block_w*mb_y - block_w/2,
2798
                       block_w, block_w,
2799
                       w, h,
2800
                       w, ref_stride, obmc_stride,
2801
                       mb_x - 1, mb_y - 1,
2802
                       add, plane_index);
2803
            
2804
            STOP_TIMER("add_yblock")
2805
        }
2806
    
2807
    STOP_TIMER("predict_slice")
2808
}
2809

    
2810
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2811
    const int mb_h= s->b_height << s->block_max_depth;
2812
    int mb_y;
2813
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2814
        predict_slice(s, buf, plane_index, add, mb_y);
2815
}
2816

    
2817
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2818
    const int level= b->level;
2819
    const int w= b->width;
2820
    const int h= b->height;
2821
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2822
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2823
    int x,y, thres1, thres2;
2824
    START_TIMER
2825

    
2826
    assert(QROOT==8);
2827

    
2828
    if(s->qlog == LOSSLESS_QLOG) return;
2829
 
2830
    bias= bias ? 0 : (3*qmul)>>3;
2831
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2832
    thres2= 2*thres1;
2833
    
2834
    if(!bias){
2835
        for(y=0; y<h; y++){
2836
            for(x=0; x<w; x++){
2837
                int i= src[x + y*stride];
2838
                
2839
                if((unsigned)(i+thres1) > thres2){
2840
                    if(i>=0){
2841
                        i<<= QEXPSHIFT;
2842
                        i/= qmul; //FIXME optimize
2843
                        src[x + y*stride]=  i;
2844
                    }else{
2845
                        i= -i;
2846
                        i<<= QEXPSHIFT;
2847
                        i/= qmul; //FIXME optimize
2848
                        src[x + y*stride]= -i;
2849
                    }
2850
                }else
2851
                    src[x + y*stride]= 0;
2852
            }
2853
        }
2854
    }else{
2855
        for(y=0; y<h; y++){
2856
            for(x=0; x<w; x++){
2857
                int i= src[x + y*stride]; 
2858
                
2859
                if((unsigned)(i+thres1) > thres2){
2860
                    if(i>=0){
2861
                        i<<= QEXPSHIFT;
2862
                        i= (i + bias) / qmul; //FIXME optimize
2863
                        src[x + y*stride]=  i;
2864
                    }else{
2865
                        i= -i;
2866
                        i<<= QEXPSHIFT;
2867
                        i= (i + bias) / qmul; //FIXME optimize
2868
                        src[x + y*stride]= -i;
2869
                    }
2870
                }else
2871
                    src[x + y*stride]= 0;
2872
            }
2873
        }
2874
    }
2875
    if(level+1 == s->spatial_decomposition_count){
2876
//        STOP_TIMER("quantize")
2877
    }
2878
}
2879

    
2880
static void dequantize_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride){
2881
    const int w= b->width;
2882
    const int h= b->height;
2883
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2884
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2885
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2886
    int x,y;
2887
    START_TIMER
2888
    
2889
    if(s->qlog == LOSSLESS_QLOG) return;
2890
    
2891
    assert(QROOT==8);
2892
    
2893
    for(y=0; y<h; y++){
2894
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2895
        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2896
        for(x=0; x<w; x++){
2897
            int i= line[x];
2898
            if(i<0){
2899
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2900
            }else if(i>0){
2901
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2902
            }
2903
        }
2904
    }
2905
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2906
        STOP_TIMER("dquant")
2907
    }
2908
}
2909

    
2910
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2911
    const int w= b->width;
2912
    const int h= b->height;
2913
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2914
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2915
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2916
    int x,y;
2917
    START_TIMER
2918
    
2919
    if(s->qlog == LOSSLESS_QLOG) return;
2920
    
2921
    assert(QROOT==8);
2922

    
2923
    for(y=0; y<h; y++){
2924
        for(x=0; x<w; x++){
2925
            int i= src[x + y*stride];
2926
            if(i<0){
2927
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2928
            }else if(i>0){
2929
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2930
            }
2931
        }
2932
    }
2933
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2934
        STOP_TIMER("dquant")
2935
    }
2936
}
2937

    
2938
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2939
    const int w= b->width;
2940
    const int h= b->height;
2941
    int x,y;
2942
    
2943
    for(y=h-1; y>=0; y--){
2944
        for(x=w-1; x>=0; x--){
2945
            int i= x + y*stride;
2946
            
2947
            if(x){
2948
                if(use_median){
2949
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2950
                    else  src[i] -= src[i - 1];
2951
                }else{
2952
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2953
                    else  src[i] -= src[i - 1];
2954
                }
2955
            }else{
2956
                if(y) src[i] -= src[i - stride];
2957
            }
2958
        }
2959
    }
2960
}
2961

    
2962
static void correlate_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2963
    const int w= b->width;
2964
    const int h= b->height;
2965
    int x,y;
2966
    
2967
//    START_TIMER
2968
    
2969
    DWTELEM * line;
2970
    DWTELEM * prev;
2971
    
2972
    for(y=0; y<h; y++){
2973
        prev = line;
2974
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2975
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2976
        for(x=0; x<w; x++){
2977
            if(x){
2978
                if(use_median){
2979
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
2980
                    else  line[x] += line[x - 1];
2981
                }else{
2982
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
2983
                    else  line[x] += line[x - 1];
2984
                }
2985
            }else{
2986
                if(y) line[x] += prev[x];
2987
            }
2988
        }
2989
    }
2990
    
2991
//    STOP_TIMER("correlate")
2992
}
2993

    
2994
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2995
    const int w= b->width;
2996
    const int h= b->height;
2997
    int x,y;
2998
    
2999
    for(y=0; y<h; y++){
3000
        for(x=0; x<w; x++){
3001
            int i= x + y*stride;
3002
            
3003
            if(x){
3004
                if(use_median){
3005
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3006
                    else  src[i] += src[i - 1];
3007
                }else{
3008
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3009
                    else  src[i] += src[i - 1];
3010
                }
3011
            }else{
3012
                if(y) src[i] += src[i - stride];
3013
            }
3014
        }
3015
    }
3016
}
3017

    
3018
static void encode_header(SnowContext *s){
3019
    int plane_index, level, orientation;
3020
    uint8_t kstate[32]; 
3021
    
3022
    memset(kstate, MID_STATE, sizeof(kstate));   
3023

    
3024
    put_rac(&s->c, kstate, s->keyframe);
3025
    if(s->keyframe || s->always_reset)
3026
        reset_contexts(s);
3027
    if(s->keyframe){
3028
        put_symbol(&s->c, s->header_state, s->version, 0);
3029
        put_rac(&s->c, s->header_state, s->always_reset);
3030
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3031
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3032
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3033
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3034
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3035
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3036
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3037
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3038

    
3039
        for(plane_index=0; plane_index<2; plane_index++){
3040
            for(level=0; level<s->spatial_decomposition_count; level++){
3041
                for(orientation=level ? 1:0; orientation<4; orientation++){
3042
                    if(orientation==2) continue;
3043
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3044
                }
3045
            }
3046
        }
3047
    }
3048
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3049
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
3050
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
3051
    put_symbol(&s->c, s->header_state, s->qbias, 1);
3052
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3053
}
3054

    
3055
static int decode_header(SnowContext *s){
3056
    int plane_index, level, orientation;
3057
    uint8_t kstate[32];
3058

    
3059
    memset(kstate, MID_STATE, sizeof(kstate));   
3060

    
3061
    s->keyframe= get_rac(&s->c, kstate);
3062
    if(s->keyframe || s->always_reset)
3063
        reset_contexts(s);
3064
    if(s->keyframe){
3065
        s->version= get_symbol(&s->c, s->header_state, 0);
3066
        if(s->version>0){
3067
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3068
            return -1;
3069
        }
3070
        s->always_reset= get_rac(&s->c, s->header_state);
3071
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3072
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3073
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3074
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3075
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3076
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3077
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3078
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3079

    
3080
        for(plane_index=0; plane_index<3; plane_index++){
3081
            for(level=0; level<s->spatial_decomposition_count; level++){
3082
                for(orientation=level ? 1:0; orientation<4; orientation++){
3083
                    int q;
3084
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3085
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3086
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3087
                    s->plane[plane_index].band[level][orientation].qlog= q;
3088
                }
3089
            }
3090
        }
3091
    }
3092
    
3093
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3094
    if(s->spatial_decomposition_type > 2){
3095
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3096
        return -1;
3097
    }
3098
    
3099
    s->qlog= get_symbol(&s->c, s->header_state, 1);
3100
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3101
    s->qbias= get_symbol(&s->c, s->header_state, 1);
3102
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3103

    
3104
    return 0;
3105
}
3106

    
3107
static int common_init(AVCodecContext *avctx){
3108
    SnowContext *s = avctx->priv_data;
3109
    int width, height;
3110
    int level, orientation, plane_index, dec;
3111

    
3112
    s->avctx= avctx;
3113
        
3114
    dsputil_init(&s->dsp, avctx);
3115

    
3116
#define mcf(dx,dy)\
3117
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3118
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3119
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3120
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3121
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3122
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3123

    
3124
    mcf( 0, 0)
3125
    mcf( 4, 0)
3126
    mcf( 8, 0)
3127
    mcf(12, 0)
3128
    mcf( 0, 4)
3129
    mcf( 4, 4)
3130
    mcf( 8, 4)
3131
    mcf(12, 4)
3132
    mcf( 0, 8)
3133
    mcf( 4, 8)
3134
    mcf( 8, 8)
3135
    mcf(12, 8)
3136
    mcf( 0,12)
3137
    mcf( 4,12)
3138
    mcf( 8,12)
3139
    mcf(12,12)
3140

    
3141
#define mcfh(dx,dy)\
3142
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3143
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3144
        mc_block_hpel ## dx ## dy ## 16;\
3145
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3146
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3147
        mc_block_hpel ## dx ## dy ## 8;
3148

    
3149
    mcfh(0, 0)
3150
    mcfh(8, 0)
3151
    mcfh(0, 8)
3152
    mcfh(8, 8)
3153
        
3154
    dec= s->spatial_decomposition_count= 5;
3155
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3156
    
3157
    s->chroma_h_shift= 1; //FIXME XXX
3158
    s->chroma_v_shift= 1;
3159
    
3160
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3161
    
3162
    width= s->avctx->width;
3163
    height= s->avctx->height;
3164

    
3165
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3166
    
3167
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3168
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3169
    
3170
    for(plane_index=0; plane_index<3; plane_index++){    
3171
        int w= s->avctx->width;
3172
        int h= s->avctx->height;
3173

    
3174
        if(plane_index){
3175
            w>>= s->chroma_h_shift;
3176
            h>>= s->chroma_v_shift;
3177
        }
3178
        s->plane[plane_index].width = w;
3179
        s->plane[plane_index].height= h;
3180
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3181
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3182
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3183
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3184
                
3185
                b->buf= s->spatial_dwt_buffer;
3186
                b->level= level;
3187
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3188
                b->width = (w + !(orientation&1))>>1;
3189
                b->height= (h + !(orientation>1))>>1;
3190
                
3191
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3192
                b->buf_x_offset = 0;
3193
                b->buf_y_offset = 0;
3194
                
3195
                if(orientation&1){
3196
                    b->buf += (w+1)>>1;
3197
                    b->buf_x_offset = (w+1)>>1;
3198
                }
3199
                if(orientation>1){
3200
                    b->buf += b->stride>>1;
3201
                    b->buf_y_offset = b->stride_line >> 1;
3202
                }
3203
                
3204
                if(level)
3205
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3206
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3207
            }
3208
            w= (w+1)>>1;
3209
            h= (h+1)>>1;
3210
        }
3211
    }
3212
    
3213
    reset_contexts(s);
3214
/*    
3215
    width= s->width= avctx->width;
3216
    height= s->height= avctx->height;
3217
    
3218
    assert(width && height);
3219
*/
3220
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3221
    
3222
    return 0;
3223
}
3224

    
3225

    
3226
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3227
    int width = p->width;
3228
    int height= p->height;
3229
    int level, orientation, x, y;
3230

    
3231
    for(level=0; level<s->spatial_decomposition_count; level++){
3232
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3233
            SubBand *b= &p->band[level][orientation];
3234
            DWTELEM *buf= b->buf;
3235
            int64_t error=0;
3236
            
3237
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3238
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
3239
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3240
            for(y=0; y<height; y++){
3241
                for(x=0; x<width; x++){
3242
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
3243
                    error += d*d;
3244
                }
3245
            }
3246

    
3247
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3248
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3249
        }
3250
    }
3251
}
3252

    
3253
static int encode_init(AVCodecContext *avctx)
3254
{
3255
    SnowContext *s = avctx->priv_data;
3256
    int plane_index;
3257

    
3258
    if(avctx->strict_std_compliance >= 0){
3259
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
3260
               "use vstrict=-1 / -strict -1 to use it anyway\n");
3261
        return -1;
3262
    }
3263
 
3264
    common_init(avctx);
3265
    alloc_blocks(s);
3266
 
3267
    s->version=0;
3268
    
3269
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3270
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3271
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3272
    h263_encode_init(&s->m); //mv_penalty
3273

    
3274
    for(plane_index=0; plane_index<3; plane_index++){
3275
        calculate_vissual_weight(s, &s->plane[plane_index]);
3276
    }
3277
    
3278
    
3279
    avctx->coded_frame= &s->current_picture;
3280
    switch(avctx->pix_fmt){
3281
//    case PIX_FMT_YUV444P:
3282
//    case PIX_FMT_YUV422P:
3283
    case PIX_FMT_YUV420P:
3284
    case PIX_FMT_GRAY8:
3285
//    case PIX_FMT_YUV411P:
3286
//    case PIX_FMT_YUV410P:
3287
        s->colorspace_type= 0;
3288
        break;
3289
/*    case PIX_FMT_RGBA32:
3290
        s->colorspace= 1;
3291
        break;*/
3292
    default:
3293
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3294
        return -1;
3295
    }
3296
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3297
    s->chroma_h_shift= 1;
3298
    s->chroma_v_shift= 1;
3299
    return 0;
3300
}
3301

    
3302
static int frame_start(SnowContext *s){
3303
   AVFrame tmp;
3304
   int w= s->avctx->width; //FIXME round up to x16 ?
3305
   int h= s->avctx->height;
3306

    
3307
    if(s->current_picture.data[0]){
3308
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
3309
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3310
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3311
    }
3312

    
3313
    tmp= s->last_picture;
3314
    s->last_picture= s->current_picture;
3315
    s->current_picture= tmp;
3316
    
3317
    s->current_picture.reference= 1;
3318
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3319
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3320
        return -1;
3321
    }
3322
    
3323
    return 0;
3324
}
3325

    
3326
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3327
    SnowContext *s = avctx->priv_data;
3328
    RangeCoder * const c= &s->c;
3329
    AVFrame *pict = data;
3330
    const int width= s->avctx->width;
3331
    const int height= s->avctx->height;
3332
    int level, orientation, plane_index;
3333

    
3334
    ff_init_range_encoder(c, buf, buf_size);
3335
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3336
    
3337
    s->input_picture = *pict;
3338

    
3339
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3340
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3341
    
3342
    if(pict->quality){
3343
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3344
        //<64 >60
3345
        s->qlog += 61;
3346
    }else{
3347
        s->qlog= LOSSLESS_QLOG;
3348
    }
3349

    
3350
    frame_start(s);
3351
    s->current_picture.key_frame= s->keyframe;
3352

    
3353
    if(pict->pict_type == P_TYPE){
3354
        int block_width = (width +15)>>4;
3355
        int block_height= (height+15)>>4;
3356
        int stride= s->current_picture.linesize[0];
3357
        
3358
        assert(s->current_picture.data[0]);
3359
        assert(s->last_picture.data[0]);
3360
     
3361
        s->m.avctx= s->avctx;
3362
        s->m.current_picture.data[0]= s->current_picture.data[0];
3363
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
3364
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3365
        s->m.current_picture_ptr= &s->m.current_picture;
3366
        s->m.   last_picture_ptr= &s->m.   last_picture;
3367
        s->m.linesize=
3368
        s->m.   last_picture.linesize[0]=
3369
        s->m.    new_picture.linesize[0]=
3370
        s->m.current_picture.linesize[0]= stride;
3371
        s->m.uvlinesize= s->current_picture.linesize[1];
3372
        s->m.width = width;
3373
        s->m.height= height;
3374
        s->m.mb_width = block_width;
3375
        s->m.mb_height= block_height;
3376
        s->m.mb_stride=   s->m.mb_width+1;
3377
        s->m.b8_stride= 2*s->m.mb_width+1;
3378
        s->m.f_code=1;
3379
        s->m.pict_type= pict->pict_type;
3380
        s->m.me_method= s->avctx->me_method;
3381
        s->m.me.scene_change_score=0;
3382
        s->m.flags= s->avctx->flags;
3383
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3384
        s->m.out_format= FMT_H263;
3385
        s->m.unrestricted_mv= 1;
3386

    
3387
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3388
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3389
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3390

    
3391
        s->m.dsp= s->dsp; //move
3392
        ff_init_me(&s->m);
3393
    }
3394
    
3395
redo_frame:
3396
        
3397
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3398

    
3399
    encode_header(s);
3400
    encode_blocks(s);
3401
      
3402
    for(plane_index=0; plane_index<3; plane_index++){
3403
        Plane *p= &s->plane[plane_index];
3404
        int w= p->width;
3405
        int h= p->height;
3406
        int x, y;
3407
//        int bits= put_bits_count(&s->c.pb);
3408

    
3409
        //FIXME optimize
3410
     if(pict->data[plane_index]) //FIXME gray hack
3411
        for(y=0; y<h; y++){
3412
            for(x=0; x<w; x++){
3413
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3414
            }
3415
        }
3416
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3417
        
3418
        if(   plane_index==0 
3419
           && pict->pict_type == P_TYPE 
3420
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3421
            ff_init_range_encoder(c, buf, buf_size);
3422
            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3423
            pict->pict_type= FF_I_TYPE;
3424
            s->keyframe=1;
3425
            reset_contexts(s);
3426
            goto redo_frame;
3427
        }
3428
        
3429
        if(s->qlog == LOSSLESS_QLOG){
3430
            for(y=0; y<h; y++){
3431
                for(x=0; x<w; x++){
3432
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1)))>>FRAC_BITS;
3433
                }
3434
            }
3435
        }
3436
 
3437
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3438

    
3439
        for(level=0; level<s->spatial_decomposition_count; level++){
3440
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3441
                SubBand *b= &p->band[level][orientation];
3442
                
3443
                quantize(s, b, b->buf, b->stride, s->qbias);
3444
                if(orientation==0)
3445
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3446
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3447
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
3448
                if(orientation==0)
3449
                    correlate(s, b, b->buf, b->stride, 1, 0);
3450
            }
3451
        }
3452
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3453

    
3454
        for(level=0; level<s->spatial_decomposition_count; level++){
3455
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3456
                SubBand *b= &p->band[level][orientation];
3457

    
3458
                dequantize(s, b, b->buf, b->stride);
3459
            }
3460
        }
3461

    
3462
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3463
        if(s->qlog == LOSSLESS_QLOG){
3464
            for(y=0; y<h; y++){
3465
                for(x=0; x<w; x++){
3466
                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
3467
                }
3468
            }
3469
        }
3470
{START_TIMER
3471
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3472
STOP_TIMER("pred-conv")}
3473
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3474
            int64_t error= 0;
3475
            
3476
    if(pict->data[plane_index]) //FIXME gray hack
3477
            for(y=0; y<h; y++){
3478
                for(x=0; x<w; x++){
3479
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3480
                    error += d*d;
3481
                }
3482
            }
3483
            s->avctx->error[plane_index] += error;
3484
            s->current_picture.error[plane_index] = error;
3485
        }
3486
    }
3487

    
3488
    if(s->last_picture.data[0])
3489
        avctx->release_buffer(avctx, &s->last_picture);
3490

    
3491
    emms_c();
3492
    
3493
    return ff_rac_terminate(c);
3494
}
3495

    
3496
static void common_end(SnowContext *s){
3497
    int plane_index, level, orientation;
3498

    
3499
    av_freep(&s->spatial_dwt_buffer);
3500

    
3501
    av_freep(&s->m.me.scratchpad);    
3502
    av_freep(&s->m.me.map);
3503
    av_freep(&s->m.me.score_map);
3504
 
3505
    av_freep(&s->block);
3506

    
3507
    for(plane_index=0; plane_index<3; plane_index++){    
3508
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3509
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3510
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3511
                
3512
                av_freep(&b->x_coeff);
3513
            }
3514
        }
3515
    }
3516
}
3517

    
3518
static int encode_end(AVCodecContext *avctx)
3519
{
3520
    SnowContext *s = avctx->priv_data;
3521

    
3522
    common_end(s);
3523

    
3524
    return 0;
3525
}
3526

    
3527
static int decode_init(AVCodecContext *avctx)
3528
{
3529
    SnowContext *s = avctx->priv_data;
3530
    int block_size;
3531

    
3532
    common_init(avctx);
3533
    
3534
    block_size = MB_SIZE >> s->block_max_depth;
3535
    /* FIXME block_size * 2 is determined empirically. block_size * 1.5 is definitely needed, but I (Robert) cannot figure out why more than that is needed. Perhaps there is a bug, or perhaps I overlooked some demands that are placed on the buffer. */
3536
    /* FIXME The formula is WRONG. For height > 480, the buffer will overflow. */
3537
    /* FIXME For now, I will use a full frame of lines. Fortunately, this should not materially effect cache performance because lines are allocated using a stack, so if in fact only 50 out of 496 lines are needed at a time, the other 446 will sit allocated but never accessed. */
3538
//    slice_buffer_init(s->plane[0].sb, s->plane[0].height, (block_size * 2) + (s->spatial_decomposition_count * s->spatial_decomposition_count), s->plane[0].width, s->spatial_dwt_buffer);
3539
    slice_buffer_init(&s->sb, s->plane[0].height, s->plane[0].height, s->plane[0].width, s->spatial_dwt_buffer);
3540
    
3541
    return 0;
3542
}
3543

    
3544
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3545
    SnowContext *s = avctx->priv_data;
3546
    RangeCoder * const c= &s->c;
3547
    int bytes_read;
3548
    AVFrame *picture = data;
3549
    int level, orientation, plane_index;
3550

    
3551
    ff_init_range_decoder(c, buf, buf_size);
3552
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3553

    
3554
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3555
    decode_header(s);
3556
    if(!s->block) alloc_blocks(s);
3557

    
3558
    frame_start(s);
3559
    //keyframe flag dupliaction mess FIXME
3560
    if(avctx->debug&FF_DEBUG_PICT_INFO)
3561
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3562
    
3563
    decode_blocks(s);
3564

    
3565
    for(plane_index=0; plane_index<3; plane_index++){
3566
        Plane *p= &s->plane[plane_index];
3567
        int w= p->width;
3568
        int h= p->height;
3569
        int x, y;
3570
        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
3571
        SubBand * correlate_band;
3572
        
3573
if(s->avctx->debug&2048){
3574
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3575
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3576

    
3577
        for(y=0; y<h; y++){
3578
            for(x=0; x<w; x++){
3579
                int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
3580
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3581
            }
3582
        }
3583
}
3584

    
3585
{   START_TIMER
3586
    for(level=0; level<s->spatial_decomposition_count; level++){
3587
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3588
            SubBand *b= &p->band[level][orientation];
3589
            unpack_coeffs(s, b, b->parent, orientation);
3590
        }
3591
    }
3592
    STOP_TIMER("unpack coeffs");
3593
}
3594
        
3595
        /* Handle level 0, orientation 0 specially. It is particularly resistant to slicing but fortunately quite small, so process it in one pass. */
3596
        correlate_band = &p->band[0][0];
3597
        decode_subband_slice_buffered(s, correlate_band, &s->sb, 0, correlate_band->height, decode_state[0][0]);
3598
        correlate_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0);
3599
        dequantize_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride);
3600

    
3601
{START_TIMER
3602
    const int mb_h= s->b_height << s->block_max_depth;
3603
    const int block_size = MB_SIZE >> s->block_max_depth;
3604
    const int block_w    = plane_index ? block_size/2 : block_size;
3605
    int mb_y;
3606
    dwt_compose_t cs[MAX_DECOMPOSITIONS];
3607
    int yd=0, yq=0;
3608
    int y;
3609
    int end_y;
3610

    
3611
    ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
3612
    for(mb_y=0; mb_y<=mb_h; mb_y++){
3613
        
3614
        const int slice_starty = block_w*mb_y;
3615
        const int slice_h = block_w*(mb_y+1);
3616

    
3617
        {        
3618
        START_TIMER
3619
        for(level=0; level<s->spatial_decomposition_count; level++){
3620
            for(orientation=level ? 1 : 1; orientation<4; orientation++){
3621
                SubBand *b= &p->band[level][orientation];
3622
                int start_y;
3623
                int end_y;
3624
                int our_mb_start = mb_y;
3625
                int our_mb_end = (mb_y + 1);
3626
                start_y = FFMIN(b->height, (mb_y ? ((block_w * our_mb_start - 4) >> (s->spatial_decomposition_count - level)) + 5 : 0));
3627
                end_y = FFMIN(b->height, (((block_w * our_mb_end - 4) >> (s->spatial_decomposition_count - level)) + 5));
3628
                    
3629
                if (start_y != end_y)
3630
                    decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
3631
            }
3632
        }
3633
        STOP_TIMER("decode_subband_slice");
3634
        }
3635
        
3636
{   START_TIMER
3637
        for(; yd<slice_h; yd+=4){
3638
            ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
3639
        }
3640
    STOP_TIMER("idwt slice");}
3641

    
3642
        
3643
        if(s->qlog == LOSSLESS_QLOG){
3644
            for(; yq<slice_h && yq<h; yq++){
3645
                DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
3646
                for(x=0; x<w; x++){
3647
                    line[x] <<= FRAC_BITS;
3648
                }
3649
            }
3650
        }
3651

    
3652
        predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
3653
        
3654
        /* Nasty hack based empirically on how predict_slice_buffered() hits the buffer. */
3655
        /* FIXME If possible, make predict_slice fit into the slice. As of now, it works on some previous lines (up to slice_height / 2) if the condition on the next line is false. */
3656
        if (s->keyframe || (s->avctx->debug&512)){
3657
            y = FFMIN(p->height, slice_starty);
3658
            end_y = FFMIN(p->height, slice_h);
3659
        }
3660
        else{
3661
            y = FFMAX(0, FFMIN(p->height, slice_starty - (block_w >> 1)));
3662
            end_y = FFMAX(0, FFMIN(p->height, slice_h - (block_w >> 1)));
3663
        }
3664
        while(y < end_y)
3665
            slice_buffer_release(&s->sb, y++);
3666
    }
3667
    
3668
    slice_buffer_flush(&s->sb);
3669
    
3670
STOP_TIMER("idwt + predict_slices")}
3671
    }
3672
            
3673
    emms_c();
3674

    
3675
    if(s->last_picture.data[0])
3676
        avctx->release_buffer(avctx, &s->last_picture);
3677

    
3678
if(!(s->avctx->debug&2048))        
3679
    *picture= s->current_picture;
3680
else
3681
    *picture= s->mconly_picture;
3682
    
3683
    *data_size = sizeof(AVFrame);
3684
    
3685
    bytes_read= c->bytestream - c->bytestream_start;
3686
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
3687

    
3688
    return bytes_read;
3689
}
3690

    
3691
static int decode_end(AVCodecContext *avctx)
3692
{
3693
    SnowContext *s = avctx->priv_data;
3694

    
3695
    slice_buffer_destroy(&s->sb);
3696
    
3697
    common_end(s);
3698

    
3699
    return 0;
3700
}
3701

    
3702
AVCodec snow_decoder = {
3703
    "snow",
3704
    CODEC_TYPE_VIDEO,
3705
    CODEC_ID_SNOW,
3706
    sizeof(SnowContext),
3707
    decode_init,
3708
    NULL,
3709
    decode_end,
3710
    decode_frame,
3711
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3712
    NULL
3713
};
3714

    
3715
#ifdef CONFIG_ENCODERS
3716
AVCodec snow_encoder = {
3717
    "snow",
3718
    CODEC_TYPE_VIDEO,
3719
    CODEC_ID_SNOW,
3720
    sizeof(SnowContext),
3721
    encode_init,
3722
    encode_frame,
3723
    encode_end,
3724
};
3725
#endif
3726

    
3727

    
3728
#if 0
3729
#undef malloc
3730
#undef free
3731
#undef printf
3732

3733
int main(){
3734
    int width=256;
3735
    int height=256;
3736
    int buffer[2][width*height];
3737
    SnowContext s;
3738
    int i;
3739
    s.spatial_decomposition_count=6;
3740
    s.spatial_decomposition_type=1;
3741
    
3742
    printf("testing 5/3 DWT\n");
3743
    for(i=0; i<width*height; i++)
3744
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3745
    
3746
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3747
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3748
    
3749
    for(i=0; i<width*height; i++)
3750
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3751

3752
    printf("testing 9/7 DWT\n");
3753
    s.spatial_decomposition_type=0;
3754
    for(i=0; i<width*height; i++)
3755
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3756
    
3757
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3758
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3759
    
3760
    for(i=0; i<width*height; i++)
3761
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3762
        
3763
    printf("testing AC coder\n");
3764
    memset(s.header_state, 0, sizeof(s.header_state));
3765
    ff_init_range_encoder(&s.c, buffer[0], 256*256);
3766
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3767
        
3768
    for(i=-256; i<256; i++){
3769
START_TIMER
3770
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3771
STOP_TIMER("put_symbol")
3772
    }
3773
    ff_rac_terminate(&s.c);
3774

3775
    memset(s.header_state, 0, sizeof(s.header_state));
3776
    ff_init_range_decoder(&s.c, buffer[0], 256*256);
3777
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3778
    
3779
    for(i=-256; i<256; i++){
3780
        int j;
3781
START_TIMER
3782
        j= get_symbol(&s.c, s.header_state, 1);
3783
STOP_TIMER("get_symbol")
3784
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3785
    }
3786
{
3787
int level, orientation, x, y;
3788
int64_t errors[8][4];
3789
int64_t g=0;
3790

3791
    memset(errors, 0, sizeof(errors));
3792
    s.spatial_decomposition_count=3;
3793
    s.spatial_decomposition_type=0;
3794
    for(level=0; level<s.spatial_decomposition_count; level++){
3795
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3796
            int w= width  >> (s.spatial_decomposition_count-level);
3797
            int h= height >> (s.spatial_decomposition_count-level);
3798
            int stride= width  << (s.spatial_decomposition_count-level);
3799
            DWTELEM *buf= buffer[0];
3800
            int64_t error=0;
3801

3802
            if(orientation&1) buf+=w;
3803
            if(orientation>1) buf+=stride>>1;
3804
            
3805
            memset(buffer[0], 0, sizeof(int)*width*height);
3806
            buf[w/2 + h/2*stride]= 256*256;
3807
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3808
            for(y=0; y<height; y++){
3809
                for(x=0; x<width; x++){
3810
                    int64_t d= buffer[0][x + y*width];
3811
                    error += d*d;
3812
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3813
                }
3814
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3815
            }
3816
            error= (int)(sqrt(error)+0.5);
3817
            errors[level][orientation]= error;
3818
            if(g) g=ff_gcd(g, error);
3819
            else g= error;
3820
        }
3821
    }
3822
    printf("static int const visual_weight[][4]={\n");
3823
    for(level=0; level<s.spatial_decomposition_count; level++){
3824
        printf("  {");
3825
        for(orientation=0; orientation<4; orientation++){
3826
            printf("%8lld,", errors[level][orientation]/g);
3827
        }
3828
        printf("},\n");
3829
    }
3830
    printf("};\n");
3831
    {
3832
            int level=2;
3833
            int orientation=3;
3834
            int w= width  >> (s.spatial_decomposition_count-level);
3835
            int h= height >> (s.spatial_decomposition_count-level);
3836
            int stride= width  << (s.spatial_decomposition_count-level);
3837
            DWTELEM *buf= buffer[0];
3838
            int64_t error=0;
3839

3840
            buf+=w;
3841
            buf+=stride>>1;
3842
            
3843
            memset(buffer[0], 0, sizeof(int)*width*height);
3844
#if 1
3845
            for(y=0; y<height; y++){
3846
                for(x=0; x<width; x++){
3847
                    int tab[4]={0,2,3,1};
3848
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3849
                }
3850
            }
3851
            ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3852
#else
3853
            for(y=0; y<h; y++){
3854
                for(x=0; x<w; x++){
3855
                    buf[x + y*stride  ]=169;
3856
                    buf[x + y*stride-w]=64;
3857
                }
3858
            }
3859
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3860
#endif
3861
            for(y=0; y<height; y++){
3862
                for(x=0; x<width; x++){
3863
                    int64_t d= buffer[0][x + y*width];
3864
                    error += d*d;
3865
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3866
                }
3867
                if(ABS(height/2-y)<9) printf("\n");
3868
            }
3869
    }
3870

    
3871
}
3872
    return 0;
3873
}
3874
#endif
3875