Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 0cea8a03

History | View | Annotate | Download (118 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22
#include "cabac.h"
23

    
24
#include "mpegvideo.h"
25

    
26
#undef NDEBUG
27
#include <assert.h>
28

    
29
#define MAX_DECOMPOSITIONS 8
30
#define MAX_PLANES 4
31
#define DWTELEM int
32
#define QROOT 8 
33
#define LOSSLESS_QLOG -128
34

    
35
static const int8_t quant3[256]={
36
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
52
};
53
static const int8_t quant3b[256]={
54
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
};
71
static const int8_t quant5[256]={
72
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
88
};
89
static const int8_t quant7[256]={
90
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
93
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
106
};
107
static const int8_t quant9[256]={
108
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
124
};
125
static const int8_t quant11[256]={
126
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
142
};
143
static const int8_t quant13[256]={
144
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
145
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
160
};
161

    
162
#define OBMC_MAX 64
163
#if 0 //64*cubic
164
static const uint8_t obmc32[1024]={
165
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
167
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
168
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
169
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
170
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
171
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
172
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
173
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
174
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
175
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
176
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
177
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
178
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
179
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
180
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
182
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
183
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
184
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
185
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
186
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
187
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
190
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
191
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
194
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
195
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
196
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197
//error:0.000022
198
};
199
static const uint8_t obmc16[256]={
200
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
201
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
202
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
203
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
204
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
205
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
206
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
207
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
209
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
210
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
211
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
212
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
213
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
214
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216
//error:0.000033
217
};
218
#elif 1 // 64*linear
219
static const uint8_t obmc32[1024]={
220
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
221
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
222
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
223
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
224
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
225
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
226
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
227
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
228
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
229
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
230
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
231
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
232
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
233
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
234
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
235
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
237
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
238
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
239
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
240
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
241
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
242
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
243
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
244
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
245
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
246
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
249
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
250
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
251
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
252
 //error:0.000020
253
};
254
static const uint8_t obmc16[256]={
255
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
256
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
257
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
258
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
259
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
260
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
261
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
262
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
264
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
265
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
266
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
267
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
268
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
269
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
270
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
271
//error:0.000015
272
};
273
#else //64*cos
274
static const uint8_t obmc32[1024]={
275
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
277
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
278
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
279
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
280
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
281
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
282
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
283
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
284
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
285
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
286
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
287
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
288
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
289
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
290
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
292
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
293
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
294
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
295
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
296
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
297
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
300
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
301
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
304
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
305
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
306
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307
//error:0.000022
308
};
309
static const uint8_t obmc16[256]={
310
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
311
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
312
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
313
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
314
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
315
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
316
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
317
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
319
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
320
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
321
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
322
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
323
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
324
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326
//error:0.000022
327
};
328
#endif
329

    
330
//linear *64
331
static const uint8_t obmc8[64]={
332
 1, 3, 5, 7, 7, 5, 3, 1,
333
 3, 9,15,21,21,15, 9, 3,
334
 5,15,25,35,35,25,15, 5,
335
 7,21,35,49,49,35,21, 7,
336
 7,21,35,49,49,35,21, 7,
337
 5,15,25,35,35,25,15, 5,
338
 3, 9,15,21,21,15, 9, 3,
339
 1, 3, 5, 7, 7, 5, 3, 1,
340
//error:0.000000
341
};
342

    
343
//linear *64
344
static const uint8_t obmc4[16]={
345
 4,12,12, 4,
346
12,36,36,12,
347
12,36,36,12,
348
 4,12,12, 4,
349
//error:0.000000
350
};
351

    
352
static const uint8_t *obmc_tab[4]={
353
    obmc32, obmc16, obmc8, obmc4
354
};
355

    
356
typedef struct BlockNode{
357
    int16_t mx;
358
    int16_t my;
359
    uint8_t color[3];
360
    uint8_t type;
361
//#define TYPE_SPLIT    1
362
#define BLOCK_INTRA   1
363
//#define TYPE_NOCOLOR  4
364
    uint8_t level; //FIXME merge into type?
365
}BlockNode;
366

    
367
#define LOG2_MB_SIZE 4
368
#define MB_SIZE (1<<LOG2_MB_SIZE)
369

    
370
typedef struct SubBand{
371
    int level;
372
    int stride;
373
    int width;
374
    int height;
375
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
376
    DWTELEM *buf;
377
    int16_t *x;
378
    DWTELEM *coeff;
379
    struct SubBand *parent;
380
    uint8_t state[/*7*2*/ 7 + 512][32];
381
}SubBand;
382

    
383
typedef struct Plane{
384
    int width;
385
    int height;
386
    SubBand band[MAX_DECOMPOSITIONS][4];
387
}Plane;
388

    
389
typedef struct SnowContext{
390
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
391

    
392
    AVCodecContext *avctx;
393
    CABACContext c;
394
    DSPContext dsp;
395
    AVFrame input_picture;
396
    AVFrame current_picture;
397
    AVFrame last_picture;
398
    AVFrame mconly_picture;
399
//     uint8_t q_context[16];
400
    uint8_t header_state[32];
401
    uint8_t block_state[128 + 32*128];
402
    int keyframe;
403
    int version;
404
    int spatial_decomposition_type;
405
    int temporal_decomposition_type;
406
    int spatial_decomposition_count;
407
    int temporal_decomposition_count;
408
    DWTELEM *spatial_dwt_buffer;
409
    DWTELEM *pred_buffer;
410
    int colorspace_type;
411
    int chroma_h_shift;
412
    int chroma_v_shift;
413
    int spatial_scalability;
414
    int qlog;
415
    int lambda;
416
    int lambda2;
417
    int mv_scale;
418
    int qbias;
419
#define QBIAS_SHIFT 3
420
    int b_width;
421
    int b_height;
422
    int block_max_depth;
423
    Plane plane[MAX_PLANES];
424
    BlockNode *block;
425

    
426
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
427
}SnowContext;
428

    
429
#define QEXPSHIFT 7 //FIXME try to change this to 0
430
static const uint8_t qexp[8]={
431
    128, 140, 152, 166, 181, 197, 215, 235
432
//   64,  70,  76,  83,  91,  99, 108, 117
433
//   32,  35,  38,  41,  45,  49,  54,  59
434
//   16,  17,  19,  21,  23,  25,  27,  29
435
//    8,   9,  10,  10,  11,  12,  13,  15
436
};
437

    
438
static inline int mirror(int v, int m){
439
    if     (v<0) return -v;
440
    else if(v>m) return 2*m-v;
441
    else         return v;
442
}
443

    
444
static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
445
    int i;
446

    
447
    if(v){
448
        const int a= ABS(v);
449
        const int e= av_log2(a);
450
#if 1
451
        const int el= FFMIN(e, 10);   
452
        put_cabac(c, state+0, 0);
453

    
454
        for(i=0; i<el; i++){
455
            put_cabac(c, state+1+i, 1);  //1..10
456
        }
457
        for(; i<e; i++){
458
            put_cabac(c, state+1+9, 1);  //1..10
459
        }
460
        put_cabac(c, state+1+FFMIN(i,9), 0);
461

    
462
        for(i=e-1; i>=el; i--){
463
            put_cabac(c, state+22+9, (a>>i)&1); //22..31
464
        }
465
        for(; i>=0; i--){
466
            put_cabac(c, state+22+i, (a>>i)&1); //22..31
467
        }
468

    
469
        if(is_signed)
470
            put_cabac(c, state+11 + el, v < 0); //11..21
471
#else
472
        
473
        put_cabac(c, state+0, 0);
474
        if(e<=9){
475
            for(i=0; i<e; i++){
476
                put_cabac(c, state+1+i, 1);  //1..10
477
            }
478
            put_cabac(c, state+1+i, 0);
479

    
480
            for(i=e-1; i>=0; i--){
481
                put_cabac(c, state+22+i, (a>>i)&1); //22..31
482
            }
483

    
484
            if(is_signed)
485
                put_cabac(c, state+11 + e, v < 0); //11..21
486
        }else{
487
            for(i=0; i<e; i++){
488
                put_cabac(c, state+1+FFMIN(i,9), 1);  //1..10
489
            }
490
            put_cabac(c, state+1+FFMIN(i,9), 0);
491

    
492
            for(i=e-1; i>=0; i--){
493
                put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
494
            }
495

    
496
            if(is_signed)
497
                put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
498
        }
499
#endif
500
    }else{
501
        put_cabac(c, state+0, 1);
502
    }
503
}
504

    
505
static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
506
    if(get_cabac(c, state+0))
507
        return 0;
508
    else{
509
        int i, e, a, el;
510
 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
511
        for(e=0; e<10; e++){ 
512
            if(get_cabac(c, state + 1 + e)==0) // 1..10
513
                break;
514
        }
515
        el= e;
516
 
517
        if(e==10){
518
            while(get_cabac(c, state + 1 + 9)) //10
519
                e++;
520
        }
521
        a= 1;
522
        for(i=e-1; i>=el; i--){
523
            a += a + get_cabac(c, state+22+9); //31
524
        }
525
        for(; i>=0; i--){
526
            a += a + get_cabac(c, state+22+i); //22..31
527
        }
528

    
529
        if(is_signed && get_cabac(c, state+11 + el)) //11..21
530
            return -a;
531
        else
532
            return a;
533
    }
534
}
535

    
536
static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
537
    int i;
538
    int r= log2>=0 ? 1<<log2 : 1;
539

    
540
    assert(v>=0);
541
    assert(log2>=-4);
542

    
543
    while(v >= r){
544
        put_cabac(c, state+4+log2, 1);
545
        v -= r;
546
        log2++;
547
        if(log2>0) r+=r;
548
    }
549
    put_cabac(c, state+4+log2, 0);
550
    
551
    for(i=log2-1; i>=0; i--){
552
        put_cabac(c, state+31-i, (v>>i)&1);
553
    }
554
}
555

    
556
static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
557
    int i;
558
    int r= log2>=0 ? 1<<log2 : 1;
559
    int v=0;
560

    
561
    assert(log2>=-4);
562

    
563
    while(get_cabac(c, state+4+log2)){
564
        v+= r;
565
        log2++;
566
        if(log2>0) r+=r;
567
    }
568
    
569
    for(i=log2-1; i>=0; i--){
570
        v+= get_cabac(c, state+31-i)<<i;
571
    }
572

    
573
    return v;
574
}
575

    
576
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
577
    const int mirror_left= !highpass;
578
    const int mirror_right= (width&1) ^ highpass;
579
    const int w= (width>>1) - 1 + (highpass & width);
580
    int i;
581

    
582
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
583
    if(mirror_left){
584
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
585
        dst += dst_step;
586
        src += src_step;
587
    }
588
    
589
    for(i=0; i<w; i++){
590
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
591
    }
592
    
593
    if(mirror_right){
594
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
595
    }
596
}
597

    
598
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
599
    const int mirror_left= !highpass;
600
    const int mirror_right= (width&1) ^ highpass;
601
    const int w= (width>>1) - 1 + (highpass & width);
602
    int i;
603

    
604
    if(mirror_left){
605
        int r= 3*2*ref[0];
606
        r += r>>4;
607
        r += r>>8;
608
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
609
        dst += dst_step;
610
        src += src_step;
611
    }
612
    
613
    for(i=0; i<w; i++){
614
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
615
        r += r>>4;
616
        r += r>>8;
617
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
618
    }
619
    
620
    if(mirror_right){
621
        int r= 3*2*ref[w*ref_step];
622
        r += r>>4;
623
        r += r>>8;
624
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
625
    }
626
}
627

    
628

    
629
static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
630
    int x, i;
631
    
632
    for(x=start; x<width; x+=2){
633
        int64_t sum=0;
634

    
635
        for(i=0; i<n; i++){
636
            int x2= x + 2*i - n + 1;
637
            if     (x2<     0) x2= -x2;
638
            else if(x2>=width) x2= 2*width-x2-2;
639
            sum += coeffs[i]*(int64_t)dst[x2];
640
        }
641
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
642
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
643
    }
644
}
645

    
646
static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
647
    int x, y, i;
648
    for(y=start; y<height; y+=2){
649
        for(x=0; x<width; x++){
650
            int64_t sum=0;
651
    
652
            for(i=0; i<n; i++){
653
                int y2= y + 2*i - n + 1;
654
                if     (y2<      0) y2= -y2;
655
                else if(y2>=height) y2= 2*height-y2-2;
656
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
657
            }
658
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
659
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
660
        }
661
    }
662
}
663

    
664
#define SCALEX 1
665
#define LX0 0
666
#define LX1 1
667

    
668
#if 0 // more accurate 9/7
669
#define N1 2
670
#define SHIFT1 14
671
#define COEFFS1 (int[]){-25987,-25987}
672
#define N2 2
673
#define SHIFT2 19
674
#define COEFFS2 (int[]){-27777,-27777}
675
#define N3 2
676
#define SHIFT3 15
677
#define COEFFS3 (int[]){28931,28931}
678
#define N4 2
679
#define SHIFT4 15
680
#define COEFFS4 (int[]){14533,14533}
681
#elif 1 // 13/7 CRF
682
#define N1 4
683
#define SHIFT1 4
684
#define COEFFS1 (int[]){1,-9,-9,1}
685
#define N2 4
686
#define SHIFT2 4
687
#define COEFFS2 (int[]){-1,5,5,-1}
688
#define N3 0
689
#define SHIFT3 1
690
#define COEFFS3 NULL
691
#define N4 0
692
#define SHIFT4 1
693
#define COEFFS4 NULL
694
#elif 1 // 3/5
695
#define LX0 1
696
#define LX1 0
697
#define SCALEX 0.5
698
#define N1 2
699
#define SHIFT1 1
700
#define COEFFS1 (int[]){1,1}
701
#define N2 2
702
#define SHIFT2 2
703
#define COEFFS2 (int[]){-1,-1}
704
#define N3 0
705
#define SHIFT3 0
706
#define COEFFS3 NULL
707
#define N4 0
708
#define SHIFT4 0
709
#define COEFFS4 NULL
710
#elif 1 // 11/5 
711
#define N1 0
712
#define SHIFT1 1
713
#define COEFFS1 NULL
714
#define N2 2
715
#define SHIFT2 2
716
#define COEFFS2 (int[]){-1,-1}
717
#define N3 2
718
#define SHIFT3 0
719
#define COEFFS3 (int[]){-1,-1}
720
#define N4 4
721
#define SHIFT4 7
722
#define COEFFS4 (int[]){-5,29,29,-5}
723
#define SCALEX 4
724
#elif 1 // 9/7 CDF
725
#define N1 2
726
#define SHIFT1 7
727
#define COEFFS1 (int[]){-203,-203}
728
#define N2 2
729
#define SHIFT2 12
730
#define COEFFS2 (int[]){-217,-217}
731
#define N3 2
732
#define SHIFT3 7
733
#define COEFFS3 (int[]){113,113}
734
#define N4 2
735
#define SHIFT4 9
736
#define COEFFS4 (int[]){227,227}
737
#define SCALEX 1
738
#elif 1 // 7/5 CDF
739
#define N1 0
740
#define SHIFT1 1
741
#define COEFFS1 NULL
742
#define N2 2
743
#define SHIFT2 2
744
#define COEFFS2 (int[]){-1,-1}
745
#define N3 2
746
#define SHIFT3 0
747
#define COEFFS3 (int[]){-1,-1}
748
#define N4 2
749
#define SHIFT4 4
750
#define COEFFS4 (int[]){3,3}
751
#elif 1 // 9/7 MN
752
#define N1 4
753
#define SHIFT1 4
754
#define COEFFS1 (int[]){1,-9,-9,1}
755
#define N2 2
756
#define SHIFT2 2
757
#define COEFFS2 (int[]){1,1}
758
#define N3 0
759
#define SHIFT3 1
760
#define COEFFS3 NULL
761
#define N4 0
762
#define SHIFT4 1
763
#define COEFFS4 NULL
764
#else // 13/7 CRF
765
#define N1 4
766
#define SHIFT1 4
767
#define COEFFS1 (int[]){1,-9,-9,1}
768
#define N2 4
769
#define SHIFT2 4
770
#define COEFFS2 (int[]){-1,5,5,-1}
771
#define N3 0
772
#define SHIFT3 1
773
#define COEFFS3 NULL
774
#define N4 0
775
#define SHIFT4 1
776
#define COEFFS4 NULL
777
#endif
778
static void horizontal_decomposeX(int *b, int width){
779
    int temp[width];
780
    const int width2= width>>1;
781
    const int w2= (width+1)>>1;
782
    int A1,A2,A3,A4, x;
783

    
784
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
785
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
786
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
787
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
788
    
789
    for(x=0; x<width2; x++){
790
        temp[x   ]= b[2*x    ];
791
        temp[x+w2]= b[2*x + 1];
792
    }
793
    if(width&1)
794
        temp[x   ]= b[2*x    ];
795
    memcpy(b, temp, width*sizeof(int));
796
}
797

    
798
static void horizontal_composeX(int *b, int width){
799
    int temp[width];
800
    const int width2= width>>1;
801
    int A1,A2,A3,A4, x;
802
    const int w2= (width+1)>>1;
803

    
804
    memcpy(temp, b, width*sizeof(int));
805
    for(x=0; x<width2; x++){
806
        b[2*x    ]= temp[x   ];
807
        b[2*x + 1]= temp[x+w2];
808
    }
809
    if(width&1)
810
        b[2*x    ]= temp[x   ];
811

    
812
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
813
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
814
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
815
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
816
}
817

    
818
static void spatial_decomposeX(int *buffer, int width, int height, int stride){
819
    int x, y;
820
  
821
    for(y=0; y<height; y++){
822
        for(x=0; x<width; x++){
823
            buffer[y*stride + x] *= SCALEX;
824
        }
825
    }
826

    
827
    for(y=0; y<height; y++){
828
        horizontal_decomposeX(buffer + y*stride, width);
829
    }
830
    
831
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
832
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
833
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
834
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
835
}
836

    
837
static void spatial_composeX(int *buffer, int width, int height, int stride){
838
    int x, y;
839
  
840
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
841
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
842
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
843
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
844

    
845
    for(y=0; y<height; y++){
846
        horizontal_composeX(buffer + y*stride, width);
847
    }
848

    
849
    for(y=0; y<height; y++){
850
        for(x=0; x<width; x++){
851
            buffer[y*stride + x] /= SCALEX;
852
        }
853
    }
854
}
855

    
856
static void horizontal_decompose53i(int *b, int width){
857
    int temp[width];
858
    const int width2= width>>1;
859
    int A1,A2,A3,A4, x;
860
    const int w2= (width+1)>>1;
861

    
862
    for(x=0; x<width2; x++){
863
        temp[x   ]= b[2*x    ];
864
        temp[x+w2]= b[2*x + 1];
865
    }
866
    if(width&1)
867
        temp[x   ]= b[2*x    ];
868
#if 0
869
    A2= temp[1       ];
870
    A4= temp[0       ];
871
    A1= temp[0+width2];
872
    A1 -= (A2 + A4)>>1;
873
    A4 += (A1 + 1)>>1;
874
    b[0+width2] = A1;
875
    b[0       ] = A4;
876
    for(x=1; x+1<width2; x+=2){
877
        A3= temp[x+width2];
878
        A4= temp[x+1     ];
879
        A3 -= (A2 + A4)>>1;
880
        A2 += (A1 + A3 + 2)>>2;
881
        b[x+width2] = A3;
882
        b[x       ] = A2;
883

884
        A1= temp[x+1+width2];
885
        A2= temp[x+2       ];
886
        A1 -= (A2 + A4)>>1;
887
        A4 += (A1 + A3 + 2)>>2;
888
        b[x+1+width2] = A1;
889
        b[x+1       ] = A4;
890
    }
891
    A3= temp[width-1];
892
    A3 -= A2;
893
    A2 += (A1 + A3 + 2)>>2;
894
    b[width -1] = A3;
895
    b[width2-1] = A2;
896
#else        
897
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
898
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
899
#endif
900
}
901

    
902
static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
903
    int i;
904
    
905
    for(i=0; i<width; i++){
906
        b1[i] -= (b0[i] + b2[i])>>1;
907
    }
908
}
909

    
910
static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
911
    int i;
912
    
913
    for(i=0; i<width; i++){
914
        b1[i] += (b0[i] + b2[i] + 2)>>2;
915
    }
916
}
917

    
918
static void spatial_decompose53i(int *buffer, int width, int height, int stride){
919
    int x, y;
920
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
921
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
922
  
923
    for(y=-2; y<height; y+=2){
924
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
925
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
926

    
927
{START_TIMER
928
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
929
        if(y+2 < height) horizontal_decompose53i(b3, width);
930
STOP_TIMER("horizontal_decompose53i")}
931
        
932
{START_TIMER
933
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
934
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
935
STOP_TIMER("vertical_decompose53i*")}
936
        
937
        b0=b2;
938
        b1=b3;
939
    }
940
}
941

    
942
#define lift5 lift
943
#if 1
944
#define W_AM 3
945
#define W_AO 0
946
#define W_AS 1
947

    
948
#define W_BM 1
949
#define W_BO 8
950
#define W_BS 4
951

    
952
#undef lift5
953
#define W_CM 9999
954
#define W_CO 2
955
#define W_CS 2
956

    
957
#define W_DM 15
958
#define W_DO 16
959
#define W_DS 5
960
#elif 0
961
#define W_AM 55
962
#define W_AO 16
963
#define W_AS 5
964

    
965
#define W_BM 3
966
#define W_BO 32
967
#define W_BS 6
968

    
969
#define W_CM 127
970
#define W_CO 64
971
#define W_CS 7
972

    
973
#define W_DM 7
974
#define W_DO 8
975
#define W_DS 4
976
#elif 0
977
#define W_AM 97
978
#define W_AO 32
979
#define W_AS 6
980

    
981
#define W_BM 63
982
#define W_BO 512
983
#define W_BS 10
984

    
985
#define W_CM 13
986
#define W_CO 8
987
#define W_CS 4
988

    
989
#define W_DM 15
990
#define W_DO 16
991
#define W_DS 5
992

    
993
#else
994

    
995
#define W_AM 203
996
#define W_AO 64
997
#define W_AS 7
998

    
999
#define W_BM 217
1000
#define W_BO 2048
1001
#define W_BS 12
1002

    
1003
#define W_CM 113
1004
#define W_CO 64
1005
#define W_CS 7
1006

    
1007
#define W_DM 227
1008
#define W_DO 128
1009
#define W_DS 9
1010
#endif
1011
static void horizontal_decompose97i(int *b, int width){
1012
    int temp[width];
1013
    const int w2= (width+1)>>1;
1014

    
1015
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1016
    lift (temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1017
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1018
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1019
}
1020

    
1021

    
1022
static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
1023
    int i;
1024
    
1025
    for(i=0; i<width; i++){
1026
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1027
    }
1028
}
1029

    
1030
static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
1031
    int i;
1032
    
1033
    for(i=0; i<width; i++){
1034
#ifdef lift5
1035
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1036
#else
1037
        int r= 3*(b0[i] + b2[i]);
1038
        r+= r>>4;
1039
        r+= r>>8;
1040
        b1[i] += (r+W_CO)>>W_CS;
1041
#endif
1042
    }
1043
}
1044

    
1045
static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
1046
    int i;
1047
    
1048
    for(i=0; i<width; i++){
1049
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1050
    }
1051
}
1052

    
1053
static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
1054
    int i;
1055
    
1056
    for(i=0; i<width; i++){
1057
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1058
    }
1059
}
1060

    
1061
static void spatial_decompose97i(int *buffer, int width, int height, int stride){
1062
    int x, y;
1063
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1064
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1065
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1066
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1067
  
1068
    for(y=-4; y<height; y+=2){
1069
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1070
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1071

    
1072
{START_TIMER
1073
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1074
        if(y+4 < height) horizontal_decompose97i(b5, width);
1075
if(width>400){
1076
STOP_TIMER("horizontal_decompose97i")
1077
}}
1078
        
1079
{START_TIMER
1080
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1081
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1082
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1083
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1084

    
1085
if(width>400){
1086
STOP_TIMER("vertical_decompose97i")
1087
}}
1088
        
1089
        b0=b2;
1090
        b1=b3;
1091
        b2=b4;
1092
        b3=b5;
1093
    }
1094
}
1095

    
1096
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1097
    int level;
1098
    
1099
    for(level=0; level<decomposition_count; level++){
1100
        switch(type){
1101
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1102
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1103
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1104
        }
1105
    }
1106
}
1107

    
1108
static void horizontal_compose53i(int *b, int width){
1109
    int temp[width];
1110
    const int width2= width>>1;
1111
    const int w2= (width+1)>>1;
1112
    int A1,A2,A3,A4, x;
1113

    
1114
#if 0
1115
    A2= temp[1       ];
1116
    A4= temp[0       ];
1117
    A1= temp[0+width2];
1118
    A1 -= (A2 + A4)>>1;
1119
    A4 += (A1 + 1)>>1;
1120
    b[0+width2] = A1;
1121
    b[0       ] = A4;
1122
    for(x=1; x+1<width2; x+=2){
1123
        A3= temp[x+width2];
1124
        A4= temp[x+1     ];
1125
        A3 -= (A2 + A4)>>1;
1126
        A2 += (A1 + A3 + 2)>>2;
1127
        b[x+width2] = A3;
1128
        b[x       ] = A2;
1129

1130
        A1= temp[x+1+width2];
1131
        A2= temp[x+2       ];
1132
        A1 -= (A2 + A4)>>1;
1133
        A4 += (A1 + A3 + 2)>>2;
1134
        b[x+1+width2] = A1;
1135
        b[x+1       ] = A4;
1136
    }
1137
    A3= temp[width-1];
1138
    A3 -= A2;
1139
    A2 += (A1 + A3 + 2)>>2;
1140
    b[width -1] = A3;
1141
    b[width2-1] = A2;
1142
#else   
1143
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1144
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1145
#endif
1146
    for(x=0; x<width2; x++){
1147
        b[2*x    ]= temp[x   ];
1148
        b[2*x + 1]= temp[x+w2];
1149
    }
1150
    if(width&1)
1151
        b[2*x    ]= temp[x   ];
1152
}
1153

    
1154
static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1155
    int i;
1156
    
1157
    for(i=0; i<width; i++){
1158
        b1[i] += (b0[i] + b2[i])>>1;
1159
    }
1160
}
1161

    
1162
static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1163
    int i;
1164
    
1165
    for(i=0; i<width; i++){
1166
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1167
    }
1168
}
1169

    
1170
static void spatial_compose53i(int *buffer, int width, int height, int stride){
1171
    int x, y;
1172
    DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1173
    DWTELEM *b1= buffer + mirror(-1  , height-1)*stride;
1174
  
1175
    for(y=-1; y<=height; y+=2){
1176
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1177
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1178

    
1179
{START_TIMER
1180
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1181
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1182
STOP_TIMER("vertical_compose53i*")}
1183

    
1184
{START_TIMER
1185
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1186
        if(b0 <= b2) horizontal_compose53i(b1, width);
1187
STOP_TIMER("horizontal_compose53i")}
1188

    
1189
        b0=b2;
1190
        b1=b3;
1191
    }
1192
}   
1193

    
1194
 
1195
static void horizontal_compose97i(int *b, int width){
1196
    int temp[width];
1197
    const int w2= (width+1)>>1;
1198

    
1199
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1200
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1201
    lift (b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1202
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1203
}
1204

    
1205
static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1206
    int i;
1207
    
1208
    for(i=0; i<width; i++){
1209
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1210
    }
1211
}
1212

    
1213
static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1214
    int i;
1215
    
1216
    for(i=0; i<width; i++){
1217
#ifdef lift5
1218
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1219
#else
1220
        int r= 3*(b0[i] + b2[i]);
1221
        r+= r>>4;
1222
        r+= r>>8;
1223
        b1[i] -= (r+W_CO)>>W_CS;
1224
#endif
1225
    }
1226
}
1227

    
1228
static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1229
    int i;
1230
    
1231
    for(i=0; i<width; i++){
1232
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1233
    }
1234
}
1235

    
1236
static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1237
    int i;
1238
    
1239
    for(i=0; i<width; i++){
1240
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1241
    }
1242
}
1243

    
1244
static void spatial_compose97i(int *buffer, int width, int height, int stride){
1245
    int x, y;
1246
    DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1247
    DWTELEM *b1= buffer + mirror(-3  , height-1)*stride;
1248
    DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1249
    DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1250

    
1251
    for(y=-3; y<=height; y+=2){
1252
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1253
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1254

    
1255
        if(stride == width && y+4 < height && 0){ 
1256
            int x;
1257
            for(x=0; x<width/2; x++)
1258
                b5[x] += 64*2;
1259
            for(; x<width; x++)
1260
                b5[x] += 169*2;
1261
        }
1262
        
1263
{START_TIMER
1264
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1265
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1266
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1267
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1268
if(width>400){
1269
STOP_TIMER("vertical_compose97i")}}
1270

    
1271
{START_TIMER
1272
        if(y-1>=  0) horizontal_compose97i(b0, width);
1273
        if(b0 <= b2) horizontal_compose97i(b1, width);
1274
if(width>400 && b0 <= b2){
1275
STOP_TIMER("horizontal_compose97i")}}
1276
        
1277
        b0=b2;
1278
        b1=b3;
1279
        b2=b4;
1280
        b3=b5;
1281
    }
1282
}
1283

    
1284
void ff_spatial_idwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1285
    int level;
1286

    
1287
    for(level=decomposition_count-1; level>=0; level--){
1288
        switch(type){
1289
        case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1290
        case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1291
        case 2: spatial_composeX  (buffer, width>>level, height>>level, stride<<level); break;
1292
        }
1293
    }
1294
}
1295

    
1296
static const int hilbert[16][2]={
1297
    {0,0}, {1,0}, {1,1}, {0,1},
1298
    {0,2}, {0,3}, {1,3}, {1,2},
1299
    {2,2}, {2,3}, {3,3}, {3,2},
1300
    {3,1}, {2,1}, {2,0}, {3,0},
1301
};
1302
#if 0
1303
-o o-
1304
 | |
1305
 o-o
1306
 
1307
-o-o o-o-
1308
   | | 
1309
 o-o o-o
1310
 |     |
1311
 o o-o o
1312
 | | | |
1313
 o-o o-o
1314
 
1315
 0112122312232334122323342334
1316
 0123456789ABCDEF0123456789AB
1317
 RLLRMRRLLRRMRLLMLRRLMLLRRLLM
1318
 
1319
 4  B  F 14 1B
1320
 4 11 15 20 27
1321
 
1322
-o o-o-o o-o-o o-
1323
 | |   | |   | |
1324
 o-o o-o o-o o-o
1325
     |     |
1326
 o-o o-o o-o o-o
1327
 | |   | |   | |
1328
 o o-o-o o-o-o o
1329
 |             |
1330
 o-o o-o-o-o o-o
1331
   | |     | | 
1332
 o-o o-o o-o o-o
1333
 |     | |     |
1334
 o o-o o o o-o o
1335
 | | | | | | | |
1336
 o-o o-o o-o o-o
1337

1338
#endif
1339

    
1340
#define SVI(a, i, x, y) \
1341
{\
1342
    a[i][0]= x;\
1343
    a[i][1]= y;\
1344
    i++;\
1345
}
1346

    
1347
static int sig_cmp(const void *a, const void *b){
1348
    const int16_t* da = (const int16_t *) a;
1349
    const int16_t* db = (const int16_t *) b;
1350
    
1351
    if(da[1] != db[1]) return da[1] - db[1];
1352
    else               return da[0] - db[0];
1353
}
1354

    
1355
static int deint(unsigned int a){
1356
    a &= 0x55555555;         //0 1 2 3 4 5 6 7 8 9 A B C D E F
1357
    a +=     a & 0x11111111; // 01  23  45  67  89  AB  CD  EF
1358
    a +=  3*(a & 0x0F0F0F0F);//   0123    4567    89AB    CDEF
1359
    a += 15*(a & 0x00FF00FF);//       01234567        89ABCDEF
1360
    a +=255*(a & 0x0000FFFF);//               0123456789ABCDEF
1361
    return a>>15;
1362
}
1363

    
1364
static void encode_subband_z0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1365
    const int level= b->level;
1366
    const int w= b->width;
1367
    const int h= b->height;
1368
    int x, y, pos;
1369

    
1370
    if(1){
1371
        int run=0;
1372
        int runs[w*h];
1373
        int run_index=0;
1374
        int count=0;
1375
        
1376
        for(pos=0; ; pos++){
1377
            int x= deint(pos   );
1378
            int y= deint(pos>>1);
1379
            int v, p=0, pr=0, pd=0;
1380
            int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
1381

    
1382
            if(x>=w || y>=h){
1383
                if(x>=w && y>=h)
1384
                    break;
1385
                continue;
1386
            }
1387
            count++;
1388
                
1389
            v= src[x + y*stride];
1390

    
1391
            if(y){
1392
                t= src[x + (y-1)*stride];
1393
                if(x){
1394
                    lt= src[x - 1 + (y-1)*stride];
1395
                }
1396
                if(x + 1 < w){
1397
                    /*rt= src[x + 1 + (y-1)*stride]*/;
1398
                }
1399
            }
1400
            if(x){
1401
                l= src[x - 1 + y*stride];
1402
                /*if(x > 1){
1403
                    if(orientation==1) ll= src[y + (x-2)*stride];
1404
                    else               ll= src[x - 2 + y*stride];
1405
                }*/
1406
            }
1407
            if(parent){
1408
                int px= x>>1;
1409
                int py= y>>1;
1410
                if(px<b->parent->width && py<b->parent->height){
1411
                    p= parent[px + py*2*stride];
1412
                    /*if(px+1<b->parent->width) 
1413
                        pr= parent[px + 1 + py*2*stride];
1414
                    if(py+1<b->parent->height) 
1415
                        pd= parent[px + (py+1)*2*stride];*/
1416
                }
1417
            }
1418
            if(!(/*ll|*/l|lt|t|/*rt|*/p)){
1419
                if(v){
1420
                    runs[run_index++]= run;
1421
                    run=0;
1422
                }else{
1423
                    run++;
1424
                }
1425
            }
1426
        }
1427
        assert(count==w*h);
1428
        runs[run_index++]= run;
1429
        run_index=0;
1430
        run= runs[run_index++];
1431

    
1432
        put_symbol(&s->c, b->state[1], run, 0);
1433
        
1434
        for(pos=0; ; pos++){
1435
            int x= deint(pos   );
1436
            int y= deint(pos>>1);
1437
            int v, p=0, pr=0, pd=0;
1438
            int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
1439

    
1440
            if(x>=w || y>=h){
1441
                if(x>=w && y>=h)
1442
                    break;
1443
                continue;
1444
            }
1445
            v= src[x + y*stride];
1446

    
1447
            if(y){
1448
                t= src[x + (y-1)*stride];
1449
                if(x){
1450
                    lt= src[x - 1 + (y-1)*stride];
1451
                }
1452
                if(x + 1 < w){
1453
//                    rt= src[x + 1 + (y-1)*stride];
1454
                }
1455
            }
1456
            if(x){
1457
                l= src[x - 1 + y*stride];
1458
                /*if(x > 1){
1459
                    if(orientation==1) ll= src[y + (x-2)*stride];
1460
                    else               ll= src[x - 2 + y*stride];
1461
                }*/
1462
            }
1463

    
1464
            if(parent){
1465
                int px= x>>1;
1466
                int py= y>>1;
1467
                if(px<b->parent->width && py<b->parent->height){
1468
                    p= parent[px + py*2*stride];
1469
/*                        if(px+1<b->parent->width) 
1470
                        pr= parent[px + 1 + py*2*stride];
1471
                    if(py+1<b->parent->height) 
1472
                        pd= parent[px + (py+1)*2*stride];*/
1473
                }
1474
            }
1475
            if(/*ll|*/l|lt|t|/*rt|*/p){
1476
                int context= av_log2(/*ABS(ll) + */2*(3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p)));
1477

    
1478
                put_cabac(&s->c, &b->state[0][context], !!v);
1479
            }else{
1480
                if(!run){
1481
                    run= runs[run_index++];
1482
                    put_symbol(&s->c, b->state[1], run, 0);
1483
                    assert(v);
1484
                }else{
1485
                    run--;
1486
                    assert(!v);
1487
                }
1488
            }
1489
            if(v){
1490
                int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p));
1491

    
1492
                put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
1493
                put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1494
            }
1495
        }
1496
    }
1497
}
1498

    
1499
static void encode_subband_bp(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1500
    const int level= b->level;
1501
    const int w= b->width;
1502
    const int h= b->height;
1503
    int x, y;
1504

    
1505
#if 0
1506
    int plane;
1507
    for(plane=24; plane>=0; plane--){
1508
        int run=0;
1509
        int runs[w*h];
1510
        int run_index=0;
1511
                
1512
        for(y=0; y<h; y++){
1513
            for(x=0; x<w; x++){
1514
                int v, lv, p=0;
1515
                int d=0, r=0, rd=0, ld=0;
1516
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1517
                v= src[x + y*stride];
1518

1519
                if(y){
1520
                    t= src[x + (y-1)*stride];
1521
                    if(x){
1522
                        lt= src[x - 1 + (y-1)*stride];
1523
                    }
1524
                    if(x + 1 < w){
1525
                        rt= src[x + 1 + (y-1)*stride];
1526
                    }
1527
                }
1528
                if(x){
1529
                    l= src[x - 1 + y*stride];
1530
                    /*if(x > 1){
1531
                        if(orientation==1) ll= src[y + (x-2)*stride];
1532
                        else               ll= src[x - 2 + y*stride];
1533
                    }*/
1534
                }
1535
                if(y+1<h){
1536
                    d= src[x + (y+1)*stride];
1537
                    if(x)         ld= src[x - 1 + (y+1)*stride];
1538
                    if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1539
                }
1540
                if(x + 1 < w)
1541
                    r= src[x + 1 + y*stride];
1542
                if(parent){
1543
                    int px= x>>1;
1544
                    int py= y>>1;
1545
                    if(px<b->parent->width && py<b->parent->height) 
1546
                        p= parent[px + py*2*stride];
1547
                }
1548
#define HIDE(c, plane) c= c>=0 ? c&((-1)<<(plane)) : -((-c)&((-1)<<(plane)));
1549
                lv=v;
1550
                HIDE( v, plane)
1551
                HIDE(lv, plane+1)
1552
                HIDE( p, plane)
1553
                HIDE( l, plane)
1554
                HIDE(lt, plane)
1555
                HIDE( t, plane)
1556
                HIDE(rt, plane)
1557
                HIDE( r, plane+1)
1558
                HIDE(rd, plane+1)
1559
                HIDE( d, plane+1)
1560
                HIDE(ld, plane+1)
1561
                if(!(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv)){
1562
                    if(v){
1563
                        runs[run_index++]= run;
1564
                        run=0;
1565
                    }else{
1566
                        run++;
1567
                    }
1568
                }
1569
            }
1570
        }
1571
        runs[run_index++]= run;
1572
        run_index=0;
1573
        run= runs[run_index++];
1574

1575
        put_symbol(&s->c, b->state[1], run, 0);
1576
        
1577
        for(y=0; y<h; y++){
1578
            for(x=0; x<w; x++){
1579
                int v, p=0, lv;
1580
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1581
                int d=0, r=0, rd=0, ld=0;
1582
                v= src[x + y*stride];
1583

1584
                if(y){
1585
                    t= src[x + (y-1)*stride];
1586
                    if(x){
1587
                        lt= src[x - 1 + (y-1)*stride];
1588
                    }
1589
                    if(x + 1 < w){
1590
                        rt= src[x + 1 + (y-1)*stride];
1591
                    }
1592
                }
1593
                if(x){
1594
                    l= src[x - 1 + y*stride];
1595
                    /*if(x > 1){
1596
                        if(orientation==1) ll= src[y + (x-2)*stride];
1597
                        else               ll= src[x - 2 + y*stride];
1598
                    }*/
1599
                }
1600
                if(y+1<h){
1601
                    d= src[x + (y+1)*stride];
1602
                    if(x)         ld= src[x - 1 + (y+1)*stride];
1603
                    if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1604
                }
1605
                if(x + 1 < w)
1606
                    r= src[x + 1 + y*stride];
1607

1608
                if(parent){
1609
                    int px= x>>1;
1610
                    int py= y>>1;
1611
                    if(px<b->parent->width && py<b->parent->height) 
1612
                        p= parent[px + py*2*stride];
1613
                }
1614
                lv=v;
1615
                HIDE( v, plane)
1616
                HIDE(lv, plane+1)
1617
                HIDE( p, plane)
1618
                HIDE( l, plane)
1619
                HIDE(lt, plane)
1620
                HIDE( t, plane)
1621
                HIDE(rt, plane)
1622
                HIDE( r, plane+1)
1623
                HIDE(rd, plane+1)
1624
                HIDE( d, plane+1)
1625
                HIDE(ld, plane+1)
1626
                if(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv){
1627
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)
1628
                                                      +3*ABS(r) + ABS(rd) + 2*ABS(d) + ABS(ld));
1629

1630
                    if(lv) put_cabac(&s->c, &b->state[99][context + 8*(av_log2(ABS(lv))-plane)], !!(v-lv));
1631
                    else   put_cabac(&s->c, &b->state[ 0][context], !!v);
1632
                }else{
1633
                    assert(!lv);
1634
                    if(!run){
1635
                        run= runs[run_index++];
1636
                        put_symbol(&s->c, b->state[1], run, 0);
1637
                        assert(v);
1638
                    }else{
1639
                        run--;
1640
                        assert(!v);
1641
                    }
1642
                }
1643
                if(v && !lv){
1644
                    int context=    clip(quant3b[l&0xFF] + quant3b[r&0xFF], -1,1)
1645
                                + 3*clip(quant3b[t&0xFF] + quant3b[d&0xFF], -1,1);
1646
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + context], v<0);
1647
                }
1648
            }
1649
        }
1650
    }
1651
    return;    
1652
#endif
1653
}
1654

    
1655
static void encode_subband_X(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1656
    const int level= b->level;
1657
    const int w= b->width;
1658
    const int h= b->height;
1659
    int x, y;
1660

    
1661
#if 0
1662
    if(orientation==3 && parent && 0){
1663
        int16_t candidate[w*h][2];
1664
        uint8_t state[w*h];
1665
        int16_t boarder[3][w*h*4][2];
1666
        int16_t significant[w*h][2];
1667
        int candidate_count=0;
1668
        int boarder_count[3]={0,0,0};
1669
        int significant_count=0;
1670
        int rle_pos=0;
1671
        int v, last_v;
1672
        int primary= orientation==1;
1673
        
1674
        memset(candidate, 0, sizeof(candidate));
1675
        memset(state, 0, sizeof(state));
1676
        memset(boarder, 0, sizeof(boarder));
1677
        
1678
        for(y=0; y<h; y++){
1679
            for(x=0; x<w; x++){
1680
                if(parent[(x>>1) + (y>>1)*2*stride])
1681
                    SVI(candidate, candidate_count, x, y)
1682
            }
1683
        }
1684

1685
        for(;;){
1686
            while(candidate_count && !boarder_count[0] && !boarder_count[1] && !boarder_count[2]){
1687
                candidate_count--;
1688
                x= candidate[ candidate_count][0];
1689
                y= candidate[ candidate_count][1];
1690
                if(state[x + y*w])
1691
                    continue;
1692
                state[x + y*w]= 1;
1693
                v= !!src[x + y*stride];
1694
                put_cabac(&s->c, &b->state[0][0], v);
1695
                if(v){
1696
                    SVI(significant, significant_count, x,y)
1697
                    if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1698
                    if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1699
                    if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1700
                    if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1701
                    if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1702
                    if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1703
                    if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1704
                    if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1705
                }
1706
            }
1707
            while(!boarder_count[0] && !boarder_count[1] && !boarder_count[2] && rle_pos < w*h){
1708
                int run=0;
1709
                for(; rle_pos < w*h;){
1710
                    x= rle_pos % w; //FIXME speed
1711
                    y= rle_pos / w;
1712
                    rle_pos++;
1713
                    if(state[x + y*w])
1714
                        continue;
1715
                    state[x + y*w]= 1;
1716
                    v= !!src[x + y*stride];
1717
                    if(v){
1718
                        put_symbol(&s->c, b->state[1], run, 0);
1719
                        SVI(significant, significant_count, x,y)
1720
                        if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1721
                        if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1722
                        if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1723
                        if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1724
                        if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1725
                        if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1726
                        if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1727
                        if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1728
                        break;
1729
//FIXME                note only right & down can be boarders
1730
                    }
1731
                    run++;
1732
                }
1733
            }
1734
            if(!boarder_count[0] && !boarder_count[1] && !boarder_count[2])
1735
                break;
1736
            
1737
            while(boarder_count[0] || boarder_count[1] || boarder_count[2]){
1738
                int index;
1739
                
1740
                if     (boarder_count[  primary]) index=  primary;
1741
                else if(boarder_count[1-primary]) index=1-primary;
1742
                else                              index=2;
1743
                
1744
                boarder_count[index]--;
1745
                x= boarder[index][ boarder_count[index] ][0];
1746
                y= boarder[index][ boarder_count[index] ][1];
1747
                if(state[x + y*w]) //FIXME maybe check earlier
1748
                    continue;
1749
                state[x + y*w]= 1;
1750
                v= !!src[x + y*stride];
1751
                put_cabac(&s->c, &b->state[0][index+1], v);
1752
                if(v){
1753
                    SVI(significant, significant_count, x,y)
1754
                    if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1755
                    if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1756
                    if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1757
                    if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1758
                    if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1759
                    if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1760
                    if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1761
                    if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1762
                }
1763
            }
1764
        }
1765
        //FIXME sort significant coeffs maybe
1766
        if(1){
1767
            qsort(significant, significant_count, sizeof(int16_t[2]), sig_cmp);
1768
        }
1769
        
1770
        last_v=1;
1771
        while(significant_count){
1772
            int context= 3 + quant7[last_v&0xFF]; //use significance of suroundings
1773
            significant_count--;
1774
            x= significant[significant_count][0];//FIXME try opposit direction
1775
            y= significant[significant_count][1];
1776
            v= src[x + y*stride];
1777
            put_symbol(&s->c, b->state[context + 2], v, 1); //FIXME try to avoid first bit, try this with the old code too!!
1778
            last_v= v;
1779
        }
1780
    }
1781
#endif
1782
}
1783

    
1784
static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1785
    const int level= b->level;
1786
    const int w= b->width;
1787
    const int h= b->height;
1788
    int x, y;
1789

    
1790
    if(1){
1791
        int run=0;
1792
        int runs[w*h];
1793
        int run_index=0;
1794
                
1795
        for(y=0; y<h; y++){
1796
            for(x=0; x<w; x++){
1797
                int v, p=0;
1798
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1799
                v= src[x + y*stride];
1800

    
1801
                if(y){
1802
                    t= src[x + (y-1)*stride];
1803
                    if(x){
1804
                        lt= src[x - 1 + (y-1)*stride];
1805
                    }
1806
                    if(x + 1 < w){
1807
                        rt= src[x + 1 + (y-1)*stride];
1808
                    }
1809
                }
1810
                if(x){
1811
                    l= src[x - 1 + y*stride];
1812
                    /*if(x > 1){
1813
                        if(orientation==1) ll= src[y + (x-2)*stride];
1814
                        else               ll= src[x - 2 + y*stride];
1815
                    }*/
1816
                }
1817
                if(parent){
1818
                    int px= x>>1;
1819
                    int py= y>>1;
1820
                    if(px<b->parent->width && py<b->parent->height) 
1821
                        p= parent[px + py*2*stride];
1822
                }
1823
                if(!(/*ll|*/l|lt|t|rt|p)){
1824
                    if(v){
1825
                        runs[run_index++]= run;
1826
                        run=0;
1827
                    }else{
1828
                        run++;
1829
                    }
1830
                }
1831
            }
1832
        }
1833
        runs[run_index++]= run;
1834
        run_index=0;
1835
        run= runs[run_index++];
1836

    
1837
        put_symbol2(&s->c, b->state[1], run, 3);
1838
        
1839
        for(y=0; y<h; y++){
1840
            for(x=0; x<w; x++){
1841
                int v, p=0;
1842
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1843
                v= src[x + y*stride];
1844

    
1845
                if(y){
1846
                    t= src[x + (y-1)*stride];
1847
                    if(x){
1848
                        lt= src[x - 1 + (y-1)*stride];
1849
                    }
1850
                    if(x + 1 < w){
1851
                        rt= src[x + 1 + (y-1)*stride];
1852
                    }
1853
                }
1854
                if(x){
1855
                    l= src[x - 1 + y*stride];
1856
                    /*if(x > 1){
1857
                        if(orientation==1) ll= src[y + (x-2)*stride];
1858
                        else               ll= src[x - 2 + y*stride];
1859
                    }*/
1860
                }
1861
                if(parent){
1862
                    int px= x>>1;
1863
                    int py= y>>1;
1864
                    if(px<b->parent->width && py<b->parent->height) 
1865
                        p= parent[px + py*2*stride];
1866
                }
1867
                if(/*ll|*/l|lt|t|rt|p){
1868
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1869

    
1870
                    put_cabac(&s->c, &b->state[0][context], !!v);
1871
                }else{
1872
                    if(!run){
1873
                        run= runs[run_index++];
1874

    
1875
                        put_symbol2(&s->c, b->state[1], run, 3);
1876
                        assert(v);
1877
                    }else{
1878
                        run--;
1879
                        assert(!v);
1880
                    }
1881
                }
1882
                if(v){
1883
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1884

    
1885
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1886
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1887
                }
1888
            }
1889
        }
1890
    }
1891
}
1892

    
1893
static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1894
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1895
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1896
    encode_subband_c0run(s, b, src, parent, stride, orientation);
1897
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1898
}
1899

    
1900
static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1901
    const int level= b->level;
1902
    const int w= b->width;
1903
    const int h= b->height;
1904
    int x,y;
1905

    
1906
    START_TIMER
1907

    
1908
    if(1){
1909
        int run;
1910
        int index=0;
1911
        int prev_index=-1;
1912
        int prev2_index=0;
1913
        int parent_index= 0;
1914
        int prev_parent_index= 0;
1915
        
1916
        for(y=0; y<b->height; y++)
1917
            memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1918

    
1919
        run= get_symbol2(&s->c, b->state[1], 3);
1920
        for(y=0; y<h; y++){
1921
            int v=0;
1922
            int lt=0, t=0, rt=0;
1923

    
1924
            if(y){
1925
                rt= src[(y-1)*stride];
1926
            }
1927
            for(x=0; x<w; x++){
1928
                int p=0;
1929
                const int l= v;
1930
                
1931
                lt= t; t= rt;
1932

    
1933
                if(y && x + 1 < w){
1934
                    rt= src[x + 1 + (y-1)*stride];
1935
                }else
1936
                    rt= 0;
1937
                if(parent){
1938
                    int px= x>>1;
1939
                    int py= y>>1;
1940
                    if(px<b->parent->width && py<b->parent->height) 
1941
                        p= parent[px + py*2*stride];
1942

    
1943
                    if(x>>1 > b->parent->x[parent_index]){
1944
                        parent_index++;
1945
                    }
1946
                }
1947
                if(/*ll|*/l|lt|t|rt|p){
1948
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1949

    
1950
                    v=get_cabac(&s->c, &b->state[0][context]);
1951
                }else{
1952
                    if(!run){
1953
                        run= get_symbol2(&s->c, b->state[1], 3);
1954
                        v=1;
1955
                    }else{
1956
                        run--;
1957
                        v=0;
1958

    
1959
                        if(y && parent){
1960
                            int max_run;
1961
                            while(b->x[prev_index] < x)
1962
                                prev_index++;
1963

    
1964
                            max_run= FFMIN(run, b->x[prev_index] - x - 2);
1965
                            max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1966
                            x+= max_run;
1967
                            run-= max_run;
1968
                        }
1969
                    }
1970
                }
1971
                if(v){
1972
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1973
                    v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1974
                    if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
1975
                        v= -v;
1976
                    src[x + y*stride]= v;
1977
                    b->x[index++]=x; //FIXME interleave x/coeff
1978
//                    b->coeff[index++]= v;
1979
                }
1980
            }
1981
            b->x[index++]= w+1; //end marker
1982
            prev_index= prev2_index;
1983
            prev2_index= index;
1984
            
1985
            if(parent){
1986
                while(b->parent->x[parent_index] != b->parent->width+1)
1987
                    parent_index++;
1988
                parent_index++;
1989
                if(y&1){
1990
                    prev_parent_index= parent_index;
1991
                }else{
1992
                    parent_index= prev_parent_index;
1993
                }
1994
            }
1995
        }
1996
        b->x[index++]= w+1; //end marker
1997
        if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
1998
            STOP_TIMER("decode_subband")
1999
        }
2000
        
2001
        return;
2002
    }
2003
}
2004

    
2005
static void reset_contexts(SnowContext *s){
2006
    int plane_index, level, orientation;
2007

    
2008
    for(plane_index=0; plane_index<2; plane_index++){
2009
        for(level=0; level<s->spatial_decomposition_count; level++){
2010
            for(orientation=level ? 1:0; orientation<4; orientation++){
2011
                memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
2012
            }
2013
        }
2014
    }
2015
    memset(s->header_state, 0, sizeof(s->header_state));
2016
    memset(s->block_state, 0, sizeof(s->block_state));
2017
}
2018

    
2019
static int alloc_blocks(SnowContext *s){
2020
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
2021
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
2022
    
2023
    s->b_width = w;
2024
    s->b_height= h;
2025
    
2026
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
2027
    return 0;
2028
}
2029

    
2030
static inline void copy_cabac_state(CABACContext *d, CABACContext *s){
2031
    PutBitContext bak= d->pb;
2032
    *d= *s;
2033
    d->pb= bak;
2034
}
2035

    
2036
//near copy & paste from dsputil, FIXME
2037
static int pix_sum(uint8_t * pix, int line_size, int w)
2038
{
2039
    int s, i, j;
2040

    
2041
    s = 0;
2042
    for (i = 0; i < w; i++) {
2043
        for (j = 0; j < w; j++) {
2044
            s += pix[0];
2045
            pix ++;
2046
        }
2047
        pix += line_size - w;
2048
    }
2049
    return s;
2050
}
2051

    
2052
//near copy & paste from dsputil, FIXME
2053
static int pix_norm1(uint8_t * pix, int line_size, int w)
2054
{
2055
    int s, i, j;
2056
    uint32_t *sq = squareTbl + 256;
2057

    
2058
    s = 0;
2059
    for (i = 0; i < w; i++) {
2060
        for (j = 0; j < w; j ++) {
2061
            s += sq[pix[0]];
2062
            pix ++;
2063
        }
2064
        pix += line_size - w;
2065
    }
2066
    return s;
2067
}
2068

    
2069
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
2070
    const int w= s->b_width << s->block_max_depth;
2071
    const int rem_depth= s->block_max_depth - level;
2072
    const int index= (x + y*w) << rem_depth;
2073
    const int block_w= 1<<rem_depth;
2074
    BlockNode block;
2075
    int i,j;
2076
    
2077
    block.color[0]= l;
2078
    block.color[1]= cb;
2079
    block.color[2]= cr;
2080
    block.mx= mx;
2081
    block.my= my;
2082
    block.type= type;
2083
    block.level= level;
2084

    
2085
    for(j=0; j<block_w; j++){
2086
        for(i=0; i<block_w; i++){
2087
            s->block[index + i + j*w]= block;
2088
        }
2089
    }
2090
}
2091

    
2092
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2093
    const int offset[3]= {
2094
          y*c->  stride + x,
2095
        ((y*c->uvstride + x)>>1),
2096
        ((y*c->uvstride + x)>>1),
2097
    };
2098
    int i;
2099
    for(i=0; i<3; i++){
2100
        c->src[0][i]= src [i];
2101
        c->ref[0][i]= ref [i] + offset[i];
2102
    }
2103
    assert(!ref_index);
2104
}
2105

    
2106
//FIXME copy&paste
2107
#define P_LEFT P[1]
2108
#define P_TOP P[2]
2109
#define P_TOPRIGHT P[3]
2110
#define P_MEDIAN P[4]
2111
#define P_MV1 P[9]
2112
#define FLAG_QPEL   1 //must be 1
2113

    
2114
static int encode_q_branch(SnowContext *s, int level, int x, int y){
2115
    uint8_t p_buffer[1024];
2116
    uint8_t i_buffer[1024];
2117
    uint8_t p_state[sizeof(s->block_state)];
2118
    uint8_t i_state[sizeof(s->block_state)];
2119
    CABACContext pc, ic;
2120
    PutBitContext pbbak= s->c.pb;
2121
    int score, score2, iscore, i_len, p_len, block_s, sum;
2122
    const int w= s->b_width  << s->block_max_depth;
2123
    const int h= s->b_height << s->block_max_depth;
2124
    const int rem_depth= s->block_max_depth - level;
2125
    const int index= (x + y*w) << rem_depth;
2126
    const int block_w= 1<<(LOG2_MB_SIZE - level);
2127
    static BlockNode null_block= { //FIXME add border maybe
2128
        .color= {128,128,128},
2129
        .mx= 0,
2130
        .my= 0,
2131
        .type= 0,
2132
        .level= 0,
2133
    };
2134
    int trx= (x+1)<<rem_depth;
2135
    int try= (y+1)<<rem_depth;
2136
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2137
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2138
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2139
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2140
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2141
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2142
    int pl = left->color[0];
2143
    int pcb= left->color[1];
2144
    int pcr= left->color[2];
2145
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2146
    int pmy= mid_pred(left->my, top->my, tr->my);
2147
    int mx=0, my=0;
2148
    int l,cr,cb, i;
2149
    const int stride= s->current_picture.linesize[0];
2150
    const int uvstride= s->current_picture.linesize[1];
2151
    const int instride= s->input_picture.linesize[0];
2152
    const int uvinstride= s->input_picture.linesize[1];
2153
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
2154
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2155
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2156
    uint8_t current_mb[3][stride*block_w];
2157
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2158
    int P[10][2];
2159
    int16_t last_mv[3][2];
2160
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2161
    const int shift= 1+qpel;
2162
    MotionEstContext *c= &s->m.me;
2163
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2164
    int my_context= av_log2(2*ABS(left->my - top->my));
2165
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2166

    
2167
    assert(sizeof(s->block_state) >= 256);
2168
    if(s->keyframe){
2169
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2170
        return 0;
2171
    }
2172

    
2173
    //FIXME optimize
2174
    for(i=0; i<block_w; i++)
2175
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
2176
    for(i=0; i<block_w>>1; i++)
2177
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2178
    for(i=0; i<block_w>>1; i++)
2179
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2180

    
2181
//    clip predictors / edge ?
2182

    
2183
    P_LEFT[0]= left->mx;
2184
    P_LEFT[1]= left->my;
2185
    P_TOP [0]= top->mx;
2186
    P_TOP [1]= top->my;
2187
    P_TOPRIGHT[0]= tr->mx;
2188
    P_TOPRIGHT[1]= tr->my;
2189
    
2190
    last_mv[0][0]= s->block[index].mx;
2191
    last_mv[0][1]= s->block[index].my;
2192
    last_mv[1][0]= right->mx;
2193
    last_mv[1][1]= right->my;
2194
    last_mv[2][0]= bottom->mx;
2195
    last_mv[2][1]= bottom->my;
2196
    
2197
    s->m.mb_stride=2;
2198
    s->m.mb_x= 
2199
    s->m.mb_y= 0;
2200
    s->m.me.skip= 0;
2201

    
2202
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2203
    
2204
    assert(s->m.me.  stride ==   stride);
2205
    assert(s->m.me.uvstride == uvstride);
2206
    
2207
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2208
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2209
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2210
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2211
    
2212
    c->xmin = - x*block_w - 16;
2213
    c->ymin = - y*block_w - 16;
2214
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16;
2215
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16;
2216

    
2217
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2218
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
2219
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2220
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2221
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2222
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2223
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2224

    
2225
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2226
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2227

    
2228
    if (!y) {
2229
        c->pred_x= P_LEFT[0];
2230
        c->pred_y= P_LEFT[1];
2231
    } else {
2232
        c->pred_x = P_MEDIAN[0];
2233
        c->pred_y = P_MEDIAN[1];
2234
    }
2235

    
2236
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
2237
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2238

    
2239
    assert(mx >= c->xmin);
2240
    assert(mx <= c->xmax);
2241
    assert(my >= c->ymin);
2242
    assert(my <= c->ymax);
2243
    
2244
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2245
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2246
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2247
                             
2248
  //  subpel search
2249
    pc= s->c;
2250
    init_put_bits(&pc.pb, p_buffer, sizeof(p_buffer));
2251
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2252

    
2253
    if(level!=s->block_max_depth)
2254
        put_cabac(&pc, &p_state[4 + s_context], 1);
2255
    put_cabac(&pc, &p_state[1 + left->type + top->type], 0);
2256
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2257
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2258
    p_len= put_bits_count(&pc.pb);
2259
    score += (s->lambda2*(p_len + pc.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
2260

    
2261
    block_s= block_w*block_w;
2262
    sum = pix_sum(&current_mb[0][0], stride, block_w);
2263
    l= (sum + block_s/2)/block_s;
2264
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2265
    
2266
    block_s= block_w*block_w>>2;
2267
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2268
    cb= (sum + block_s/2)/block_s;
2269
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2270
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2271
    cr= (sum + block_s/2)/block_s;
2272
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2273

    
2274
    ic= s->c;
2275
    init_put_bits(&ic.pb, i_buffer, sizeof(i_buffer));
2276
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2277
    if(level!=s->block_max_depth)
2278
        put_cabac(&ic, &i_state[4 + s_context], 1);
2279
    put_cabac(&ic, &i_state[1 + left->type + top->type], 1);
2280
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2281
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2282
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2283
    i_len= put_bits_count(&ic.pb);
2284
    iscore += (s->lambda2*(i_len + ic.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
2285

    
2286
//    assert(score==256*256*256*64-1);
2287
    assert(iscore < 255*255*256 + s->lambda2*10);
2288
    assert(iscore >= 0);
2289
    assert(l>=0 && l<=255);
2290
    assert(pl>=0 && pl<=255);
2291

    
2292
    if(level==0){
2293
        int varc= iscore >> 8;
2294
        int vard= score >> 8;
2295
        if (vard <= 64 || vard < varc)
2296
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2297
        else
2298
            c->scene_change_score+= s->m.qscale;
2299
    }
2300
        
2301
    if(level!=s->block_max_depth){
2302
        put_cabac(&s->c, &s->block_state[4 + s_context], 0);
2303
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2304
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2305
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2306
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2307
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2308
    
2309
        if(score2 < score && score2 < iscore)
2310
            return score2;
2311
    }
2312
    
2313
    if(iscore < score){
2314
        flush_put_bits(&ic.pb);
2315
        ff_copy_bits(&pbbak, i_buffer, i_len);
2316
        s->c= ic;
2317
        s->c.pb= pbbak;
2318
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2319
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2320
        return iscore;
2321
    }else{
2322
        flush_put_bits(&pc.pb);
2323
        ff_copy_bits(&pbbak, p_buffer, p_len);
2324
        s->c= pc;
2325
        s->c.pb= pbbak;
2326
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2327
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2328
        return score;
2329
    }
2330
}
2331

    
2332
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2333
    const int w= s->b_width << s->block_max_depth;
2334
    const int h= s->b_height<< s->block_max_depth;
2335
    const int rem_depth= s->block_max_depth - level;
2336
    const int index= (x + y*w) << rem_depth;
2337
    static BlockNode null_block= { //FIXME add border maybe
2338
        .color= {128,128,128},
2339
        .mx= 0,
2340
        .my= 0,
2341
        .type= 0,
2342
        .level= 0,
2343
    };
2344
    int trx= (x+1)<<rem_depth;
2345
    int try= (y+1)<<rem_depth;
2346
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2347
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2348
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2349
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2350
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2351
    
2352
    if(s->keyframe){
2353
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2354
        return;
2355
    }
2356

    
2357
    if(level==s->block_max_depth || get_cabac(&s->c, &s->block_state[4 + s_context])){
2358
        int type;
2359
        int l = left->color[0];
2360
        int cb= left->color[1];
2361
        int cr= left->color[2];
2362
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2363
        int my= mid_pred(left->my, top->my, tr->my);
2364
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2365
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2366
        
2367
        type= get_cabac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2368

    
2369
        if(type){
2370
            l += get_symbol(&s->c, &s->block_state[32], 1);
2371
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2372
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2373
        }else{
2374
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2375
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2376
        }
2377
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2378
    }else{
2379
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2380
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2381
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2382
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2383
    }
2384
}
2385

    
2386
static void encode_blocks(SnowContext *s){
2387
    int x, y;
2388
    int w= s->b_width;
2389
    int h= s->b_height;
2390

    
2391
    for(y=0; y<h; y++){
2392
        for(x=0; x<w; x++){
2393
            encode_q_branch(s, 0, x, y);
2394
        }
2395
    }
2396
}
2397

    
2398
static void decode_blocks(SnowContext *s){
2399
    int x, y;
2400
    int w= s->b_width;
2401
    int h= s->b_height;
2402

    
2403
    for(y=0; y<h; y++){
2404
        for(x=0; x<w; x++){
2405
            decode_q_branch(s, 0, x, y);
2406
        }
2407
    }
2408
}
2409

    
2410
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2411
    int x, y;
2412

    
2413
    for(y=0; y < b_h+5; y++){
2414
        for(x=0; x < b_w; x++){
2415
            int a0= src[x     + y*stride];
2416
            int a1= src[x + 1 + y*stride];
2417
            int a2= src[x + 2 + y*stride];
2418
            int a3= src[x + 3 + y*stride];
2419
            int a4= src[x + 4 + y*stride];
2420
            int a5= src[x + 5 + y*stride];
2421
//            int am= 9*(a1+a2) - (a0+a3);
2422
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2423
//            int am= 18*(a2+a3) - 2*(a1+a4);
2424
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2425
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2426

    
2427
//            if(b_w==16) am= 8*(a1+a2);
2428

    
2429
            if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2430
            else     tmp[x + y*stride]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2431

    
2432
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2433
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2434
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2435
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2436
        }
2437
    }
2438
    for(y=0; y < b_h; y++){
2439
        for(x=0; x < b_w; x++){
2440
            int a0= tmp[x +  y     *stride];
2441
            int a1= tmp[x + (y + 1)*stride];
2442
            int a2= tmp[x + (y + 2)*stride];
2443
            int a3= tmp[x + (y + 3)*stride];
2444
            int a4= tmp[x + (y + 4)*stride];
2445
            int a5= tmp[x + (y + 5)*stride];
2446
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2447
//            int am= 18*(a2+a3) - 2*(a1+a4);
2448
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2449
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2450
            
2451
//            if(b_w==16) am= 8*(a1+a2);
2452

    
2453
            if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2454
            else     dst[x + y*stride]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2455

    
2456
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2457
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2458
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2459
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2460
        }
2461
    }
2462
}
2463

    
2464
#define mcb(dx,dy,b_w)\
2465
static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
2466
    uint8_t tmp[stride*(b_w+5)];\
2467
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2468
}
2469

    
2470
mcb( 0, 0,16)
2471
mcb( 4, 0,16)
2472
mcb( 8, 0,16)
2473
mcb(12, 0,16)
2474
mcb( 0, 4,16)
2475
mcb( 4, 4,16)
2476
mcb( 8, 4,16)
2477
mcb(12, 4,16)
2478
mcb( 0, 8,16)
2479
mcb( 4, 8,16)
2480
mcb( 8, 8,16)
2481
mcb(12, 8,16)
2482
mcb( 0,12,16)
2483
mcb( 4,12,16)
2484
mcb( 8,12,16)
2485
mcb(12,12,16)
2486

    
2487
#define mca(dx,dy,b_w)\
2488
static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
2489
    uint8_t tmp[stride*(b_w+5)];\
2490
    assert(h==b_w);\
2491
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2492
}
2493

    
2494
mca( 0, 0,16)
2495
mca( 8, 0,16)
2496
mca( 0, 8,16)
2497
mca( 8, 8,16)
2498

    
2499
static void add_xblock(SnowContext *s, DWTELEM *dst, uint8_t *src, uint8_t *obmc, int s_x, int s_y, int b_w, int b_h, int mv_x, int mv_y, int w, int h, int dst_stride, int src_stride, int obmc_stride, int mb_type, int add, int color){
2500
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME move to context to gurantee alignment
2501
    int x,y;
2502

    
2503
    if(s_x<0){
2504
        obmc -= s_x;
2505
        b_w += s_x;
2506
        s_x=0;
2507
    }else if(s_x + b_w > w){
2508
        b_w = w - s_x;
2509
    }
2510
    if(s_y<0){
2511
        obmc -= s_y*obmc_stride;
2512
        b_h += s_y;
2513
        s_y=0;
2514
    }else if(s_y + b_h> h){
2515
        b_h = h - s_y;
2516
    }
2517

    
2518
    if(b_w<=0 || b_h<=0) return;
2519
    
2520
    dst += s_x + s_y*dst_stride;
2521

    
2522
    if(mb_type==BLOCK_INTRA){
2523
        for(y=0; y < b_h; y++){
2524
            for(x=0; x < b_w; x++){
2525
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * color * (256/OBMC_MAX);
2526
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * color * (256/OBMC_MAX);
2527
            }
2528
        }
2529
    }else{
2530
        int dx= mv_x&15;
2531
        int dy= mv_y&15;
2532
//        int dxy= (mv_x&1) + 2*(mv_y&1);
2533

    
2534
        s_x += (mv_x>>4) - 2;
2535
        s_y += (mv_y>>4) - 2;
2536
        src += s_x + s_y*src_stride;
2537
        //use dsputil
2538
    
2539
        if(   (unsigned)s_x >= w - b_w - 4
2540
           || (unsigned)s_y >= h - b_h - 4){
2541
            ff_emulated_edge_mc(tmp + 32, src, src_stride, b_w+5, b_h+5, s_x, s_y, w, h);
2542
            src= tmp + 32;
2543
        }
2544

    
2545
        assert(mb_type==0);
2546
        mc_block(tmp, src, tmp + 64+8, src_stride, b_w, b_h, dx, dy);
2547
        for(y=0; y < b_h; y++){
2548
            for(x=0; x < b_w; x++){
2549
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2550
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2551
            }
2552
        }
2553
    }
2554
}
2555

    
2556
static void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2557
    Plane *p= &s->plane[plane_index];
2558
    const int mb_w= s->b_width  << s->block_max_depth;
2559
    const int mb_h= s->b_height << s->block_max_depth;
2560
    const int mb_stride= mb_w;
2561
    int x, y, mb_x, mb_y;
2562
    int scale      = plane_index ?  s->mv_scale : 2*s->mv_scale;
2563
    int block_size = MB_SIZE >> s->block_max_depth;
2564
    int block_w    = plane_index ? block_size/2 : block_size;
2565
    uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2566
    int obmc_stride= plane_index ? block_size : 2*block_size;
2567
    int ref_stride= s->last_picture.linesize[plane_index];
2568
    uint8_t *ref  = s->last_picture.data[plane_index];
2569
    int w= p->width;
2570
    int h= p->height;
2571
    
2572
if(s->avctx->debug&512){
2573
    for(y=0; y<h; y++){
2574
        for(x=0; x<w; x++){
2575
            if(add) buf[x + y*w]+= 128*256;
2576
            else    buf[x + y*w]-= 128*256;
2577
        }
2578
    }
2579
    
2580
    return;
2581
}
2582
    for(mb_y=-1; mb_y<=mb_h; mb_y++){
2583
        for(mb_x=-1; mb_x<=mb_w; mb_x++){
2584
            int index= clip(mb_x, 0, mb_w-1) + clip(mb_y, 0, mb_h-1)*mb_stride;
2585

    
2586
            add_xblock(s, buf, ref, obmc, 
2587
                       block_w*mb_x - block_w/2, 
2588
                       block_w*mb_y - block_w/2,
2589
                       2*block_w, 2*block_w,
2590
                       s->block[index].mx*scale, s->block[index].my*scale,
2591
                       w, h,
2592
                       w, ref_stride, obmc_stride, 
2593
                       s->block[index].type, add, s->block[index].color[plane_index]);
2594

    
2595
        }
2596
    }
2597
}
2598

    
2599
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2600
    const int level= b->level;
2601
    const int w= b->width;
2602
    const int h= b->height;
2603
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2604
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2605
    int x,y, thres1, thres2;
2606
    START_TIMER
2607

    
2608
    assert(QROOT==8);
2609

    
2610
    if(s->qlog == LOSSLESS_QLOG) return;
2611
 
2612
    bias= bias ? 0 : (3*qmul)>>3;
2613
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2614
    thres2= 2*thres1;
2615
    
2616
    if(!bias){
2617
        for(y=0; y<h; y++){
2618
            for(x=0; x<w; x++){
2619
                int i= src[x + y*stride];
2620
                
2621
                if((unsigned)(i+thres1) > thres2){
2622
                    if(i>=0){
2623
                        i<<= QEXPSHIFT;
2624
                        i/= qmul; //FIXME optimize
2625
                        src[x + y*stride]=  i;
2626
                    }else{
2627
                        i= -i;
2628
                        i<<= QEXPSHIFT;
2629
                        i/= qmul; //FIXME optimize
2630
                        src[x + y*stride]= -i;
2631
                    }
2632
                }else
2633
                    src[x + y*stride]= 0;
2634
            }
2635
        }
2636
    }else{
2637
        for(y=0; y<h; y++){
2638
            for(x=0; x<w; x++){
2639
                int i= src[x + y*stride]; 
2640
                
2641
                if((unsigned)(i+thres1) > thres2){
2642
                    if(i>=0){
2643
                        i<<= QEXPSHIFT;
2644
                        i= (i + bias) / qmul; //FIXME optimize
2645
                        src[x + y*stride]=  i;
2646
                    }else{
2647
                        i= -i;
2648
                        i<<= QEXPSHIFT;
2649
                        i= (i + bias) / qmul; //FIXME optimize
2650
                        src[x + y*stride]= -i;
2651
                    }
2652
                }else
2653
                    src[x + y*stride]= 0;
2654
            }
2655
        }
2656
    }
2657
    if(level+1 == s->spatial_decomposition_count){
2658
//        STOP_TIMER("quantize")
2659
    }
2660
}
2661

    
2662
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2663
    const int level= b->level;
2664
    const int w= b->width;
2665
    const int h= b->height;
2666
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2667
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2668
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2669
    int x,y;
2670
    
2671
    if(s->qlog == LOSSLESS_QLOG) return;
2672
    
2673
    assert(QROOT==8);
2674

    
2675
    for(y=0; y<h; y++){
2676
        for(x=0; x<w; x++){
2677
            int i= src[x + y*stride];
2678
            if(i<0){
2679
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2680
            }else if(i>0){
2681
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2682
            }
2683
        }
2684
    }
2685
}
2686

    
2687
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2688
    const int w= b->width;
2689
    const int h= b->height;
2690
    int x,y;
2691
    
2692
    for(y=h-1; y>=0; y--){
2693
        for(x=w-1; x>=0; x--){
2694
            int i= x + y*stride;
2695
            
2696
            if(x){
2697
                if(use_median){
2698
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2699
                    else  src[i] -= src[i - 1];
2700
                }else{
2701
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2702
                    else  src[i] -= src[i - 1];
2703
                }
2704
            }else{
2705
                if(y) src[i] -= src[i - stride];
2706
            }
2707
        }
2708
    }
2709
}
2710

    
2711
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2712
    const int w= b->width;
2713
    const int h= b->height;
2714
    int x,y;
2715
    
2716
    for(y=0; y<h; y++){
2717
        for(x=0; x<w; x++){
2718
            int i= x + y*stride;
2719
            
2720
            if(x){
2721
                if(use_median){
2722
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2723
                    else  src[i] += src[i - 1];
2724
                }else{
2725
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2726
                    else  src[i] += src[i - 1];
2727
                }
2728
            }else{
2729
                if(y) src[i] += src[i - stride];
2730
            }
2731
        }
2732
    }
2733
}
2734

    
2735
static void encode_header(SnowContext *s){
2736
    int plane_index, level, orientation;
2737

    
2738
    put_cabac(&s->c, s->header_state, s->keyframe); // state clearing stuff?
2739
    if(s->keyframe){
2740
        put_symbol(&s->c, s->header_state, s->version, 0);
2741
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2742
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2743
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2744
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
2745
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2746
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
2747
        put_cabac(&s->c, s->header_state, s->spatial_scalability);
2748
//        put_cabac(&s->c, s->header_state, s->rate_scalability);
2749

    
2750
        for(plane_index=0; plane_index<2; plane_index++){
2751
            for(level=0; level<s->spatial_decomposition_count; level++){
2752
                for(orientation=level ? 1:0; orientation<4; orientation++){
2753
                    if(orientation==2) continue;
2754
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2755
                }
2756
            }
2757
        }
2758
    }
2759
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2760
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
2761
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
2762
    put_symbol(&s->c, s->header_state, s->qbias, 1);
2763
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
2764
}
2765

    
2766
static int decode_header(SnowContext *s){
2767
    int plane_index, level, orientation;
2768

    
2769
    s->keyframe= get_cabac(&s->c, s->header_state);
2770
    if(s->keyframe){
2771
        s->version= get_symbol(&s->c, s->header_state, 0);
2772
        if(s->version>0){
2773
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2774
            return -1;
2775
        }
2776
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2777
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2778
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2779
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2780
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2781
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2782
        s->spatial_scalability= get_cabac(&s->c, s->header_state);
2783
//        s->rate_scalability= get_cabac(&s->c, s->header_state);
2784

    
2785
        for(plane_index=0; plane_index<3; plane_index++){
2786
            for(level=0; level<s->spatial_decomposition_count; level++){
2787
                for(orientation=level ? 1:0; orientation<4; orientation++){
2788
                    int q;
2789
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2790
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2791
                    else                    q= get_symbol(&s->c, s->header_state, 1);
2792
                    s->plane[plane_index].band[level][orientation].qlog= q;
2793
                }
2794
            }
2795
        }
2796
    }
2797
    
2798
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2799
    if(s->spatial_decomposition_type > 2){
2800
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2801
        return -1;
2802
    }
2803
    
2804
    s->qlog= get_symbol(&s->c, s->header_state, 1);
2805
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2806
    s->qbias= get_symbol(&s->c, s->header_state, 1);
2807
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
2808

    
2809
    return 0;
2810
}
2811

    
2812
static int common_init(AVCodecContext *avctx){
2813
    SnowContext *s = avctx->priv_data;
2814
    int width, height;
2815
    int level, orientation, plane_index, dec;
2816

    
2817
    s->avctx= avctx;
2818
        
2819
    dsputil_init(&s->dsp, avctx);
2820

    
2821
#define mcf(dx,dy)\
2822
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
2823
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2824
        mc_block ## dx ## dy;
2825

    
2826
    mcf( 0, 0)
2827
    mcf( 4, 0)
2828
    mcf( 8, 0)
2829
    mcf(12, 0)
2830
    mcf( 0, 4)
2831
    mcf( 4, 4)
2832
    mcf( 8, 4)
2833
    mcf(12, 4)
2834
    mcf( 0, 8)
2835
    mcf( 4, 8)
2836
    mcf( 8, 8)
2837
    mcf(12, 8)
2838
    mcf( 0,12)
2839
    mcf( 4,12)
2840
    mcf( 8,12)
2841
    mcf(12,12)
2842

    
2843
#define mcfh(dx,dy)\
2844
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
2845
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2846
        mc_block_hpel ## dx ## dy;
2847

    
2848
    mcfh(0, 0)
2849
    mcfh(8, 0)
2850
    mcfh(0, 8)
2851
    mcfh(8, 8)
2852
        
2853
    dec= s->spatial_decomposition_count= 5;
2854
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2855
    
2856
    s->chroma_h_shift= 1; //FIXME XXX
2857
    s->chroma_v_shift= 1;
2858
    
2859
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2860
    
2861
    width= s->avctx->width;
2862
    height= s->avctx->height;
2863

    
2864
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
2865
    s->pred_buffer= av_mallocz(width*height*sizeof(DWTELEM));
2866
    
2867
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2868
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
2869
    
2870
    for(plane_index=0; plane_index<3; plane_index++){    
2871
        int w= s->avctx->width;
2872
        int h= s->avctx->height;
2873

    
2874
        if(plane_index){
2875
            w>>= s->chroma_h_shift;
2876
            h>>= s->chroma_v_shift;
2877
        }
2878
        s->plane[plane_index].width = w;
2879
        s->plane[plane_index].height= h;
2880
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
2881
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2882
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2883
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2884
                
2885
                b->buf= s->spatial_dwt_buffer;
2886
                b->level= level;
2887
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2888
                b->width = (w + !(orientation&1))>>1;
2889
                b->height= (h + !(orientation>1))>>1;
2890
                
2891
                if(orientation&1) b->buf += (w+1)>>1;
2892
                if(orientation>1) b->buf += b->stride>>1;
2893
                
2894
                if(level)
2895
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
2896
                b->x    = av_mallocz(((b->width+1) * b->height+1)*sizeof(int16_t));
2897
                b->coeff= av_mallocz(((b->width+1) * b->height+1)*sizeof(DWTELEM));
2898
            }
2899
            w= (w+1)>>1;
2900
            h= (h+1)>>1;
2901
        }
2902
    }
2903
    
2904
    reset_contexts(s);
2905
/*    
2906
    width= s->width= avctx->width;
2907
    height= s->height= avctx->height;
2908
    
2909
    assert(width && height);
2910
*/
2911
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2912
    
2913
    return 0;
2914
}
2915

    
2916

    
2917
static void calculate_vissual_weight(SnowContext *s, Plane *p){
2918
    int width = p->width;
2919
    int height= p->height;
2920
    int i, level, orientation, x, y;
2921

    
2922
    for(level=0; level<s->spatial_decomposition_count; level++){
2923
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2924
            SubBand *b= &p->band[level][orientation];
2925
            DWTELEM *buf= b->buf;
2926
            int64_t error=0;
2927
            
2928
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2929
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
2930
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
2931
            for(y=0; y<height; y++){
2932
                for(x=0; x<width; x++){
2933
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
2934
                    error += d*d;
2935
                }
2936
            }
2937

    
2938
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
2939
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
2940
        }
2941
    }
2942
}
2943

    
2944
static int encode_init(AVCodecContext *avctx)
2945
{
2946
    SnowContext *s = avctx->priv_data;
2947
    int i;
2948
    int level, orientation, plane_index;
2949

    
2950
    if(avctx->strict_std_compliance >= 0){
2951
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2952
               "use vstrict=-1 to use it anyway\n");
2953
        return -1;
2954
    }
2955
 
2956
    common_init(avctx);
2957
    alloc_blocks(s);
2958
 
2959
    s->version=0;
2960
    
2961
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2962
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2963
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2964
    h263_encode_init(&s->m); //mv_penalty
2965

    
2966
    for(plane_index=0; plane_index<3; plane_index++){
2967
        calculate_vissual_weight(s, &s->plane[plane_index]);
2968
    }
2969
    
2970
    
2971
    avctx->coded_frame= &s->current_picture;
2972
    switch(avctx->pix_fmt){
2973
//    case PIX_FMT_YUV444P:
2974
//    case PIX_FMT_YUV422P:
2975
    case PIX_FMT_YUV420P:
2976
    case PIX_FMT_GRAY8:
2977
//    case PIX_FMT_YUV411P:
2978
//    case PIX_FMT_YUV410P:
2979
        s->colorspace_type= 0;
2980
        break;
2981
/*    case PIX_FMT_RGBA32:
2982
        s->colorspace= 1;
2983
        break;*/
2984
    default:
2985
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2986
        return -1;
2987
    }
2988
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2989
    s->chroma_h_shift= 1;
2990
    s->chroma_v_shift= 1;
2991
    return 0;
2992
}
2993

    
2994
static int frame_start(SnowContext *s){
2995
   AVFrame tmp;
2996
   int w= s->avctx->width; //FIXME round up to x16 ?
2997
   int h= s->avctx->height;
2998

    
2999
   if(s->keyframe)
3000
        reset_contexts(s);
3001
 
3002
    if(s->current_picture.data[0]){
3003
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
3004
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3005
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3006
    }
3007

    
3008
    tmp= s->last_picture;
3009
    s->last_picture= s->current_picture;
3010
    s->current_picture= tmp;
3011
    
3012
    s->current_picture.reference= 1;
3013
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3014
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3015
        return -1;
3016
    }
3017
    
3018
    return 0;
3019
}
3020

    
3021
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3022
    SnowContext *s = avctx->priv_data;
3023
    CABACContext * const c= &s->c;
3024
    AVFrame *pict = data;
3025
    const int width= s->avctx->width;
3026
    const int height= s->avctx->height;
3027
    int used_count= 0;
3028
    int log2_threshold, level, orientation, plane_index, i;
3029

    
3030
    ff_init_cabac_encoder(c, buf, buf_size);
3031
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3032
    
3033
    s->input_picture = *pict;
3034

    
3035
    memset(s->header_state, 0, sizeof(s->header_state));
3036

    
3037
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3038
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3039
    
3040
    if(pict->quality){
3041
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3042
        //<64 >60
3043
        s->qlog += 61;
3044
    }else{
3045
        s->qlog= LOSSLESS_QLOG;
3046
    }
3047

    
3048
    frame_start(s);
3049

    
3050
    if(pict->pict_type == P_TYPE){
3051
        int block_width = (width +15)>>4;
3052
        int block_height= (height+15)>>4;
3053
        int stride= s->current_picture.linesize[0];
3054
        uint8_t *src_plane= s->input_picture.data[0];
3055
        int src_stride= s->input_picture.linesize[0];
3056
        int x,y;
3057
        
3058
        assert(s->current_picture.data[0]);
3059
        assert(s->last_picture.data[0]);
3060
     
3061
        s->m.avctx= s->avctx;
3062
        s->m.current_picture.data[0]= s->current_picture.data[0];
3063
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
3064
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3065
        s->m.current_picture_ptr= &s->m.current_picture;
3066
        s->m.   last_picture_ptr= &s->m.   last_picture;
3067
        s->m.linesize=
3068
        s->m.   last_picture.linesize[0]=
3069
        s->m.    new_picture.linesize[0]=
3070
        s->m.current_picture.linesize[0]= stride;
3071
        s->m.uvlinesize= s->current_picture.linesize[1];
3072
        s->m.width = width;
3073
        s->m.height= height;
3074
        s->m.mb_width = block_width;
3075
        s->m.mb_height= block_height;
3076
        s->m.mb_stride=   s->m.mb_width+1;
3077
        s->m.b8_stride= 2*s->m.mb_width+1;
3078
        s->m.f_code=1;
3079
        s->m.pict_type= pict->pict_type;
3080
        s->m.me_method= s->avctx->me_method;
3081
        s->m.me.scene_change_score=0;
3082
        s->m.flags= s->avctx->flags;
3083
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3084
        s->m.out_format= FMT_H263;
3085
        s->m.unrestricted_mv= 1;
3086

    
3087
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3088
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3089
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3090

    
3091
        s->m.dsp= s->dsp; //move
3092
        ff_init_me(&s->m);
3093
    }
3094
    
3095
redo_frame:
3096
        
3097
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3098

    
3099
    encode_header(s);
3100
    encode_blocks(s);
3101
      
3102
    for(plane_index=0; plane_index<3; plane_index++){
3103
        Plane *p= &s->plane[plane_index];
3104
        int w= p->width;
3105
        int h= p->height;
3106
        int x, y;
3107
        int bits= put_bits_count(&s->c.pb);
3108

    
3109
        //FIXME optimize
3110
     if(pict->data[plane_index]) //FIXME gray hack
3111
        for(y=0; y<h; y++){
3112
            for(x=0; x<w; x++){
3113
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
3114
            }
3115
        }
3116
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3117
        
3118
        if(   plane_index==0 
3119
           && pict->pict_type == P_TYPE 
3120
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3121
            ff_init_cabac_encoder(c, buf, buf_size);
3122
            ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3123
            pict->pict_type= FF_I_TYPE;
3124
            s->keyframe=1;
3125
            reset_contexts(s);
3126
            goto redo_frame;
3127
        }
3128
        
3129
        if(s->qlog == LOSSLESS_QLOG){
3130
            for(y=0; y<h; y++){
3131
                for(x=0; x<w; x++){
3132
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + 127)>>8;
3133
                }
3134
            }
3135
        }
3136
 
3137
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3138

    
3139
        for(level=0; level<s->spatial_decomposition_count; level++){
3140
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3141
                SubBand *b= &p->band[level][orientation];
3142
                
3143
                quantize(s, b, b->buf, b->stride, s->qbias);
3144
                if(orientation==0)
3145
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3146
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3147
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
3148
                if(orientation==0)
3149
                    correlate(s, b, b->buf, b->stride, 1, 0);
3150
            }
3151
        }
3152
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3153

    
3154
        for(level=0; level<s->spatial_decomposition_count; level++){
3155
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3156
                SubBand *b= &p->band[level][orientation];
3157

    
3158
                dequantize(s, b, b->buf, b->stride);
3159
            }
3160
        }
3161

    
3162
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3163
        if(s->qlog == LOSSLESS_QLOG){
3164
            for(y=0; y<h; y++){
3165
                for(x=0; x<w; x++){
3166
                    s->spatial_dwt_buffer[y*w + x]<<=8;
3167
                }
3168
            }
3169
        }
3170
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3171
        //FIXME optimize
3172
        for(y=0; y<h; y++){
3173
            for(x=0; x<w; x++){
3174
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
3175
                if(v&(~255)) v= ~(v>>31);
3176
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
3177
            }
3178
        }
3179
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3180
            int64_t error= 0;
3181
            
3182
    if(pict->data[plane_index]) //FIXME gray hack
3183
            for(y=0; y<h; y++){
3184
                for(x=0; x<w; x++){
3185
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3186
                    error += d*d;
3187
                }
3188
            }
3189
            s->avctx->error[plane_index] += error;
3190
            s->avctx->error[3] += error;
3191
        }
3192
    }
3193

    
3194
    if(s->last_picture.data[0])
3195
        avctx->release_buffer(avctx, &s->last_picture);
3196

    
3197
    emms_c();
3198
    
3199
    return put_cabac_terminate(c, 1);
3200
}
3201

    
3202
static void common_end(SnowContext *s){
3203
    int plane_index, level, orientation;
3204

    
3205
    av_freep(&s->spatial_dwt_buffer);
3206

    
3207
    av_freep(&s->m.me.scratchpad);    
3208
    av_freep(&s->m.me.map);
3209
    av_freep(&s->m.me.score_map);
3210
 
3211
    av_freep(&s->block);
3212

    
3213
    for(plane_index=0; plane_index<3; plane_index++){    
3214
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3215
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3216
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3217
                
3218
                av_freep(&b->x);
3219
                av_freep(&b->coeff);
3220
            }
3221
        }
3222
    }
3223
}
3224

    
3225
static int encode_end(AVCodecContext *avctx)
3226
{
3227
    SnowContext *s = avctx->priv_data;
3228

    
3229
    common_end(s);
3230

    
3231
    return 0;
3232
}
3233

    
3234
static int decode_init(AVCodecContext *avctx)
3235
{
3236
//    SnowContext *s = avctx->priv_data;
3237

    
3238
    common_init(avctx);
3239
    
3240
    return 0;
3241
}
3242

    
3243
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3244
    SnowContext *s = avctx->priv_data;
3245
    CABACContext * const c= &s->c;
3246
    const int width= s->avctx->width;
3247
    const int height= s->avctx->height;
3248
    int bytes_read;
3249
    AVFrame *picture = data;
3250
    int log2_threshold, level, orientation, plane_index;
3251
    
3252

    
3253
    /* no supplementary picture */
3254
    if (buf_size == 0)
3255
        return 0;
3256

    
3257
    ff_init_cabac_decoder(c, buf, buf_size);
3258
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3259

    
3260
    memset(s->header_state, 0, sizeof(s->header_state));
3261

    
3262
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3263
    decode_header(s);
3264
    if(!s->block) alloc_blocks(s);
3265

    
3266
    frame_start(s);
3267
    //keyframe flag dupliaction mess FIXME
3268
    if(avctx->debug&FF_DEBUG_PICT_INFO)
3269
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3270
    
3271
    decode_blocks(s);
3272

    
3273
    for(plane_index=0; plane_index<3; plane_index++){
3274
        Plane *p= &s->plane[plane_index];
3275
        int w= p->width;
3276
        int h= p->height;
3277
        int x, y;
3278
        
3279
if(s->avctx->debug&2048){
3280
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3281
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3282

    
3283
        for(y=0; y<h; y++){
3284
            for(x=0; x<w; x++){
3285
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
3286
                if(v&(~255)) v= ~(v>>31);
3287
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3288
            }
3289
        }
3290
}
3291
        for(level=0; level<s->spatial_decomposition_count; level++){
3292
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3293
                SubBand *b= &p->band[level][orientation];
3294

    
3295
                decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3296
                if(orientation==0)
3297
                    correlate(s, b, b->buf, b->stride, 1, 0);
3298
            }
3299
        }
3300
if(!(s->avctx->debug&1024))
3301
        for(level=0; level<s->spatial_decomposition_count; level++){
3302
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3303
                SubBand *b= &p->band[level][orientation];
3304

    
3305
                dequantize(s, b, b->buf, b->stride);
3306
            }
3307
        }
3308

    
3309
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3310
        if(s->qlog == LOSSLESS_QLOG){
3311
            for(y=0; y<h; y++){
3312
                for(x=0; x<w; x++){
3313
                    s->spatial_dwt_buffer[y*w + x]<<=8;
3314
                }
3315
            }
3316
        }
3317
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3318

    
3319
        //FIXME optimize
3320
        for(y=0; y<h; y++){
3321
            for(x=0; x<w; x++){
3322
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
3323
                if(v&(~255)) v= ~(v>>31);
3324
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
3325
            }
3326
        }
3327
    }
3328
            
3329
    emms_c();
3330

    
3331
    if(s->last_picture.data[0])
3332
        avctx->release_buffer(avctx, &s->last_picture);
3333

    
3334
if(!(s->avctx->debug&2048))        
3335
    *picture= s->current_picture;
3336
else
3337
    *picture= s->mconly_picture;
3338
    
3339
    *data_size = sizeof(AVFrame);
3340
    
3341
    bytes_read= get_cabac_terminate(c);
3342
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
3343

    
3344
    return bytes_read;
3345
}
3346

    
3347
static int decode_end(AVCodecContext *avctx)
3348
{
3349
    SnowContext *s = avctx->priv_data;
3350

    
3351
    common_end(s);
3352

    
3353
    return 0;
3354
}
3355

    
3356
AVCodec snow_decoder = {
3357
    "snow",
3358
    CODEC_TYPE_VIDEO,
3359
    CODEC_ID_SNOW,
3360
    sizeof(SnowContext),
3361
    decode_init,
3362
    NULL,
3363
    decode_end,
3364
    decode_frame,
3365
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3366
    NULL
3367
};
3368

    
3369
AVCodec snow_encoder = {
3370
    "snow",
3371
    CODEC_TYPE_VIDEO,
3372
    CODEC_ID_SNOW,
3373
    sizeof(SnowContext),
3374
    encode_init,
3375
    encode_frame,
3376
    encode_end,
3377
};
3378

    
3379

    
3380
#if 0
3381
#undef malloc
3382
#undef free
3383
#undef printf
3384

3385
int main(){
3386
    int width=256;
3387
    int height=256;
3388
    int buffer[2][width*height];
3389
    SnowContext s;
3390
    int i;
3391
    s.spatial_decomposition_count=6;
3392
    s.spatial_decomposition_type=1;
3393
    
3394
    printf("testing 5/3 DWT\n");
3395
    for(i=0; i<width*height; i++)
3396
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3397
    
3398
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3399
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3400
    
3401
    for(i=0; i<width*height; i++)
3402
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3403

3404
    printf("testing 9/7 DWT\n");
3405
    s.spatial_decomposition_type=0;
3406
    for(i=0; i<width*height; i++)
3407
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3408
    
3409
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3410
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3411
    
3412
    for(i=0; i<width*height; i++)
3413
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3414
        
3415
    printf("testing AC coder\n");
3416
    memset(s.header_state, 0, sizeof(s.header_state));
3417
    ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
3418
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3419
        
3420
    for(i=-256; i<256; i++){
3421
START_TIMER
3422
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3423
STOP_TIMER("put_symbol")
3424
    }
3425
    put_cabac_terminate(&s.c, 1);
3426

3427
    memset(s.header_state, 0, sizeof(s.header_state));
3428
    ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
3429
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3430
    
3431
    for(i=-256; i<256; i++){
3432
        int j;
3433
START_TIMER
3434
        j= get_symbol(&s.c, s.header_state, 1);
3435
STOP_TIMER("get_symbol")
3436
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3437
    }
3438
{
3439
int level, orientation, x, y;
3440
int64_t errors[8][4];
3441
int64_t g=0;
3442

3443
    memset(errors, 0, sizeof(errors));
3444
    s.spatial_decomposition_count=3;
3445
    s.spatial_decomposition_type=0;
3446
    for(level=0; level<s.spatial_decomposition_count; level++){
3447
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3448
            int w= width  >> (s.spatial_decomposition_count-level);
3449
            int h= height >> (s.spatial_decomposition_count-level);
3450
            int stride= width  << (s.spatial_decomposition_count-level);
3451
            DWTELEM *buf= buffer[0];
3452
            int64_t error=0;
3453

3454
            if(orientation&1) buf+=w;
3455
            if(orientation>1) buf+=stride>>1;
3456
            
3457
            memset(buffer[0], 0, sizeof(int)*width*height);
3458
            buf[w/2 + h/2*stride]= 256*256;
3459
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3460
            for(y=0; y<height; y++){
3461
                for(x=0; x<width; x++){
3462
                    int64_t d= buffer[0][x + y*width];
3463
                    error += d*d;
3464
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3465
                }
3466
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3467
            }
3468
            error= (int)(sqrt(error)+0.5);
3469
            errors[level][orientation]= error;
3470
            if(g) g=ff_gcd(g, error);
3471
            else g= error;
3472
        }
3473
    }
3474
    printf("static int const visual_weight[][4]={\n");
3475
    for(level=0; level<s.spatial_decomposition_count; level++){
3476
        printf("  {");
3477
        for(orientation=0; orientation<4; orientation++){
3478
            printf("%8lld,", errors[level][orientation]/g);
3479
        }
3480
        printf("},\n");
3481
    }
3482
    printf("};\n");
3483
    {
3484
            int level=2;
3485
            int orientation=3;
3486
            int w= width  >> (s.spatial_decomposition_count-level);
3487
            int h= height >> (s.spatial_decomposition_count-level);
3488
            int stride= width  << (s.spatial_decomposition_count-level);
3489
            DWTELEM *buf= buffer[0];
3490
            int64_t error=0;
3491

3492
            buf+=w;
3493
            buf+=stride>>1;
3494
            
3495
            memset(buffer[0], 0, sizeof(int)*width*height);
3496
#if 1
3497
            for(y=0; y<height; y++){
3498
                for(x=0; x<width; x++){
3499
                    int tab[4]={0,2,3,1};
3500
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3501
                }
3502
            }
3503
            ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3504
#else
3505
            for(y=0; y<h; y++){
3506
                for(x=0; x<w; x++){
3507
                    buf[x + y*stride  ]=169;
3508
                    buf[x + y*stride-w]=64;
3509
                }
3510
            }
3511
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3512
#endif
3513
            for(y=0; y<height; y++){
3514
                for(x=0; x<width; x++){
3515
                    int64_t d= buffer[0][x + y*width];
3516
                    error += d*d;
3517
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3518
                }
3519
                if(ABS(height/2-y)<9) printf("\n");
3520
            }
3521
    }
3522

    
3523
}
3524
    return 0;
3525
}
3526
#endif
3527