Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 93fbdb5a

History | View | Annotate | Download (112 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22
#include "cabac.h"
23

    
24
#include "mpegvideo.h"
25

    
26
#undef NDEBUG
27
#include <assert.h>
28

    
29
#define MAX_DECOMPOSITIONS 8
30
#define MAX_PLANES 4
31
#define DWTELEM int
32
#define QROOT 8 
33
#define LOSSLESS_QLOG -128
34

    
35
static const int8_t quant3[256]={
36
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
52
};
53
static const int8_t quant3b[256]={
54
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
};
71
static const int8_t quant5[256]={
72
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
88
};
89
static const int8_t quant7[256]={
90
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
93
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
106
};
107
static const int8_t quant9[256]={
108
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
124
};
125
static const int8_t quant11[256]={
126
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
142
};
143
static const int8_t quant13[256]={
144
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
145
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
160
};
161

    
162
#define OBMC_MAX 64
163
#if 0 //64*cubic
164
static const uint8_t obmc32[1024]={
165
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
167
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
168
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
169
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
170
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
171
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
172
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
173
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
174
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
175
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
176
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
177
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
178
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
179
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
180
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
182
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
183
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
184
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
185
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
186
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
187
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
190
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
191
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
194
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
195
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
196
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197
//error:0.000022
198
};
199
static const uint8_t obmc16[256]={
200
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
201
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
202
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
203
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
204
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
205
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
206
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
207
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
209
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
210
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
211
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
212
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
213
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
214
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216
//error:0.000033
217
};
218
#elif 1 // 64*linear
219
static const uint8_t obmc32[1024]={
220
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
221
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
222
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
223
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
224
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
225
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
226
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
227
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
228
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
229
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
230
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
231
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
232
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
233
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
234
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
235
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
237
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
238
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
239
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
240
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
241
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
242
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
243
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
244
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
245
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
246
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
249
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
250
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
251
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
252
 //error:0.000020
253
};
254
static const uint8_t obmc16[256]={
255
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
256
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
257
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
258
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
259
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
260
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
261
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
262
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
264
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
265
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
266
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
267
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
268
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
269
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
270
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
271
//error:0.000015
272
};
273
#else //64*cos
274
static const uint8_t obmc32[1024]={
275
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
277
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
278
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
279
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
280
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
281
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
282
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
283
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
284
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
285
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
286
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
287
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
288
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
289
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
290
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
292
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
293
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
294
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
295
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
296
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
297
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
300
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
301
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
304
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
305
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
306
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307
//error:0.000022
308
};
309
static const uint8_t obmc16[256]={
310
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
311
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
312
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
313
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
314
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
315
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
316
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
317
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
319
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
320
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
321
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
322
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
323
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
324
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326
//error:0.000022
327
};
328
#endif
329

    
330
typedef struct SubBand{
331
    int level;
332
    int stride;
333
    int width;
334
    int height;
335
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
336
    DWTELEM *buf;
337
    struct SubBand *parent;
338
    uint8_t state[/*7*2*/ 7 + 512][32];
339
}SubBand;
340

    
341
typedef struct Plane{
342
    int width;
343
    int height;
344
    SubBand band[MAX_DECOMPOSITIONS][4];
345
}Plane;
346

    
347
typedef struct SnowContext{
348
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
349

    
350
    AVCodecContext *avctx;
351
    CABACContext c;
352
    DSPContext dsp;
353
    AVFrame input_picture;
354
    AVFrame current_picture;
355
    AVFrame last_picture;
356
    AVFrame mconly_picture;
357
//     uint8_t q_context[16];
358
    uint8_t header_state[32];
359
    int keyframe;
360
    int version;
361
    int spatial_decomposition_type;
362
    int temporal_decomposition_type;
363
    int spatial_decomposition_count;
364
    int temporal_decomposition_count;
365
    DWTELEM *spatial_dwt_buffer;
366
    DWTELEM *pred_buffer;
367
    int colorspace_type;
368
    int chroma_h_shift;
369
    int chroma_v_shift;
370
    int spatial_scalability;
371
    int qlog;
372
    int mv_scale;
373
    int qbias;
374
#define QBIAS_SHIFT 3
375
    int b_width; //FIXME remove?
376
    int b_height; //FIXME remove?
377
    Plane plane[MAX_PLANES];
378
    SubBand mb_band;
379
    SubBand mv_band[2];
380
    
381
    uint16_t *mb_type;
382
    uint8_t *mb_mean;
383
    uint32_t *dummy;
384
    int16_t (*motion_val8)[2];
385
    int16_t (*motion_val16)[2];
386
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
387
}SnowContext;
388

    
389
#define QEXPSHIFT 7 //FIXME try to change this to 0
390
static const uint8_t qexp[8]={
391
    128, 140, 152, 166, 181, 197, 215, 235
392
//   64,  70,  76,  83,  91,  99, 108, 117
393
//   32,  35,  38,  41,  45,  49,  54,  59
394
//   16,  17,  19,  21,  23,  25,  27,  29
395
//    8,   9,  10,  10,  11,  12,  13,  15
396
};
397

    
398
static inline int mirror(int v, int m){
399
    if     (v<0) return -v;
400
    else if(v>m) return 2*m-v;
401
    else         return v;
402
}
403

    
404
static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
405
    int i;
406

    
407
    if(v){
408
        const int a= ABS(v);
409
        const int e= av_log2(a);
410
#if 1
411
        const int el= FFMIN(e, 10);   
412
        put_cabac(c, state+0, 0);
413

    
414
        for(i=0; i<el; i++){
415
            put_cabac(c, state+1+i, 1);  //1..10
416
        }
417
        for(; i<e; i++){
418
            put_cabac(c, state+1+9, 1);  //1..10
419
        }
420
        put_cabac(c, state+1+FFMIN(i,9), 0);
421

    
422
        for(i=e-1; i>=el; i--){
423
            put_cabac(c, state+22+9, (a>>i)&1); //22..31
424
        }
425
        for(; i>=0; i--){
426
            put_cabac(c, state+22+i, (a>>i)&1); //22..31
427
        }
428

    
429
        if(is_signed)
430
            put_cabac(c, state+11 + el, v < 0); //11..21
431
#else
432
        
433
        put_cabac(c, state+0, 0);
434
        if(e<=9){
435
            for(i=0; i<e; i++){
436
                put_cabac(c, state+1+i, 1);  //1..10
437
            }
438
            put_cabac(c, state+1+i, 0);
439

    
440
            for(i=e-1; i>=0; i--){
441
                put_cabac(c, state+22+i, (a>>i)&1); //22..31
442
            }
443

    
444
            if(is_signed)
445
                put_cabac(c, state+11 + e, v < 0); //11..21
446
        }else{
447
            for(i=0; i<e; i++){
448
                put_cabac(c, state+1+FFMIN(i,9), 1);  //1..10
449
            }
450
            put_cabac(c, state+1+FFMIN(i,9), 0);
451

    
452
            for(i=e-1; i>=0; i--){
453
                put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
454
            }
455

    
456
            if(is_signed)
457
                put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
458
        }
459
#endif
460
    }else{
461
        put_cabac(c, state+0, 1);
462
    }
463
}
464

    
465
static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
466
    if(get_cabac(c, state+0))
467
        return 0;
468
    else{
469
        int i, e, a, el;
470
 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
471
        for(e=0; e<10; e++){ 
472
            if(get_cabac(c, state + 1 + e)==0) // 1..10
473
                break;
474
        }
475
        el= e;
476
 
477
        if(e==10){
478
            while(get_cabac(c, state + 1 + 9)) //10
479
                e++;
480
        }
481
        a= 1;
482
        for(i=e-1; i>=el; i--){
483
            a += a + get_cabac(c, state+22+9); //31
484
        }
485
        for(; i>=0; i--){
486
            a += a + get_cabac(c, state+22+i); //22..31
487
        }
488

    
489
        if(is_signed && get_cabac(c, state+11 + el)) //11..21
490
            return -a;
491
        else
492
            return a;
493
    }
494
}
495

    
496
static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
497
    int i;
498
    int r= log2>=0 ? 1<<log2 : 1;
499

    
500
    assert(v>=0);
501
    assert(log2>=-4);
502

    
503
    while(v >= r){
504
        put_cabac(c, state+4+log2, 1);
505
        v -= r;
506
        log2++;
507
        if(log2>0) r+=r;
508
    }
509
    put_cabac(c, state+4+log2, 0);
510
    
511
    for(i=log2-1; i>=0; i--){
512
        put_cabac(c, state+31-i, (v>>i)&1);
513
    }
514
}
515

    
516
static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
517
    int i;
518
    int r= log2>=0 ? 1<<log2 : 1;
519
    int v=0;
520

    
521
    assert(log2>=-4);
522

    
523
    while(get_cabac(c, state+4+log2)){
524
        v+= r;
525
        log2++;
526
        if(log2>0) r+=r;
527
    }
528
    
529
    for(i=log2-1; i>=0; i--){
530
        v+= get_cabac(c, state+31-i)<<i;
531
    }
532

    
533
    return v;
534
}
535

    
536
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
537
    const int mirror_left= !highpass;
538
    const int mirror_right= (width&1) ^ highpass;
539
    const int w= (width>>1) - 1 + (highpass & width);
540
    int i;
541

    
542
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
543
    if(mirror_left){
544
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
545
        dst += dst_step;
546
        src += src_step;
547
    }
548
    
549
    for(i=0; i<w; i++){
550
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
551
    }
552
    
553
    if(mirror_right){
554
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
555
    }
556
}
557

    
558
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
559
    const int mirror_left= !highpass;
560
    const int mirror_right= (width&1) ^ highpass;
561
    const int w= (width>>1) - 1 + (highpass & width);
562
    int i;
563

    
564
    if(mirror_left){
565
        int r= 3*2*ref[0];
566
        r += r>>4;
567
        r += r>>8;
568
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
569
        dst += dst_step;
570
        src += src_step;
571
    }
572
    
573
    for(i=0; i<w; i++){
574
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
575
        r += r>>4;
576
        r += r>>8;
577
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
578
    }
579
    
580
    if(mirror_right){
581
        int r= 3*2*ref[w*ref_step];
582
        r += r>>4;
583
        r += r>>8;
584
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
585
    }
586
}
587

    
588

    
589
static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
590
    int x, i;
591
    
592
    for(x=start; x<width; x+=2){
593
        int64_t sum=0;
594

    
595
        for(i=0; i<n; i++){
596
            int x2= x + 2*i - n + 1;
597
            if     (x2<     0) x2= -x2;
598
            else if(x2>=width) x2= 2*width-x2-2;
599
            sum += coeffs[i]*(int64_t)dst[x2];
600
        }
601
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
602
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
603
    }
604
}
605

    
606
static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
607
    int x, y, i;
608
    for(y=start; y<height; y+=2){
609
        for(x=0; x<width; x++){
610
            int64_t sum=0;
611
    
612
            for(i=0; i<n; i++){
613
                int y2= y + 2*i - n + 1;
614
                if     (y2<      0) y2= -y2;
615
                else if(y2>=height) y2= 2*height-y2-2;
616
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
617
            }
618
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
619
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
620
        }
621
    }
622
}
623

    
624
#define SCALEX 1
625
#define LX0 0
626
#define LX1 1
627

    
628
#if 0 // more accurate 9/7
629
#define N1 2
630
#define SHIFT1 14
631
#define COEFFS1 (int[]){-25987,-25987}
632
#define N2 2
633
#define SHIFT2 19
634
#define COEFFS2 (int[]){-27777,-27777}
635
#define N3 2
636
#define SHIFT3 15
637
#define COEFFS3 (int[]){28931,28931}
638
#define N4 2
639
#define SHIFT4 15
640
#define COEFFS4 (int[]){14533,14533}
641
#elif 1 // 13/7 CRF
642
#define N1 4
643
#define SHIFT1 4
644
#define COEFFS1 (int[]){1,-9,-9,1}
645
#define N2 4
646
#define SHIFT2 4
647
#define COEFFS2 (int[]){-1,5,5,-1}
648
#define N3 0
649
#define SHIFT3 1
650
#define COEFFS3 NULL
651
#define N4 0
652
#define SHIFT4 1
653
#define COEFFS4 NULL
654
#elif 1 // 3/5
655
#define LX0 1
656
#define LX1 0
657
#define SCALEX 0.5
658
#define N1 2
659
#define SHIFT1 1
660
#define COEFFS1 (int[]){1,1}
661
#define N2 2
662
#define SHIFT2 2
663
#define COEFFS2 (int[]){-1,-1}
664
#define N3 0
665
#define SHIFT3 0
666
#define COEFFS3 NULL
667
#define N4 0
668
#define SHIFT4 0
669
#define COEFFS4 NULL
670
#elif 1 // 11/5 
671
#define N1 0
672
#define SHIFT1 1
673
#define COEFFS1 NULL
674
#define N2 2
675
#define SHIFT2 2
676
#define COEFFS2 (int[]){-1,-1}
677
#define N3 2
678
#define SHIFT3 0
679
#define COEFFS3 (int[]){-1,-1}
680
#define N4 4
681
#define SHIFT4 7
682
#define COEFFS4 (int[]){-5,29,29,-5}
683
#define SCALEX 4
684
#elif 1 // 9/7 CDF
685
#define N1 2
686
#define SHIFT1 7
687
#define COEFFS1 (int[]){-203,-203}
688
#define N2 2
689
#define SHIFT2 12
690
#define COEFFS2 (int[]){-217,-217}
691
#define N3 2
692
#define SHIFT3 7
693
#define COEFFS3 (int[]){113,113}
694
#define N4 2
695
#define SHIFT4 9
696
#define COEFFS4 (int[]){227,227}
697
#define SCALEX 1
698
#elif 1 // 7/5 CDF
699
#define N1 0
700
#define SHIFT1 1
701
#define COEFFS1 NULL
702
#define N2 2
703
#define SHIFT2 2
704
#define COEFFS2 (int[]){-1,-1}
705
#define N3 2
706
#define SHIFT3 0
707
#define COEFFS3 (int[]){-1,-1}
708
#define N4 2
709
#define SHIFT4 4
710
#define COEFFS4 (int[]){3,3}
711
#elif 1 // 9/7 MN
712
#define N1 4
713
#define SHIFT1 4
714
#define COEFFS1 (int[]){1,-9,-9,1}
715
#define N2 2
716
#define SHIFT2 2
717
#define COEFFS2 (int[]){1,1}
718
#define N3 0
719
#define SHIFT3 1
720
#define COEFFS3 NULL
721
#define N4 0
722
#define SHIFT4 1
723
#define COEFFS4 NULL
724
#else // 13/7 CRF
725
#define N1 4
726
#define SHIFT1 4
727
#define COEFFS1 (int[]){1,-9,-9,1}
728
#define N2 4
729
#define SHIFT2 4
730
#define COEFFS2 (int[]){-1,5,5,-1}
731
#define N3 0
732
#define SHIFT3 1
733
#define COEFFS3 NULL
734
#define N4 0
735
#define SHIFT4 1
736
#define COEFFS4 NULL
737
#endif
738
static void horizontal_decomposeX(int *b, int width){
739
    int temp[width];
740
    const int width2= width>>1;
741
    const int w2= (width+1)>>1;
742
    int A1,A2,A3,A4, x;
743

    
744
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
745
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
746
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
747
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
748
    
749
    for(x=0; x<width2; x++){
750
        temp[x   ]= b[2*x    ];
751
        temp[x+w2]= b[2*x + 1];
752
    }
753
    if(width&1)
754
        temp[x   ]= b[2*x    ];
755
    memcpy(b, temp, width*sizeof(int));
756
}
757

    
758
static void horizontal_composeX(int *b, int width){
759
    int temp[width];
760
    const int width2= width>>1;
761
    int A1,A2,A3,A4, x;
762
    const int w2= (width+1)>>1;
763

    
764
    memcpy(temp, b, width*sizeof(int));
765
    for(x=0; x<width2; x++){
766
        b[2*x    ]= temp[x   ];
767
        b[2*x + 1]= temp[x+w2];
768
    }
769
    if(width&1)
770
        b[2*x    ]= temp[x   ];
771

    
772
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
773
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
774
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
775
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
776
}
777

    
778
static void spatial_decomposeX(int *buffer, int width, int height, int stride){
779
    int x, y;
780
  
781
    for(y=0; y<height; y++){
782
        for(x=0; x<width; x++){
783
            buffer[y*stride + x] *= SCALEX;
784
        }
785
    }
786

    
787
    for(y=0; y<height; y++){
788
        horizontal_decomposeX(buffer + y*stride, width);
789
    }
790
    
791
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
792
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
793
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
794
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
795
}
796

    
797
static void spatial_composeX(int *buffer, int width, int height, int stride){
798
    int x, y;
799
  
800
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
801
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
802
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
803
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
804

    
805
    for(y=0; y<height; y++){
806
        horizontal_composeX(buffer + y*stride, width);
807
    }
808

    
809
    for(y=0; y<height; y++){
810
        for(x=0; x<width; x++){
811
            buffer[y*stride + x] /= SCALEX;
812
        }
813
    }
814
}
815

    
816
static void horizontal_decompose53i(int *b, int width){
817
    int temp[width];
818
    const int width2= width>>1;
819
    int A1,A2,A3,A4, x;
820
    const int w2= (width+1)>>1;
821

    
822
    for(x=0; x<width2; x++){
823
        temp[x   ]= b[2*x    ];
824
        temp[x+w2]= b[2*x + 1];
825
    }
826
    if(width&1)
827
        temp[x   ]= b[2*x    ];
828
#if 0
829
    A2= temp[1       ];
830
    A4= temp[0       ];
831
    A1= temp[0+width2];
832
    A1 -= (A2 + A4)>>1;
833
    A4 += (A1 + 1)>>1;
834
    b[0+width2] = A1;
835
    b[0       ] = A4;
836
    for(x=1; x+1<width2; x+=2){
837
        A3= temp[x+width2];
838
        A4= temp[x+1     ];
839
        A3 -= (A2 + A4)>>1;
840
        A2 += (A1 + A3 + 2)>>2;
841
        b[x+width2] = A3;
842
        b[x       ] = A2;
843

844
        A1= temp[x+1+width2];
845
        A2= temp[x+2       ];
846
        A1 -= (A2 + A4)>>1;
847
        A4 += (A1 + A3 + 2)>>2;
848
        b[x+1+width2] = A1;
849
        b[x+1       ] = A4;
850
    }
851
    A3= temp[width-1];
852
    A3 -= A2;
853
    A2 += (A1 + A3 + 2)>>2;
854
    b[width -1] = A3;
855
    b[width2-1] = A2;
856
#else        
857
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
858
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
859
#endif
860
}
861

    
862
static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
863
    int i;
864
    
865
    for(i=0; i<width; i++){
866
        b1[i] -= (b0[i] + b2[i])>>1;
867
    }
868
}
869

    
870
static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
871
    int i;
872
    
873
    for(i=0; i<width; i++){
874
        b1[i] += (b0[i] + b2[i] + 2)>>2;
875
    }
876
}
877

    
878
static void spatial_decompose53i(int *buffer, int width, int height, int stride){
879
    int x, y;
880
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
881
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
882
  
883
    for(y=-2; y<height; y+=2){
884
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
885
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
886

    
887
{START_TIMER
888
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
889
        if(y+2 < height) horizontal_decompose53i(b3, width);
890
STOP_TIMER("horizontal_decompose53i")}
891
        
892
{START_TIMER
893
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
894
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
895
STOP_TIMER("vertical_decompose53i*")}
896
        
897
        b0=b2;
898
        b1=b3;
899
    }
900
}
901

    
902
#define lift5 lift
903
#if 1
904
#define W_AM 3
905
#define W_AO 0
906
#define W_AS 1
907

    
908
#define W_BM 1
909
#define W_BO 8
910
#define W_BS 4
911

    
912
#undef lift5
913
#define W_CM 9999
914
#define W_CO 2
915
#define W_CS 2
916

    
917
#define W_DM 15
918
#define W_DO 16
919
#define W_DS 5
920
#elif 0
921
#define W_AM 55
922
#define W_AO 16
923
#define W_AS 5
924

    
925
#define W_BM 3
926
#define W_BO 32
927
#define W_BS 6
928

    
929
#define W_CM 127
930
#define W_CO 64
931
#define W_CS 7
932

    
933
#define W_DM 7
934
#define W_DO 8
935
#define W_DS 4
936
#elif 0
937
#define W_AM 97
938
#define W_AO 32
939
#define W_AS 6
940

    
941
#define W_BM 63
942
#define W_BO 512
943
#define W_BS 10
944

    
945
#define W_CM 13
946
#define W_CO 8
947
#define W_CS 4
948

    
949
#define W_DM 15
950
#define W_DO 16
951
#define W_DS 5
952

    
953
#else
954

    
955
#define W_AM 203
956
#define W_AO 64
957
#define W_AS 7
958

    
959
#define W_BM 217
960
#define W_BO 2048
961
#define W_BS 12
962

    
963
#define W_CM 113
964
#define W_CO 64
965
#define W_CS 7
966

    
967
#define W_DM 227
968
#define W_DO 128
969
#define W_DS 9
970
#endif
971
static void horizontal_decompose97i(int *b, int width){
972
    int temp[width];
973
    const int w2= (width+1)>>1;
974

    
975
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
976
    lift (temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
977
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
978
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
979
}
980

    
981

    
982
static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
983
    int i;
984
    
985
    for(i=0; i<width; i++){
986
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
987
    }
988
}
989

    
990
static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
991
    int i;
992
    
993
    for(i=0; i<width; i++){
994
#ifdef lift5
995
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
996
#else
997
        int r= 3*(b0[i] + b2[i]);
998
        r+= r>>4;
999
        r+= r>>8;
1000
        b1[i] += (r+W_CO)>>W_CS;
1001
#endif
1002
    }
1003
}
1004

    
1005
static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
1006
    int i;
1007
    
1008
    for(i=0; i<width; i++){
1009
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1010
    }
1011
}
1012

    
1013
static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
1014
    int i;
1015
    
1016
    for(i=0; i<width; i++){
1017
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1018
    }
1019
}
1020

    
1021
static void spatial_decompose97i(int *buffer, int width, int height, int stride){
1022
    int x, y;
1023
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1024
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1025
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1026
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1027
  
1028
    for(y=-4; y<height; y+=2){
1029
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1030
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1031

    
1032
{START_TIMER
1033
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1034
        if(y+4 < height) horizontal_decompose97i(b5, width);
1035
if(width>400){
1036
STOP_TIMER("horizontal_decompose97i")
1037
}}
1038
        
1039
{START_TIMER
1040
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1041
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1042
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1043
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1044

    
1045
if(width>400){
1046
STOP_TIMER("vertical_decompose97i")
1047
}}
1048
        
1049
        b0=b2;
1050
        b1=b3;
1051
        b2=b4;
1052
        b3=b5;
1053
    }
1054
}
1055

    
1056
static void spatial_dwt(SnowContext *s, int *buffer, int width, int height, int stride){
1057
    int level;
1058
    
1059
    for(level=0; level<s->spatial_decomposition_count; level++){
1060
        switch(s->spatial_decomposition_type){
1061
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1062
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1063
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1064
        }
1065
    }
1066
}
1067

    
1068
static void horizontal_compose53i(int *b, int width){
1069
    int temp[width];
1070
    const int width2= width>>1;
1071
    const int w2= (width+1)>>1;
1072
    int A1,A2,A3,A4, x;
1073

    
1074
#if 0
1075
    A2= temp[1       ];
1076
    A4= temp[0       ];
1077
    A1= temp[0+width2];
1078
    A1 -= (A2 + A4)>>1;
1079
    A4 += (A1 + 1)>>1;
1080
    b[0+width2] = A1;
1081
    b[0       ] = A4;
1082
    for(x=1; x+1<width2; x+=2){
1083
        A3= temp[x+width2];
1084
        A4= temp[x+1     ];
1085
        A3 -= (A2 + A4)>>1;
1086
        A2 += (A1 + A3 + 2)>>2;
1087
        b[x+width2] = A3;
1088
        b[x       ] = A2;
1089

1090
        A1= temp[x+1+width2];
1091
        A2= temp[x+2       ];
1092
        A1 -= (A2 + A4)>>1;
1093
        A4 += (A1 + A3 + 2)>>2;
1094
        b[x+1+width2] = A1;
1095
        b[x+1       ] = A4;
1096
    }
1097
    A3= temp[width-1];
1098
    A3 -= A2;
1099
    A2 += (A1 + A3 + 2)>>2;
1100
    b[width -1] = A3;
1101
    b[width2-1] = A2;
1102
#else   
1103
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1104
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1105
#endif
1106
    for(x=0; x<width2; x++){
1107
        b[2*x    ]= temp[x   ];
1108
        b[2*x + 1]= temp[x+w2];
1109
    }
1110
    if(width&1)
1111
        b[2*x    ]= temp[x   ];
1112
}
1113

    
1114
static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1115
    int i;
1116
    
1117
    for(i=0; i<width; i++){
1118
        b1[i] += (b0[i] + b2[i])>>1;
1119
    }
1120
}
1121

    
1122
static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1123
    int i;
1124
    
1125
    for(i=0; i<width; i++){
1126
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1127
    }
1128
}
1129

    
1130
static void spatial_compose53i(int *buffer, int width, int height, int stride){
1131
    int x, y;
1132
    DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1133
    DWTELEM *b1= buffer + mirror(-1  , height-1)*stride;
1134
  
1135
    for(y=-1; y<=height; y+=2){
1136
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1137
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1138

    
1139
{START_TIMER
1140
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1141
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1142
STOP_TIMER("vertical_compose53i*")}
1143

    
1144
{START_TIMER
1145
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1146
        if(b0 <= b2) horizontal_compose53i(b1, width);
1147
STOP_TIMER("horizontal_compose53i")}
1148

    
1149
        b0=b2;
1150
        b1=b3;
1151
    }
1152
}   
1153

    
1154
 
1155
static void horizontal_compose97i(int *b, int width){
1156
    int temp[width];
1157
    const int w2= (width+1)>>1;
1158

    
1159
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1160
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1161
    lift (b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1162
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1163
}
1164

    
1165
static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1166
    int i;
1167
    
1168
    for(i=0; i<width; i++){
1169
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1170
    }
1171
}
1172

    
1173
static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1174
    int i;
1175
    
1176
    for(i=0; i<width; i++){
1177
#ifdef lift5
1178
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1179
#else
1180
        int r= 3*(b0[i] + b2[i]);
1181
        r+= r>>4;
1182
        r+= r>>8;
1183
        b1[i] -= (r+W_CO)>>W_CS;
1184
#endif
1185
    }
1186
}
1187

    
1188
static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1189
    int i;
1190
    
1191
    for(i=0; i<width; i++){
1192
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1193
    }
1194
}
1195

    
1196
static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1197
    int i;
1198
    
1199
    for(i=0; i<width; i++){
1200
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1201
    }
1202
}
1203

    
1204
static void spatial_compose97i(int *buffer, int width, int height, int stride){
1205
    int x, y;
1206
    DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1207
    DWTELEM *b1= buffer + mirror(-3  , height-1)*stride;
1208
    DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1209
    DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1210

    
1211
    for(y=-3; y<=height; y+=2){
1212
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1213
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1214

    
1215
        if(stride == width && y+4 < height && 0){ 
1216
            int x;
1217
            for(x=0; x<width/2; x++)
1218
                b5[x] += 64*2;
1219
            for(; x<width; x++)
1220
                b5[x] += 169*2;
1221
        }
1222
        
1223
{START_TIMER
1224
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1225
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1226
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1227
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1228
if(width>400){
1229
STOP_TIMER("vertical_compose97i")}}
1230

    
1231
{START_TIMER
1232
        if(y-1>=  0) horizontal_compose97i(b0, width);
1233
        if(b0 <= b2) horizontal_compose97i(b1, width);
1234
if(width>400 && b0 <= b2){
1235
STOP_TIMER("horizontal_compose97i")}}
1236
        
1237
        b0=b2;
1238
        b1=b3;
1239
        b2=b4;
1240
        b3=b5;
1241
    }
1242
}
1243

    
1244
static void spatial_idwt(SnowContext *s, int *buffer, int width, int height, int stride){
1245
    int level;
1246

    
1247
    for(level=s->spatial_decomposition_count-1; level>=0; level--){
1248
        switch(s->spatial_decomposition_type){
1249
        case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1250
        case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1251
        case 2: spatial_composeX  (buffer, width>>level, height>>level, stride<<level); break;
1252
        }
1253
    }
1254
}
1255

    
1256
static const int hilbert[16][2]={
1257
    {0,0}, {1,0}, {1,1}, {0,1},
1258
    {0,2}, {0,3}, {1,3}, {1,2},
1259
    {2,2}, {2,3}, {3,3}, {3,2},
1260
    {3,1}, {2,1}, {2,0}, {3,0},
1261
};
1262
#if 0
1263
-o o-
1264
 | |
1265
 o-o
1266
 
1267
-o-o o-o-
1268
   | | 
1269
 o-o o-o
1270
 |     |
1271
 o o-o o
1272
 | | | |
1273
 o-o o-o
1274
 
1275
 0112122312232334122323342334
1276
 0123456789ABCDEF0123456789AB
1277
 RLLRMRRLLRRMRLLMLRRLMLLRRLLM
1278
 
1279
 4  B  F 14 1B
1280
 4 11 15 20 27
1281
 
1282
-o o-o-o o-o-o o-
1283
 | |   | |   | |
1284
 o-o o-o o-o o-o
1285
     |     |
1286
 o-o o-o o-o o-o
1287
 | |   | |   | |
1288
 o o-o-o o-o-o o
1289
 |             |
1290
 o-o o-o-o-o o-o
1291
   | |     | | 
1292
 o-o o-o o-o o-o
1293
 |     | |     |
1294
 o o-o o o o-o o
1295
 | | | | | | | |
1296
 o-o o-o o-o o-o
1297

1298
#endif
1299

    
1300
#define SVI(a, i, x, y) \
1301
{\
1302
    a[i][0]= x;\
1303
    a[i][1]= y;\
1304
    i++;\
1305
}
1306

    
1307
static int sig_cmp(const void *a, const void *b){
1308
    const int16_t* da = (const int16_t *) a;
1309
    const int16_t* db = (const int16_t *) b;
1310
    
1311
    if(da[1] != db[1]) return da[1] - db[1];
1312
    else               return da[0] - db[0];
1313
}
1314

    
1315
static int deint(unsigned int a){
1316
    a &= 0x55555555;         //0 1 2 3 4 5 6 7 8 9 A B C D E F
1317
    a +=     a & 0x11111111; // 01  23  45  67  89  AB  CD  EF
1318
    a +=  3*(a & 0x0F0F0F0F);//   0123    4567    89AB    CDEF
1319
    a += 15*(a & 0x00FF00FF);//       01234567        89ABCDEF
1320
    a +=255*(a & 0x0000FFFF);//               0123456789ABCDEF
1321
    return a>>15;
1322
}
1323

    
1324
static void encode_subband_z0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1325
    const int level= b->level;
1326
    const int w= b->width;
1327
    const int h= b->height;
1328
    int x, y, pos;
1329

    
1330
    if(1){
1331
        int run=0;
1332
        int runs[w*h];
1333
        int run_index=0;
1334
        int count=0;
1335
        
1336
        for(pos=0; ; pos++){
1337
            int x= deint(pos   );
1338
            int y= deint(pos>>1);
1339
            int v, p=0, pr=0, pd=0;
1340
            int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
1341

    
1342
            if(x>=w || y>=h){
1343
                if(x>=w && y>=h)
1344
                    break;
1345
                continue;
1346
            }
1347
            count++;
1348
                
1349
            v= src[x + y*stride];
1350

    
1351
            if(y){
1352
                t= src[x + (y-1)*stride];
1353
                if(x){
1354
                    lt= src[x - 1 + (y-1)*stride];
1355
                }
1356
                if(x + 1 < w){
1357
                    /*rt= src[x + 1 + (y-1)*stride]*/;
1358
                }
1359
            }
1360
            if(x){
1361
                l= src[x - 1 + y*stride];
1362
                /*if(x > 1){
1363
                    if(orientation==1) ll= src[y + (x-2)*stride];
1364
                    else               ll= src[x - 2 + y*stride];
1365
                }*/
1366
            }
1367
            if(parent){
1368
                int px= x>>1;
1369
                int py= y>>1;
1370
                if(px<b->parent->width && py<b->parent->height){
1371
                    p= parent[px + py*2*stride];
1372
                    /*if(px+1<b->parent->width) 
1373
                        pr= parent[px + 1 + py*2*stride];
1374
                    if(py+1<b->parent->height) 
1375
                        pd= parent[px + (py+1)*2*stride];*/
1376
                }
1377
            }
1378
            if(!(/*ll|*/l|lt|t|/*rt|*/p)){
1379
                if(v){
1380
                    runs[run_index++]= run;
1381
                    run=0;
1382
                }else{
1383
                    run++;
1384
                }
1385
            }
1386
        }
1387
        assert(count==w*h);
1388
        runs[run_index++]= run;
1389
        run_index=0;
1390
        run= runs[run_index++];
1391

    
1392
        put_symbol(&s->c, b->state[1], run, 0);
1393
        
1394
        for(pos=0; ; pos++){
1395
            int x= deint(pos   );
1396
            int y= deint(pos>>1);
1397
            int v, p=0, pr=0, pd=0;
1398
            int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
1399

    
1400
            if(x>=w || y>=h){
1401
                if(x>=w && y>=h)
1402
                    break;
1403
                continue;
1404
            }
1405
            v= src[x + y*stride];
1406

    
1407
            if(y){
1408
                t= src[x + (y-1)*stride];
1409
                if(x){
1410
                    lt= src[x - 1 + (y-1)*stride];
1411
                }
1412
                if(x + 1 < w){
1413
//                    rt= src[x + 1 + (y-1)*stride];
1414
                }
1415
            }
1416
            if(x){
1417
                l= src[x - 1 + y*stride];
1418
                /*if(x > 1){
1419
                    if(orientation==1) ll= src[y + (x-2)*stride];
1420
                    else               ll= src[x - 2 + y*stride];
1421
                }*/
1422
            }
1423

    
1424
            if(parent){
1425
                int px= x>>1;
1426
                int py= y>>1;
1427
                if(px<b->parent->width && py<b->parent->height){
1428
                    p= parent[px + py*2*stride];
1429
/*                        if(px+1<b->parent->width) 
1430
                        pr= parent[px + 1 + py*2*stride];
1431
                    if(py+1<b->parent->height) 
1432
                        pd= parent[px + (py+1)*2*stride];*/
1433
                }
1434
            }
1435
            if(/*ll|*/l|lt|t|/*rt|*/p){
1436
                int context= av_log2(/*ABS(ll) + */2*(3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p)));
1437

    
1438
                put_cabac(&s->c, &b->state[0][context], !!v);
1439
            }else{
1440
                if(!run){
1441
                    run= runs[run_index++];
1442
                    put_symbol(&s->c, b->state[1], run, 0);
1443
                    assert(v);
1444
                }else{
1445
                    run--;
1446
                    assert(!v);
1447
                }
1448
            }
1449
            if(v){
1450
                int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p));
1451

    
1452
                put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
1453
                put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1454
            }
1455
        }
1456
    }
1457
}
1458

    
1459
static void encode_subband_bp(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1460
    const int level= b->level;
1461
    const int w= b->width;
1462
    const int h= b->height;
1463
    int x, y;
1464

    
1465
#if 0
1466
    int plane;
1467
    for(plane=24; plane>=0; plane--){
1468
        int run=0;
1469
        int runs[w*h];
1470
        int run_index=0;
1471
                
1472
        for(y=0; y<h; y++){
1473
            for(x=0; x<w; x++){
1474
                int v, lv, p=0;
1475
                int d=0, r=0, rd=0, ld=0;
1476
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1477
                v= src[x + y*stride];
1478

1479
                if(y){
1480
                    t= src[x + (y-1)*stride];
1481
                    if(x){
1482
                        lt= src[x - 1 + (y-1)*stride];
1483
                    }
1484
                    if(x + 1 < w){
1485
                        rt= src[x + 1 + (y-1)*stride];
1486
                    }
1487
                }
1488
                if(x){
1489
                    l= src[x - 1 + y*stride];
1490
                    /*if(x > 1){
1491
                        if(orientation==1) ll= src[y + (x-2)*stride];
1492
                        else               ll= src[x - 2 + y*stride];
1493
                    }*/
1494
                }
1495
                if(y+1<h){
1496
                    d= src[x + (y+1)*stride];
1497
                    if(x)         ld= src[x - 1 + (y+1)*stride];
1498
                    if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1499
                }
1500
                if(x + 1 < w)
1501
                    r= src[x + 1 + y*stride];
1502
                if(parent){
1503
                    int px= x>>1;
1504
                    int py= y>>1;
1505
                    if(px<b->parent->width && py<b->parent->height) 
1506
                        p= parent[px + py*2*stride];
1507
                }
1508
#define HIDE(c, plane) c= c>=0 ? c&((-1)<<(plane)) : -((-c)&((-1)<<(plane)));
1509
                lv=v;
1510
                HIDE( v, plane)
1511
                HIDE(lv, plane+1)
1512
                HIDE( p, plane)
1513
                HIDE( l, plane)
1514
                HIDE(lt, plane)
1515
                HIDE( t, plane)
1516
                HIDE(rt, plane)
1517
                HIDE( r, plane+1)
1518
                HIDE(rd, plane+1)
1519
                HIDE( d, plane+1)
1520
                HIDE(ld, plane+1)
1521
                if(!(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv)){
1522
                    if(v){
1523
                        runs[run_index++]= run;
1524
                        run=0;
1525
                    }else{
1526
                        run++;
1527
                    }
1528
                }
1529
            }
1530
        }
1531
        runs[run_index++]= run;
1532
        run_index=0;
1533
        run= runs[run_index++];
1534

1535
        put_symbol(&s->c, b->state[1], run, 0);
1536
        
1537
        for(y=0; y<h; y++){
1538
            for(x=0; x<w; x++){
1539
                int v, p=0, lv;
1540
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1541
                int d=0, r=0, rd=0, ld=0;
1542
                v= src[x + y*stride];
1543

1544
                if(y){
1545
                    t= src[x + (y-1)*stride];
1546
                    if(x){
1547
                        lt= src[x - 1 + (y-1)*stride];
1548
                    }
1549
                    if(x + 1 < w){
1550
                        rt= src[x + 1 + (y-1)*stride];
1551
                    }
1552
                }
1553
                if(x){
1554
                    l= src[x - 1 + y*stride];
1555
                    /*if(x > 1){
1556
                        if(orientation==1) ll= src[y + (x-2)*stride];
1557
                        else               ll= src[x - 2 + y*stride];
1558
                    }*/
1559
                }
1560
                if(y+1<h){
1561
                    d= src[x + (y+1)*stride];
1562
                    if(x)         ld= src[x - 1 + (y+1)*stride];
1563
                    if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1564
                }
1565
                if(x + 1 < w)
1566
                    r= src[x + 1 + y*stride];
1567

1568
                if(parent){
1569
                    int px= x>>1;
1570
                    int py= y>>1;
1571
                    if(px<b->parent->width && py<b->parent->height) 
1572
                        p= parent[px + py*2*stride];
1573
                }
1574
                lv=v;
1575
                HIDE( v, plane)
1576
                HIDE(lv, plane+1)
1577
                HIDE( p, plane)
1578
                HIDE( l, plane)
1579
                HIDE(lt, plane)
1580
                HIDE( t, plane)
1581
                HIDE(rt, plane)
1582
                HIDE( r, plane+1)
1583
                HIDE(rd, plane+1)
1584
                HIDE( d, plane+1)
1585
                HIDE(ld, plane+1)
1586
                if(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv){
1587
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)
1588
                                                      +3*ABS(r) + ABS(rd) + 2*ABS(d) + ABS(ld));
1589

1590
                    if(lv) put_cabac(&s->c, &b->state[99][context + 8*(av_log2(ABS(lv))-plane)], !!(v-lv));
1591
                    else   put_cabac(&s->c, &b->state[ 0][context], !!v);
1592
                }else{
1593
                    assert(!lv);
1594
                    if(!run){
1595
                        run= runs[run_index++];
1596
                        put_symbol(&s->c, b->state[1], run, 0);
1597
                        assert(v);
1598
                    }else{
1599
                        run--;
1600
                        assert(!v);
1601
                    }
1602
                }
1603
                if(v && !lv){
1604
                    int context=    clip(quant3b[l&0xFF] + quant3b[r&0xFF], -1,1)
1605
                                + 3*clip(quant3b[t&0xFF] + quant3b[d&0xFF], -1,1);
1606
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + context], v<0);
1607
                }
1608
            }
1609
        }
1610
    }
1611
    return;    
1612
#endif
1613
}
1614

    
1615
static void encode_subband_X(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1616
    const int level= b->level;
1617
    const int w= b->width;
1618
    const int h= b->height;
1619
    int x, y;
1620

    
1621
#if 0
1622
    if(orientation==3 && parent && 0){
1623
        int16_t candidate[w*h][2];
1624
        uint8_t state[w*h];
1625
        int16_t boarder[3][w*h*4][2];
1626
        int16_t significant[w*h][2];
1627
        int candidate_count=0;
1628
        int boarder_count[3]={0,0,0};
1629
        int significant_count=0;
1630
        int rle_pos=0;
1631
        int v, last_v;
1632
        int primary= orientation==1;
1633
        
1634
        memset(candidate, 0, sizeof(candidate));
1635
        memset(state, 0, sizeof(state));
1636
        memset(boarder, 0, sizeof(boarder));
1637
        
1638
        for(y=0; y<h; y++){
1639
            for(x=0; x<w; x++){
1640
                if(parent[(x>>1) + (y>>1)*2*stride])
1641
                    SVI(candidate, candidate_count, x, y)
1642
            }
1643
        }
1644

1645
        for(;;){
1646
            while(candidate_count && !boarder_count[0] && !boarder_count[1] && !boarder_count[2]){
1647
                candidate_count--;
1648
                x= candidate[ candidate_count][0];
1649
                y= candidate[ candidate_count][1];
1650
                if(state[x + y*w])
1651
                    continue;
1652
                state[x + y*w]= 1;
1653
                v= !!src[x + y*stride];
1654
                put_cabac(&s->c, &b->state[0][0], v);
1655
                if(v){
1656
                    SVI(significant, significant_count, x,y)
1657
                    if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1658
                    if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1659
                    if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1660
                    if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1661
                    if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1662
                    if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1663
                    if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1664
                    if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1665
                }
1666
            }
1667
            while(!boarder_count[0] && !boarder_count[1] && !boarder_count[2] && rle_pos < w*h){
1668
                int run=0;
1669
                for(; rle_pos < w*h;){
1670
                    x= rle_pos % w; //FIXME speed
1671
                    y= rle_pos / w;
1672
                    rle_pos++;
1673
                    if(state[x + y*w])
1674
                        continue;
1675
                    state[x + y*w]= 1;
1676
                    v= !!src[x + y*stride];
1677
                    if(v){
1678
                        put_symbol(&s->c, b->state[1], run, 0);
1679
                        SVI(significant, significant_count, x,y)
1680
                        if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1681
                        if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1682
                        if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1683
                        if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1684
                        if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1685
                        if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1686
                        if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1687
                        if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1688
                        break;
1689
//FIXME                note only right & down can be boarders
1690
                    }
1691
                    run++;
1692
                }
1693
            }
1694
            if(!boarder_count[0] && !boarder_count[1] && !boarder_count[2])
1695
                break;
1696
            
1697
            while(boarder_count[0] || boarder_count[1] || boarder_count[2]){
1698
                int index;
1699
                
1700
                if     (boarder_count[  primary]) index=  primary;
1701
                else if(boarder_count[1-primary]) index=1-primary;
1702
                else                              index=2;
1703
                
1704
                boarder_count[index]--;
1705
                x= boarder[index][ boarder_count[index] ][0];
1706
                y= boarder[index][ boarder_count[index] ][1];
1707
                if(state[x + y*w]) //FIXME maybe check earlier
1708
                    continue;
1709
                state[x + y*w]= 1;
1710
                v= !!src[x + y*stride];
1711
                put_cabac(&s->c, &b->state[0][index+1], v);
1712
                if(v){
1713
                    SVI(significant, significant_count, x,y)
1714
                    if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1715
                    if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1716
                    if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1717
                    if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1718
                    if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1719
                    if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1720
                    if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1721
                    if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1722
                }
1723
            }
1724
        }
1725
        //FIXME sort significant coeffs maybe
1726
        if(1){
1727
            qsort(significant, significant_count, sizeof(int16_t[2]), sig_cmp);
1728
        }
1729
        
1730
        last_v=1;
1731
        while(significant_count){
1732
            int context= 3 + quant7[last_v&0xFF]; //use significance of suroundings
1733
            significant_count--;
1734
            x= significant[significant_count][0];//FIXME try opposit direction
1735
            y= significant[significant_count][1];
1736
            v= src[x + y*stride];
1737
            put_symbol(&s->c, b->state[context + 2], v, 1); //FIXME try to avoid first bit, try this with the old code too!!
1738
            last_v= v;
1739
        }
1740
    }
1741
#endif
1742
}
1743

    
1744
static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1745
    const int level= b->level;
1746
    const int w= b->width;
1747
    const int h= b->height;
1748
    int x, y;
1749

    
1750
    if(1){
1751
        int run=0;
1752
        int runs[w*h];
1753
        int run_index=0;
1754
                
1755
        for(y=0; y<h; y++){
1756
            for(x=0; x<w; x++){
1757
                int v, p=0;
1758
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1759
                v= src[x + y*stride];
1760

    
1761
                if(y){
1762
                    t= src[x + (y-1)*stride];
1763
                    if(x){
1764
                        lt= src[x - 1 + (y-1)*stride];
1765
                    }
1766
                    if(x + 1 < w){
1767
                        rt= src[x + 1 + (y-1)*stride];
1768
                    }
1769
                }
1770
                if(x){
1771
                    l= src[x - 1 + y*stride];
1772
                    /*if(x > 1){
1773
                        if(orientation==1) ll= src[y + (x-2)*stride];
1774
                        else               ll= src[x - 2 + y*stride];
1775
                    }*/
1776
                }
1777
                if(parent){
1778
                    int px= x>>1;
1779
                    int py= y>>1;
1780
                    if(px<b->parent->width && py<b->parent->height) 
1781
                        p= parent[px + py*2*stride];
1782
                }
1783
                if(!(/*ll|*/l|lt|t|rt|p)){
1784
                    if(v){
1785
                        runs[run_index++]= run;
1786
                        run=0;
1787
                    }else{
1788
                        run++;
1789
                    }
1790
                }
1791
            }
1792
        }
1793
        runs[run_index++]= run;
1794
        run_index=0;
1795
        run= runs[run_index++];
1796

    
1797
        put_symbol2(&s->c, b->state[1], run, 3);
1798
        
1799
        for(y=0; y<h; y++){
1800
            for(x=0; x<w; x++){
1801
                int v, p=0;
1802
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1803
                v= src[x + y*stride];
1804

    
1805
                if(y){
1806
                    t= src[x + (y-1)*stride];
1807
                    if(x){
1808
                        lt= src[x - 1 + (y-1)*stride];
1809
                    }
1810
                    if(x + 1 < w){
1811
                        rt= src[x + 1 + (y-1)*stride];
1812
                    }
1813
                }
1814
                if(x){
1815
                    l= src[x - 1 + y*stride];
1816
                    /*if(x > 1){
1817
                        if(orientation==1) ll= src[y + (x-2)*stride];
1818
                        else               ll= src[x - 2 + y*stride];
1819
                    }*/
1820
                }
1821
                if(parent){
1822
                    int px= x>>1;
1823
                    int py= y>>1;
1824
                    if(px<b->parent->width && py<b->parent->height) 
1825
                        p= parent[px + py*2*stride];
1826
                }
1827
                if(/*ll|*/l|lt|t|rt|p){
1828
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1829

    
1830
                    put_cabac(&s->c, &b->state[0][context], !!v);
1831
                }else{
1832
                    if(!run){
1833
                        run= runs[run_index++];
1834

    
1835
                        put_symbol2(&s->c, b->state[1], run, 3);
1836
                        assert(v);
1837
                    }else{
1838
                        run--;
1839
                        assert(!v);
1840
                    }
1841
                }
1842
                if(v){
1843
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1844

    
1845
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1846
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1847
                }
1848
            }
1849
        }
1850
    }
1851
}
1852

    
1853
static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1854
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1855
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1856
    encode_subband_c0run(s, b, src, parent, stride, orientation);
1857
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1858
}
1859

    
1860
static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1861
    const int level= b->level;
1862
    const int w= b->width;
1863
    const int h= b->height;
1864
    int x,y;
1865

    
1866
    START_TIMER
1867
#if 0    
1868
    for(y=0; y<b->height; y++)
1869
        memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1870

1871
    int plane;
1872
    for(plane=24; plane>=0; plane--){
1873
        int run;
1874

1875
        run= get_symbol(&s->c, b->state[1], 0);
1876
                
1877
#define HIDE(c, plane) c= c>=0 ? c&((-1)<<(plane)) : -((-c)&((-1)<<(plane)));
1878
        
1879
        for(y=0; y<h; y++){
1880
            for(x=0; x<w; x++){
1881
                int v, p=0, lv;
1882
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1883
                int d=0, r=0, rd=0, ld=0;
1884
                lv= src[x + y*stride];
1885

1886
                if(y){
1887
                    t= src[x + (y-1)*stride];
1888
                    if(x){
1889
                        lt= src[x - 1 + (y-1)*stride];
1890
                    }
1891
                    if(x + 1 < w){
1892
                        rt= src[x + 1 + (y-1)*stride];
1893
                    }
1894
                }
1895
                if(x){
1896
                    l= src[x - 1 + y*stride];
1897
                    /*if(x > 1){
1898
                        if(orientation==1) ll= src[y + (x-2)*stride];
1899
                        else               ll= src[x - 2 + y*stride];
1900
                    }*/
1901
                }
1902
                if(y+1<h){
1903
                    d= src[x + (y+1)*stride];
1904
                    if(x)         ld= src[x - 1 + (y+1)*stride];
1905
                    if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1906
                }
1907
                if(x + 1 < w)
1908
                    r= src[x + 1 + y*stride];
1909

1910
                if(parent){
1911
                    int px= x>>1;
1912
                    int py= y>>1;
1913
                    if(px<b->parent->width && py<b->parent->height) 
1914
                        p= parent[px + py*2*stride];
1915
                }
1916
                HIDE( p, plane)
1917
                if(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv){
1918
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)
1919
                                                      +3*ABS(r) + ABS(rd) + 2*ABS(d) + ABS(ld));
1920

1921
                    if(lv){
1922
                        assert(context + 8*av_log2(ABS(lv)) < 512 - 100);
1923
                        if(get_cabac(&s->c, &b->state[99][context + 8*(av_log2(ABS(lv))-plane)])){
1924
                            if(lv<0) v= lv - (1<<plane);
1925
                            else     v= lv + (1<<plane);
1926
                        }else
1927
                            v=lv;
1928
                    }else{
1929
                        v= get_cabac(&s->c, &b->state[ 0][context]) << plane;
1930
                    }
1931
                }else{
1932
                    assert(!lv);
1933
                    if(!run){
1934
                        run= get_symbol(&s->c, b->state[1], 0);
1935
                        v= 1<<plane;
1936
                    }else{
1937
                        run--;
1938
                        v=0;
1939
                    }
1940
                }
1941
                if(v && !lv){
1942
                    int context=    clip(quant3b[l&0xFF] + quant3b[r&0xFF], -1,1)
1943
                                + 3*clip(quant3b[t&0xFF] + quant3b[d&0xFF], -1,1);
1944
                    if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + context]))
1945
                        v= -v;
1946
                }
1947
                src[x + y*stride]= v;
1948
            }
1949
        }
1950
    }
1951
    return;    
1952
#endif
1953
    if(1){
1954
        int run;
1955
                
1956
        for(y=0; y<b->height; y++)
1957
            memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1958

    
1959
        run= get_symbol2(&s->c, b->state[1], 3);
1960
        for(y=0; y<h; y++){
1961
            for(x=0; x<w; x++){
1962
                int v, p=0;
1963
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1964

    
1965
                if(y){
1966
                    t= src[x + (y-1)*stride];
1967
                    if(x){
1968
                        lt= src[x - 1 + (y-1)*stride];
1969
                    }
1970
                    if(x + 1 < w){
1971
                        rt= src[x + 1 + (y-1)*stride];
1972
                    }
1973
                }
1974
                if(x){
1975
                    l= src[x - 1 + y*stride];
1976
                    /*if(x > 1){
1977
                        if(orientation==1) ll= src[y + (x-2)*stride];
1978
                        else               ll= src[x - 2 + y*stride];
1979
                    }*/
1980
                }
1981
                if(parent){
1982
                    int px= x>>1;
1983
                    int py= y>>1;
1984
                    if(px<b->parent->width && py<b->parent->height) 
1985
                        p= parent[px + py*2*stride];
1986
                }
1987
                if(/*ll|*/l|lt|t|rt|p){
1988
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1989

    
1990
                    v=get_cabac(&s->c, &b->state[0][context]);
1991
                }else{
1992
                    if(!run){
1993
                        run= get_symbol2(&s->c, b->state[1], 3);
1994
                        //FIXME optimize this here
1995
                        //FIXME try to store a more naive run
1996
                        v=1;
1997
                    }else{
1998
                        run--;
1999
                        v=0;
2000
                    }
2001
                }
2002
                if(v){
2003
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
2004
                    v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
2005
                    if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
2006
                        v= -v;
2007
                    src[x + y*stride]= v;
2008
                }
2009
            }
2010
        }
2011
        if(level+1 == s->spatial_decomposition_count){
2012
            STOP_TIMER("decode_subband")
2013
        }
2014
        
2015
        return;
2016
    }
2017
}
2018

    
2019
static void reset_contexts(SnowContext *s){
2020
    int plane_index, level, orientation;
2021

    
2022
    for(plane_index=0; plane_index<2; plane_index++){
2023
        for(level=0; level<s->spatial_decomposition_count; level++){
2024
            for(orientation=level ? 1:0; orientation<4; orientation++){
2025
                memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
2026
            }
2027
        }
2028
    }
2029
    memset(s->mb_band.state, 0, sizeof(s->mb_band.state));
2030
    memset(s->mv_band[0].state, 0, sizeof(s->mv_band[0].state));
2031
    memset(s->mv_band[1].state, 0, sizeof(s->mv_band[1].state));
2032
    memset(s->header_state, 0, sizeof(s->header_state));
2033
}
2034

    
2035
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2036
    int x, y;
2037

    
2038
    for(y=0; y < b_h+5; y++){
2039
        for(x=0; x < b_w; x++){
2040
            int a0= src[x     + y*stride];
2041
            int a1= src[x + 1 + y*stride];
2042
            int a2= src[x + 2 + y*stride];
2043
            int a3= src[x + 3 + y*stride];
2044
            int a4= src[x + 4 + y*stride];
2045
            int a5= src[x + 5 + y*stride];
2046
//            int am= 9*(a1+a2) - (a0+a3);
2047
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2048
//            int am= 18*(a2+a3) - 2*(a1+a4);
2049
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2050
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2051

    
2052
//            if(b_w==16) am= 8*(a1+a2);
2053

    
2054
            if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2055
            else     tmp[x + y*stride]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2056

    
2057
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2058
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2059
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2060
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2061
        }
2062
    }
2063
    for(y=0; y < b_h; y++){
2064
        for(x=0; x < b_w; x++){
2065
            int a0= tmp[x +  y     *stride];
2066
            int a1= tmp[x + (y + 1)*stride];
2067
            int a2= tmp[x + (y + 2)*stride];
2068
            int a3= tmp[x + (y + 3)*stride];
2069
            int a4= tmp[x + (y + 4)*stride];
2070
            int a5= tmp[x + (y + 5)*stride];
2071
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2072
//            int am= 18*(a2+a3) - 2*(a1+a4);
2073
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2074
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2075
            
2076
//            if(b_w==16) am= 8*(a1+a2);
2077

    
2078
            if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2079
            else     dst[x + y*stride]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2080

    
2081
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2082
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2083
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2084
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2085
        }
2086
    }
2087
}
2088

    
2089
#define mcb(dx,dy,b_w)\
2090
static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
2091
    uint8_t tmp[stride*(b_w+5)];\
2092
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2093
}
2094

    
2095
mcb( 0, 0,16)
2096
mcb( 4, 0,16)
2097
mcb( 8, 0,16)
2098
mcb(12, 0,16)
2099
mcb( 0, 4,16)
2100
mcb( 4, 4,16)
2101
mcb( 8, 4,16)
2102
mcb(12, 4,16)
2103
mcb( 0, 8,16)
2104
mcb( 4, 8,16)
2105
mcb( 8, 8,16)
2106
mcb(12, 8,16)
2107
mcb( 0,12,16)
2108
mcb( 4,12,16)
2109
mcb( 8,12,16)
2110
mcb(12,12,16)
2111

    
2112
#define mca(dx,dy,b_w)\
2113
static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
2114
    uint8_t tmp[stride*(b_w+5)];\
2115
    assert(h==b_w);\
2116
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2117
}
2118

    
2119
mca( 0, 0,16)
2120
mca( 8, 0,16)
2121
mca( 0, 8,16)
2122
mca( 8, 8,16)
2123

    
2124
static void add_xblock(DWTELEM *dst, uint8_t *src, uint8_t *obmc, int s_x, int s_y, int b_w, int b_h, int mv_x, int mv_y, int w, int h, int dst_stride, int src_stride, int obmc_stride, int mb_type, int add){
2125
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME move to context to gurantee alignment
2126
    int x,y;
2127

    
2128
    if(s_x<0){
2129
        obmc -= s_x;
2130
        b_w += s_x;
2131
        s_x=0;
2132
    }else if(s_x + b_w > w){
2133
        b_w = w - s_x;
2134
    }
2135
    if(s_y<0){
2136
        obmc -= s_y*obmc_stride;
2137
        b_h += s_y;
2138
        s_y=0;
2139
    }else if(s_y + b_h> h){
2140
        b_h = h - s_y;
2141
    }
2142

    
2143
    if(b_w<=0 || b_h<=0) return;
2144
    
2145
    dst += s_x + s_y*dst_stride;
2146
    
2147
    if(mb_type==1){
2148
        src += s_x + s_y*src_stride;
2149
        for(y=0; y < b_h; y++){
2150
            for(x=0; x < b_w; x++){
2151
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
2152
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
2153
            }
2154
        }
2155
    }else{
2156
        int dx= mv_x&15;
2157
        int dy= mv_y&15;
2158
//        int dxy= (mv_x&1) + 2*(mv_y&1);
2159

    
2160
        s_x += (mv_x>>4) - 2;
2161
        s_y += (mv_y>>4) - 2;
2162
        src += s_x + s_y*src_stride;
2163
        //use dsputil
2164
    
2165
        if(   (unsigned)s_x >= w - b_w - 4
2166
           || (unsigned)s_y >= h - b_h - 4){
2167
            ff_emulated_edge_mc(tmp + 32, src, src_stride, b_w+5, b_h+5, s_x, s_y, w, h);
2168
            src= tmp + 32;
2169
        }
2170

    
2171
        if(mb_type==0){
2172
            mc_block(tmp, src, tmp + 64+8, src_stride, b_w, b_h, dx, dy);
2173
        }else{
2174
            int sum=0;
2175
            for(y=0; y < b_h; y++){
2176
                for(x=0; x < b_w; x++){
2177
                    sum += src[x+  y*src_stride];
2178
                }
2179
            }
2180
            sum= (sum + b_h*b_w/2) / (b_h*b_w);
2181
            for(y=0; y < b_h; y++){
2182
                for(x=0; x < b_w; x++){
2183
                    tmp[x + y*src_stride]= sum; 
2184
                }
2185
            }
2186
        }
2187

    
2188
        for(y=0; y < b_h; y++){
2189
            for(x=0; x < b_w; x++){
2190
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2191
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2192
            }
2193
        }
2194
    }
2195
}
2196

    
2197
static void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2198
    Plane *p= &s->plane[plane_index];
2199
    const int mb_w= s->mb_band.width;
2200
    const int mb_h= s->mb_band.height;
2201
    const int mb_stride= s->mb_band.stride;
2202
    int x, y, mb_x, mb_y;
2203
    int scale      = plane_index ?  s->mv_scale : 2*s->mv_scale;
2204
    int block_w    = plane_index ?  8 : 16;
2205
    uint8_t *obmc  = plane_index ? obmc16 : obmc32;
2206
    int obmc_stride= plane_index ? 16 : 32;
2207
    int ref_stride= s->last_picture.linesize[plane_index];
2208
    uint8_t *ref  = s->last_picture.data[plane_index];
2209
    int w= p->width;
2210
    int h= p->height;
2211
    
2212
if(s->avctx->debug&512){
2213
    for(y=0; y<h; y++){
2214
        for(x=0; x<w; x++){
2215
            if(add) buf[x + y*w]+= 128*256;
2216
            else    buf[x + y*w]-= 128*256;
2217
        }
2218
    }
2219
    
2220
    return;
2221
}
2222
    for(mb_y=-1; mb_y<=mb_h; mb_y++){
2223
        for(mb_x=-1; mb_x<=mb_w; mb_x++){
2224
            int index= clip(mb_x, 0, mb_w-1) + clip(mb_y, 0, mb_h-1)*mb_stride;
2225

    
2226
            add_xblock(buf, ref, obmc, 
2227
                       block_w*mb_x - block_w/2, 
2228
                       block_w*mb_y - block_w/2,
2229
                       2*block_w, 2*block_w,
2230
                       s->mv_band[0].buf[index]*scale, s->mv_band[1].buf[index]*scale,
2231
                       w, h,
2232
                       w, ref_stride, obmc_stride, 
2233
                       s->mb_band.buf[index], add);
2234

    
2235
        }
2236
    }
2237
}
2238

    
2239
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2240
    const int level= b->level;
2241
    const int w= b->width;
2242
    const int h= b->height;
2243
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2244
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2245
    int x,y, thres1, thres2;
2246
    START_TIMER
2247

    
2248
    assert(QROOT==8);
2249

    
2250
    if(s->qlog == LOSSLESS_QLOG) return;
2251
 
2252
    bias= bias ? 0 : (3*qmul)>>3;
2253
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2254
    thres2= 2*thres1;
2255
    
2256
    if(!bias){
2257
        for(y=0; y<h; y++){
2258
            for(x=0; x<w; x++){
2259
                int i= src[x + y*stride];
2260
                
2261
                if((unsigned)(i+thres1) > thres2){
2262
                    if(i>=0){
2263
                        i<<= QEXPSHIFT;
2264
                        i/= qmul; //FIXME optimize
2265
                        src[x + y*stride]=  i;
2266
                    }else{
2267
                        i= -i;
2268
                        i<<= QEXPSHIFT;
2269
                        i/= qmul; //FIXME optimize
2270
                        src[x + y*stride]= -i;
2271
                    }
2272
                }else
2273
                    src[x + y*stride]= 0;
2274
            }
2275
        }
2276
    }else{
2277
        for(y=0; y<h; y++){
2278
            for(x=0; x<w; x++){
2279
                int i= src[x + y*stride]; 
2280
                
2281
                if((unsigned)(i+thres1) > thres2){
2282
                    if(i>=0){
2283
                        i<<= QEXPSHIFT;
2284
                        i= (i + bias) / qmul; //FIXME optimize
2285
                        src[x + y*stride]=  i;
2286
                    }else{
2287
                        i= -i;
2288
                        i<<= QEXPSHIFT;
2289
                        i= (i + bias) / qmul; //FIXME optimize
2290
                        src[x + y*stride]= -i;
2291
                    }
2292
                }else
2293
                    src[x + y*stride]= 0;
2294
            }
2295
        }
2296
    }
2297
    if(level+1 == s->spatial_decomposition_count){
2298
//        STOP_TIMER("quantize")
2299
    }
2300
}
2301

    
2302
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2303
    const int level= b->level;
2304
    const int w= b->width;
2305
    const int h= b->height;
2306
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2307
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2308
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2309
    int x,y;
2310
    
2311
    if(s->qlog == LOSSLESS_QLOG) return;
2312
    
2313
    assert(QROOT==8);
2314

    
2315
    for(y=0; y<h; y++){
2316
        for(x=0; x<w; x++){
2317
            int i= src[x + y*stride];
2318
            if(i<0){
2319
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2320
            }else if(i>0){
2321
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2322
            }
2323
        }
2324
    }
2325
}
2326

    
2327
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2328
    const int w= b->width;
2329
    const int h= b->height;
2330
    int x,y;
2331
    
2332
    for(y=h-1; y>=0; y--){
2333
        for(x=w-1; x>=0; x--){
2334
            int i= x + y*stride;
2335
            
2336
            if(x){
2337
                if(use_median){
2338
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2339
                    else  src[i] -= src[i - 1];
2340
                }else{
2341
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2342
                    else  src[i] -= src[i - 1];
2343
                }
2344
            }else{
2345
                if(y) src[i] -= src[i - stride];
2346
            }
2347
        }
2348
    }
2349
}
2350

    
2351
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2352
    const int w= b->width;
2353
    const int h= b->height;
2354
    int x,y;
2355
    
2356
    for(y=0; y<h; y++){
2357
        for(x=0; x<w; x++){
2358
            int i= x + y*stride;
2359
            
2360
            if(x){
2361
                if(use_median){
2362
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2363
                    else  src[i] += src[i - 1];
2364
                }else{
2365
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2366
                    else  src[i] += src[i - 1];
2367
                }
2368
            }else{
2369
                if(y) src[i] += src[i - stride];
2370
            }
2371
        }
2372
    }
2373
}
2374

    
2375
static void encode_header(SnowContext *s){
2376
    int plane_index, level, orientation;
2377

    
2378
    put_cabac(&s->c, s->header_state, s->keyframe); // state clearing stuff?
2379
    if(s->keyframe){
2380
        put_symbol(&s->c, s->header_state, s->version, 0);
2381
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2382
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2383
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2384
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
2385
        put_symbol(&s->c, s->header_state, s->b_width, 0);
2386
        put_symbol(&s->c, s->header_state, s->b_height, 0);
2387
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2388
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
2389
        put_cabac(&s->c, s->header_state, s->spatial_scalability);
2390
//        put_cabac(&s->c, s->header_state, s->rate_scalability);
2391

    
2392
        for(plane_index=0; plane_index<2; plane_index++){
2393
            for(level=0; level<s->spatial_decomposition_count; level++){
2394
                for(orientation=level ? 1:0; orientation<4; orientation++){
2395
                    if(orientation==2) continue;
2396
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2397
                }
2398
            }
2399
        }
2400
    }
2401
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2402
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
2403
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
2404
    put_symbol(&s->c, s->header_state, s->qbias, 1);
2405
}
2406

    
2407
static int decode_header(SnowContext *s){
2408
    int plane_index, level, orientation;
2409

    
2410
    s->keyframe= get_cabac(&s->c, s->header_state);
2411
    if(s->keyframe){
2412
        s->version= get_symbol(&s->c, s->header_state, 0);
2413
        if(s->version>0){
2414
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2415
            return -1;
2416
        }
2417
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2418
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2419
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2420
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2421
        s->b_width= get_symbol(&s->c, s->header_state, 0);
2422
        s->b_height= get_symbol(&s->c, s->header_state, 0);
2423
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2424
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2425
        s->spatial_scalability= get_cabac(&s->c, s->header_state);
2426
//        s->rate_scalability= get_cabac(&s->c, s->header_state);
2427

    
2428
        for(plane_index=0; plane_index<3; plane_index++){
2429
            for(level=0; level<s->spatial_decomposition_count; level++){
2430
                for(orientation=level ? 1:0; orientation<4; orientation++){
2431
                    int q;
2432
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2433
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2434
                    else                    q= get_symbol(&s->c, s->header_state, 1);
2435
                    s->plane[plane_index].band[level][orientation].qlog= q;
2436
                }
2437
            }
2438
        }
2439
    }
2440
    
2441
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2442
    if(s->spatial_decomposition_type > 2){
2443
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2444
        return -1;
2445
    }
2446
    
2447
    s->qlog= get_symbol(&s->c, s->header_state, 1);
2448
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2449
    s->qbias= get_symbol(&s->c, s->header_state, 1);
2450

    
2451
    return 0;
2452
}
2453

    
2454
static int common_init(AVCodecContext *avctx){
2455
    SnowContext *s = avctx->priv_data;
2456
    int width, height;
2457
    int level, orientation, plane_index, dec;
2458

    
2459
    s->avctx= avctx;
2460
        
2461
    dsputil_init(&s->dsp, avctx);
2462

    
2463
#define mcf(dx,dy)\
2464
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
2465
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2466
        mc_block ## dx ## dy;
2467

    
2468
    mcf( 0, 0)
2469
    mcf( 4, 0)
2470
    mcf( 8, 0)
2471
    mcf(12, 0)
2472
    mcf( 0, 4)
2473
    mcf( 4, 4)
2474
    mcf( 8, 4)
2475
    mcf(12, 4)
2476
    mcf( 0, 8)
2477
    mcf( 4, 8)
2478
    mcf( 8, 8)
2479
    mcf(12, 8)
2480
    mcf( 0,12)
2481
    mcf( 4,12)
2482
    mcf( 8,12)
2483
    mcf(12,12)
2484

    
2485
#define mcfh(dx,dy)\
2486
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
2487
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2488
        mc_block_hpel ## dx ## dy;
2489

    
2490
    mcfh(0, 0)
2491
    mcfh(8, 0)
2492
    mcfh(0, 8)
2493
    mcfh(8, 8)
2494
        
2495
    dec= s->spatial_decomposition_count= 5;
2496
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2497
    
2498
    s->chroma_h_shift= 1; //FIXME XXX
2499
    s->chroma_v_shift= 1;
2500
    
2501
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2502
    
2503
    s->b_width = (s->avctx->width +(1<<dec)-1)>>dec;
2504
    s->b_height= (s->avctx->height+(1<<dec)-1)>>dec;
2505
    
2506
    s->spatial_dwt_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
2507
    s->pred_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
2508
    
2509
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2510
    
2511
    for(plane_index=0; plane_index<3; plane_index++){    
2512
        int w= s->avctx->width;
2513
        int h= s->avctx->height;
2514

    
2515
        if(plane_index){
2516
            w>>= s->chroma_h_shift;
2517
            h>>= s->chroma_v_shift;
2518
        }
2519
        s->plane[plane_index].width = w;
2520
        s->plane[plane_index].height= h;
2521
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
2522
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2523
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2524
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2525
                
2526
                b->buf= s->spatial_dwt_buffer;
2527
                b->level= level;
2528
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2529
                b->width = (w + !(orientation&1))>>1;
2530
                b->height= (h + !(orientation>1))>>1;
2531
                
2532
                if(orientation&1) b->buf += (w+1)>>1;
2533
                if(orientation>1) b->buf += b->stride>>1;
2534
                
2535
                if(level)
2536
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
2537
            }
2538
            w= (w+1)>>1;
2539
            h= (h+1)>>1;
2540
        }
2541
    }
2542
    
2543
    //FIXME init_subband() ?
2544
    s->mb_band.stride= s->mv_band[0].stride= s->mv_band[1].stride=
2545
    s->mb_band.width = s->mv_band[0].width = s->mv_band[1].width = (s->avctx->width + 15)>>4;
2546
    s->mb_band.height= s->mv_band[0].height= s->mv_band[1].height= (s->avctx->height+ 15)>>4;
2547
    s->mb_band   .buf= av_mallocz(s->mb_band   .stride * s->mb_band   .height*sizeof(DWTELEM));
2548
    s->mv_band[0].buf= av_mallocz(s->mv_band[0].stride * s->mv_band[0].height*sizeof(DWTELEM));
2549
    s->mv_band[1].buf= av_mallocz(s->mv_band[1].stride * s->mv_band[1].height*sizeof(DWTELEM));
2550

    
2551
    reset_contexts(s);
2552
/*    
2553
    width= s->width= avctx->width;
2554
    height= s->height= avctx->height;
2555
    
2556
    assert(width && height);
2557
*/
2558
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2559
    
2560
    return 0;
2561
}
2562

    
2563

    
2564
static void calculate_vissual_weight(SnowContext *s, Plane *p){
2565
    int width = p->width;
2566
    int height= p->height;
2567
    int i, level, orientation, x, y;
2568

    
2569
    for(level=0; level<s->spatial_decomposition_count; level++){
2570
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2571
            SubBand *b= &p->band[level][orientation];
2572
            DWTELEM *buf= b->buf;
2573
            int64_t error=0;
2574
            
2575
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2576
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
2577
            spatial_idwt(s, s->spatial_dwt_buffer, width, height, width);
2578
            for(y=0; y<height; y++){
2579
                for(x=0; x<width; x++){
2580
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
2581
                    error += d*d;
2582
                }
2583
            }
2584

    
2585
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
2586
            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
2587
        }
2588
    }
2589
}
2590

    
2591
static int encode_init(AVCodecContext *avctx)
2592
{
2593
    SnowContext *s = avctx->priv_data;
2594
    int i;
2595
    int level, orientation, plane_index;
2596

    
2597
    if(avctx->strict_std_compliance >= 0){
2598
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2599
               "use vstrict=-1 to use it anyway\n");
2600
        return -1;
2601
    }
2602
 
2603
    common_init(avctx);
2604
 
2605
    s->version=0;
2606
    
2607
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2608
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2609
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2610
    s->mb_type        = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int16_t));
2611
    s->mb_mean        = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int8_t ));
2612
    s->dummy          = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int32_t));
2613
    h263_encode_init(&s->m); //mv_penalty
2614

    
2615
    for(plane_index=0; plane_index<3; plane_index++){
2616
        calculate_vissual_weight(s, &s->plane[plane_index]);
2617
    }
2618
    
2619
    
2620
    avctx->coded_frame= &s->current_picture;
2621
    switch(avctx->pix_fmt){
2622
//    case PIX_FMT_YUV444P:
2623
//    case PIX_FMT_YUV422P:
2624
    case PIX_FMT_YUV420P:
2625
    case PIX_FMT_GRAY8:
2626
//    case PIX_FMT_YUV411P:
2627
//    case PIX_FMT_YUV410P:
2628
        s->colorspace_type= 0;
2629
        break;
2630
/*    case PIX_FMT_RGBA32:
2631
        s->colorspace= 1;
2632
        break;*/
2633
    default:
2634
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2635
        return -1;
2636
    }
2637
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2638
    s->chroma_h_shift= 1;
2639
    s->chroma_v_shift= 1;
2640
    return 0;
2641
}
2642

    
2643
static int frame_start(SnowContext *s){
2644
   AVFrame tmp;
2645

    
2646
   if(s->keyframe)
2647
        reset_contexts(s);
2648
 
2649
    tmp= s->last_picture;
2650
    s->last_picture= s->current_picture;
2651
    s->current_picture= tmp;
2652
    
2653
    s->current_picture.reference= 1;
2654
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2655
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2656
        return -1;
2657
    }
2658
    
2659
    return 0;
2660
}
2661

    
2662
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2663
    SnowContext *s = avctx->priv_data;
2664
    CABACContext * const c= &s->c;
2665
    AVFrame *pict = data;
2666
    const int width= s->avctx->width;
2667
    const int height= s->avctx->height;
2668
    int used_count= 0;
2669
    int log2_threshold, level, orientation, plane_index, i;
2670

    
2671
    ff_init_cabac_encoder(c, buf, buf_size);
2672
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2673
    
2674
    s->input_picture = *pict;
2675

    
2676
    memset(s->header_state, 0, sizeof(s->header_state));
2677

    
2678
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2679
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2680
    
2681
    if(pict->quality){
2682
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2683
        //<64 >60
2684
        s->qlog += 61;
2685
    }else{
2686
        s->qlog= LOSSLESS_QLOG;
2687
    }
2688

    
2689
    for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
2690
        s->mb_band.buf[i]= s->keyframe;
2691
    }
2692
    
2693
    frame_start(s);
2694

    
2695
    if(pict->pict_type == P_TYPE){
2696
        int block_width = (width +15)>>4;
2697
        int block_height= (height+15)>>4;
2698
        int stride= s->current_picture.linesize[0];
2699
        uint8_t *src_plane= s->input_picture.data[0];
2700
        int src_stride= s->input_picture.linesize[0];
2701
        int x,y;
2702
        
2703
        assert(s->current_picture.data[0]);
2704
        assert(s->last_picture.data[0]);
2705
     
2706
        s->m.avctx= s->avctx;
2707
        s->m.current_picture.data[0]= s->current_picture.data[0];
2708
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
2709
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
2710
        s->m.current_picture_ptr= &s->m.current_picture;
2711
        s->m.   last_picture_ptr= &s->m.   last_picture;
2712
        s->m.linesize=
2713
        s->m.   last_picture.linesize[0]=
2714
        s->m.    new_picture.linesize[0]=
2715
        s->m.current_picture.linesize[0]= stride;
2716
        s->m.width = width;
2717
        s->m.height= height;
2718
        s->m.mb_width = block_width;
2719
        s->m.mb_height= block_height;
2720
        s->m.mb_stride=   s->m.mb_width+1;
2721
        s->m.b8_stride= 2*s->m.mb_width+1;
2722
        s->m.f_code=1;
2723
        s->m.pict_type= pict->pict_type;
2724
        s->m.me_method= s->avctx->me_method;
2725
        s->m.me.scene_change_score=0;
2726
        s->m.flags= s->avctx->flags;
2727
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2728
        s->m.out_format= FMT_H263;
2729
        s->m.unrestricted_mv= 1;
2730

    
2731
        s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
2732
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
2733
        s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
2734

    
2735
        if(!s->motion_val8){
2736
            s->motion_val8 = av_mallocz(s->m.b8_stride*block_height*2*2*sizeof(int16_t));
2737
            s->motion_val16= av_mallocz(s->m.mb_stride*block_height*2*sizeof(int16_t));
2738
        }
2739
        
2740
        s->m.mb_type= s->mb_type;
2741
        
2742
        //dummies, to avoid segfaults
2743
        s->m.current_picture.mb_mean  = s->mb_mean;
2744
        s->m.current_picture.mb_var   = (int16_t*)s->dummy;
2745
        s->m.current_picture.mc_mb_var= (int16_t*)s->dummy;
2746
        s->m.current_picture.mb_type  = s->dummy;
2747
        
2748
        s->m.current_picture.motion_val[0]= s->motion_val8;
2749
        s->m.p_mv_table= s->motion_val16;
2750
        s->m.dsp= s->dsp; //move
2751
        ff_init_me(&s->m);
2752
    
2753
        
2754
        s->m.me.pre_pass=1;
2755
        s->m.me.dia_size= s->avctx->pre_dia_size;
2756
        s->m.first_slice_line=1;
2757
        for(y= block_height-1; y >= 0; y--) {
2758
            uint8_t src[stride*16];
2759

    
2760
            s->m.new_picture.data[0]= src - y*16*stride; //ugly
2761
            s->m.mb_y= y;
2762
            for(i=0; i<16 && i + 16*y<height; i++){
2763
                memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
2764
                for(x=width; x<16*block_width; x++)
2765
                    src[i*stride+x]= src[i*stride+x-1];
2766
            }
2767
            for(; i<16 && i + 16*y<16*block_height; i++)
2768
                memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
2769

    
2770
            for(x=block_width-1; x >=0 ;x--) {
2771
                s->m.mb_x= x;
2772
                ff_init_block_index(&s->m);
2773
                ff_update_block_index(&s->m);
2774
                ff_pre_estimate_p_frame_motion(&s->m, x, y);
2775
            }
2776
            s->m.first_slice_line=0;
2777
        }        
2778
        s->m.me.pre_pass=0;
2779
        
2780
        
2781
        s->m.me.dia_size= s->avctx->dia_size;
2782
        s->m.first_slice_line=1;
2783
        for (y = 0; y < block_height; y++) {
2784
            uint8_t src[stride*16];
2785

    
2786
            s->m.new_picture.data[0]= src - y*16*stride; //ugly
2787
            s->m.mb_y= y;
2788
            
2789
            assert(width <= stride);
2790
            assert(width <= 16*block_width);
2791
    
2792
            for(i=0; i<16 && i + 16*y<height; i++){
2793
                memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
2794
                for(x=width; x<16*block_width; x++)
2795
                    src[i*stride+x]= src[i*stride+x-1];
2796
            }
2797
            for(; i<16 && i + 16*y<16*block_height; i++)
2798
                memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
2799
    
2800
            for (x = 0; x < block_width; x++) {
2801
                int mb_xy= x + y*(s->mb_band.stride);
2802
                s->m.mb_x= x;
2803
                ff_init_block_index(&s->m);
2804
                ff_update_block_index(&s->m);
2805
                
2806
                ff_estimate_p_frame_motion(&s->m, x, y);
2807
                
2808
                s->mb_band   .buf[mb_xy]= (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER)
2809
                 ? 0 : 2;
2810
                s->mv_band[0].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][0];
2811
                s->mv_band[1].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][1];
2812
                
2813
                if(s->mb_band   .buf[x + y*(s->mb_band.stride)]==2 && 0){
2814
                    int dc0=128, dc1=128, dc, dc2, dir;
2815
                    int offset= (s->avctx->flags & CODEC_FLAG_QPEL) ? 64 : 32;
2816
                    
2817
                    dc       =s->mb_mean[x +  y   *s->m.mb_stride    ];
2818
                    if(x) dc0=s->mb_mean[x +  y   *s->m.mb_stride - 1];
2819
                    if(y) dc1=s->mb_mean[x + (y-1)*s->m.mb_stride    ];
2820
                    dc2= (dc0+dc1)>>1;
2821
#if 0
2822
                    if     (ABS(dc0 - dc) < ABS(dc1 - dc) && ABS(dc0 - dc) < ABS(dc2 - dc))
2823
                        dir= 1;
2824
                    else if(ABS(dc0 - dc) >=ABS(dc1 - dc) && ABS(dc1 - dc) < ABS(dc2 - dc))
2825
                        dir=-1;
2826
                    else
2827
                        dir=0;
2828
#endif                    
2829
                    if(ABS(dc0 - dc) < ABS(dc1 - dc) && x){
2830
                        s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + y*(s->mb_band.stride)-1] - offset;
2831
                        s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + y*(s->mb_band.stride)-1];
2832
                        s->mb_mean[x +  y   *s->m.mb_stride    ]= dc0;
2833
                    }else if(y){
2834
                        s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + (y-1)*(s->mb_band.stride)];
2835
                        s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + (y-1)*(s->mb_band.stride)] - offset;
2836
                        s->mb_mean[x +  y   *s->m.mb_stride    ]= dc1;
2837
                    }
2838
                }
2839
//                s->mb_band   .buf[x + y*(s->mb_band.stride)]=1; //FIXME intra only test
2840
            }
2841
            s->m.first_slice_line=0;
2842
        }
2843
        assert(s->m.pict_type == P_TYPE);
2844
        if(s->m.me.scene_change_score > s->avctx->scenechange_threshold){
2845
            s->m.pict_type= 
2846
            pict->pict_type =I_TYPE;
2847
            for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
2848
                s->mb_band.buf[i]= 1;
2849
                s->mv_band[0].buf[i]=
2850
                s->mv_band[1].buf[i]= 0;
2851
            }
2852
    //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
2853
        }        
2854
    }
2855
        
2856
    s->m.first_slice_line=1;
2857
    
2858
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
2859

    
2860
    encode_header(s);
2861
    
2862
    decorrelate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 0, 1);
2863
    decorrelate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 0, 1);
2864
    decorrelate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 0 ,1);
2865
    encode_subband(s, &s->mb_band   , s->mb_band   .buf, NULL, s->mb_band   .stride, 0);
2866
    encode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
2867
    encode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
2868
    
2869
//FIXME avoid this
2870
    correlate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 1, 1);
2871
    correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
2872
    correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
2873
    
2874
    for(plane_index=0; plane_index<3; plane_index++){
2875
        Plane *p= &s->plane[plane_index];
2876
        int w= p->width;
2877
        int h= p->height;
2878
        int x, y;
2879
        int bits= put_bits_count(&s->c.pb);
2880

    
2881
        //FIXME optimize
2882
     if(pict->data[plane_index]) //FIXME gray hack
2883
        for(y=0; y<h; y++){
2884
            for(x=0; x<w; x++){
2885
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
2886
            }
2887
        }
2888
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
2889
        if(s->qlog == LOSSLESS_QLOG){
2890
            for(y=0; y<h; y++){
2891
                for(x=0; x<w; x++){
2892
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + 127)>>8;
2893
                }
2894
            }
2895
        }
2896
 
2897
        spatial_dwt(s, s->spatial_dwt_buffer, w, h, w);
2898

    
2899
        for(level=0; level<s->spatial_decomposition_count; level++){
2900
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2901
                SubBand *b= &p->band[level][orientation];
2902
                
2903
                quantize(s, b, b->buf, b->stride, s->qbias);
2904
                if(orientation==0)
2905
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
2906
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2907
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
2908
                if(orientation==0)
2909
                    correlate(s, b, b->buf, b->stride, 1, 0);
2910
            }
2911
        }
2912
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
2913

    
2914
        for(level=0; level<s->spatial_decomposition_count; level++){
2915
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2916
                SubBand *b= &p->band[level][orientation];
2917

    
2918
                dequantize(s, b, b->buf, b->stride);
2919
            }
2920
        }
2921

    
2922
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
2923
        if(s->qlog == LOSSLESS_QLOG){
2924
            for(y=0; y<h; y++){
2925
                for(x=0; x<w; x++){
2926
                    s->spatial_dwt_buffer[y*w + x]<<=8;
2927
                }
2928
            }
2929
        }
2930
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2931
        //FIXME optimize
2932
        for(y=0; y<h; y++){
2933
            for(x=0; x<w; x++){
2934
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2935
                if(v&(~255)) v= ~(v>>31);
2936
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2937
            }
2938
        }
2939
        if(s->avctx->flags&CODEC_FLAG_PSNR){
2940
            int64_t error= 0;
2941
            
2942
    if(pict->data[plane_index]) //FIXME gray hack
2943
            for(y=0; y<h; y++){
2944
                for(x=0; x<w; x++){
2945
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
2946
                    error += d*d;
2947
                }
2948
            }
2949
            s->avctx->error[plane_index] += error;
2950
            s->avctx->error[3] += error;
2951
        }
2952
    }
2953

    
2954
    if(s->last_picture.data[0])
2955
        avctx->release_buffer(avctx, &s->last_picture);
2956

    
2957
    emms_c();
2958
    
2959
    return put_cabac_terminate(c, 1);
2960
}
2961

    
2962
static void common_end(SnowContext *s){
2963
    av_freep(&s->spatial_dwt_buffer);
2964
    av_freep(&s->mb_band.buf);
2965
    av_freep(&s->mv_band[0].buf);
2966
    av_freep(&s->mv_band[1].buf);
2967

    
2968
    av_freep(&s->m.me.scratchpad);    
2969
    av_freep(&s->m.me.map);
2970
    av_freep(&s->m.me.score_map);
2971
    av_freep(&s->mb_type);
2972
    av_freep(&s->mb_mean);
2973
    av_freep(&s->dummy);
2974
    av_freep(&s->motion_val8);
2975
    av_freep(&s->motion_val16);
2976
}
2977

    
2978
static int encode_end(AVCodecContext *avctx)
2979
{
2980
    SnowContext *s = avctx->priv_data;
2981

    
2982
    common_end(s);
2983

    
2984
    return 0;
2985
}
2986

    
2987
static int decode_init(AVCodecContext *avctx)
2988
{
2989
//    SnowContext *s = avctx->priv_data;
2990

    
2991
    common_init(avctx);
2992
    
2993
    return 0;
2994
}
2995

    
2996
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
2997
    SnowContext *s = avctx->priv_data;
2998
    CABACContext * const c= &s->c;
2999
    const int width= s->avctx->width;
3000
    const int height= s->avctx->height;
3001
    int bytes_read;
3002
    AVFrame *picture = data;
3003
    int log2_threshold, level, orientation, plane_index;
3004
    
3005

    
3006
    /* no supplementary picture */
3007
    if (buf_size == 0)
3008
        return 0;
3009

    
3010
    ff_init_cabac_decoder(c, buf, buf_size);
3011
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3012

    
3013
    memset(s->header_state, 0, sizeof(s->header_state));
3014

    
3015
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3016
    decode_header(s);
3017

    
3018
    frame_start(s);
3019
    //keyframe flag dupliaction mess FIXME
3020
    if(avctx->debug&FF_DEBUG_PICT_INFO)
3021
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3022
    
3023
    decode_subband(s, &s->mb_band   , s->mb_band   .buf, NULL, s->mb_band   .stride, 0);
3024
    decode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
3025
    decode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
3026
    correlate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 1, 1);
3027
    correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
3028
    correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
3029

    
3030
    for(plane_index=0; plane_index<3; plane_index++){
3031
        Plane *p= &s->plane[plane_index];
3032
        int w= p->width;
3033
        int h= p->height;
3034
        int x, y;
3035
        
3036
if(s->avctx->debug&2048){
3037
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3038
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3039

    
3040
        for(y=0; y<h; y++){
3041
            for(x=0; x<w; x++){
3042
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
3043
                if(v&(~255)) v= ~(v>>31);
3044
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3045
            }
3046
        }
3047
}
3048
        for(level=0; level<s->spatial_decomposition_count; level++){
3049
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3050
                SubBand *b= &p->band[level][orientation];
3051

    
3052
                decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3053
                if(orientation==0)
3054
                    correlate(s, b, b->buf, b->stride, 1, 0);
3055
            }
3056
        }
3057
if(!(s->avctx->debug&1024))
3058
        for(level=0; level<s->spatial_decomposition_count; level++){
3059
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3060
                SubBand *b= &p->band[level][orientation];
3061

    
3062
                dequantize(s, b, b->buf, b->stride);
3063
            }
3064
        }
3065

    
3066
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
3067
        if(s->qlog == LOSSLESS_QLOG){
3068
            for(y=0; y<h; y++){
3069
                for(x=0; x<w; x++){
3070
                    s->spatial_dwt_buffer[y*w + x]<<=8;
3071
                }
3072
            }
3073
        }
3074
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3075

    
3076
        //FIXME optimize
3077
        for(y=0; y<h; y++){
3078
            for(x=0; x<w; x++){
3079
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
3080
                if(v&(~255)) v= ~(v>>31);
3081
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
3082
            }
3083
        }
3084
    }
3085
            
3086
    emms_c();
3087

    
3088
    if(s->last_picture.data[0])
3089
        avctx->release_buffer(avctx, &s->last_picture);
3090

    
3091
if(!(s->avctx->debug&2048))        
3092
    *picture= s->current_picture;
3093
else
3094
    *picture= s->mconly_picture;
3095
    
3096
    *data_size = sizeof(AVFrame);
3097
    
3098
    bytes_read= get_cabac_terminate(c);
3099
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
3100

    
3101
    return bytes_read;
3102
}
3103

    
3104
static int decode_end(AVCodecContext *avctx)
3105
{
3106
    SnowContext *s = avctx->priv_data;
3107

    
3108
    common_end(s);
3109

    
3110
    return 0;
3111
}
3112

    
3113
AVCodec snow_decoder = {
3114
    "snow",
3115
    CODEC_TYPE_VIDEO,
3116
    CODEC_ID_SNOW,
3117
    sizeof(SnowContext),
3118
    decode_init,
3119
    NULL,
3120
    decode_end,
3121
    decode_frame,
3122
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3123
    NULL
3124
};
3125

    
3126
AVCodec snow_encoder = {
3127
    "snow",
3128
    CODEC_TYPE_VIDEO,
3129
    CODEC_ID_SNOW,
3130
    sizeof(SnowContext),
3131
    encode_init,
3132
    encode_frame,
3133
    encode_end,
3134
};
3135

    
3136

    
3137
#if 0
3138
#undef malloc
3139
#undef free
3140
#undef printf
3141

3142
int main(){
3143
    int width=256;
3144
    int height=256;
3145
    int buffer[2][width*height];
3146
    SnowContext s;
3147
    int i;
3148
    s.spatial_decomposition_count=6;
3149
    s.spatial_decomposition_type=1;
3150
    
3151
    printf("testing 5/3 DWT\n");
3152
    for(i=0; i<width*height; i++)
3153
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3154
    
3155
    spatial_dwt(&s, buffer[0], width, height, width);
3156
    spatial_idwt(&s, buffer[0], width, height, width);
3157
    
3158
    for(i=0; i<width*height; i++)
3159
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3160

3161
    printf("testing 9/7 DWT\n");
3162
    s.spatial_decomposition_type=0;
3163
    for(i=0; i<width*height; i++)
3164
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3165
    
3166
    spatial_dwt(&s, buffer[0], width, height, width);
3167
    spatial_idwt(&s, buffer[0], width, height, width);
3168
    
3169
    for(i=0; i<width*height; i++)
3170
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3171
        
3172
    printf("testing AC coder\n");
3173
    memset(s.header_state, 0, sizeof(s.header_state));
3174
    ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
3175
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3176
        
3177
    for(i=-256; i<256; i++){
3178
START_TIMER
3179
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3180
STOP_TIMER("put_symbol")
3181
    }
3182
    put_cabac_terminate(&s.c, 1);
3183

3184
    memset(s.header_state, 0, sizeof(s.header_state));
3185
    ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
3186
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3187
    
3188
    for(i=-256; i<256; i++){
3189
        int j;
3190
START_TIMER
3191
        j= get_symbol(&s.c, s.header_state, 1);
3192
STOP_TIMER("get_symbol")
3193
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3194
    }
3195
{
3196
int level, orientation, x, y;
3197
int64_t errors[8][4];
3198
int64_t g=0;
3199

3200
    memset(errors, 0, sizeof(errors));
3201
    s.spatial_decomposition_count=3;
3202
    s.spatial_decomposition_type=0;
3203
    for(level=0; level<s.spatial_decomposition_count; level++){
3204
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3205
            int w= width  >> (s.spatial_decomposition_count-level);
3206
            int h= height >> (s.spatial_decomposition_count-level);
3207
            int stride= width  << (s.spatial_decomposition_count-level);
3208
            DWTELEM *buf= buffer[0];
3209
            int64_t error=0;
3210

3211
            if(orientation&1) buf+=w;
3212
            if(orientation>1) buf+=stride>>1;
3213
            
3214
            memset(buffer[0], 0, sizeof(int)*width*height);
3215
            buf[w/2 + h/2*stride]= 256*256;
3216
            spatial_idwt(&s, buffer[0], width, height, width);
3217
            for(y=0; y<height; y++){
3218
                for(x=0; x<width; x++){
3219
                    int64_t d= buffer[0][x + y*width];
3220
                    error += d*d;
3221
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3222
                }
3223
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3224
            }
3225
            error= (int)(sqrt(error)+0.5);
3226
            errors[level][orientation]= error;
3227
            if(g) g=ff_gcd(g, error);
3228
            else g= error;
3229
        }
3230
    }
3231
    printf("static int const visual_weight[][4]={\n");
3232
    for(level=0; level<s.spatial_decomposition_count; level++){
3233
        printf("  {");
3234
        for(orientation=0; orientation<4; orientation++){
3235
            printf("%8lld,", errors[level][orientation]/g);
3236
        }
3237
        printf("},\n");
3238
    }
3239
    printf("};\n");
3240
    {
3241
            int level=2;
3242
            int orientation=3;
3243
            int w= width  >> (s.spatial_decomposition_count-level);
3244
            int h= height >> (s.spatial_decomposition_count-level);
3245
            int stride= width  << (s.spatial_decomposition_count-level);
3246
            DWTELEM *buf= buffer[0];
3247
            int64_t error=0;
3248

3249
            buf+=w;
3250
            buf+=stride>>1;
3251
            
3252
            memset(buffer[0], 0, sizeof(int)*width*height);
3253
#if 1
3254
            for(y=0; y<height; y++){
3255
                for(x=0; x<width; x++){
3256
                    int tab[4]={0,2,3,1};
3257
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3258
                }
3259
            }
3260
            spatial_dwt(&s, buffer[0], width, height, width);
3261
#else
3262
            for(y=0; y<h; y++){
3263
                for(x=0; x<w; x++){
3264
                    buf[x + y*stride  ]=169;
3265
                    buf[x + y*stride-w]=64;
3266
                }
3267
            }
3268
            spatial_idwt(&s, buffer[0], width, height, width);
3269
#endif
3270
            for(y=0; y<height; y++){
3271
                for(x=0; x<width; x++){
3272
                    int64_t d= buffer[0][x + y*width];
3273
                    error += d*d;
3274
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3275
                }
3276
                if(ABS(height/2-y)<9) printf("\n");
3277
            }
3278
    }
3279

    
3280
}
3281
    return 0;
3282
}
3283
#endif
3284