Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ bc68bfdd

History | View | Annotate | Download (123 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22
#include "cabac.h"
23

    
24
#include "mpegvideo.h"
25

    
26
#undef NDEBUG
27
#include <assert.h>
28

    
29
#define MAX_DECOMPOSITIONS 8
30
#define MAX_PLANES 4
31
#define DWTELEM int
32
#define QROOT 8 
33

    
34
static const int8_t quant3[256]={
35
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51
};
52
static const int8_t quant3b[256]={
53
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
};
70
static const int8_t quant5[256]={
71
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
72
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
80
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
87
};
88
static const int8_t quant7[256]={
89
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
92
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
93
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
98
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
105
};
106
static const int8_t quant9[256]={
107
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
109
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
116
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
123
};
124
static const int8_t quant11[256]={
125
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
126
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
128
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
134
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
141
};
142
static const int8_t quant13[256]={
143
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
144
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
147
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
152
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
159
};
160

    
161
#define OBMC_MAX 64
162
#if 0 //64*cubic
163
static const uint8_t obmc32[1024]={
164
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
165
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
166
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
167
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
168
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
169
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
170
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
171
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
172
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
173
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
174
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
175
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
176
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
177
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
178
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
179
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
180
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
182
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
184
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
185
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
186
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
187
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
189
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
190
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
191
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
192
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
193
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
194
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
195
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
196
//error:0.000022
197
};
198
static const uint8_t obmc16[256]={
199
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
200
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
201
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
202
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
203
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
204
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
205
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
206
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
207
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
209
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
211
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
212
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
213
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
214
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215
//error:0.000033
216
};
217
#elif 1 // 64*linear
218
static const uint8_t obmc32[1024]={
219
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
220
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
221
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
222
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
223
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
224
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
225
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
226
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
227
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
228
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
229
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
230
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
231
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
232
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
233
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
234
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
235
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
237
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
239
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
240
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
241
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
242
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
243
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
244
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
245
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
246
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
247
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
248
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
249
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
250
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
251
 //error:0.000020
252
};
253
static const uint8_t obmc16[256]={
254
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
255
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
256
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
257
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
258
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
259
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
260
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
261
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
262
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
264
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
266
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
267
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
268
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
269
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
270
//error:0.000015
271
};
272
#else //64*cos
273
static const uint8_t obmc32[1024]={
274
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
275
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
276
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
277
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
278
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
279
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
280
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
281
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
282
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
283
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
284
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
285
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
286
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
287
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
288
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
289
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
290
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
292
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
294
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
295
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
296
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
297
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
299
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
300
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
301
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
302
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
303
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
304
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
305
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
306
//error:0.000022
307
};
308
static const uint8_t obmc16[256]={
309
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
310
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
311
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
312
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
313
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
314
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
315
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
316
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
317
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
319
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
321
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
322
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
323
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
324
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325
//error:0.000022
326
};
327
#endif
328

    
329
typedef struct SubBand{
330
    int level;
331
    int stride;
332
    int width;
333
    int height;
334
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
335
    DWTELEM *buf;
336
    struct SubBand *parent;
337
    uint8_t state[/*7*2*/ 7 + 512][32];
338
}SubBand;
339

    
340
typedef struct Plane{
341
    int width;
342
    int height;
343
    SubBand band[MAX_DECOMPOSITIONS][4];
344
}Plane;
345

    
346
typedef struct SnowContext{
347
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
348

    
349
    AVCodecContext *avctx;
350
    CABACContext c;
351
    DSPContext dsp;
352
    AVFrame input_picture;
353
    AVFrame current_picture;
354
    AVFrame last_picture;
355
    AVFrame mconly_picture;
356
//     uint8_t q_context[16];
357
    uint8_t header_state[32];
358
    int keyframe;
359
    int version;
360
    int spatial_decomposition_type;
361
    int temporal_decomposition_type;
362
    int spatial_decomposition_count;
363
    int temporal_decomposition_count;
364
    DWTELEM *spatial_dwt_buffer;
365
    DWTELEM *pred_buffer;
366
    int colorspace_type;
367
    int chroma_h_shift;
368
    int chroma_v_shift;
369
    int spatial_scalability;
370
    int qlog;
371
    int mv_scale;
372
    int qbias;
373
#define QBIAS_SHIFT 3
374
    int b_width; //FIXME remove?
375
    int b_height; //FIXME remove?
376
    Plane plane[MAX_PLANES];
377
    SubBand mb_band;
378
    SubBand mv_band[2];
379
    
380
    uint16_t *mb_type;
381
    uint8_t *mb_mean;
382
    uint32_t *dummy;
383
    int16_t (*motion_val8)[2];
384
    int16_t (*motion_val16)[2];
385
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
386
}SnowContext;
387

    
388
#define QEXPSHIFT 7 //FIXME try to change this to 0
389
static const uint8_t qexp[8]={
390
    128, 140, 152, 166, 181, 197, 215, 235
391
//   64,  70,  76,  83,  91,  99, 108, 117
392
//   32,  35,  38,  41,  45,  49,  54,  59
393
//   16,  17,  19,  21,  23,  25,  27,  29
394
//    8,   9,  10,  10,  11,  12,  13,  15
395
};
396

    
397
static inline int mirror(int v, int m){
398
    if     (v<0) return -v;
399
    else if(v>m) return 2*m-v;
400
    else         return v;
401
}
402

    
403
static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
404
    int i;
405

    
406
    if(v){
407
        const int a= ABS(v);
408
        const int e= av_log2(a);
409
#if 1
410
        const int el= FFMIN(e, 10);   
411
        put_cabac(c, state+0, 0);
412

    
413
        for(i=0; i<el; i++){
414
            put_cabac(c, state+1+i, 1);  //1..10
415
        }
416
        for(; i<e; i++){
417
            put_cabac(c, state+1+9, 1);  //1..10
418
        }
419
        put_cabac(c, state+1+FFMIN(i,9), 0);
420

    
421
        for(i=e-1; i>=el; i--){
422
            put_cabac(c, state+22+9, (a>>i)&1); //22..31
423
        }
424
        for(; i>=0; i--){
425
            put_cabac(c, state+22+i, (a>>i)&1); //22..31
426
        }
427

    
428
        if(is_signed)
429
            put_cabac(c, state+11 + el, v < 0); //11..21
430
#else
431
        
432
        put_cabac(c, state+0, 0);
433
        if(e<=9){
434
            for(i=0; i<e; i++){
435
                put_cabac(c, state+1+i, 1);  //1..10
436
            }
437
            put_cabac(c, state+1+i, 0);
438

    
439
            for(i=e-1; i>=0; i--){
440
                put_cabac(c, state+22+i, (a>>i)&1); //22..31
441
            }
442

    
443
            if(is_signed)
444
                put_cabac(c, state+11 + e, v < 0); //11..21
445
        }else{
446
            for(i=0; i<e; i++){
447
                put_cabac(c, state+1+FFMIN(i,9), 1);  //1..10
448
            }
449
            put_cabac(c, state+1+FFMIN(i,9), 0);
450

    
451
            for(i=e-1; i>=0; i--){
452
                put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
453
            }
454

    
455
            if(is_signed)
456
                put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
457
        }
458
#endif
459
    }else{
460
        put_cabac(c, state+0, 1);
461
    }
462
}
463

    
464
static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
465
    if(get_cabac(c, state+0))
466
        return 0;
467
    else{
468
        int i, e, a, el;
469
 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
470
        for(e=0; e<10; e++){ 
471
            if(get_cabac(c, state + 1 + e)==0) // 1..10
472
                break;
473
        }
474
        el= e;
475
 
476
        if(e==10){
477
            while(get_cabac(c, state + 1 + 9)) //10
478
                e++;
479
        }
480
        a= 1;
481
        for(i=e-1; i>=el; i--){
482
            a += a + get_cabac(c, state+22+9); //31
483
        }
484
        for(; i>=0; i--){
485
            a += a + get_cabac(c, state+22+i); //22..31
486
        }
487

    
488
        if(is_signed && get_cabac(c, state+11 + el)) //11..21
489
            return -a;
490
        else
491
            return a;
492
    }
493
}
494

    
495
static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
496
    int i;
497
    int e= av_log2(v<<1);
498

    
499
    assert(v>=0);
500
    if(v==0) assert(e==0);
501
    
502
    while(e > log2){
503
        put_cabac(c, state+log2, 1);
504
        v -= 1<<log2;
505
        assert(v>=0);
506
        e= av_log2(v<<1);
507
        log2++;
508
    }
509
    put_cabac(c, state+log2, 0);
510
    
511
    for(i=log2-1; i>=0; i--){
512
        put_cabac(c, state+31-i, (v>>i)&1);
513
    }
514
    assert(!((v>>i)&1));
515
}
516

    
517
static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
518
    int i;
519
    int v=0;
520

    
521
    while(get_cabac(c, state+log2)){
522
        v+= 1<<log2;
523
        log2++;
524
    }
525
    
526
    for(i=log2-1; i>=0; i--){
527
        v+= get_cabac(c, state+31-i)<<i;
528
    }
529

    
530
    return v;
531
}
532

    
533
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
534
    const int mirror_left= !highpass;
535
    const int mirror_right= (width&1) ^ highpass;
536
    const int w= (width>>1) - 1 + (highpass & width);
537
    int i;
538

    
539
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
540
    if(mirror_left){
541
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
542
        dst += dst_step;
543
        src += src_step;
544
    }
545
    
546
    for(i=0; i<w; i++){
547
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
548
    }
549
    
550
    if(mirror_right){
551
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
552
    }
553
}
554

    
555
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
556
    const int mirror_left= !highpass;
557
    const int mirror_right= (width&1) ^ highpass;
558
    const int w= (width>>1) - 1 + (highpass & width);
559
    int i;
560

    
561
    if(mirror_left){
562
        int r= 3*2*ref[0];
563
        r += r>>4;
564
        r += r>>8;
565
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
566
        dst += dst_step;
567
        src += src_step;
568
    }
569
    
570
    for(i=0; i<w; i++){
571
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
572
        r += r>>4;
573
        r += r>>8;
574
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
575
    }
576
    
577
    if(mirror_right){
578
        int r= 3*2*ref[w*ref_step];
579
        r += r>>4;
580
        r += r>>8;
581
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
582
    }
583
}
584

    
585

    
586
static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
587
    int x, i;
588
    
589
    for(x=start; x<width; x+=2){
590
        int64_t sum=0;
591

    
592
        for(i=0; i<n; i++){
593
            int x2= x + 2*i - n + 1;
594
            if     (x2<     0) x2= -x2;
595
            else if(x2>=width) x2= 2*width-x2-2;
596
            sum += coeffs[i]*(int64_t)dst[x2];
597
        }
598
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
599
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
600
    }
601
}
602

    
603
static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
604
    int x, y, i;
605
    for(y=start; y<height; y+=2){
606
        for(x=0; x<width; x++){
607
            int64_t sum=0;
608
    
609
            for(i=0; i<n; i++){
610
                int y2= y + 2*i - n + 1;
611
                if     (y2<      0) y2= -y2;
612
                else if(y2>=height) y2= 2*height-y2-2;
613
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
614
            }
615
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
616
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
617
        }
618
    }
619
}
620

    
621
#define SCALEX 1
622
#define LX0 0
623
#define LX1 1
624

    
625
#if 0 // more accurate 9/7
626
#define N1 2
627
#define SHIFT1 14
628
#define COEFFS1 (int[]){-25987,-25987}
629
#define N2 2
630
#define SHIFT2 19
631
#define COEFFS2 (int[]){-27777,-27777}
632
#define N3 2
633
#define SHIFT3 15
634
#define COEFFS3 (int[]){28931,28931}
635
#define N4 2
636
#define SHIFT4 15
637
#define COEFFS4 (int[]){14533,14533}
638
#elif 1 // 13/7 CRF
639
#define N1 4
640
#define SHIFT1 4
641
#define COEFFS1 (int[]){1,-9,-9,1}
642
#define N2 4
643
#define SHIFT2 4
644
#define COEFFS2 (int[]){-1,5,5,-1}
645
#define N3 0
646
#define SHIFT3 1
647
#define COEFFS3 NULL
648
#define N4 0
649
#define SHIFT4 1
650
#define COEFFS4 NULL
651
#elif 1 // 3/5
652
#define LX0 1
653
#define LX1 0
654
#define SCALEX 0.5
655
#define N1 2
656
#define SHIFT1 1
657
#define COEFFS1 (int[]){1,1}
658
#define N2 2
659
#define SHIFT2 2
660
#define COEFFS2 (int[]){-1,-1}
661
#define N3 0
662
#define SHIFT3 0
663
#define COEFFS3 NULL
664
#define N4 0
665
#define SHIFT4 0
666
#define COEFFS4 NULL
667
#elif 1 // 11/5 
668
#define N1 0
669
#define SHIFT1 1
670
#define COEFFS1 NULL
671
#define N2 2
672
#define SHIFT2 2
673
#define COEFFS2 (int[]){-1,-1}
674
#define N3 2
675
#define SHIFT3 0
676
#define COEFFS3 (int[]){-1,-1}
677
#define N4 4
678
#define SHIFT4 7
679
#define COEFFS4 (int[]){-5,29,29,-5}
680
#define SCALEX 4
681
#elif 1 // 9/7 CDF
682
#define N1 2
683
#define SHIFT1 7
684
#define COEFFS1 (int[]){-203,-203}
685
#define N2 2
686
#define SHIFT2 12
687
#define COEFFS2 (int[]){-217,-217}
688
#define N3 2
689
#define SHIFT3 7
690
#define COEFFS3 (int[]){113,113}
691
#define N4 2
692
#define SHIFT4 9
693
#define COEFFS4 (int[]){227,227}
694
#define SCALEX 1
695
#elif 1 // 7/5 CDF
696
#define N1 0
697
#define SHIFT1 1
698
#define COEFFS1 NULL
699
#define N2 2
700
#define SHIFT2 2
701
#define COEFFS2 (int[]){-1,-1}
702
#define N3 2
703
#define SHIFT3 0
704
#define COEFFS3 (int[]){-1,-1}
705
#define N4 2
706
#define SHIFT4 4
707
#define COEFFS4 (int[]){3,3}
708
#elif 1 // 9/7 MN
709
#define N1 4
710
#define SHIFT1 4
711
#define COEFFS1 (int[]){1,-9,-9,1}
712
#define N2 2
713
#define SHIFT2 2
714
#define COEFFS2 (int[]){1,1}
715
#define N3 0
716
#define SHIFT3 1
717
#define COEFFS3 NULL
718
#define N4 0
719
#define SHIFT4 1
720
#define COEFFS4 NULL
721
#else // 13/7 CRF
722
#define N1 4
723
#define SHIFT1 4
724
#define COEFFS1 (int[]){1,-9,-9,1}
725
#define N2 4
726
#define SHIFT2 4
727
#define COEFFS2 (int[]){-1,5,5,-1}
728
#define N3 0
729
#define SHIFT3 1
730
#define COEFFS3 NULL
731
#define N4 0
732
#define SHIFT4 1
733
#define COEFFS4 NULL
734
#endif
735
static void horizontal_decomposeX(int *b, int width){
736
    int temp[width];
737
    const int width2= width>>1;
738
    const int w2= (width+1)>>1;
739
    int A1,A2,A3,A4, x;
740

    
741
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
742
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
743
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
744
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
745
    
746
    for(x=0; x<width2; x++){
747
        temp[x   ]= b[2*x    ];
748
        temp[x+w2]= b[2*x + 1];
749
    }
750
    if(width&1)
751
        temp[x   ]= b[2*x    ];
752
    memcpy(b, temp, width*sizeof(int));
753
}
754

    
755
static void horizontal_composeX(int *b, int width){
756
    int temp[width];
757
    const int width2= width>>1;
758
    int A1,A2,A3,A4, x;
759
    const int w2= (width+1)>>1;
760

    
761
    memcpy(temp, b, width*sizeof(int));
762
    for(x=0; x<width2; x++){
763
        b[2*x    ]= temp[x   ];
764
        b[2*x + 1]= temp[x+w2];
765
    }
766
    if(width&1)
767
        b[2*x    ]= temp[x   ];
768

    
769
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
770
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
771
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
772
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
773
}
774

    
775
static void spatial_decomposeX(int *buffer, int width, int height, int stride){
776
    int x, y;
777
  
778
    for(y=0; y<height; y++){
779
        for(x=0; x<width; x++){
780
            buffer[y*stride + x] *= SCALEX;
781
        }
782
    }
783

    
784
    for(y=0; y<height; y++){
785
        horizontal_decomposeX(buffer + y*stride, width);
786
    }
787
    
788
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
789
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
790
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
791
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
792
}
793

    
794
static void spatial_composeX(int *buffer, int width, int height, int stride){
795
    int x, y;
796
  
797
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
798
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
799
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
800
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
801

    
802
    for(y=0; y<height; y++){
803
        horizontal_composeX(buffer + y*stride, width);
804
    }
805

    
806
    for(y=0; y<height; y++){
807
        for(x=0; x<width; x++){
808
            buffer[y*stride + x] /= SCALEX;
809
        }
810
    }
811
}
812

    
813
static void horizontal_decompose53i(int *b, int width){
814
    int temp[width];
815
    const int width2= width>>1;
816
    int A1,A2,A3,A4, x;
817
    const int w2= (width+1)>>1;
818

    
819
    for(x=0; x<width2; x++){
820
        temp[x   ]= b[2*x    ];
821
        temp[x+w2]= b[2*x + 1];
822
    }
823
    if(width&1)
824
        temp[x   ]= b[2*x    ];
825
#if 0
826
    A2= temp[1       ];
827
    A4= temp[0       ];
828
    A1= temp[0+width2];
829
    A1 -= (A2 + A4)>>1;
830
    A4 += (A1 + 1)>>1;
831
    b[0+width2] = A1;
832
    b[0       ] = A4;
833
    for(x=1; x+1<width2; x+=2){
834
        A3= temp[x+width2];
835
        A4= temp[x+1     ];
836
        A3 -= (A2 + A4)>>1;
837
        A2 += (A1 + A3 + 2)>>2;
838
        b[x+width2] = A3;
839
        b[x       ] = A2;
840

841
        A1= temp[x+1+width2];
842
        A2= temp[x+2       ];
843
        A1 -= (A2 + A4)>>1;
844
        A4 += (A1 + A3 + 2)>>2;
845
        b[x+1+width2] = A1;
846
        b[x+1       ] = A4;
847
    }
848
    A3= temp[width-1];
849
    A3 -= A2;
850
    A2 += (A1 + A3 + 2)>>2;
851
    b[width -1] = A3;
852
    b[width2-1] = A2;
853
#else        
854
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
855
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
856
#endif
857
}
858

    
859
static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
860
    int i;
861
    
862
    for(i=0; i<width; i++){
863
        b1[i] -= (b0[i] + b2[i])>>1;
864
    }
865
}
866

    
867
static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
868
    int i;
869
    
870
    for(i=0; i<width; i++){
871
        b1[i] += (b0[i] + b2[i] + 2)>>2;
872
    }
873
}
874

    
875
static void spatial_decompose53i(int *buffer, int width, int height, int stride){
876
    int x, y;
877
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
878
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
879
  
880
    for(y=-2; y<height; y+=2){
881
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
882
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
883

    
884
{START_TIMER
885
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
886
        if(y+2 < height) horizontal_decompose53i(b3, width);
887
STOP_TIMER("horizontal_decompose53i")}
888
        
889
{START_TIMER
890
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
891
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
892
STOP_TIMER("vertical_decompose53i*")}
893
        
894
        b0=b2;
895
        b1=b3;
896
    }
897
}
898

    
899
#define lift5 lift
900
#if 1
901
#define W_AM 3
902
#define W_AO 0
903
#define W_AS 1
904

    
905
#define W_BM 1
906
#define W_BO 8
907
#define W_BS 4
908

    
909
#undef lift5
910
#define W_CM 9999
911
#define W_CO 2
912
#define W_CS 2
913

    
914
#define W_DM 15
915
#define W_DO 16
916
#define W_DS 5
917
#elif 0
918
#define W_AM 55
919
#define W_AO 16
920
#define W_AS 5
921

    
922
#define W_BM 3
923
#define W_BO 32
924
#define W_BS 6
925

    
926
#define W_CM 127
927
#define W_CO 64
928
#define W_CS 7
929

    
930
#define W_DM 7
931
#define W_DO 8
932
#define W_DS 4
933
#elif 0
934
#define W_AM 97
935
#define W_AO 32
936
#define W_AS 6
937

    
938
#define W_BM 63
939
#define W_BO 512
940
#define W_BS 10
941

    
942
#define W_CM 13
943
#define W_CO 8
944
#define W_CS 4
945

    
946
#define W_DM 15
947
#define W_DO 16
948
#define W_DS 5
949

    
950
#else
951

    
952
#define W_AM 203
953
#define W_AO 64
954
#define W_AS 7
955

    
956
#define W_BM 217
957
#define W_BO 2048
958
#define W_BS 12
959

    
960
#define W_CM 113
961
#define W_CO 64
962
#define W_CS 7
963

    
964
#define W_DM 227
965
#define W_DO 128
966
#define W_DS 9
967
#endif
968
static void horizontal_decompose97i(int *b, int width){
969
    int temp[width];
970
    const int w2= (width+1)>>1;
971

    
972
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
973
    lift (temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
974
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
975
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
976
}
977

    
978

    
979
static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
980
    int i;
981
    
982
    for(i=0; i<width; i++){
983
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
984
    }
985
}
986

    
987
static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
988
    int i;
989
    
990
    for(i=0; i<width; i++){
991
#ifdef lift5
992
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
993
#else
994
        int r= 3*(b0[i] + b2[i]);
995
        r+= r>>4;
996
        r+= r>>8;
997
        b1[i] += (r+W_CO)>>W_CS;
998
#endif
999
    }
1000
}
1001

    
1002
static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
1003
    int i;
1004
    
1005
    for(i=0; i<width; i++){
1006
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1007
    }
1008
}
1009

    
1010
static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
1011
    int i;
1012
    
1013
    for(i=0; i<width; i++){
1014
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1015
    }
1016
}
1017

    
1018
static void spatial_decompose97i(int *buffer, int width, int height, int stride){
1019
    int x, y;
1020
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1021
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1022
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1023
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1024
  
1025
    for(y=-4; y<height; y+=2){
1026
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1027
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1028

    
1029
{START_TIMER
1030
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1031
        if(y+4 < height) horizontal_decompose97i(b5, width);
1032
if(width>400){
1033
STOP_TIMER("horizontal_decompose97i")
1034
}}
1035
        
1036
{START_TIMER
1037
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1038
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1039
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1040
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1041

    
1042
if(width>400){
1043
STOP_TIMER("vertical_decompose97i")
1044
}}
1045
        
1046
        b0=b2;
1047
        b1=b3;
1048
        b2=b4;
1049
        b3=b5;
1050
    }
1051
}
1052

    
1053
static void spatial_dwt(SnowContext *s, int *buffer, int width, int height, int stride){
1054
    int level;
1055
    
1056
    for(level=0; level<s->spatial_decomposition_count; level++){
1057
        switch(s->spatial_decomposition_type){
1058
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1059
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1060
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1061
        }
1062
    }
1063
}
1064

    
1065
static void horizontal_compose53i(int *b, int width){
1066
    int temp[width];
1067
    const int width2= width>>1;
1068
    const int w2= (width+1)>>1;
1069
    int A1,A2,A3,A4, x;
1070

    
1071
#if 0
1072
    A2= temp[1       ];
1073
    A4= temp[0       ];
1074
    A1= temp[0+width2];
1075
    A1 -= (A2 + A4)>>1;
1076
    A4 += (A1 + 1)>>1;
1077
    b[0+width2] = A1;
1078
    b[0       ] = A4;
1079
    for(x=1; x+1<width2; x+=2){
1080
        A3= temp[x+width2];
1081
        A4= temp[x+1     ];
1082
        A3 -= (A2 + A4)>>1;
1083
        A2 += (A1 + A3 + 2)>>2;
1084
        b[x+width2] = A3;
1085
        b[x       ] = A2;
1086

1087
        A1= temp[x+1+width2];
1088
        A2= temp[x+2       ];
1089
        A1 -= (A2 + A4)>>1;
1090
        A4 += (A1 + A3 + 2)>>2;
1091
        b[x+1+width2] = A1;
1092
        b[x+1       ] = A4;
1093
    }
1094
    A3= temp[width-1];
1095
    A3 -= A2;
1096
    A2 += (A1 + A3 + 2)>>2;
1097
    b[width -1] = A3;
1098
    b[width2-1] = A2;
1099
#else   
1100
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1101
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1102
#endif
1103
    for(x=0; x<width2; x++){
1104
        b[2*x    ]= temp[x   ];
1105
        b[2*x + 1]= temp[x+w2];
1106
    }
1107
    if(width&1)
1108
        b[2*x    ]= temp[x   ];
1109
}
1110

    
1111
static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1112
    int i;
1113
    
1114
    for(i=0; i<width; i++){
1115
        b1[i] += (b0[i] + b2[i])>>1;
1116
    }
1117
}
1118

    
1119
static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1120
    int i;
1121
    
1122
    for(i=0; i<width; i++){
1123
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1124
    }
1125
}
1126

    
1127
static void spatial_compose53i(int *buffer, int width, int height, int stride){
1128
    int x, y;
1129
    DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1130
    DWTELEM *b1= buffer + mirror(-1  , height-1)*stride;
1131
  
1132
    for(y=-1; y<=height; y+=2){
1133
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1134
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1135

    
1136
{START_TIMER
1137
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1138
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1139
STOP_TIMER("vertical_compose53i*")}
1140

    
1141
{START_TIMER
1142
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1143
        if(b0 <= b2) horizontal_compose53i(b1, width);
1144
STOP_TIMER("horizontal_compose53i")}
1145

    
1146
        b0=b2;
1147
        b1=b3;
1148
    }
1149
}   
1150

    
1151
 
1152
static void horizontal_compose97i(int *b, int width){
1153
    int temp[width];
1154
    const int w2= (width+1)>>1;
1155

    
1156
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1157
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1158
    lift (b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1159
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1160
}
1161

    
1162
static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1163
    int i;
1164
    
1165
    for(i=0; i<width; i++){
1166
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1167
    }
1168
}
1169

    
1170
static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1171
    int i;
1172
    
1173
    for(i=0; i<width; i++){
1174
#ifdef lift5
1175
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1176
#else
1177
        int r= 3*(b0[i] + b2[i]);
1178
        r+= r>>4;
1179
        r+= r>>8;
1180
        b1[i] -= (r+W_CO)>>W_CS;
1181
#endif
1182
    }
1183
}
1184

    
1185
static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1186
    int i;
1187
    
1188
    for(i=0; i<width; i++){
1189
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1190
    }
1191
}
1192

    
1193
static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1194
    int i;
1195
    
1196
    for(i=0; i<width; i++){
1197
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1198
    }
1199
}
1200

    
1201
static void spatial_compose97i(int *buffer, int width, int height, int stride){
1202
    int x, y;
1203
    DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1204
    DWTELEM *b1= buffer + mirror(-3  , height-1)*stride;
1205
    DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1206
    DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1207

    
1208
    for(y=-3; y<=height; y+=2){
1209
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1210
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1211

    
1212
        if(stride == width && y+4 < height && 0){ 
1213
            int x;
1214
            for(x=0; x<width/2; x++)
1215
                b5[x] += 64*2;
1216
            for(; x<width; x++)
1217
                b5[x] += 169*2;
1218
        }
1219
        
1220
{START_TIMER
1221
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1222
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1223
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1224
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1225
if(width>400){
1226
STOP_TIMER("vertical_compose97i")}}
1227

    
1228
{START_TIMER
1229
        if(y-1>=  0) horizontal_compose97i(b0, width);
1230
        if(b0 <= b2) horizontal_compose97i(b1, width);
1231
if(width>400 && b0 <= b2){
1232
STOP_TIMER("horizontal_compose97i")}}
1233
        
1234
        b0=b2;
1235
        b1=b3;
1236
        b2=b4;
1237
        b3=b5;
1238
    }
1239
}
1240

    
1241
static void spatial_idwt(SnowContext *s, int *buffer, int width, int height, int stride){
1242
    int level;
1243

    
1244
    for(level=s->spatial_decomposition_count-1; level>=0; level--){
1245
        switch(s->spatial_decomposition_type){
1246
        case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1247
        case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1248
        case 2: spatial_composeX  (buffer, width>>level, height>>level, stride<<level); break;
1249
        }
1250
    }
1251
}
1252

    
1253
static const int hilbert[16][2]={
1254
    {0,0}, {1,0}, {1,1}, {0,1},
1255
    {0,2}, {0,3}, {1,3}, {1,2},
1256
    {2,2}, {2,3}, {3,3}, {3,2},
1257
    {3,1}, {2,1}, {2,0}, {3,0},
1258
};
1259
#if 0
1260
-o o-
1261
 | |
1262
 o-o
1263
 
1264
-o-o o-o-
1265
   | | 
1266
 o-o o-o
1267
 |     |
1268
 o o-o o
1269
 | | | |
1270
 o-o o-o
1271
 
1272
 0112122312232334122323342334
1273
 0123456789ABCDEF0123456789AB
1274
 RLLRMRRLLRRMRLLMLRRLMLLRRLLM
1275
 
1276
 4  B  F 14 1B
1277
 4 11 15 20 27
1278
 
1279
-o o-o-o o-o-o o-
1280
 | |   | |   | |
1281
 o-o o-o o-o o-o
1282
     |     |
1283
 o-o o-o o-o o-o
1284
 | |   | |   | |
1285
 o o-o-o o-o-o o
1286
 |             |
1287
 o-o o-o-o-o o-o
1288
   | |     | | 
1289
 o-o o-o o-o o-o
1290
 |     | |     |
1291
 o o-o o o o-o o
1292
 | | | | | | | |
1293
 o-o o-o o-o o-o
1294

1295
#endif
1296

    
1297
#define SVI(a, i, x, y) \
1298
{\
1299
    a[i][0]= x;\
1300
    a[i][1]= y;\
1301
    i++;\
1302
}
1303

    
1304
static int sig_cmp(const void *a, const void *b){
1305
    const int16_t* da = (const int16_t *) a;
1306
    const int16_t* db = (const int16_t *) b;
1307
    
1308
    if(da[1] != db[1]) return da[1] - db[1];
1309
    else               return da[0] - db[0];
1310
}
1311

    
1312
static int deint(unsigned int a){
1313
    a &= 0x55555555;         //0 1 2 3 4 5 6 7 8 9 A B C D E F
1314
    a +=     a & 0x11111111; // 01  23  45  67  89  AB  CD  EF
1315
    a +=  3*(a & 0x0F0F0F0F);//   0123    4567    89AB    CDEF
1316
    a += 15*(a & 0x00FF00FF);//       01234567        89ABCDEF
1317
    a +=255*(a & 0x0000FFFF);//               0123456789ABCDEF
1318
    return a>>15;
1319
}
1320

    
1321
static void encode_subband_z0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1322
    const int level= b->level;
1323
    const int w= b->width;
1324
    const int h= b->height;
1325
    int x, y, pos;
1326

    
1327
    if(1){
1328
        int run=0;
1329
        int runs[w*h];
1330
        int run_index=0;
1331
        int count=0;
1332
        
1333
        for(pos=0; ; pos++){
1334
            int x= deint(pos   );
1335
            int y= deint(pos>>1);
1336
            int v, p=0, pr=0, pd=0;
1337
            int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
1338

    
1339
            if(x>=w || y>=h){
1340
                if(x>=w && y>=h)
1341
                    break;
1342
                continue;
1343
            }
1344
            count++;
1345
                
1346
            v= src[x + y*stride];
1347

    
1348
            if(y){
1349
                t= src[x + (y-1)*stride];
1350
                if(x){
1351
                    lt= src[x - 1 + (y-1)*stride];
1352
                }
1353
                if(x + 1 < w){
1354
                    /*rt= src[x + 1 + (y-1)*stride]*/;
1355
                }
1356
            }
1357
            if(x){
1358
                l= src[x - 1 + y*stride];
1359
                /*if(x > 1){
1360
                    if(orientation==1) ll= src[y + (x-2)*stride];
1361
                    else               ll= src[x - 2 + y*stride];
1362
                }*/
1363
            }
1364
            if(parent){
1365
                int px= x>>1;
1366
                int py= y>>1;
1367
                if(px<b->parent->width && py<b->parent->height){
1368
                    p= parent[px + py*2*stride];
1369
                    /*if(px+1<b->parent->width) 
1370
                        pr= parent[px + 1 + py*2*stride];
1371
                    if(py+1<b->parent->height) 
1372
                        pd= parent[px + (py+1)*2*stride];*/
1373
                }
1374
            }
1375
            if(!(/*ll|*/l|lt|t|/*rt|*/p)){
1376
                if(v){
1377
                    runs[run_index++]= run;
1378
                    run=0;
1379
                }else{
1380
                    run++;
1381
                }
1382
            }
1383
        }
1384
        assert(count==w*h);
1385
        runs[run_index++]= run;
1386
        run_index=0;
1387
        run= runs[run_index++];
1388

    
1389
        put_symbol(&s->c, b->state[1], run, 0);
1390
        
1391
        for(pos=0; ; pos++){
1392
            int x= deint(pos   );
1393
            int y= deint(pos>>1);
1394
            int v, p=0, pr=0, pd=0;
1395
            int /*ll=0, */l=0, lt=0, t=0/*, rt=0*/;
1396

    
1397
            if(x>=w || y>=h){
1398
                if(x>=w && y>=h)
1399
                    break;
1400
                continue;
1401
            }
1402
            v= src[x + y*stride];
1403

    
1404
            if(y){
1405
                t= src[x + (y-1)*stride];
1406
                if(x){
1407
                    lt= src[x - 1 + (y-1)*stride];
1408
                }
1409
                if(x + 1 < w){
1410
//                    rt= src[x + 1 + (y-1)*stride];
1411
                }
1412
            }
1413
            if(x){
1414
                l= src[x - 1 + y*stride];
1415
                /*if(x > 1){
1416
                    if(orientation==1) ll= src[y + (x-2)*stride];
1417
                    else               ll= src[x - 2 + y*stride];
1418
                }*/
1419
            }
1420

    
1421
            if(parent){
1422
                int px= x>>1;
1423
                int py= y>>1;
1424
                if(px<b->parent->width && py<b->parent->height){
1425
                    p= parent[px + py*2*stride];
1426
/*                        if(px+1<b->parent->width) 
1427
                        pr= parent[px + 1 + py*2*stride];
1428
                    if(py+1<b->parent->height) 
1429
                        pd= parent[px + (py+1)*2*stride];*/
1430
                }
1431
            }
1432
            if(/*ll|*/l|lt|t|/*rt|*/p){
1433
                int context= av_log2(/*ABS(ll) + */2*(3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p)));
1434

    
1435
                put_cabac(&s->c, &b->state[0][context], !!v);
1436
            }else{
1437
                if(!run){
1438
                    run= runs[run_index++];
1439
                    put_symbol(&s->c, b->state[1], run, 0);
1440
                    assert(v);
1441
                }else{
1442
                    run--;
1443
                    assert(!v);
1444
                }
1445
            }
1446
            if(v){
1447
                int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + /*ABS(rt) +*/ ABS(p));
1448

    
1449
                put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
1450
                put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1451
            }
1452
        }
1453
    }
1454
}
1455

    
1456
static void encode_subband_bp(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1457
    const int level= b->level;
1458
    const int w= b->width;
1459
    const int h= b->height;
1460
    int x, y;
1461

    
1462
#if 0
1463
    int plane;
1464
    for(plane=24; plane>=0; plane--){
1465
        int run=0;
1466
        int runs[w*h];
1467
        int run_index=0;
1468
                
1469
        for(y=0; y<h; y++){
1470
            for(x=0; x<w; x++){
1471
                int v, lv, p=0;
1472
                int d=0, r=0, rd=0, ld=0;
1473
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1474
                v= src[x + y*stride];
1475

1476
                if(y){
1477
                    t= src[x + (y-1)*stride];
1478
                    if(x){
1479
                        lt= src[x - 1 + (y-1)*stride];
1480
                    }
1481
                    if(x + 1 < w){
1482
                        rt= src[x + 1 + (y-1)*stride];
1483
                    }
1484
                }
1485
                if(x){
1486
                    l= src[x - 1 + y*stride];
1487
                    /*if(x > 1){
1488
                        if(orientation==1) ll= src[y + (x-2)*stride];
1489
                        else               ll= src[x - 2 + y*stride];
1490
                    }*/
1491
                }
1492
                if(y+1<h){
1493
                    d= src[x + (y+1)*stride];
1494
                    if(x)         ld= src[x - 1 + (y+1)*stride];
1495
                    if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1496
                }
1497
                if(x + 1 < w)
1498
                    r= src[x + 1 + y*stride];
1499
                if(parent){
1500
                    int px= x>>1;
1501
                    int py= y>>1;
1502
                    if(px<b->parent->width && py<b->parent->height) 
1503
                        p= parent[px + py*2*stride];
1504
                }
1505
#define HIDE(c, plane) c= c>=0 ? c&((-1)<<(plane)) : -((-c)&((-1)<<(plane)));
1506
                lv=v;
1507
                HIDE( v, plane)
1508
                HIDE(lv, plane+1)
1509
                HIDE( p, plane)
1510
                HIDE( l, plane)
1511
                HIDE(lt, plane)
1512
                HIDE( t, plane)
1513
                HIDE(rt, plane)
1514
                HIDE( r, plane+1)
1515
                HIDE(rd, plane+1)
1516
                HIDE( d, plane+1)
1517
                HIDE(ld, plane+1)
1518
                if(!(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv)){
1519
                    if(v){
1520
                        runs[run_index++]= run;
1521
                        run=0;
1522
                    }else{
1523
                        run++;
1524
                    }
1525
                }
1526
            }
1527
        }
1528
        runs[run_index++]= run;
1529
        run_index=0;
1530
        run= runs[run_index++];
1531

1532
        put_symbol(&s->c, b->state[1], run, 0);
1533
        
1534
        for(y=0; y<h; y++){
1535
            for(x=0; x<w; x++){
1536
                int v, p=0, lv;
1537
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1538
                int d=0, r=0, rd=0, ld=0;
1539
                v= src[x + y*stride];
1540

1541
                if(y){
1542
                    t= src[x + (y-1)*stride];
1543
                    if(x){
1544
                        lt= src[x - 1 + (y-1)*stride];
1545
                    }
1546
                    if(x + 1 < w){
1547
                        rt= src[x + 1 + (y-1)*stride];
1548
                    }
1549
                }
1550
                if(x){
1551
                    l= src[x - 1 + y*stride];
1552
                    /*if(x > 1){
1553
                        if(orientation==1) ll= src[y + (x-2)*stride];
1554
                        else               ll= src[x - 2 + y*stride];
1555
                    }*/
1556
                }
1557
                if(y+1<h){
1558
                    d= src[x + (y+1)*stride];
1559
                    if(x)         ld= src[x - 1 + (y+1)*stride];
1560
                    if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
1561
                }
1562
                if(x + 1 < w)
1563
                    r= src[x + 1 + y*stride];
1564

1565
                if(parent){
1566
                    int px= x>>1;
1567
                    int py= y>>1;
1568
                    if(px<b->parent->width && py<b->parent->height) 
1569
                        p= parent[px + py*2*stride];
1570
                }
1571
                lv=v;
1572
                HIDE( v, plane)
1573
                HIDE(lv, plane+1)
1574
                HIDE( p, plane)
1575
                HIDE( l, plane)
1576
                HIDE(lt, plane)
1577
                HIDE( t, plane)
1578
                HIDE(rt, plane)
1579
                HIDE( r, plane+1)
1580
                HIDE(rd, plane+1)
1581
                HIDE( d, plane+1)
1582
                HIDE(ld, plane+1)
1583
                if(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv){
1584
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)
1585
                                                      +3*ABS(r) + ABS(rd) + 2*ABS(d) + ABS(ld));
1586

1587
                    if(lv) put_cabac(&s->c, &b->state[99][context + 8*(av_log2(ABS(lv))-plane)], !!(v-lv));
1588
                    else   put_cabac(&s->c, &b->state[ 0][context], !!v);
1589
                }else{
1590
                    assert(!lv);
1591
                    if(!run){
1592
                        run= runs[run_index++];
1593
                        put_symbol(&s->c, b->state[1], run, 0);
1594
                        assert(v);
1595
                    }else{
1596
                        run--;
1597
                        assert(!v);
1598
                    }
1599
                }
1600
                if(v && !lv){
1601
                    int context=    clip(quant3b[l&0xFF] + quant3b[r&0xFF], -1,1)
1602
                                + 3*clip(quant3b[t&0xFF] + quant3b[d&0xFF], -1,1);
1603
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + context], v<0);
1604
                }
1605
            }
1606
        }
1607
    }
1608
    return;    
1609
#endif
1610
}
1611

    
1612
static void encode_subband_X(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1613
    const int level= b->level;
1614
    const int w= b->width;
1615
    const int h= b->height;
1616
    int x, y;
1617

    
1618
#if 0
1619
    if(orientation==3 && parent && 0){
1620
        int16_t candidate[w*h][2];
1621
        uint8_t state[w*h];
1622
        int16_t boarder[3][w*h*4][2];
1623
        int16_t significant[w*h][2];
1624
        int candidate_count=0;
1625
        int boarder_count[3]={0,0,0};
1626
        int significant_count=0;
1627
        int rle_pos=0;
1628
        int v, last_v;
1629
        int primary= orientation==1;
1630
        
1631
        memset(candidate, 0, sizeof(candidate));
1632
        memset(state, 0, sizeof(state));
1633
        memset(boarder, 0, sizeof(boarder));
1634
        
1635
        for(y=0; y<h; y++){
1636
            for(x=0; x<w; x++){
1637
                if(parent[(x>>1) + (y>>1)*2*stride])
1638
                    SVI(candidate, candidate_count, x, y)
1639
            }
1640
        }
1641

1642
        for(;;){
1643
            while(candidate_count && !boarder_count[0] && !boarder_count[1] && !boarder_count[2]){
1644
                candidate_count--;
1645
                x= candidate[ candidate_count][0];
1646
                y= candidate[ candidate_count][1];
1647
                if(state[x + y*w])
1648
                    continue;
1649
                state[x + y*w]= 1;
1650
                v= !!src[x + y*stride];
1651
                put_cabac(&s->c, &b->state[0][0], v);
1652
                if(v){
1653
                    SVI(significant, significant_count, x,y)
1654
                    if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1655
                    if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1656
                    if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1657
                    if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1658
                    if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1659
                    if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1660
                    if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1661
                    if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1662
                }
1663
            }
1664
            while(!boarder_count[0] && !boarder_count[1] && !boarder_count[2] && rle_pos < w*h){
1665
                int run=0;
1666
                for(; rle_pos < w*h;){
1667
                    x= rle_pos % w; //FIXME speed
1668
                    y= rle_pos / w;
1669
                    rle_pos++;
1670
                    if(state[x + y*w])
1671
                        continue;
1672
                    state[x + y*w]= 1;
1673
                    v= !!src[x + y*stride];
1674
                    if(v){
1675
                        put_symbol(&s->c, b->state[1], run, 0);
1676
                        SVI(significant, significant_count, x,y)
1677
                        if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1678
                        if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1679
                        if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1680
                        if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1681
                        if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1682
                        if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1683
                        if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1684
                        if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1685
                        break;
1686
//FIXME                note only right & down can be boarders
1687
                    }
1688
                    run++;
1689
                }
1690
            }
1691
            if(!boarder_count[0] && !boarder_count[1] && !boarder_count[2])
1692
                break;
1693
            
1694
            while(boarder_count[0] || boarder_count[1] || boarder_count[2]){
1695
                int index;
1696
                
1697
                if     (boarder_count[  primary]) index=  primary;
1698
                else if(boarder_count[1-primary]) index=1-primary;
1699
                else                              index=2;
1700
                
1701
                boarder_count[index]--;
1702
                x= boarder[index][ boarder_count[index] ][0];
1703
                y= boarder[index][ boarder_count[index] ][1];
1704
                if(state[x + y*w]) //FIXME maybe check earlier
1705
                    continue;
1706
                state[x + y*w]= 1;
1707
                v= !!src[x + y*stride];
1708
                put_cabac(&s->c, &b->state[0][index+1], v);
1709
                if(v){
1710
                    SVI(significant, significant_count, x,y)
1711
                    if(x     && !state[x - 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x-1,y  )
1712
                    if(y     && !state[x     + (y-1)*w]) SVI(boarder[1],boarder_count[1],x  ,y-1)
1713
                    if(x+1<w && !state[x + 1 +  y   *w]) SVI(boarder[0],boarder_count[0],x+1,y  )
1714
                    if(y+1<h && !state[x     + (y+1)*w]) SVI(boarder[1],boarder_count[1],x  ,y+1)
1715
                    if(x     && y     && !state[x - 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x-1,y-1)
1716
                    if(x     && y+1<h && !state[x - 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x-1,y+1)
1717
                    if(x+1<w && y+1<h && !state[x + 1 + (y+1)*w]) SVI(boarder[2],boarder_count[2],x+1,y+1)
1718
                    if(x+1<w && y     && !state[x + 1 + (y-1)*w]) SVI(boarder[2],boarder_count[2],x+1,y-1)
1719
                }
1720
            }
1721
        }
1722
        //FIXME sort significant coeffs maybe
1723
        if(1){
1724
            qsort(significant, significant_count, sizeof(int16_t[2]), sig_cmp);
1725
        }
1726
        
1727
        last_v=1;
1728
        while(significant_count){
1729
            int context= 3 + quant7[last_v&0xFF]; //use significance of suroundings
1730
            significant_count--;
1731
            x= significant[significant_count][0];//FIXME try opposit direction
1732
            y= significant[significant_count][1];
1733
            v= src[x + y*stride];
1734
            put_symbol(&s->c, b->state[context + 2], v, 1); //FIXME try to avoid first bit, try this with the old code too!!
1735
            last_v= v;
1736
        }
1737
    }
1738
#endif
1739
}
1740

    
1741
static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1742
    const int level= b->level;
1743
    const int w= b->width;
1744
    const int h= b->height;
1745
    int x, y;
1746

    
1747
    if(1){
1748
        int run=0;
1749
        int runs[w*h];
1750
        int run_index=0;
1751
                
1752
        for(y=0; y<h; y++){
1753
            for(x=0; x<w; x++){
1754
                int v, p=0;
1755
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1756
                v= src[x + y*stride];
1757

    
1758
                if(y){
1759
                    t= src[x + (y-1)*stride];
1760
                    if(x){
1761
                        lt= src[x - 1 + (y-1)*stride];
1762
                    }
1763
                    if(x + 1 < w){
1764
                        rt= src[x + 1 + (y-1)*stride];
1765
                    }
1766
                }
1767
                if(x){
1768
                    l= src[x - 1 + y*stride];
1769
                    /*if(x > 1){
1770
                        if(orientation==1) ll= src[y + (x-2)*stride];
1771
                        else               ll= src[x - 2 + y*stride];
1772
                    }*/
1773
                }
1774
                if(parent){
1775
                    int px= x>>1;
1776
                    int py= y>>1;
1777
                    if(px<b->parent->width && py<b->parent->height) 
1778
                        p= parent[px + py*2*stride];
1779
                }
1780
                if(!(/*ll|*/l|lt|t|rt|p)){
1781
                    if(v){
1782
                        runs[run_index++]= run;
1783
                        run=0;
1784
                    }else{
1785
                        run++;
1786
                    }
1787
                }
1788
            }
1789
        }
1790
        runs[run_index++]= run;
1791
        run_index=0;
1792
        run= runs[run_index++];
1793

    
1794
        put_symbol2(&s->c, b->state[1], run, 3);
1795
        
1796
        for(y=0; y<h; y++){
1797
            for(x=0; x<w; x++){
1798
                int v, p=0;
1799
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1800
                v= src[x + y*stride];
1801

    
1802
                if(y){
1803
                    t= src[x + (y-1)*stride];
1804
                    if(x){
1805
                        lt= src[x - 1 + (y-1)*stride];
1806
                    }
1807
                    if(x + 1 < w){
1808
                        rt= src[x + 1 + (y-1)*stride];
1809
                    }
1810
                }
1811
                if(x){
1812
                    l= src[x - 1 + y*stride];
1813
                    /*if(x > 1){
1814
                        if(orientation==1) ll= src[y + (x-2)*stride];
1815
                        else               ll= src[x - 2 + y*stride];
1816
                    }*/
1817
                }
1818
                if(parent){
1819
                    int px= x>>1;
1820
                    int py= y>>1;
1821
                    if(px<b->parent->width && py<b->parent->height) 
1822
                        p= parent[px + py*2*stride];
1823
                }
1824
                if(/*ll|*/l|lt|t|rt|p){
1825
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1826

    
1827
                    put_cabac(&s->c, &b->state[0][context], !!v);
1828
                }else{
1829
                    if(!run){
1830
                        run= runs[run_index++];
1831

    
1832
                        put_symbol2(&s->c, b->state[1], run, 3);
1833
                        assert(v);
1834
                    }else{
1835
                        run--;
1836
                        assert(!v);
1837
                    }
1838
                }
1839
                if(v){
1840
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1841

    
1842
                    put_symbol(&s->c, b->state[context + 2], ABS(v)-1, 0);
1843
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1844
                }
1845
            }
1846
        }
1847
    }
1848
}
1849

    
1850
static void encode_subband_dzr(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1851
    const int level= b->level;
1852
    const int w= b->width;
1853
    const int h= b->height;
1854
    int x, y;
1855

    
1856
    if(1){
1857
        int run[16]={0};
1858
        int runs[16][w*h]; //FIXME do something about the size
1859
        int run_index[16]={0};
1860
        int positions[2][w];
1861
        int distances[2][w];
1862
        int dist_count=0;
1863
        int i;
1864
                
1865
        for(y=0; y<h; y++){
1866
            int *     pos = positions[ y&1];
1867
            int *last_pos = positions[(y&1)^1];
1868
            int *     dist= distances[ y&1];
1869
            int *last_dist= distances[(y&1)^1];
1870
            int dist_index=0;
1871
            int last_dist_index=0;
1872
            
1873
            for(x=0; x<w; x++){
1874
                int p=0, l=0, lt=0, t=0, rt=0;
1875
                int v= src[x + y*stride];
1876

    
1877
                if(y){
1878
                    t= src[x + (y-1)*stride];
1879
                    if(x){
1880
                        lt= src[x - 1 + (y-1)*stride];
1881
                    }
1882
                    if(x + 1 < w){
1883
                        rt= src[x + 1 + (y-1)*stride];
1884
                    }
1885
                }
1886
                if(x){
1887
                    l= src[x - 1 + y*stride];
1888
                }
1889
                if(parent){
1890
                    int px= x>>1;
1891
                    int py= y>>1;
1892
                    if(px<b->parent->width && py<b->parent->height) 
1893
                        p= parent[px + py*2*stride];
1894
                }
1895
                if(last_dist_index < dist_count && last_pos[last_dist_index] == x){
1896
                    if(dist_index==0 || x - pos[dist_index-1] > dist[dist_index-1] - last_dist[last_dist_index]){
1897
                        pos[dist_index]= x;
1898
                        dist[dist_index++]= last_dist[last_dist_index];
1899
                    }
1900
                    last_dist_index++;
1901
                }
1902
                
1903
                if(!(l|lt|t|rt|p)){
1904
                    int cur_dist=w>>1;
1905
                    int run_class;
1906
                    
1907
                    if(last_dist_index < dist_count) 
1908
                        cur_dist= last_pos[last_dist_index] - x + y - last_dist[last_dist_index];
1909
                    if(dist_index)
1910
                        cur_dist= FFMIN(cur_dist, x - pos[dist_index-1] + y - dist[dist_index-1]);
1911
                    assert(cur_dist>=2);
1912
                    run_class= av_log2(cur_dist+62);
1913
                    
1914
                    if(v){
1915
                        runs[run_class][run_index[run_class]++]= run[run_class];
1916
                        run[run_class]=0;
1917
                    }else{
1918
                        run[run_class]++;
1919
                    }
1920
                }
1921
                if(v){
1922
                    while(dist_index && x - pos[dist_index-1] <= y - dist[dist_index-1])
1923
                        dist_index--;
1924
                    pos[dist_index]= x;
1925
                    dist[dist_index++]= y;
1926
                }
1927
            }
1928
            dist_count= dist_index;
1929
        }
1930
        for(i=0; i<12; i++){
1931
            runs[i][run_index[i]++]= run[i];
1932
            run_index[i]=0;
1933
            run[i]=0;
1934
        }
1935
        
1936
        dist_count=0;
1937
        
1938
        for(y=0; y<h; y++){
1939
            int *     pos = positions[ y&1];
1940
            int *last_pos = positions[(y&1)^1];
1941
            int *     dist= distances[ y&1];
1942
            int *last_dist= distances[(y&1)^1];
1943
            int dist_index=0;
1944
            int last_dist_index=0;
1945
            
1946
            for(x=0; x<w; x++){
1947
                int p=0, l=0, lt=0, t=0, rt=0;
1948
                int v= src[x + y*stride];
1949

    
1950
                if(y){
1951
                    t= src[x + (y-1)*stride];
1952
                    if(x){
1953
                        lt= src[x - 1 + (y-1)*stride];
1954
                    }
1955
                    if(x + 1 < w){
1956
                        rt= src[x + 1 + (y-1)*stride];
1957
                    }
1958
                }
1959
                if(x){
1960
                    l= src[x - 1 + y*stride];
1961
                }
1962
                if(parent){
1963
                    int px= x>>1;
1964
                    int py= y>>1;
1965
                    if(px<b->parent->width && py<b->parent->height) 
1966
                        p= parent[px + py*2*stride];
1967
                }
1968
                if(last_dist_index < dist_count && last_pos[last_dist_index] == x){
1969
                    if(dist_index==0 || x - pos[dist_index-1] > dist[dist_index-1] - last_dist[last_dist_index]){
1970
                        pos[dist_index]= x;
1971
                        dist[dist_index++]= last_dist[last_dist_index];
1972
                    }
1973
                    last_dist_index++;
1974
                }
1975
                if(l|lt|t|rt|p){
1976
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1977

    
1978
                    put_cabac(&s->c, &b->state[0][context], !!v);
1979
                }else{
1980
                    int cur_dist=w>>1;
1981
                    int run_class;
1982
                    
1983
                    if(last_dist_index < dist_count) 
1984
                        cur_dist= last_pos[last_dist_index] - x + y - last_dist[last_dist_index];
1985
                    if(dist_index)
1986
                        cur_dist= FFMIN(cur_dist, x - pos[dist_index-1] + y - dist[dist_index-1]);
1987
                    assert(cur_dist>=2);
1988
                    assert(!dist_index || (pos[dist_index-1] >= 0 && pos[dist_index-1] <w));
1989
                    assert(last_dist_index >= dist_count || (last_pos[last_dist_index] >= 0 && last_pos[last_dist_index] <w));
1990
                    assert(!dist_index || dist[dist_index-1] <= y);
1991
                    assert(last_dist_index >= dist_count || last_dist[last_dist_index] < y);
1992
                    assert(cur_dist <= y + FFMAX(x, w-x-1));
1993
                    run_class= av_log2(cur_dist+62);
1994

    
1995
                    if(!run_index[run_class]){
1996
                        run[run_class]= runs[run_class][run_index[run_class]++];
1997
                        put_symbol(&s->c, b->state[run_class+1], run[run_class], 0);
1998
                    }
1999
                    if(!run[run_class]){
2000
                        run[run_class]= runs[run_class][run_index[run_class]++];
2001
                        put_symbol(&s->c, b->state[run_class+1], run[run_class], 0);
2002
                        assert(v);
2003
                    }else{
2004
                        run[run_class]--;
2005
                        assert(!v);
2006
                    }
2007
                }
2008
                if(v){
2009
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
2010

    
2011
                    put_symbol(&s->c, b->state[context + 16], ABS(v)-1, 0);
2012
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
2013

    
2014
                    while(dist_index && x - pos[dist_index-1] <= y - dist[dist_index-1])
2015
                        dist_index--;
2016
                    pos[dist_index]= x;
2017
                    dist[dist_index++]= y;
2018
                }
2019
            }
2020
            dist_count= dist_index;
2021
        }
2022
    }
2023
}
2024

    
2025
static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
2026
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
2027
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
2028
    encode_subband_c0run(s, b, src, parent, stride, orientation);
2029
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
2030
}
2031

    
2032
static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
2033
    const int level= b->level;
2034
    const int w= b->width;
2035
    const int h= b->height;
2036
    int x,y;
2037

    
2038
    START_TIMER
2039
#if 0    
2040
    for(y=0; y<b->height; y++)
2041
        memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
2042

2043
    int plane;
2044
    for(plane=24; plane>=0; plane--){
2045
        int run;
2046

2047
        run= get_symbol(&s->c, b->state[1], 0);
2048
                
2049
#define HIDE(c, plane) c= c>=0 ? c&((-1)<<(plane)) : -((-c)&((-1)<<(plane)));
2050
        
2051
        for(y=0; y<h; y++){
2052
            for(x=0; x<w; x++){
2053
                int v, p=0, lv;
2054
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
2055
                int d=0, r=0, rd=0, ld=0;
2056
                lv= src[x + y*stride];
2057

2058
                if(y){
2059
                    t= src[x + (y-1)*stride];
2060
                    if(x){
2061
                        lt= src[x - 1 + (y-1)*stride];
2062
                    }
2063
                    if(x + 1 < w){
2064
                        rt= src[x + 1 + (y-1)*stride];
2065
                    }
2066
                }
2067
                if(x){
2068
                    l= src[x - 1 + y*stride];
2069
                    /*if(x > 1){
2070
                        if(orientation==1) ll= src[y + (x-2)*stride];
2071
                        else               ll= src[x - 2 + y*stride];
2072
                    }*/
2073
                }
2074
                if(y+1<h){
2075
                    d= src[x + (y+1)*stride];
2076
                    if(x)         ld= src[x - 1 + (y+1)*stride];
2077
                    if(x + 1 < w) rd= src[x + 1 + (y+1)*stride];
2078
                }
2079
                if(x + 1 < w)
2080
                    r= src[x + 1 + y*stride];
2081

2082
                if(parent){
2083
                    int px= x>>1;
2084
                    int py= y>>1;
2085
                    if(px<b->parent->width && py<b->parent->height) 
2086
                        p= parent[px + py*2*stride];
2087
                }
2088
                HIDE( p, plane)
2089
                if(/*ll|*/l|lt|t|rt|r|rd|ld|d|p|lv){
2090
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p)
2091
                                                      +3*ABS(r) + ABS(rd) + 2*ABS(d) + ABS(ld));
2092

2093
                    if(lv){
2094
                        assert(context + 8*av_log2(ABS(lv)) < 512 - 100);
2095
                        if(get_cabac(&s->c, &b->state[99][context + 8*(av_log2(ABS(lv))-plane)])){
2096
                            if(lv<0) v= lv - (1<<plane);
2097
                            else     v= lv + (1<<plane);
2098
                        }else
2099
                            v=lv;
2100
                    }else{
2101
                        v= get_cabac(&s->c, &b->state[ 0][context]) << plane;
2102
                    }
2103
                }else{
2104
                    assert(!lv);
2105
                    if(!run){
2106
                        run= get_symbol(&s->c, b->state[1], 0);
2107
                        v= 1<<plane;
2108
                    }else{
2109
                        run--;
2110
                        v=0;
2111
                    }
2112
                }
2113
                if(v && !lv){
2114
                    int context=    clip(quant3b[l&0xFF] + quant3b[r&0xFF], -1,1)
2115
                                + 3*clip(quant3b[t&0xFF] + quant3b[d&0xFF], -1,1);
2116
                    if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + context]))
2117
                        v= -v;
2118
                }
2119
                src[x + y*stride]= v;
2120
            }
2121
        }
2122
    }
2123
    return;    
2124
#endif
2125
#if 0
2126
    int tree[10][w*h]; //FIXME space waste ...
2127
    int treedim[10][2];
2128
    int lev;
2129
    const int max_level= av_log2(2*FFMAX(w,h)-1);
2130
    int w2=w, h2=h;
2131
    memset(tree, 0, sizeof(tree));
2132
    
2133
//    assert(w%2==0 && h%2==0);
2134

2135
    for(lev=max_level; lev>=0; lev--){
2136
        treedim[lev][0]= w2;
2137
        treedim[lev][1]= h2;
2138
        w2= (w2+1)>>1;
2139
        h2= (h2+1)>>1;
2140
    }    
2141
    
2142
    for(lev=0; lev<=max_level; lev++){
2143
        w2= treedim[lev][0];
2144
        h2= treedim[lev][1];
2145
        for(y=0; y<h2; y++){
2146
            for(x=0; x<w2; x++){
2147
                int l= 0, t=0;
2148
                int context;
2149
                if(lev && !tree[lev-1][x/2 + y/2*w])
2150
                    continue;
2151

2152
                if(x) l= tree[lev][x - 1 + y*w];
2153
                if(y) t= tree[lev][x + (y-1)*w];
2154

2155
                context= lev + 8*(!!l) + 16*(!!t);
2156
                tree[lev][x + y*w]= get_cabac(&s->c, &b->state[98][context]);
2157
            }
2158
        }
2159
    }
2160
    if(1){
2161
        for(y=0; y<b->height; y++)
2162
            memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
2163

2164
        for(y=0; y<h; y++){
2165
            for(x=0; x<w; x++){
2166
                int v, p=0;
2167
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
2168

2169
                if(y){
2170
                    t= src[x + (y-1)*stride];
2171
                    if(x){
2172
                        lt= src[x - 1 + (y-1)*stride];
2173
                    }
2174
                    if(x + 1 < w){
2175
                        rt= src[x + 1 + (y-1)*stride];
2176
                    }
2177
                }
2178
                if(x){
2179
                    l= src[x - 1 + y*stride];
2180
                    /*if(x > 1){
2181
                        if(orientation==1) ll= src[y + (x-2)*stride];
2182
                        else               ll= src[x - 2 + y*stride];
2183
                    }*/
2184
                }
2185
                if(parent){
2186
                    int px= x>>1;
2187
                    int py= y>>1;
2188
                    if(px<b->parent->width && py<b->parent->height) 
2189
                        p= parent[px + py*2*stride];
2190
                }
2191
                if(tree[max_level][x + y*w]){
2192
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
2193
                    v= get_symbol(&s->c, b->state[context + 2], 0) + 1;
2194
                    if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
2195
                        v= -v;
2196
                    src[x + y*stride]= v;
2197
                }
2198
            }
2199
        }
2200
        if(level+1 == s->spatial_decomposition_count){
2201
            STOP_TIMER("decode_subband")
2202
        }
2203
        
2204
        return;
2205
    }
2206
#endif
2207
    if(1){
2208
        int run;
2209
                
2210
        for(y=0; y<b->height; y++)
2211
            memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
2212

    
2213
        run= get_symbol2(&s->c, b->state[1], 3);
2214
        for(y=0; y<h; y++){
2215
            for(x=0; x<w; x++){
2216
                int v, p=0;
2217
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
2218

    
2219
                if(y){
2220
                    t= src[x + (y-1)*stride];
2221
                    if(x){
2222
                        lt= src[x - 1 + (y-1)*stride];
2223
                    }
2224
                    if(x + 1 < w){
2225
                        rt= src[x + 1 + (y-1)*stride];
2226
                    }
2227
                }
2228
                if(x){
2229
                    l= src[x - 1 + y*stride];
2230
                    /*if(x > 1){
2231
                        if(orientation==1) ll= src[y + (x-2)*stride];
2232
                        else               ll= src[x - 2 + y*stride];
2233
                    }*/
2234
                }
2235
                if(parent){
2236
                    int px= x>>1;
2237
                    int py= y>>1;
2238
                    if(px<b->parent->width && py<b->parent->height) 
2239
                        p= parent[px + py*2*stride];
2240
                }
2241
                if(/*ll|*/l|lt|t|rt|p){
2242
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
2243

    
2244
                    v=get_cabac(&s->c, &b->state[0][context]);
2245
                }else{
2246
                    if(!run){
2247
                        run= get_symbol2(&s->c, b->state[1], 3);
2248
                        //FIXME optimize this here
2249
                        //FIXME try to store a more naive run
2250
                        v=1;
2251
                    }else{
2252
                        run--;
2253
                        v=0;
2254
                    }
2255
                }
2256
                if(v){
2257
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
2258
                    v= get_symbol(&s->c, b->state[context + 2], 0) + 1;
2259
                    if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
2260
                        v= -v;
2261
                    src[x + y*stride]= v;
2262
                }
2263
            }
2264
        }
2265
        if(level+1 == s->spatial_decomposition_count){
2266
            STOP_TIMER("decode_subband")
2267
        }
2268
        
2269
        return;
2270
    }
2271
}
2272

    
2273
static void reset_contexts(SnowContext *s){
2274
    int plane_index, level, orientation;
2275

    
2276
    for(plane_index=0; plane_index<2; plane_index++){
2277
        for(level=0; level<s->spatial_decomposition_count; level++){
2278
            for(orientation=level ? 1:0; orientation<4; orientation++){
2279
                memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
2280
            }
2281
        }
2282
    }
2283
    memset(s->mb_band.state, 0, sizeof(s->mb_band.state));
2284
    memset(s->mv_band[0].state, 0, sizeof(s->mv_band[0].state));
2285
    memset(s->mv_band[1].state, 0, sizeof(s->mv_band[1].state));
2286
    memset(s->header_state, 0, sizeof(s->header_state));
2287
}
2288

    
2289
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2290
    int x, y;
2291

    
2292
    for(y=0; y < b_h+5; y++){
2293
        for(x=0; x < b_w; x++){
2294
            int a0= src[x     + y*stride];
2295
            int a1= src[x + 1 + y*stride];
2296
            int a2= src[x + 2 + y*stride];
2297
            int a3= src[x + 3 + y*stride];
2298
            int a4= src[x + 4 + y*stride];
2299
            int a5= src[x + 5 + y*stride];
2300
//            int am= 9*(a1+a2) - (a0+a3);
2301
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2302
//            int am= 18*(a2+a3) - 2*(a1+a4);
2303
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2304
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2305

    
2306
//            if(b_w==16) am= 8*(a1+a2);
2307

    
2308
            if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2309
            else     tmp[x + y*stride]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2310

    
2311
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2312
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2313
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2314
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2315
        }
2316
    }
2317
    for(y=0; y < b_h; y++){
2318
        for(x=0; x < b_w; x++){
2319
            int a0= tmp[x +  y     *stride];
2320
            int a1= tmp[x + (y + 1)*stride];
2321
            int a2= tmp[x + (y + 2)*stride];
2322
            int a3= tmp[x + (y + 3)*stride];
2323
            int a4= tmp[x + (y + 4)*stride];
2324
            int a5= tmp[x + (y + 5)*stride];
2325
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2326
//            int am= 18*(a2+a3) - 2*(a1+a4);
2327
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2328
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2329
            
2330
//            if(b_w==16) am= 8*(a1+a2);
2331

    
2332
            if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2333
            else     dst[x + y*stride]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2334

    
2335
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2336
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2337
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2338
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2339
        }
2340
    }
2341
}
2342

    
2343
#define mcb(dx,dy,b_w)\
2344
static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
2345
    uint8_t tmp[stride*(b_w+5)];\
2346
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2347
}
2348

    
2349
mcb( 0, 0,16)
2350
mcb( 4, 0,16)
2351
mcb( 8, 0,16)
2352
mcb(12, 0,16)
2353
mcb( 0, 4,16)
2354
mcb( 4, 4,16)
2355
mcb( 8, 4,16)
2356
mcb(12, 4,16)
2357
mcb( 0, 8,16)
2358
mcb( 4, 8,16)
2359
mcb( 8, 8,16)
2360
mcb(12, 8,16)
2361
mcb( 0,12,16)
2362
mcb( 4,12,16)
2363
mcb( 8,12,16)
2364
mcb(12,12,16)
2365

    
2366
#define mca(dx,dy,b_w)\
2367
static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
2368
    uint8_t tmp[stride*(b_w+5)];\
2369
    assert(h==b_w);\
2370
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2371
}
2372

    
2373
mca( 0, 0,16)
2374
mca( 8, 0,16)
2375
mca( 0, 8,16)
2376
mca( 8, 8,16)
2377

    
2378
static void add_xblock(DWTELEM *dst, uint8_t *src, uint8_t *obmc, int s_x, int s_y, int b_w, int b_h, int mv_x, int mv_y, int w, int h, int dst_stride, int src_stride, int obmc_stride, int mb_type, int add){
2379
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME move to context to gurantee alignment
2380
    int x,y;
2381

    
2382
    if(s_x<0){
2383
        obmc -= s_x;
2384
        b_w += s_x;
2385
        s_x=0;
2386
    }else if(s_x + b_w > w){
2387
        b_w = w - s_x;
2388
    }
2389
    if(s_y<0){
2390
        obmc -= s_y*obmc_stride;
2391
        b_h += s_y;
2392
        s_y=0;
2393
    }else if(s_y + b_h> h){
2394
        b_h = h - s_y;
2395
    }
2396

    
2397
    if(b_w<=0 || b_h<=0) return;
2398
    
2399
    dst += s_x + s_y*dst_stride;
2400
    
2401
    if(mb_type==1){
2402
        src += s_x + s_y*src_stride;
2403
        for(y=0; y < b_h; y++){
2404
            for(x=0; x < b_w; x++){
2405
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
2406
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * 128 * (256/OBMC_MAX);
2407
            }
2408
        }
2409
    }else{
2410
        int dx= mv_x&15;
2411
        int dy= mv_y&15;
2412
//        int dxy= (mv_x&1) + 2*(mv_y&1);
2413

    
2414
        s_x += (mv_x>>4) - 2;
2415
        s_y += (mv_y>>4) - 2;
2416
        src += s_x + s_y*src_stride;
2417
        //use dsputil
2418
    
2419
        if(   (unsigned)s_x >= w - b_w - 4
2420
           || (unsigned)s_y >= h - b_h - 4){
2421
            ff_emulated_edge_mc(tmp + 32, src, src_stride, b_w+5, b_h+5, s_x, s_y, w, h);
2422
            src= tmp + 32;
2423
        }
2424

    
2425
        if(mb_type==0){
2426
            mc_block(tmp, src, tmp + 64+8, src_stride, b_w, b_h, dx, dy);
2427
        }else{
2428
            int sum=0;
2429
            for(y=0; y < b_h; y++){
2430
                for(x=0; x < b_w; x++){
2431
                    sum += src[x+  y*src_stride];
2432
                }
2433
            }
2434
            sum= (sum + b_h*b_w/2) / (b_h*b_w);
2435
            for(y=0; y < b_h; y++){
2436
                for(x=0; x < b_w; x++){
2437
                    tmp[x + y*src_stride]= sum; 
2438
                }
2439
            }
2440
        }
2441

    
2442
        for(y=0; y < b_h; y++){
2443
            for(x=0; x < b_w; x++){
2444
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2445
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2446
            }
2447
        }
2448
    }
2449
}
2450

    
2451
static void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2452
    Plane *p= &s->plane[plane_index];
2453
    const int mb_w= s->mb_band.width;
2454
    const int mb_h= s->mb_band.height;
2455
    const int mb_stride= s->mb_band.stride;
2456
    int x, y, mb_x, mb_y;
2457
    int scale      = plane_index ?  s->mv_scale : 2*s->mv_scale;
2458
    int block_w    = plane_index ?  8 : 16;
2459
    uint8_t *obmc  = plane_index ? obmc16 : obmc32;
2460
    int obmc_stride= plane_index ? 16 : 32;
2461
    int ref_stride= s->last_picture.linesize[plane_index];
2462
    uint8_t *ref  = s->last_picture.data[plane_index];
2463
    int w= p->width;
2464
    int h= p->height;
2465
    
2466
if(s->avctx->debug&512){
2467
    for(y=0; y<h; y++){
2468
        for(x=0; x<w; x++){
2469
            if(add) buf[x + y*w]+= 128*256;
2470
            else    buf[x + y*w]-= 128*256;
2471
        }
2472
    }
2473
    
2474
    return;
2475
}
2476
    for(mb_y=-1; mb_y<=mb_h; mb_y++){
2477
        for(mb_x=-1; mb_x<=mb_w; mb_x++){
2478
            int index= clip(mb_x, 0, mb_w-1) + clip(mb_y, 0, mb_h-1)*mb_stride;
2479

    
2480
            add_xblock(buf, ref, obmc, 
2481
                       block_w*mb_x - block_w/2, 
2482
                       block_w*mb_y - block_w/2,
2483
                       2*block_w, 2*block_w,
2484
                       s->mv_band[0].buf[index]*scale, s->mv_band[1].buf[index]*scale,
2485
                       w, h,
2486
                       w, ref_stride, obmc_stride, 
2487
                       s->mb_band.buf[index], add);
2488

    
2489
        }
2490
    }
2491
}
2492

    
2493
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2494
    const int level= b->level;
2495
    const int w= b->width;
2496
    const int h= b->height;
2497
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2498
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2499
    int x,y, thres1, thres2;
2500
    START_TIMER
2501

    
2502
    assert(QROOT==8);
2503

    
2504
    bias= bias ? 0 : (3*qmul)>>3;
2505
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2506
    thres2= 2*thres1;
2507
    
2508
    if(!bias){
2509
        for(y=0; y<h; y++){
2510
            for(x=0; x<w; x++){
2511
                int i= src[x + y*stride];
2512
                
2513
                if((unsigned)(i+thres1) > thres2){
2514
                    if(i>=0){
2515
                        i<<= QEXPSHIFT;
2516
                        i/= qmul; //FIXME optimize
2517
                        src[x + y*stride]=  i;
2518
                    }else{
2519
                        i= -i;
2520
                        i<<= QEXPSHIFT;
2521
                        i/= qmul; //FIXME optimize
2522
                        src[x + y*stride]= -i;
2523
                    }
2524
                }else
2525
                    src[x + y*stride]= 0;
2526
            }
2527
        }
2528
    }else{
2529
        for(y=0; y<h; y++){
2530
            for(x=0; x<w; x++){
2531
                int i= src[x + y*stride]; 
2532
                
2533
                if((unsigned)(i+thres1) > thres2){
2534
                    if(i>=0){
2535
                        i<<= QEXPSHIFT;
2536
                        i= (i + bias) / qmul; //FIXME optimize
2537
                        src[x + y*stride]=  i;
2538
                    }else{
2539
                        i= -i;
2540
                        i<<= QEXPSHIFT;
2541
                        i= (i + bias) / qmul; //FIXME optimize
2542
                        src[x + y*stride]= -i;
2543
                    }
2544
                }else
2545
                    src[x + y*stride]= 0;
2546
            }
2547
        }
2548
    }
2549
    if(level+1 == s->spatial_decomposition_count){
2550
//        STOP_TIMER("quantize")
2551
    }
2552
}
2553

    
2554
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2555
    const int level= b->level;
2556
    const int w= b->width;
2557
    const int h= b->height;
2558
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2559
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2560
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2561
    int x,y;
2562
    
2563
    assert(QROOT==8);
2564

    
2565
    for(y=0; y<h; y++){
2566
        for(x=0; x<w; x++){
2567
            int i= src[x + y*stride];
2568
            if(i<0){
2569
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2570
            }else if(i>0){
2571
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2572
            }
2573
        }
2574
    }
2575
}
2576

    
2577
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2578
    const int w= b->width;
2579
    const int h= b->height;
2580
    int x,y;
2581
    
2582
    for(y=h-1; y>=0; y--){
2583
        for(x=w-1; x>=0; x--){
2584
            int i= x + y*stride;
2585
            
2586
            if(x){
2587
                if(use_median){
2588
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2589
                    else  src[i] -= src[i - 1];
2590
                }else{
2591
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2592
                    else  src[i] -= src[i - 1];
2593
                }
2594
            }else{
2595
                if(y) src[i] -= src[i - stride];
2596
            }
2597
        }
2598
    }
2599
}
2600

    
2601
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2602
    const int w= b->width;
2603
    const int h= b->height;
2604
    int x,y;
2605
    
2606
    for(y=0; y<h; y++){
2607
        for(x=0; x<w; x++){
2608
            int i= x + y*stride;
2609
            
2610
            if(x){
2611
                if(use_median){
2612
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2613
                    else  src[i] += src[i - 1];
2614
                }else{
2615
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2616
                    else  src[i] += src[i - 1];
2617
                }
2618
            }else{
2619
                if(y) src[i] += src[i - stride];
2620
            }
2621
        }
2622
    }
2623
}
2624

    
2625
static void encode_header(SnowContext *s){
2626
    int plane_index, level, orientation;
2627

    
2628
    put_cabac(&s->c, s->header_state, s->keyframe); // state clearing stuff?
2629
    if(s->keyframe){
2630
        put_symbol(&s->c, s->header_state, s->version, 0);
2631
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2632
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2633
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2634
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
2635
        put_symbol(&s->c, s->header_state, s->b_width, 0);
2636
        put_symbol(&s->c, s->header_state, s->b_height, 0);
2637
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2638
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
2639
        put_cabac(&s->c, s->header_state, s->spatial_scalability);
2640
//        put_cabac(&s->c, s->header_state, s->rate_scalability);
2641

    
2642
        for(plane_index=0; plane_index<2; plane_index++){
2643
            for(level=0; level<s->spatial_decomposition_count; level++){
2644
                for(orientation=level ? 1:0; orientation<4; orientation++){
2645
                    if(orientation==2) continue;
2646
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2647
                }
2648
            }
2649
        }
2650
    }
2651
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2652
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
2653
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
2654
    put_symbol(&s->c, s->header_state, s->qbias, 1);
2655
}
2656

    
2657
static int decode_header(SnowContext *s){
2658
    int plane_index, level, orientation;
2659

    
2660
    s->keyframe= get_cabac(&s->c, s->header_state);
2661
    if(s->keyframe){
2662
        s->version= get_symbol(&s->c, s->header_state, 0);
2663
        if(s->version>0){
2664
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2665
            return -1;
2666
        }
2667
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2668
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2669
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2670
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2671
        s->b_width= get_symbol(&s->c, s->header_state, 0);
2672
        s->b_height= get_symbol(&s->c, s->header_state, 0);
2673
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2674
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2675
        s->spatial_scalability= get_cabac(&s->c, s->header_state);
2676
//        s->rate_scalability= get_cabac(&s->c, s->header_state);
2677

    
2678
        for(plane_index=0; plane_index<3; plane_index++){
2679
            for(level=0; level<s->spatial_decomposition_count; level++){
2680
                for(orientation=level ? 1:0; orientation<4; orientation++){
2681
                    int q;
2682
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2683
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2684
                    else                    q= get_symbol(&s->c, s->header_state, 1);
2685
                    s->plane[plane_index].band[level][orientation].qlog= q;
2686
                }
2687
            }
2688
        }
2689
    }
2690
    
2691
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2692
    if(s->spatial_decomposition_type > 2){
2693
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2694
        return -1;
2695
    }
2696
    
2697
    s->qlog= get_symbol(&s->c, s->header_state, 1);
2698
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2699
    s->qbias= get_symbol(&s->c, s->header_state, 1);
2700

    
2701
    return 0;
2702
}
2703

    
2704
static int common_init(AVCodecContext *avctx){
2705
    SnowContext *s = avctx->priv_data;
2706
    int width, height;
2707
    int level, orientation, plane_index, dec;
2708

    
2709
    s->avctx= avctx;
2710
        
2711
    dsputil_init(&s->dsp, avctx);
2712

    
2713
#define mcf(dx,dy)\
2714
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
2715
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2716
        mc_block ## dx ## dy;
2717

    
2718
    mcf( 0, 0)
2719
    mcf( 4, 0)
2720
    mcf( 8, 0)
2721
    mcf(12, 0)
2722
    mcf( 0, 4)
2723
    mcf( 4, 4)
2724
    mcf( 8, 4)
2725
    mcf(12, 4)
2726
    mcf( 0, 8)
2727
    mcf( 4, 8)
2728
    mcf( 8, 8)
2729
    mcf(12, 8)
2730
    mcf( 0,12)
2731
    mcf( 4,12)
2732
    mcf( 8,12)
2733
    mcf(12,12)
2734

    
2735
#define mcfh(dx,dy)\
2736
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
2737
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2738
        mc_block_hpel ## dx ## dy;
2739

    
2740
    mcfh(0, 0)
2741
    mcfh(8, 0)
2742
    mcfh(0, 8)
2743
    mcfh(8, 8)
2744
        
2745
    dec= s->spatial_decomposition_count= 5;
2746
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2747
    
2748
    s->chroma_h_shift= 1; //FIXME XXX
2749
    s->chroma_v_shift= 1;
2750
    
2751
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2752
    
2753
    s->b_width = (s->avctx->width +(1<<dec)-1)>>dec;
2754
    s->b_height= (s->avctx->height+(1<<dec)-1)>>dec;
2755
    
2756
    s->spatial_dwt_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
2757
    s->pred_buffer= av_mallocz(s->b_width*s->b_height*sizeof(DWTELEM)<<(2*dec));
2758
    
2759
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2760
    
2761
    for(plane_index=0; plane_index<3; plane_index++){    
2762
        int w= s->avctx->width;
2763
        int h= s->avctx->height;
2764

    
2765
        if(plane_index){
2766
            w>>= s->chroma_h_shift;
2767
            h>>= s->chroma_v_shift;
2768
        }
2769
        s->plane[plane_index].width = w;
2770
        s->plane[plane_index].height= h;
2771
av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
2772
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2773
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2774
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2775
                
2776
                b->buf= s->spatial_dwt_buffer;
2777
                b->level= level;
2778
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2779
                b->width = (w + !(orientation&1))>>1;
2780
                b->height= (h + !(orientation>1))>>1;
2781
                
2782
                if(orientation&1) b->buf += (w+1)>>1;
2783
                if(orientation>1) b->buf += b->stride>>1;
2784
                
2785
                if(level)
2786
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
2787
            }
2788
            w= (w+1)>>1;
2789
            h= (h+1)>>1;
2790
        }
2791
    }
2792
    
2793
    //FIXME init_subband() ?
2794
    s->mb_band.stride= s->mv_band[0].stride= s->mv_band[1].stride=
2795
    s->mb_band.width = s->mv_band[0].width = s->mv_band[1].width = (s->avctx->width + 15)>>4;
2796
    s->mb_band.height= s->mv_band[0].height= s->mv_band[1].height= (s->avctx->height+ 15)>>4;
2797
    s->mb_band   .buf= av_mallocz(s->mb_band   .stride * s->mb_band   .height*sizeof(DWTELEM));
2798
    s->mv_band[0].buf= av_mallocz(s->mv_band[0].stride * s->mv_band[0].height*sizeof(DWTELEM));
2799
    s->mv_band[1].buf= av_mallocz(s->mv_band[1].stride * s->mv_band[1].height*sizeof(DWTELEM));
2800

    
2801
    reset_contexts(s);
2802
/*    
2803
    width= s->width= avctx->width;
2804
    height= s->height= avctx->height;
2805
    
2806
    assert(width && height);
2807
*/
2808
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2809
    
2810
    return 0;
2811
}
2812

    
2813

    
2814
static void calculate_vissual_weight(SnowContext *s, Plane *p){
2815
    int width = p->width;
2816
    int height= p->height;
2817
    int i, level, orientation, x, y;
2818

    
2819
    for(level=0; level<s->spatial_decomposition_count; level++){
2820
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2821
            SubBand *b= &p->band[level][orientation];
2822
            DWTELEM *buf= b->buf;
2823
            int64_t error=0;
2824
            
2825
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2826
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
2827
            spatial_idwt(s, s->spatial_dwt_buffer, width, height, width);
2828
            for(y=0; y<height; y++){
2829
                for(x=0; x<width; x++){
2830
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
2831
                    error += d*d;
2832
                }
2833
            }
2834

    
2835
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
2836
            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
2837
        }
2838
    }
2839
}
2840

    
2841
static int encode_init(AVCodecContext *avctx)
2842
{
2843
    SnowContext *s = avctx->priv_data;
2844
    int i;
2845
    int level, orientation, plane_index;
2846

    
2847
    if(avctx->strict_std_compliance >= 0){
2848
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2849
               "use vstrict=-1 to use it anyway\n");
2850
        return -1;
2851
    }
2852
 
2853
    common_init(avctx);
2854
 
2855
    s->version=0;
2856
    
2857
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2858
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2859
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2860
    s->mb_type        = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int16_t));
2861
    s->mb_mean        = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int8_t ));
2862
    s->dummy          = av_mallocz((s->mb_band.width+1)*s->mb_band.height*sizeof(int32_t));
2863
    h263_encode_init(&s->m); //mv_penalty
2864

    
2865
    for(plane_index=0; plane_index<3; plane_index++){
2866
        calculate_vissual_weight(s, &s->plane[plane_index]);
2867
    }
2868
    
2869
    
2870
    avctx->coded_frame= &s->current_picture;
2871
    switch(avctx->pix_fmt){
2872
//    case PIX_FMT_YUV444P:
2873
//    case PIX_FMT_YUV422P:
2874
    case PIX_FMT_YUV420P:
2875
    case PIX_FMT_GRAY8:
2876
//    case PIX_FMT_YUV411P:
2877
//    case PIX_FMT_YUV410P:
2878
        s->colorspace_type= 0;
2879
        break;
2880
/*    case PIX_FMT_RGBA32:
2881
        s->colorspace= 1;
2882
        break;*/
2883
    default:
2884
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2885
        return -1;
2886
    }
2887
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2888
    s->chroma_h_shift= 1;
2889
    s->chroma_v_shift= 1;
2890
    return 0;
2891
}
2892

    
2893
static int frame_start(SnowContext *s){
2894
   AVFrame tmp;
2895

    
2896
   if(s->keyframe)
2897
        reset_contexts(s);
2898
 
2899
    tmp= s->last_picture;
2900
    s->last_picture= s->current_picture;
2901
    s->current_picture= tmp;
2902
    
2903
    s->current_picture.reference= 1;
2904
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2905
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2906
        return -1;
2907
    }
2908
    
2909
    return 0;
2910
}
2911

    
2912
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2913
    SnowContext *s = avctx->priv_data;
2914
    CABACContext * const c= &s->c;
2915
    AVFrame *pict = data;
2916
    const int width= s->avctx->width;
2917
    const int height= s->avctx->height;
2918
    int used_count= 0;
2919
    int log2_threshold, level, orientation, plane_index, i;
2920

    
2921
    ff_init_cabac_encoder(c, buf, buf_size);
2922
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2923
    
2924
    s->input_picture = *pict;
2925

    
2926
    memset(s->header_state, 0, sizeof(s->header_state));
2927

    
2928
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2929
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2930
    
2931
    s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2932
    //<64 >60
2933
    s->qlog += 61;
2934

    
2935
    for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
2936
        s->mb_band.buf[i]= s->keyframe;
2937
    }
2938
    
2939
    frame_start(s);
2940

    
2941
    if(pict->pict_type == P_TYPE){
2942
        int block_width = (width +15)>>4;
2943
        int block_height= (height+15)>>4;
2944
        int stride= s->current_picture.linesize[0];
2945
        uint8_t *src_plane= s->input_picture.data[0];
2946
        int src_stride= s->input_picture.linesize[0];
2947
        int x,y;
2948
        
2949
        assert(s->current_picture.data[0]);
2950
        assert(s->last_picture.data[0]);
2951
     
2952
        s->m.avctx= s->avctx;
2953
        s->m.current_picture.data[0]= s->current_picture.data[0];
2954
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
2955
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
2956
        s->m.current_picture_ptr= &s->m.current_picture;
2957
        s->m.   last_picture_ptr= &s->m.   last_picture;
2958
        s->m.linesize=
2959
        s->m.   last_picture.linesize[0]=
2960
        s->m.    new_picture.linesize[0]=
2961
        s->m.current_picture.linesize[0]= stride;
2962
        s->m.width = width;
2963
        s->m.height= height;
2964
        s->m.mb_width = block_width;
2965
        s->m.mb_height= block_height;
2966
        s->m.mb_stride=   s->m.mb_width+1;
2967
        s->m.b8_stride= 2*s->m.mb_width+1;
2968
        s->m.f_code=1;
2969
        s->m.pict_type= pict->pict_type;
2970
        s->m.me_method= s->avctx->me_method;
2971
        s->m.me.scene_change_score=0;
2972
        s->m.flags= s->avctx->flags;
2973
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2974
        s->m.out_format= FMT_H263;
2975
        s->m.unrestricted_mv= 1;
2976

    
2977
        s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
2978
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
2979
        s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
2980

    
2981
        if(!s->motion_val8){
2982
            s->motion_val8 = av_mallocz(s->m.b8_stride*block_height*2*2*sizeof(int16_t));
2983
            s->motion_val16= av_mallocz(s->m.mb_stride*block_height*2*sizeof(int16_t));
2984
        }
2985
        
2986
        s->m.mb_type= s->mb_type;
2987
        
2988
        //dummies, to avoid segfaults
2989
        s->m.current_picture.mb_mean  = s->mb_mean;
2990
        s->m.current_picture.mb_var   = (int16_t*)s->dummy;
2991
        s->m.current_picture.mc_mb_var= (int16_t*)s->dummy;
2992
        s->m.current_picture.mb_type  = s->dummy;
2993
        
2994
        s->m.current_picture.motion_val[0]= s->motion_val8;
2995
        s->m.p_mv_table= s->motion_val16;
2996
        s->m.dsp= s->dsp; //move
2997
        ff_init_me(&s->m);
2998
    
2999
        
3000
        s->m.me.pre_pass=1;
3001
        s->m.me.dia_size= s->avctx->pre_dia_size;
3002
        s->m.first_slice_line=1;
3003
        for(y= block_height-1; y >= 0; y--) {
3004
            uint8_t src[stride*16];
3005

    
3006
            s->m.new_picture.data[0]= src - y*16*stride; //ugly
3007
            s->m.mb_y= y;
3008
            for(i=0; i<16 && i + 16*y<height; i++){
3009
                memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
3010
                for(x=width; x<16*block_width; x++)
3011
                    src[i*stride+x]= src[i*stride+x-1];
3012
            }
3013
            for(; i<16 && i + 16*y<16*block_height; i++)
3014
                memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
3015

    
3016
            for(x=block_width-1; x >=0 ;x--) {
3017
                s->m.mb_x= x;
3018
                ff_init_block_index(&s->m);
3019
                ff_update_block_index(&s->m);
3020
                ff_pre_estimate_p_frame_motion(&s->m, x, y);
3021
            }
3022
            s->m.first_slice_line=0;
3023
        }        
3024
        s->m.me.pre_pass=0;
3025
        
3026
        
3027
        s->m.me.dia_size= s->avctx->dia_size;
3028
        s->m.first_slice_line=1;
3029
        for (y = 0; y < block_height; y++) {
3030
            uint8_t src[stride*16];
3031

    
3032
            s->m.new_picture.data[0]= src - y*16*stride; //ugly
3033
            s->m.mb_y= y;
3034
            
3035
            assert(width <= stride);
3036
            assert(width <= 16*block_width);
3037
    
3038
            for(i=0; i<16 && i + 16*y<height; i++){
3039
                memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
3040
                for(x=width; x<16*block_width; x++)
3041
                    src[i*stride+x]= src[i*stride+x-1];
3042
            }
3043
            for(; i<16 && i + 16*y<16*block_height; i++)
3044
                memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
3045
    
3046
            for (x = 0; x < block_width; x++) {
3047
                int mb_xy= x + y*(s->mb_band.stride);
3048
                s->m.mb_x= x;
3049
                ff_init_block_index(&s->m);
3050
                ff_update_block_index(&s->m);
3051
                
3052
                ff_estimate_p_frame_motion(&s->m, x, y);
3053
                
3054
                s->mb_band   .buf[mb_xy]= (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER)
3055
                 ? 0 : 2;
3056
                s->mv_band[0].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][0];
3057
                s->mv_band[1].buf[mb_xy]= s->motion_val16[x + y*s->m.mb_stride][1];
3058
                
3059
                if(s->mb_band   .buf[x + y*(s->mb_band.stride)]==2 && 0){
3060
                    int dc0=128, dc1=128, dc, dc2, dir;
3061
                    int offset= (s->avctx->flags & CODEC_FLAG_QPEL) ? 64 : 32;
3062
                    
3063
                    dc       =s->mb_mean[x +  y   *s->m.mb_stride    ];
3064
                    if(x) dc0=s->mb_mean[x +  y   *s->m.mb_stride - 1];
3065
                    if(y) dc1=s->mb_mean[x + (y-1)*s->m.mb_stride    ];
3066
                    dc2= (dc0+dc1)>>1;
3067
#if 0
3068
                    if     (ABS(dc0 - dc) < ABS(dc1 - dc) && ABS(dc0 - dc) < ABS(dc2 - dc))
3069
                        dir= 1;
3070
                    else if(ABS(dc0 - dc) >=ABS(dc1 - dc) && ABS(dc1 - dc) < ABS(dc2 - dc))
3071
                        dir=-1;
3072
                    else
3073
                        dir=0;
3074
#endif                    
3075
                    if(ABS(dc0 - dc) < ABS(dc1 - dc) && x){
3076
                        s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + y*(s->mb_band.stride)-1] - offset;
3077
                        s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + y*(s->mb_band.stride)-1];
3078
                        s->mb_mean[x +  y   *s->m.mb_stride    ]= dc0;
3079
                    }else if(y){
3080
                        s->mv_band[0].buf[mb_xy]= s->mv_band[0].buf[x + (y-1)*(s->mb_band.stride)];
3081
                        s->mv_band[1].buf[mb_xy]= s->mv_band[1].buf[x + (y-1)*(s->mb_band.stride)] - offset;
3082
                        s->mb_mean[x +  y   *s->m.mb_stride    ]= dc1;
3083
                    }
3084
                }
3085
//                s->mb_band   .buf[x + y*(s->mb_band.stride)]=1; //FIXME intra only test
3086
            }
3087
            s->m.first_slice_line=0;
3088
        }
3089
        assert(s->m.pict_type == P_TYPE);
3090
        if(s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3091
            s->m.pict_type= 
3092
            pict->pict_type =I_TYPE;
3093
            for(i=0; i<s->mb_band.stride * s->mb_band.height; i++){
3094
                s->mb_band.buf[i]= 1;
3095
                s->mv_band[0].buf[i]=
3096
                s->mv_band[1].buf[i]= 0;
3097
            }
3098
    //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
3099
        }        
3100
    }
3101
        
3102
    s->m.first_slice_line=1;
3103
    
3104
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3105

    
3106
    encode_header(s);
3107
    
3108
    decorrelate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 0, 1);
3109
    decorrelate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 0, 1);
3110
    decorrelate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 0 ,1);
3111
    encode_subband(s, &s->mb_band   , s->mb_band   .buf, NULL, s->mb_band   .stride, 0);
3112
    encode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
3113
    encode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
3114
    
3115
//FIXME avoid this
3116
    correlate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 1, 1);
3117
    correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
3118
    correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
3119
    
3120
    for(plane_index=0; plane_index<3; plane_index++){
3121
        Plane *p= &s->plane[plane_index];
3122
        int w= p->width;
3123
        int h= p->height;
3124
        int x, y;
3125
        int bits= put_bits_count(&s->c.pb);
3126

    
3127
        //FIXME optimize
3128
#if QPRED
3129
        memset(s->pred_buffer, 0, sizeof(DWTELEM)*w*h);
3130
        predict_plane(s, s->pred_buffer, plane_index, 1);
3131
        spatial_dwt(s, s->pred_buffer, w, h, w);
3132
        for(level=0; level<s->spatial_decomposition_count; level++){
3133
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3134
                SubBand *b= &p->band[level][orientation];
3135
                int delta= ((int)s->pred_buffer - (int)s->spatial_dwt_buffer)/sizeof(DWTELEM);
3136
                
3137
                quantize  (s, b, b->buf + delta, b->stride, s->qbias);
3138
                dequantize(s, b, b->buf + delta, b->stride);
3139
            }
3140
        }
3141
        for(y=0; y<h; y++){
3142
            for(x=0; x<w; x++){
3143
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
3144
            }
3145
        }
3146
        spatial_dwt(s, s->spatial_dwt_buffer, w, h, w);
3147
        for(y=0; y<h; y++){
3148
            for(x=0; x<w; x++){
3149
                s->spatial_dwt_buffer[y*w + x]-= s->pred_buffer[y*w + x];
3150
            }
3151
        }
3152
#else
3153
     if(pict->data[plane_index]) //FIXME gray hack
3154
        for(y=0; y<h; y++){
3155
            for(x=0; x<w; x++){
3156
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
3157
            }
3158
        }
3159
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3160
        spatial_dwt(s, s->spatial_dwt_buffer, w, h, w);
3161
#endif
3162
 
3163
        for(level=0; level<s->spatial_decomposition_count; level++){
3164
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3165
                SubBand *b= &p->band[level][orientation];
3166
                
3167
                quantize(s, b, b->buf, b->stride, s->qbias);
3168
                if(orientation==0)
3169
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3170
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3171
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
3172
                if(orientation==0)
3173
                    correlate(s, b, b->buf, b->stride, 1, 0);
3174
            }
3175
        }
3176
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3177

    
3178
        for(level=0; level<s->spatial_decomposition_count; level++){
3179
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3180
                SubBand *b= &p->band[level][orientation];
3181

    
3182
                dequantize(s, b, b->buf, b->stride);
3183
            }
3184
        }
3185
        
3186
#if QPRED
3187
        for(y=0; y<h; y++){
3188
            for(x=0; x<w; x++){
3189
                s->spatial_dwt_buffer[y*w + x]+= s->pred_buffer[y*w + x];
3190
            }
3191
        }
3192
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
3193
#else
3194
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
3195
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3196
#endif
3197
        //FIXME optimize
3198
        for(y=0; y<h; y++){
3199
            for(x=0; x<w; x++){
3200
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
3201
                if(v&(~255)) v= ~(v>>31);
3202
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
3203
            }
3204
        }
3205
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3206
            int64_t error= 0;
3207
            
3208
    if(pict->data[plane_index]) //FIXME gray hack
3209
            for(y=0; y<h; y++){
3210
                for(x=0; x<w; x++){
3211
                    int d= s->spatial_dwt_buffer[y*w + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x]*256;
3212
                    error += d*d;
3213
                }
3214
            }
3215
            error= (error + 128*256)>>16;
3216
            s->avctx->error[plane_index] += error;
3217
            s->avctx->error[3] += error;
3218
        }
3219
    }
3220

    
3221
    if(s->last_picture.data[0])
3222
        avctx->release_buffer(avctx, &s->last_picture);
3223

    
3224
    emms_c();
3225
    
3226
    return put_cabac_terminate(c, 1);
3227
}
3228

    
3229
static void common_end(SnowContext *s){
3230
    av_freep(&s->spatial_dwt_buffer);
3231
    av_freep(&s->mb_band.buf);
3232
    av_freep(&s->mv_band[0].buf);
3233
    av_freep(&s->mv_band[1].buf);
3234

    
3235
    av_freep(&s->m.me.scratchpad);    
3236
    av_freep(&s->m.me.map);
3237
    av_freep(&s->m.me.score_map);
3238
    av_freep(&s->mb_type);
3239
    av_freep(&s->mb_mean);
3240
    av_freep(&s->dummy);
3241
    av_freep(&s->motion_val8);
3242
    av_freep(&s->motion_val16);
3243
}
3244

    
3245
static int encode_end(AVCodecContext *avctx)
3246
{
3247
    SnowContext *s = avctx->priv_data;
3248

    
3249
    common_end(s);
3250

    
3251
    return 0;
3252
}
3253

    
3254
static int decode_init(AVCodecContext *avctx)
3255
{
3256
//    SnowContext *s = avctx->priv_data;
3257

    
3258
    common_init(avctx);
3259
    
3260
    return 0;
3261
}
3262

    
3263
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3264
    SnowContext *s = avctx->priv_data;
3265
    CABACContext * const c= &s->c;
3266
    const int width= s->avctx->width;
3267
    const int height= s->avctx->height;
3268
    int bytes_read;
3269
    AVFrame *picture = data;
3270
    int log2_threshold, level, orientation, plane_index;
3271
    
3272

    
3273
    /* no supplementary picture */
3274
    if (buf_size == 0)
3275
        return 0;
3276

    
3277
    ff_init_cabac_decoder(c, buf, buf_size);
3278
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3279

    
3280
    memset(s->header_state, 0, sizeof(s->header_state));
3281

    
3282
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3283
    decode_header(s);
3284

    
3285
    frame_start(s);
3286
    //keyframe flag dupliaction mess FIXME
3287
    if(avctx->debug&FF_DEBUG_PICT_INFO)
3288
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3289
    
3290
    decode_subband(s, &s->mb_band   , s->mb_band   .buf, NULL, s->mb_band   .stride, 0);
3291
    decode_subband(s, &s->mv_band[0], s->mv_band[0].buf, NULL, s->mv_band[0].stride, 0);
3292
    decode_subband(s, &s->mv_band[1], s->mv_band[1].buf, NULL, s->mv_band[1].stride, 0);
3293
    correlate(s, &s->mb_band   , s->mb_band   .buf, s->mb_band   .stride, 1, 1);
3294
    correlate(s, &s->mv_band[0], s->mv_band[0].buf, s->mv_band[0].stride, 1, 1);
3295
    correlate(s, &s->mv_band[1], s->mv_band[1].buf, s->mv_band[1].stride, 1, 1);
3296

    
3297
    for(plane_index=0; plane_index<3; plane_index++){
3298
        Plane *p= &s->plane[plane_index];
3299
        int w= p->width;
3300
        int h= p->height;
3301
        int x, y;
3302
        
3303
if(s->avctx->debug&2048){
3304
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3305
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3306

    
3307
        for(y=0; y<h; y++){
3308
            for(x=0; x<w; x++){
3309
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
3310
                if(v&(~255)) v= ~(v>>31);
3311
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3312
            }
3313
        }
3314
}
3315
        for(level=0; level<s->spatial_decomposition_count; level++){
3316
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3317
                SubBand *b= &p->band[level][orientation];
3318

    
3319
                decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3320
                if(orientation==0)
3321
                    correlate(s, b, b->buf, b->stride, 1, 0);
3322
            }
3323
        }
3324
if(!(s->avctx->debug&1024))
3325
        for(level=0; level<s->spatial_decomposition_count; level++){
3326
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3327
                SubBand *b= &p->band[level][orientation];
3328

    
3329
                dequantize(s, b, b->buf, b->stride);
3330
            }
3331
        }
3332

    
3333
#if QPRED
3334
        memset(s->pred_buffer, 0, sizeof(DWTELEM)*w*h);
3335
        predict_plane(s, s->pred_buffer, plane_index, 1);
3336
        spatial_dwt(s, s->pred_buffer, w, h, w);
3337
        for(level=0; level<s->spatial_decomposition_count; level++){
3338
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3339
                SubBand *b= &p->band[level][orientation];
3340
                int delta= ((int)s->pred_buffer - (int)s->spatial_dwt_buffer)/sizeof(DWTELEM);
3341
                
3342
                quantize  (s, b, b->buf + delta, b->stride, s->qbias);
3343
                dequantize(s, b, b->buf + delta, b->stride);
3344
            }
3345
        }
3346
        for(y=0; y<h; y++){
3347
            for(x=0; x<w; x++){
3348
                s->spatial_dwt_buffer[y*w + x]+= s->pred_buffer[y*w + x];
3349
            }
3350
        }
3351
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
3352
#else
3353
        spatial_idwt(s, s->spatial_dwt_buffer, w, h, w);
3354
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3355
#endif
3356

    
3357
        //FIXME optimize
3358
        for(y=0; y<h; y++){
3359
            for(x=0; x<w; x++){
3360
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
3361
                if(v&(~255)) v= ~(v>>31);
3362
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
3363
            }
3364
        }
3365
    }
3366
            
3367
    emms_c();
3368

    
3369
    if(s->last_picture.data[0])
3370
        avctx->release_buffer(avctx, &s->last_picture);
3371

    
3372
if(!(s->avctx->debug&2048))        
3373
    *picture= s->current_picture;
3374
else
3375
    *picture= s->mconly_picture;
3376
    
3377
    *data_size = sizeof(AVFrame);
3378
    
3379
    bytes_read= get_cabac_terminate(c);
3380
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
3381

    
3382
    return bytes_read;
3383
}
3384

    
3385
static int decode_end(AVCodecContext *avctx)
3386
{
3387
    SnowContext *s = avctx->priv_data;
3388

    
3389
    common_end(s);
3390

    
3391
    return 0;
3392
}
3393

    
3394
AVCodec snow_decoder = {
3395
    "snow",
3396
    CODEC_TYPE_VIDEO,
3397
    CODEC_ID_SNOW,
3398
    sizeof(SnowContext),
3399
    decode_init,
3400
    NULL,
3401
    decode_end,
3402
    decode_frame,
3403
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3404
    NULL
3405
};
3406

    
3407
AVCodec snow_encoder = {
3408
    "snow",
3409
    CODEC_TYPE_VIDEO,
3410
    CODEC_ID_SNOW,
3411
    sizeof(SnowContext),
3412
    encode_init,
3413
    encode_frame,
3414
    encode_end,
3415
};
3416

    
3417

    
3418
#if 0
3419
#undef malloc
3420
#undef free
3421
#undef printf
3422

3423
int main(){
3424
    int width=256;
3425
    int height=256;
3426
    int buffer[2][width*height];
3427
    SnowContext s;
3428
    int i;
3429
    s.spatial_decomposition_count=6;
3430
    s.spatial_decomposition_type=1;
3431
    
3432
    printf("testing 5/3 DWT\n");
3433
    for(i=0; i<width*height; i++)
3434
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3435
    
3436
    spatial_dwt(&s, buffer[0], width, height, width);
3437
    spatial_idwt(&s, buffer[0], width, height, width);
3438
    
3439
    for(i=0; i<width*height; i++)
3440
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3441

3442
    printf("testing 9/7 DWT\n");
3443
    s.spatial_decomposition_type=0;
3444
    for(i=0; i<width*height; i++)
3445
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3446
    
3447
    spatial_dwt(&s, buffer[0], width, height, width);
3448
    spatial_idwt(&s, buffer[0], width, height, width);
3449
    
3450
    for(i=0; i<width*height; i++)
3451
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3452
        
3453
    printf("testing AC coder\n");
3454
    memset(s.header_state, 0, sizeof(s.header_state));
3455
    ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
3456
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3457
        
3458
    for(i=-256; i<256; i++){
3459
START_TIMER
3460
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3461
STOP_TIMER("put_symbol")
3462
    }
3463
    put_cabac_terminate(&s.c, 1);
3464

3465
    memset(s.header_state, 0, sizeof(s.header_state));
3466
    ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
3467
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3468
    
3469
    for(i=-256; i<256; i++){
3470
        int j;
3471
START_TIMER
3472
        j= get_symbol(&s.c, s.header_state, 1);
3473
STOP_TIMER("get_symbol")
3474
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3475
    }
3476
{
3477
int level, orientation, x, y;
3478
int64_t errors[8][4];
3479
int64_t g=0;
3480

3481
    memset(errors, 0, sizeof(errors));
3482
    s.spatial_decomposition_count=3;
3483
    s.spatial_decomposition_type=0;
3484
    for(level=0; level<s.spatial_decomposition_count; level++){
3485
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3486
            int w= width  >> (s.spatial_decomposition_count-level);
3487
            int h= height >> (s.spatial_decomposition_count-level);
3488
            int stride= width  << (s.spatial_decomposition_count-level);
3489
            DWTELEM *buf= buffer[0];
3490
            int64_t error=0;
3491

3492
            if(orientation&1) buf+=w;
3493
            if(orientation>1) buf+=stride>>1;
3494
            
3495
            memset(buffer[0], 0, sizeof(int)*width*height);
3496
            buf[w/2 + h/2*stride]= 256*256;
3497
            spatial_idwt(&s, buffer[0], width, height, width);
3498
            for(y=0; y<height; y++){
3499
                for(x=0; x<width; x++){
3500
                    int64_t d= buffer[0][x + y*width];
3501
                    error += d*d;
3502
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3503
                }
3504
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3505
            }
3506
            error= (int)(sqrt(error)+0.5);
3507
            errors[level][orientation]= error;
3508
            if(g) g=ff_gcd(g, error);
3509
            else g= error;
3510
        }
3511
    }
3512
    printf("static int const visual_weight[][4]={\n");
3513
    for(level=0; level<s.spatial_decomposition_count; level++){
3514
        printf("  {");
3515
        for(orientation=0; orientation<4; orientation++){
3516
            printf("%8lld,", errors[level][orientation]/g);
3517
        }
3518
        printf("},\n");
3519
    }
3520
    printf("};\n");
3521
    {
3522
            int level=2;
3523
            int orientation=3;
3524
            int w= width  >> (s.spatial_decomposition_count-level);
3525
            int h= height >> (s.spatial_decomposition_count-level);
3526
            int stride= width  << (s.spatial_decomposition_count-level);
3527
            DWTELEM *buf= buffer[0];
3528
            int64_t error=0;
3529

3530
            buf+=w;
3531
            buf+=stride>>1;
3532
            
3533
            memset(buffer[0], 0, sizeof(int)*width*height);
3534
#if 1
3535
            for(y=0; y<height; y++){
3536
                for(x=0; x<width; x++){
3537
                    int tab[4]={0,2,3,1};
3538
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3539
                }
3540
            }
3541
            spatial_dwt(&s, buffer[0], width, height, width);
3542
#else
3543
            for(y=0; y<h; y++){
3544
                for(x=0; x<w; x++){
3545
                    buf[x + y*stride  ]=169;
3546
                    buf[x + y*stride-w]=64;
3547
                }
3548
            }
3549
            spatial_idwt(&s, buffer[0], width, height, width);
3550
#endif
3551
            for(y=0; y<height; y++){
3552
                for(x=0; x<width; x++){
3553
                    int64_t d= buffer[0][x + y*width];
3554
                    error += d*d;
3555
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3556
                }
3557
                if(ABS(height/2-y)<9) printf("\n");
3558
            }
3559
    }
3560

    
3561
}
3562
    return 0;
3563
}
3564
#endif
3565