Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 7c2425d2

History | View | Annotate | Download (105 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22
#include "cabac.h"
23

    
24
#include "mpegvideo.h"
25

    
26
#undef NDEBUG
27
#include <assert.h>
28

    
29
#define MAX_DECOMPOSITIONS 8
30
#define MAX_PLANES 4
31
#define DWTELEM int
32
#define QROOT 8 
33
#define LOSSLESS_QLOG -128
34

    
35
static const int8_t quant3[256]={
36
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
52
};
53
static const int8_t quant3b[256]={
54
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
};
71
static const int8_t quant5[256]={
72
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
88
};
89
static const int8_t quant7[256]={
90
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
93
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
106
};
107
static const int8_t quant9[256]={
108
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
124
};
125
static const int8_t quant11[256]={
126
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
142
};
143
static const int8_t quant13[256]={
144
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
145
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
160
};
161

    
162
#define OBMC_MAX 64
163
#if 0 //64*cubic
164
static const uint8_t obmc32[1024]={
165
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
167
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
168
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
169
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
170
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
171
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
172
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
173
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
174
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
175
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
176
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
177
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
178
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
179
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
180
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
182
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
183
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
184
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
185
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
186
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
187
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
190
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
191
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
194
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
195
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
196
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197
//error:0.000022
198
};
199
static const uint8_t obmc16[256]={
200
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
201
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
202
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
203
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
204
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
205
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
206
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
207
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
209
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
210
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
211
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
212
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
213
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
214
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216
//error:0.000033
217
};
218
#elif 1 // 64*linear
219
static const uint8_t obmc32[1024]={
220
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
221
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
222
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
223
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
224
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
225
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
226
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
227
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
228
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
229
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
230
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
231
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
232
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
233
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
234
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
235
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
237
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
238
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
239
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
240
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
241
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
242
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
243
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
244
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
245
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
246
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
249
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
250
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
251
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
252
 //error:0.000020
253
};
254
static const uint8_t obmc16[256]={
255
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
256
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
257
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
258
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
259
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
260
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
261
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
262
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
264
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
265
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
266
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
267
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
268
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
269
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
270
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
271
//error:0.000015
272
};
273
#else //64*cos
274
static const uint8_t obmc32[1024]={
275
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
277
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
278
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
279
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
280
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
281
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
282
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
283
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
284
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
285
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
286
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
287
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
288
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
289
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
290
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
292
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
293
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
294
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
295
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
296
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
297
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
300
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
301
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
304
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
305
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
306
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307
//error:0.000022
308
};
309
static const uint8_t obmc16[256]={
310
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
311
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
312
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
313
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
314
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
315
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
316
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
317
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
319
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
320
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
321
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
322
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
323
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
324
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326
//error:0.000022
327
};
328
#endif
329

    
330
//linear *64
331
static const uint8_t obmc8[64]={
332
 1, 3, 5, 7, 7, 5, 3, 1,
333
 3, 9,15,21,21,15, 9, 3,
334
 5,15,25,35,35,25,15, 5,
335
 7,21,35,49,49,35,21, 7,
336
 7,21,35,49,49,35,21, 7,
337
 5,15,25,35,35,25,15, 5,
338
 3, 9,15,21,21,15, 9, 3,
339
 1, 3, 5, 7, 7, 5, 3, 1,
340
//error:0.000000
341
};
342

    
343
//linear *64
344
static const uint8_t obmc4[16]={
345
 4,12,12, 4,
346
12,36,36,12,
347
12,36,36,12,
348
 4,12,12, 4,
349
//error:0.000000
350
};
351

    
352
static const uint8_t *obmc_tab[4]={
353
    obmc32, obmc16, obmc8, obmc4
354
};
355

    
356
typedef struct BlockNode{
357
    int16_t mx;
358
    int16_t my;
359
    uint8_t color[3];
360
    uint8_t type;
361
//#define TYPE_SPLIT    1
362
#define BLOCK_INTRA   1
363
//#define TYPE_NOCOLOR  4
364
    uint8_t level; //FIXME merge into type?
365
}BlockNode;
366

    
367
#define LOG2_MB_SIZE 4
368
#define MB_SIZE (1<<LOG2_MB_SIZE)
369

    
370
typedef struct SubBand{
371
    int level;
372
    int stride;
373
    int width;
374
    int height;
375
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
376
    DWTELEM *buf;
377
    int16_t *x;
378
    DWTELEM *coeff;
379
    struct SubBand *parent;
380
    uint8_t state[/*7*2*/ 7 + 512][32];
381
}SubBand;
382

    
383
typedef struct Plane{
384
    int width;
385
    int height;
386
    SubBand band[MAX_DECOMPOSITIONS][4];
387
}Plane;
388

    
389
typedef struct SnowContext{
390
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
391

    
392
    AVCodecContext *avctx;
393
    CABACContext c;
394
    DSPContext dsp;
395
    AVFrame input_picture;
396
    AVFrame current_picture;
397
    AVFrame last_picture;
398
    AVFrame mconly_picture;
399
//     uint8_t q_context[16];
400
    uint8_t header_state[32];
401
    uint8_t block_state[128 + 32*128];
402
    int keyframe;
403
    int always_reset;
404
    int version;
405
    int spatial_decomposition_type;
406
    int temporal_decomposition_type;
407
    int spatial_decomposition_count;
408
    int temporal_decomposition_count;
409
    DWTELEM *spatial_dwt_buffer;
410
    DWTELEM *pred_buffer;
411
    int colorspace_type;
412
    int chroma_h_shift;
413
    int chroma_v_shift;
414
    int spatial_scalability;
415
    int qlog;
416
    int lambda;
417
    int lambda2;
418
    int mv_scale;
419
    int qbias;
420
#define QBIAS_SHIFT 3
421
    int b_width;
422
    int b_height;
423
    int block_max_depth;
424
    Plane plane[MAX_PLANES];
425
    BlockNode *block;
426

    
427
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
428
}SnowContext;
429

    
430
#define QEXPSHIFT 7 //FIXME try to change this to 0
431
static const uint8_t qexp[8]={
432
    128, 140, 152, 166, 181, 197, 215, 235
433
//   64,  70,  76,  83,  91,  99, 108, 117
434
//   32,  35,  38,  41,  45,  49,  54,  59
435
//   16,  17,  19,  21,  23,  25,  27,  29
436
//    8,   9,  10,  10,  11,  12,  13,  15
437
};
438

    
439
static inline int mirror(int v, int m){
440
    if     (v<0) return -v;
441
    else if(v>m) return 2*m-v;
442
    else         return v;
443
}
444

    
445
static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
446
    int i;
447

    
448
    if(v){
449
        const int a= ABS(v);
450
        const int e= av_log2(a);
451
#if 1
452
        const int el= FFMIN(e, 10);   
453
        put_cabac(c, state+0, 0);
454

    
455
        for(i=0; i<el; i++){
456
            put_cabac(c, state+1+i, 1);  //1..10
457
        }
458
        for(; i<e; i++){
459
            put_cabac(c, state+1+9, 1);  //1..10
460
        }
461
        put_cabac(c, state+1+FFMIN(i,9), 0);
462

    
463
        for(i=e-1; i>=el; i--){
464
            put_cabac(c, state+22+9, (a>>i)&1); //22..31
465
        }
466
        for(; i>=0; i--){
467
            put_cabac(c, state+22+i, (a>>i)&1); //22..31
468
        }
469

    
470
        if(is_signed)
471
            put_cabac(c, state+11 + el, v < 0); //11..21
472
#else
473
        
474
        put_cabac(c, state+0, 0);
475
        if(e<=9){
476
            for(i=0; i<e; i++){
477
                put_cabac(c, state+1+i, 1);  //1..10
478
            }
479
            put_cabac(c, state+1+i, 0);
480

    
481
            for(i=e-1; i>=0; i--){
482
                put_cabac(c, state+22+i, (a>>i)&1); //22..31
483
            }
484

    
485
            if(is_signed)
486
                put_cabac(c, state+11 + e, v < 0); //11..21
487
        }else{
488
            for(i=0; i<e; i++){
489
                put_cabac(c, state+1+FFMIN(i,9), 1);  //1..10
490
            }
491
            put_cabac(c, state+1+FFMIN(i,9), 0);
492

    
493
            for(i=e-1; i>=0; i--){
494
                put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
495
            }
496

    
497
            if(is_signed)
498
                put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
499
        }
500
#endif
501
    }else{
502
        put_cabac(c, state+0, 1);
503
    }
504
}
505

    
506
static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
507
    if(get_cabac(c, state+0))
508
        return 0;
509
    else{
510
        int i, e, a;
511
        e= 0;
512
        while(get_cabac(c, state+1 + FFMIN(e,9))){ //1..10
513
            e++;
514
        }
515

    
516
        a= 1;
517
        for(i=e-1; i>=0; i--){
518
            a += a + get_cabac(c, state+22 + FFMIN(i,9)); //22..31
519
        }
520

    
521
        if(is_signed && get_cabac(c, state+11 + FFMIN(e,10))) //11..21
522
            return -a;
523
        else
524
            return a;
525
    }
526
}
527

    
528
static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
529
    int i;
530
    int r= log2>=0 ? 1<<log2 : 1;
531

    
532
    assert(v>=0);
533
    assert(log2>=-4);
534

    
535
    while(v >= r){
536
        put_cabac(c, state+4+log2, 1);
537
        v -= r;
538
        log2++;
539
        if(log2>0) r+=r;
540
    }
541
    put_cabac(c, state+4+log2, 0);
542
    
543
    for(i=log2-1; i>=0; i--){
544
        put_cabac(c, state+31-i, (v>>i)&1);
545
    }
546
}
547

    
548
static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
549
    int i;
550
    int r= log2>=0 ? 1<<log2 : 1;
551
    int v=0;
552

    
553
    assert(log2>=-4);
554

    
555
    while(get_cabac(c, state+4+log2)){
556
        v+= r;
557
        log2++;
558
        if(log2>0) r+=r;
559
    }
560
    
561
    for(i=log2-1; i>=0; i--){
562
        v+= get_cabac(c, state+31-i)<<i;
563
    }
564

    
565
    return v;
566
}
567

    
568
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
569
    const int mirror_left= !highpass;
570
    const int mirror_right= (width&1) ^ highpass;
571
    const int w= (width>>1) - 1 + (highpass & width);
572
    int i;
573

    
574
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
575
    if(mirror_left){
576
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
577
        dst += dst_step;
578
        src += src_step;
579
    }
580
    
581
    for(i=0; i<w; i++){
582
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
583
    }
584
    
585
    if(mirror_right){
586
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
587
    }
588
}
589

    
590
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
591
    const int mirror_left= !highpass;
592
    const int mirror_right= (width&1) ^ highpass;
593
    const int w= (width>>1) - 1 + (highpass & width);
594
    int i;
595

    
596
    if(mirror_left){
597
        int r= 3*2*ref[0];
598
        r += r>>4;
599
        r += r>>8;
600
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
601
        dst += dst_step;
602
        src += src_step;
603
    }
604
    
605
    for(i=0; i<w; i++){
606
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
607
        r += r>>4;
608
        r += r>>8;
609
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
610
    }
611
    
612
    if(mirror_right){
613
        int r= 3*2*ref[w*ref_step];
614
        r += r>>4;
615
        r += r>>8;
616
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
617
    }
618
}
619

    
620

    
621
static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
622
    int x, i;
623
    
624
    for(x=start; x<width; x+=2){
625
        int64_t sum=0;
626

    
627
        for(i=0; i<n; i++){
628
            int x2= x + 2*i - n + 1;
629
            if     (x2<     0) x2= -x2;
630
            else if(x2>=width) x2= 2*width-x2-2;
631
            sum += coeffs[i]*(int64_t)dst[x2];
632
        }
633
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
634
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
635
    }
636
}
637

    
638
static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
639
    int x, y, i;
640
    for(y=start; y<height; y+=2){
641
        for(x=0; x<width; x++){
642
            int64_t sum=0;
643
    
644
            for(i=0; i<n; i++){
645
                int y2= y + 2*i - n + 1;
646
                if     (y2<      0) y2= -y2;
647
                else if(y2>=height) y2= 2*height-y2-2;
648
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
649
            }
650
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
651
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
652
        }
653
    }
654
}
655

    
656
#define SCALEX 1
657
#define LX0 0
658
#define LX1 1
659

    
660
#if 0 // more accurate 9/7
661
#define N1 2
662
#define SHIFT1 14
663
#define COEFFS1 (int[]){-25987,-25987}
664
#define N2 2
665
#define SHIFT2 19
666
#define COEFFS2 (int[]){-27777,-27777}
667
#define N3 2
668
#define SHIFT3 15
669
#define COEFFS3 (int[]){28931,28931}
670
#define N4 2
671
#define SHIFT4 15
672
#define COEFFS4 (int[]){14533,14533}
673
#elif 1 // 13/7 CRF
674
#define N1 4
675
#define SHIFT1 4
676
#define COEFFS1 (int[]){1,-9,-9,1}
677
#define N2 4
678
#define SHIFT2 4
679
#define COEFFS2 (int[]){-1,5,5,-1}
680
#define N3 0
681
#define SHIFT3 1
682
#define COEFFS3 NULL
683
#define N4 0
684
#define SHIFT4 1
685
#define COEFFS4 NULL
686
#elif 1 // 3/5
687
#define LX0 1
688
#define LX1 0
689
#define SCALEX 0.5
690
#define N1 2
691
#define SHIFT1 1
692
#define COEFFS1 (int[]){1,1}
693
#define N2 2
694
#define SHIFT2 2
695
#define COEFFS2 (int[]){-1,-1}
696
#define N3 0
697
#define SHIFT3 0
698
#define COEFFS3 NULL
699
#define N4 0
700
#define SHIFT4 0
701
#define COEFFS4 NULL
702
#elif 1 // 11/5 
703
#define N1 0
704
#define SHIFT1 1
705
#define COEFFS1 NULL
706
#define N2 2
707
#define SHIFT2 2
708
#define COEFFS2 (int[]){-1,-1}
709
#define N3 2
710
#define SHIFT3 0
711
#define COEFFS3 (int[]){-1,-1}
712
#define N4 4
713
#define SHIFT4 7
714
#define COEFFS4 (int[]){-5,29,29,-5}
715
#define SCALEX 4
716
#elif 1 // 9/7 CDF
717
#define N1 2
718
#define SHIFT1 7
719
#define COEFFS1 (int[]){-203,-203}
720
#define N2 2
721
#define SHIFT2 12
722
#define COEFFS2 (int[]){-217,-217}
723
#define N3 2
724
#define SHIFT3 7
725
#define COEFFS3 (int[]){113,113}
726
#define N4 2
727
#define SHIFT4 9
728
#define COEFFS4 (int[]){227,227}
729
#define SCALEX 1
730
#elif 1 // 7/5 CDF
731
#define N1 0
732
#define SHIFT1 1
733
#define COEFFS1 NULL
734
#define N2 2
735
#define SHIFT2 2
736
#define COEFFS2 (int[]){-1,-1}
737
#define N3 2
738
#define SHIFT3 0
739
#define COEFFS3 (int[]){-1,-1}
740
#define N4 2
741
#define SHIFT4 4
742
#define COEFFS4 (int[]){3,3}
743
#elif 1 // 9/7 MN
744
#define N1 4
745
#define SHIFT1 4
746
#define COEFFS1 (int[]){1,-9,-9,1}
747
#define N2 2
748
#define SHIFT2 2
749
#define COEFFS2 (int[]){1,1}
750
#define N3 0
751
#define SHIFT3 1
752
#define COEFFS3 NULL
753
#define N4 0
754
#define SHIFT4 1
755
#define COEFFS4 NULL
756
#else // 13/7 CRF
757
#define N1 4
758
#define SHIFT1 4
759
#define COEFFS1 (int[]){1,-9,-9,1}
760
#define N2 4
761
#define SHIFT2 4
762
#define COEFFS2 (int[]){-1,5,5,-1}
763
#define N3 0
764
#define SHIFT3 1
765
#define COEFFS3 NULL
766
#define N4 0
767
#define SHIFT4 1
768
#define COEFFS4 NULL
769
#endif
770
static void horizontal_decomposeX(int *b, int width){
771
    int temp[width];
772
    const int width2= width>>1;
773
    const int w2= (width+1)>>1;
774
    int A1,A2,A3,A4, x;
775

    
776
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
777
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
778
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
779
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
780
    
781
    for(x=0; x<width2; x++){
782
        temp[x   ]= b[2*x    ];
783
        temp[x+w2]= b[2*x + 1];
784
    }
785
    if(width&1)
786
        temp[x   ]= b[2*x    ];
787
    memcpy(b, temp, width*sizeof(int));
788
}
789

    
790
static void horizontal_composeX(int *b, int width){
791
    int temp[width];
792
    const int width2= width>>1;
793
    int A1,A2,A3,A4, x;
794
    const int w2= (width+1)>>1;
795

    
796
    memcpy(temp, b, width*sizeof(int));
797
    for(x=0; x<width2; x++){
798
        b[2*x    ]= temp[x   ];
799
        b[2*x + 1]= temp[x+w2];
800
    }
801
    if(width&1)
802
        b[2*x    ]= temp[x   ];
803

    
804
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
805
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
806
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
807
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
808
}
809

    
810
static void spatial_decomposeX(int *buffer, int width, int height, int stride){
811
    int x, y;
812
  
813
    for(y=0; y<height; y++){
814
        for(x=0; x<width; x++){
815
            buffer[y*stride + x] *= SCALEX;
816
        }
817
    }
818

    
819
    for(y=0; y<height; y++){
820
        horizontal_decomposeX(buffer + y*stride, width);
821
    }
822
    
823
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
824
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
825
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
826
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
827
}
828

    
829
static void spatial_composeX(int *buffer, int width, int height, int stride){
830
    int x, y;
831
  
832
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
833
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
834
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
835
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
836

    
837
    for(y=0; y<height; y++){
838
        horizontal_composeX(buffer + y*stride, width);
839
    }
840

    
841
    for(y=0; y<height; y++){
842
        for(x=0; x<width; x++){
843
            buffer[y*stride + x] /= SCALEX;
844
        }
845
    }
846
}
847

    
848
static void horizontal_decompose53i(int *b, int width){
849
    int temp[width];
850
    const int width2= width>>1;
851
    int A1,A2,A3,A4, x;
852
    const int w2= (width+1)>>1;
853

    
854
    for(x=0; x<width2; x++){
855
        temp[x   ]= b[2*x    ];
856
        temp[x+w2]= b[2*x + 1];
857
    }
858
    if(width&1)
859
        temp[x   ]= b[2*x    ];
860
#if 0
861
    A2= temp[1       ];
862
    A4= temp[0       ];
863
    A1= temp[0+width2];
864
    A1 -= (A2 + A4)>>1;
865
    A4 += (A1 + 1)>>1;
866
    b[0+width2] = A1;
867
    b[0       ] = A4;
868
    for(x=1; x+1<width2; x+=2){
869
        A3= temp[x+width2];
870
        A4= temp[x+1     ];
871
        A3 -= (A2 + A4)>>1;
872
        A2 += (A1 + A3 + 2)>>2;
873
        b[x+width2] = A3;
874
        b[x       ] = A2;
875

876
        A1= temp[x+1+width2];
877
        A2= temp[x+2       ];
878
        A1 -= (A2 + A4)>>1;
879
        A4 += (A1 + A3 + 2)>>2;
880
        b[x+1+width2] = A1;
881
        b[x+1       ] = A4;
882
    }
883
    A3= temp[width-1];
884
    A3 -= A2;
885
    A2 += (A1 + A3 + 2)>>2;
886
    b[width -1] = A3;
887
    b[width2-1] = A2;
888
#else        
889
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
890
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
891
#endif
892
}
893

    
894
static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
895
    int i;
896
    
897
    for(i=0; i<width; i++){
898
        b1[i] -= (b0[i] + b2[i])>>1;
899
    }
900
}
901

    
902
static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
903
    int i;
904
    
905
    for(i=0; i<width; i++){
906
        b1[i] += (b0[i] + b2[i] + 2)>>2;
907
    }
908
}
909

    
910
static void spatial_decompose53i(int *buffer, int width, int height, int stride){
911
    int y;
912
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
913
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
914
  
915
    for(y=-2; y<height; y+=2){
916
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
917
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
918

    
919
{START_TIMER
920
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
921
        if(y+2 < height) horizontal_decompose53i(b3, width);
922
STOP_TIMER("horizontal_decompose53i")}
923
        
924
{START_TIMER
925
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
926
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
927
STOP_TIMER("vertical_decompose53i*")}
928
        
929
        b0=b2;
930
        b1=b3;
931
    }
932
}
933

    
934
#define lift5 lift
935
#if 1
936
#define W_AM 3
937
#define W_AO 0
938
#define W_AS 1
939

    
940
#define W_BM 1
941
#define W_BO 8
942
#define W_BS 4
943

    
944
#undef lift5
945
#define W_CM 9999
946
#define W_CO 2
947
#define W_CS 2
948

    
949
#define W_DM 15
950
#define W_DO 16
951
#define W_DS 5
952
#elif 0
953
#define W_AM 55
954
#define W_AO 16
955
#define W_AS 5
956

    
957
#define W_BM 3
958
#define W_BO 32
959
#define W_BS 6
960

    
961
#define W_CM 127
962
#define W_CO 64
963
#define W_CS 7
964

    
965
#define W_DM 7
966
#define W_DO 8
967
#define W_DS 4
968
#elif 0
969
#define W_AM 97
970
#define W_AO 32
971
#define W_AS 6
972

    
973
#define W_BM 63
974
#define W_BO 512
975
#define W_BS 10
976

    
977
#define W_CM 13
978
#define W_CO 8
979
#define W_CS 4
980

    
981
#define W_DM 15
982
#define W_DO 16
983
#define W_DS 5
984

    
985
#else
986

    
987
#define W_AM 203
988
#define W_AO 64
989
#define W_AS 7
990

    
991
#define W_BM 217
992
#define W_BO 2048
993
#define W_BS 12
994

    
995
#define W_CM 113
996
#define W_CO 64
997
#define W_CS 7
998

    
999
#define W_DM 227
1000
#define W_DO 128
1001
#define W_DS 9
1002
#endif
1003
static void horizontal_decompose97i(int *b, int width){
1004
    int temp[width];
1005
    const int w2= (width+1)>>1;
1006

    
1007
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1008
    lift (temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1009
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1010
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1011
}
1012

    
1013

    
1014
static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
1015
    int i;
1016
    
1017
    for(i=0; i<width; i++){
1018
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1019
    }
1020
}
1021

    
1022
static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
1023
    int i;
1024
    
1025
    for(i=0; i<width; i++){
1026
#ifdef lift5
1027
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1028
#else
1029
        int r= 3*(b0[i] + b2[i]);
1030
        r+= r>>4;
1031
        r+= r>>8;
1032
        b1[i] += (r+W_CO)>>W_CS;
1033
#endif
1034
    }
1035
}
1036

    
1037
static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
1038
    int i;
1039
    
1040
    for(i=0; i<width; i++){
1041
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1042
    }
1043
}
1044

    
1045
static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
1046
    int i;
1047
    
1048
    for(i=0; i<width; i++){
1049
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1050
    }
1051
}
1052

    
1053
static void spatial_decompose97i(int *buffer, int width, int height, int stride){
1054
    int y;
1055
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1056
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1057
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1058
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1059
  
1060
    for(y=-4; y<height; y+=2){
1061
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1062
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1063

    
1064
{START_TIMER
1065
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1066
        if(y+4 < height) horizontal_decompose97i(b5, width);
1067
if(width>400){
1068
STOP_TIMER("horizontal_decompose97i")
1069
}}
1070
        
1071
{START_TIMER
1072
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1073
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1074
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1075
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1076

    
1077
if(width>400){
1078
STOP_TIMER("vertical_decompose97i")
1079
}}
1080
        
1081
        b0=b2;
1082
        b1=b3;
1083
        b2=b4;
1084
        b3=b5;
1085
    }
1086
}
1087

    
1088
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1089
    int level;
1090
    
1091
    for(level=0; level<decomposition_count; level++){
1092
        switch(type){
1093
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1094
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1095
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1096
        }
1097
    }
1098
}
1099

    
1100
static void horizontal_compose53i(int *b, int width){
1101
    int temp[width];
1102
    const int width2= width>>1;
1103
    const int w2= (width+1)>>1;
1104
    int A1,A2,A3,A4, x;
1105

    
1106
#if 0
1107
    A2= temp[1       ];
1108
    A4= temp[0       ];
1109
    A1= temp[0+width2];
1110
    A1 -= (A2 + A4)>>1;
1111
    A4 += (A1 + 1)>>1;
1112
    b[0+width2] = A1;
1113
    b[0       ] = A4;
1114
    for(x=1; x+1<width2; x+=2){
1115
        A3= temp[x+width2];
1116
        A4= temp[x+1     ];
1117
        A3 -= (A2 + A4)>>1;
1118
        A2 += (A1 + A3 + 2)>>2;
1119
        b[x+width2] = A3;
1120
        b[x       ] = A2;
1121

1122
        A1= temp[x+1+width2];
1123
        A2= temp[x+2       ];
1124
        A1 -= (A2 + A4)>>1;
1125
        A4 += (A1 + A3 + 2)>>2;
1126
        b[x+1+width2] = A1;
1127
        b[x+1       ] = A4;
1128
    }
1129
    A3= temp[width-1];
1130
    A3 -= A2;
1131
    A2 += (A1 + A3 + 2)>>2;
1132
    b[width -1] = A3;
1133
    b[width2-1] = A2;
1134
#else   
1135
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1136
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1137
#endif
1138
    for(x=0; x<width2; x++){
1139
        b[2*x    ]= temp[x   ];
1140
        b[2*x + 1]= temp[x+w2];
1141
    }
1142
    if(width&1)
1143
        b[2*x    ]= temp[x   ];
1144
}
1145

    
1146
static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1147
    int i;
1148
    
1149
    for(i=0; i<width; i++){
1150
        b1[i] += (b0[i] + b2[i])>>1;
1151
    }
1152
}
1153

    
1154
static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1155
    int i;
1156
    
1157
    for(i=0; i<width; i++){
1158
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1159
    }
1160
}
1161

    
1162
static void spatial_compose53i(int *buffer, int width, int height, int stride){
1163
    int y;
1164
    DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1165
    DWTELEM *b1= buffer + mirror(-1  , height-1)*stride;
1166
  
1167
    for(y=-1; y<=height; y+=2){
1168
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1169
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1170

    
1171
{START_TIMER
1172
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1173
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1174
STOP_TIMER("vertical_compose53i*")}
1175

    
1176
{START_TIMER
1177
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1178
        if(b0 <= b2) horizontal_compose53i(b1, width);
1179
STOP_TIMER("horizontal_compose53i")}
1180

    
1181
        b0=b2;
1182
        b1=b3;
1183
    }
1184
}   
1185

    
1186
 
1187
static void horizontal_compose97i(int *b, int width){
1188
    int temp[width];
1189
    const int w2= (width+1)>>1;
1190

    
1191
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1192
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1193
    lift (b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1194
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1195
}
1196

    
1197
static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1198
    int i;
1199
    
1200
    for(i=0; i<width; i++){
1201
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1202
    }
1203
}
1204

    
1205
static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1206
    int i;
1207
    
1208
    for(i=0; i<width; i++){
1209
#ifdef lift5
1210
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1211
#else
1212
        int r= 3*(b0[i] + b2[i]);
1213
        r+= r>>4;
1214
        r+= r>>8;
1215
        b1[i] -= (r+W_CO)>>W_CS;
1216
#endif
1217
    }
1218
}
1219

    
1220
static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1221
    int i;
1222
    
1223
    for(i=0; i<width; i++){
1224
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1225
    }
1226
}
1227

    
1228
static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1229
    int i;
1230
    
1231
    for(i=0; i<width; i++){
1232
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1233
    }
1234
}
1235

    
1236
static void spatial_compose97i(int *buffer, int width, int height, int stride){
1237
    int y;
1238
    DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1239
    DWTELEM *b1= buffer + mirror(-3  , height-1)*stride;
1240
    DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1241
    DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1242

    
1243
    for(y=-3; y<=height; y+=2){
1244
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1245
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1246

    
1247
        if(stride == width && y+4 < height && 0){ 
1248
            int x;
1249
            for(x=0; x<width/2; x++)
1250
                b5[x] += 64*2;
1251
            for(; x<width; x++)
1252
                b5[x] += 169*2;
1253
        }
1254
        
1255
{START_TIMER
1256
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1257
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1258
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1259
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1260
if(width>400){
1261
STOP_TIMER("vertical_compose97i")}}
1262

    
1263
{START_TIMER
1264
        if(y-1>=  0) horizontal_compose97i(b0, width);
1265
        if(b0 <= b2) horizontal_compose97i(b1, width);
1266
if(width>400 && b0 <= b2){
1267
STOP_TIMER("horizontal_compose97i")}}
1268
        
1269
        b0=b2;
1270
        b1=b3;
1271
        b2=b4;
1272
        b3=b5;
1273
    }
1274
}
1275

    
1276
void ff_spatial_idwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1277
    int level;
1278

    
1279
    for(level=decomposition_count-1; level>=0; level--){
1280
        switch(type){
1281
        case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1282
        case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1283
        case 2: spatial_composeX  (buffer, width>>level, height>>level, stride<<level); break;
1284
        }
1285
    }
1286
}
1287

    
1288
static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1289
    const int w= b->width;
1290
    const int h= b->height;
1291
    int x, y;
1292

    
1293
    if(1){
1294
        int run=0;
1295
        int runs[w*h];
1296
        int run_index=0;
1297
                
1298
        for(y=0; y<h; y++){
1299
            for(x=0; x<w; x++){
1300
                int v, p=0;
1301
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1302
                v= src[x + y*stride];
1303

    
1304
                if(y){
1305
                    t= src[x + (y-1)*stride];
1306
                    if(x){
1307
                        lt= src[x - 1 + (y-1)*stride];
1308
                    }
1309
                    if(x + 1 < w){
1310
                        rt= src[x + 1 + (y-1)*stride];
1311
                    }
1312
                }
1313
                if(x){
1314
                    l= src[x - 1 + y*stride];
1315
                    /*if(x > 1){
1316
                        if(orientation==1) ll= src[y + (x-2)*stride];
1317
                        else               ll= src[x - 2 + y*stride];
1318
                    }*/
1319
                }
1320
                if(parent){
1321
                    int px= x>>1;
1322
                    int py= y>>1;
1323
                    if(px<b->parent->width && py<b->parent->height) 
1324
                        p= parent[px + py*2*stride];
1325
                }
1326
                if(!(/*ll|*/l|lt|t|rt|p)){
1327
                    if(v){
1328
                        runs[run_index++]= run;
1329
                        run=0;
1330
                    }else{
1331
                        run++;
1332
                    }
1333
                }
1334
            }
1335
        }
1336
        runs[run_index++]= run;
1337
        run_index=0;
1338
        run= runs[run_index++];
1339

    
1340
        put_symbol2(&s->c, b->state[1], run, 3);
1341
        
1342
        for(y=0; y<h; y++){
1343
            for(x=0; x<w; x++){
1344
                int v, p=0;
1345
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1346
                v= src[x + y*stride];
1347

    
1348
                if(y){
1349
                    t= src[x + (y-1)*stride];
1350
                    if(x){
1351
                        lt= src[x - 1 + (y-1)*stride];
1352
                    }
1353
                    if(x + 1 < w){
1354
                        rt= src[x + 1 + (y-1)*stride];
1355
                    }
1356
                }
1357
                if(x){
1358
                    l= src[x - 1 + y*stride];
1359
                    /*if(x > 1){
1360
                        if(orientation==1) ll= src[y + (x-2)*stride];
1361
                        else               ll= src[x - 2 + y*stride];
1362
                    }*/
1363
                }
1364
                if(parent){
1365
                    int px= x>>1;
1366
                    int py= y>>1;
1367
                    if(px<b->parent->width && py<b->parent->height) 
1368
                        p= parent[px + py*2*stride];
1369
                }
1370
                if(/*ll|*/l|lt|t|rt|p){
1371
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1372

    
1373
                    put_cabac(&s->c, &b->state[0][context], !!v);
1374
                }else{
1375
                    if(!run){
1376
                        run= runs[run_index++];
1377

    
1378
                        put_symbol2(&s->c, b->state[1], run, 3);
1379
                        assert(v);
1380
                    }else{
1381
                        run--;
1382
                        assert(!v);
1383
                    }
1384
                }
1385
                if(v){
1386
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1387

    
1388
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1389
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1390
                }
1391
            }
1392
        }
1393
    }
1394
}
1395

    
1396
static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1397
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1398
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1399
    encode_subband_c0run(s, b, src, parent, stride, orientation);
1400
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1401
}
1402

    
1403
static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1404
    const int w= b->width;
1405
    const int h= b->height;
1406
    int x,y;
1407
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
1408
    int qmul= qexp[qlog&7]<<(qlog>>3);
1409
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1410
    
1411
    START_TIMER
1412

    
1413
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1414
        qadd= 0;
1415
        qmul= 1<<QEXPSHIFT;
1416
    }
1417

    
1418
    if(1){
1419
        int run;
1420
        int index=0;
1421
        int prev_index=-1;
1422
        int prev2_index=0;
1423
        int parent_index= 0;
1424
        int prev_parent_index= 0;
1425
        
1426
        for(y=0; y<b->height; y++)
1427
            memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1428

    
1429
        run= get_symbol2(&s->c, b->state[1], 3);
1430
        for(y=0; y<h; y++){
1431
            int v=0;
1432
            int lt=0, t=0, rt=0;
1433

    
1434
            if(y && b->x[prev_index] == 0){
1435
                rt= b->coeff[prev_index];
1436
            }
1437
            for(x=0; x<w; x++){
1438
                int p=0;
1439
                const int l= v;
1440
                
1441
                lt= t; t= rt;
1442

    
1443
                if(y){
1444
                    if(b->x[prev_index] <= x)
1445
                        prev_index++;
1446
                    if(b->x[prev_index] == x + 1)
1447
                        rt= b->coeff[prev_index];
1448
                    else
1449
                        rt=0;
1450
                }
1451
                if(parent){
1452
                    if(x>>1 > b->parent->x[parent_index]){
1453
                        parent_index++;
1454
                    }
1455
                    if(x>>1 == b->parent->x[parent_index]){
1456
                        p= b->parent->coeff[parent_index];
1457
                    }
1458
                }
1459
                if(/*ll|*/l|lt|t|rt|p){
1460
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1461

    
1462
                    v=get_cabac(&s->c, &b->state[0][context]);
1463
                }else{
1464
                    if(!run){
1465
                        run= get_symbol2(&s->c, b->state[1], 3);
1466
                        v=1;
1467
                    }else{
1468
                        run--;
1469
                        v=0;
1470

    
1471
                        if(y && parent){
1472
                            int max_run;
1473

    
1474
                            max_run= FFMIN(run, b->x[prev_index] - x - 2);
1475
                            max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1476
                            x+= max_run;
1477
                            run-= max_run;
1478
                        }
1479
                    }
1480
                }
1481
                if(v){
1482
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1483
                    v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1484
                    if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
1485
                        src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
1486
                        v= -v;
1487
                    }else{
1488
                        src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1489
                    }
1490
                    b->x[index]=x; //FIXME interleave x/coeff
1491
                    b->coeff[index++]= v;
1492
                }
1493
            }
1494
            b->x[index++]= w+1; //end marker
1495
            prev_index= prev2_index;
1496
            prev2_index= index;
1497
            
1498
            if(parent){
1499
                while(b->parent->x[parent_index] != b->parent->width+1)
1500
                    parent_index++;
1501
                parent_index++;
1502
                if(y&1){
1503
                    prev_parent_index= parent_index;
1504
                }else{
1505
                    parent_index= prev_parent_index;
1506
                }
1507
            }
1508
        }
1509
        b->x[index++]= w+1; //end marker
1510
        if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
1511
            STOP_TIMER("decode_subband")
1512
        }
1513
        
1514
        return;
1515
    }
1516
}
1517

    
1518
static void reset_contexts(SnowContext *s){
1519
    int plane_index, level, orientation;
1520

    
1521
    for(plane_index=0; plane_index<3; plane_index++){
1522
        for(level=0; level<s->spatial_decomposition_count; level++){
1523
            for(orientation=level ? 1:0; orientation<4; orientation++){
1524
                memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
1525
            }
1526
        }
1527
    }
1528
    memset(s->header_state, 0, sizeof(s->header_state));
1529
    memset(s->block_state, 0, sizeof(s->block_state));
1530
}
1531

    
1532
static int alloc_blocks(SnowContext *s){
1533
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1534
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1535
    
1536
    s->b_width = w;
1537
    s->b_height= h;
1538
    
1539
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1540
    return 0;
1541
}
1542

    
1543
static inline void copy_cabac_state(CABACContext *d, CABACContext *s){
1544
    PutBitContext bak= d->pb;
1545
    *d= *s;
1546
    d->pb= bak;
1547
}
1548

    
1549
//near copy & paste from dsputil, FIXME
1550
static int pix_sum(uint8_t * pix, int line_size, int w)
1551
{
1552
    int s, i, j;
1553

    
1554
    s = 0;
1555
    for (i = 0; i < w; i++) {
1556
        for (j = 0; j < w; j++) {
1557
            s += pix[0];
1558
            pix ++;
1559
        }
1560
        pix += line_size - w;
1561
    }
1562
    return s;
1563
}
1564

    
1565
//near copy & paste from dsputil, FIXME
1566
static int pix_norm1(uint8_t * pix, int line_size, int w)
1567
{
1568
    int s, i, j;
1569
    uint32_t *sq = squareTbl + 256;
1570

    
1571
    s = 0;
1572
    for (i = 0; i < w; i++) {
1573
        for (j = 0; j < w; j ++) {
1574
            s += sq[pix[0]];
1575
            pix ++;
1576
        }
1577
        pix += line_size - w;
1578
    }
1579
    return s;
1580
}
1581

    
1582
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1583
    const int w= s->b_width << s->block_max_depth;
1584
    const int rem_depth= s->block_max_depth - level;
1585
    const int index= (x + y*w) << rem_depth;
1586
    const int block_w= 1<<rem_depth;
1587
    BlockNode block;
1588
    int i,j;
1589
    
1590
    block.color[0]= l;
1591
    block.color[1]= cb;
1592
    block.color[2]= cr;
1593
    block.mx= mx;
1594
    block.my= my;
1595
    block.type= type;
1596
    block.level= level;
1597

    
1598
    for(j=0; j<block_w; j++){
1599
        for(i=0; i<block_w; i++){
1600
            s->block[index + i + j*w]= block;
1601
        }
1602
    }
1603
}
1604

    
1605
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1606
    const int offset[3]= {
1607
          y*c->  stride + x,
1608
        ((y*c->uvstride + x)>>1),
1609
        ((y*c->uvstride + x)>>1),
1610
    };
1611
    int i;
1612
    for(i=0; i<3; i++){
1613
        c->src[0][i]= src [i];
1614
        c->ref[0][i]= ref [i] + offset[i];
1615
    }
1616
    assert(!ref_index);
1617
}
1618

    
1619
//FIXME copy&paste
1620
#define P_LEFT P[1]
1621
#define P_TOP P[2]
1622
#define P_TOPRIGHT P[3]
1623
#define P_MEDIAN P[4]
1624
#define P_MV1 P[9]
1625
#define FLAG_QPEL   1 //must be 1
1626

    
1627
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1628
    uint8_t p_buffer[1024];
1629
    uint8_t i_buffer[1024];
1630
    uint8_t p_state[sizeof(s->block_state)];
1631
    uint8_t i_state[sizeof(s->block_state)];
1632
    CABACContext pc, ic;
1633
    PutBitContext pbbak= s->c.pb;
1634
    int score, score2, iscore, i_len, p_len, block_s, sum;
1635
    const int w= s->b_width  << s->block_max_depth;
1636
    const int h= s->b_height << s->block_max_depth;
1637
    const int rem_depth= s->block_max_depth - level;
1638
    const int index= (x + y*w) << rem_depth;
1639
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1640
    static BlockNode null_block= { //FIXME add border maybe
1641
        .color= {128,128,128},
1642
        .mx= 0,
1643
        .my= 0,
1644
        .type= 0,
1645
        .level= 0,
1646
    };
1647
    int trx= (x+1)<<rem_depth;
1648
    int try= (y+1)<<rem_depth;
1649
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
1650
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
1651
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1652
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1653
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1654
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1655
    int pl = left->color[0];
1656
    int pcb= left->color[1];
1657
    int pcr= left->color[2];
1658
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
1659
    int pmy= mid_pred(left->my, top->my, tr->my);
1660
    int mx=0, my=0;
1661
    int l,cr,cb, i;
1662
    const int stride= s->current_picture.linesize[0];
1663
    const int uvstride= s->current_picture.linesize[1];
1664
    const int instride= s->input_picture.linesize[0];
1665
    const int uvinstride= s->input_picture.linesize[1];
1666
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
1667
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1668
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1669
    uint8_t current_mb[3][stride*block_w];
1670
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1671
    int P[10][2];
1672
    int16_t last_mv[3][2];
1673
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1674
    const int shift= 1+qpel;
1675
    MotionEstContext *c= &s->m.me;
1676
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
1677
    int my_context= av_log2(2*ABS(left->my - top->my));
1678
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1679

    
1680
    assert(sizeof(s->block_state) >= 256);
1681
    if(s->keyframe){
1682
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1683
        return 0;
1684
    }
1685

    
1686
    //FIXME optimize
1687
    for(i=0; i<block_w; i++)
1688
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
1689
    for(i=0; i<block_w>>1; i++)
1690
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1691
    for(i=0; i<block_w>>1; i++)
1692
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1693

    
1694
//    clip predictors / edge ?
1695

    
1696
    P_LEFT[0]= left->mx;
1697
    P_LEFT[1]= left->my;
1698
    P_TOP [0]= top->mx;
1699
    P_TOP [1]= top->my;
1700
    P_TOPRIGHT[0]= tr->mx;
1701
    P_TOPRIGHT[1]= tr->my;
1702
    
1703
    last_mv[0][0]= s->block[index].mx;
1704
    last_mv[0][1]= s->block[index].my;
1705
    last_mv[1][0]= right->mx;
1706
    last_mv[1][1]= right->my;
1707
    last_mv[2][0]= bottom->mx;
1708
    last_mv[2][1]= bottom->my;
1709
    
1710
    s->m.mb_stride=2;
1711
    s->m.mb_x= 
1712
    s->m.mb_y= 0;
1713
    s->m.me.skip= 0;
1714

    
1715
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1716
    
1717
    assert(s->m.me.  stride ==   stride);
1718
    assert(s->m.me.uvstride == uvstride);
1719
    
1720
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1721
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1722
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1723
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1724
    
1725
    c->xmin = - x*block_w - 16+2;
1726
    c->ymin = - y*block_w - 16+2;
1727
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1728
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1729

    
1730
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1731
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
1732
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1733
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1734
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1735
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1736
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1737

    
1738
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1739
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1740

    
1741
    if (!y) {
1742
        c->pred_x= P_LEFT[0];
1743
        c->pred_y= P_LEFT[1];
1744
    } else {
1745
        c->pred_x = P_MEDIAN[0];
1746
        c->pred_y = P_MEDIAN[1];
1747
    }
1748

    
1749
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
1750
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1751

    
1752
    assert(mx >= c->xmin);
1753
    assert(mx <= c->xmax);
1754
    assert(my >= c->ymin);
1755
    assert(my <= c->ymax);
1756
    
1757
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1758
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1759
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1760
                             
1761
  //  subpel search
1762
    pc= s->c;
1763
    init_put_bits(&pc.pb, p_buffer, sizeof(p_buffer));
1764
    memcpy(p_state, s->block_state, sizeof(s->block_state));
1765

    
1766
    if(level!=s->block_max_depth)
1767
        put_cabac(&pc, &p_state[4 + s_context], 1);
1768
    put_cabac(&pc, &p_state[1 + left->type + top->type], 0);
1769
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1770
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
1771
    p_len= put_bits_count(&pc.pb);
1772
    score += (s->lambda2*(p_len + pc.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1773

    
1774
    block_s= block_w*block_w;
1775
    sum = pix_sum(&current_mb[0][0], stride, block_w);
1776
    l= (sum + block_s/2)/block_s;
1777
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1778
    
1779
    block_s= block_w*block_w>>2;
1780
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1781
    cb= (sum + block_s/2)/block_s;
1782
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1783
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1784
    cr= (sum + block_s/2)/block_s;
1785
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1786

    
1787
    ic= s->c;
1788
    init_put_bits(&ic.pb, i_buffer, sizeof(i_buffer));
1789
    memcpy(i_state, s->block_state, sizeof(s->block_state));
1790
    if(level!=s->block_max_depth)
1791
        put_cabac(&ic, &i_state[4 + s_context], 1);
1792
    put_cabac(&ic, &i_state[1 + left->type + top->type], 1);
1793
    put_symbol(&ic, &i_state[32],  l-pl , 1);
1794
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1795
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1796
    i_len= put_bits_count(&ic.pb);
1797
    iscore += (s->lambda2*(i_len + ic.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1798

    
1799
//    assert(score==256*256*256*64-1);
1800
    assert(iscore < 255*255*256 + s->lambda2*10);
1801
    assert(iscore >= 0);
1802
    assert(l>=0 && l<=255);
1803
    assert(pl>=0 && pl<=255);
1804

    
1805
    if(level==0){
1806
        int varc= iscore >> 8;
1807
        int vard= score >> 8;
1808
        if (vard <= 64 || vard < varc)
1809
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1810
        else
1811
            c->scene_change_score+= s->m.qscale;
1812
    }
1813
        
1814
    if(level!=s->block_max_depth){
1815
        put_cabac(&s->c, &s->block_state[4 + s_context], 0);
1816
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1817
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1818
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1819
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1820
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1821
    
1822
        if(score2 < score && score2 < iscore)
1823
            return score2;
1824
    }
1825
    
1826
    if(iscore < score){
1827
        flush_put_bits(&ic.pb);
1828
        ff_copy_bits(&pbbak, i_buffer, i_len);
1829
        s->c= ic;
1830
        s->c.pb= pbbak;
1831
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1832
        memcpy(s->block_state, i_state, sizeof(s->block_state));
1833
        return iscore;
1834
    }else{
1835
        flush_put_bits(&pc.pb);
1836
        ff_copy_bits(&pbbak, p_buffer, p_len);
1837
        s->c= pc;
1838
        s->c.pb= pbbak;
1839
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1840
        memcpy(s->block_state, p_state, sizeof(s->block_state));
1841
        return score;
1842
    }
1843
}
1844

    
1845
static void decode_q_branch(SnowContext *s, int level, int x, int y){
1846
    const int w= s->b_width << s->block_max_depth;
1847
    const int rem_depth= s->block_max_depth - level;
1848
    const int index= (x + y*w) << rem_depth;
1849
    static BlockNode null_block= { //FIXME add border maybe
1850
        .color= {128,128,128},
1851
        .mx= 0,
1852
        .my= 0,
1853
        .type= 0,
1854
        .level= 0,
1855
    };
1856
    int trx= (x+1)<<rem_depth;
1857
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
1858
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
1859
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1860
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1861
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1862
    
1863
    if(s->keyframe){
1864
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1865
        return;
1866
    }
1867

    
1868
    if(level==s->block_max_depth || get_cabac(&s->c, &s->block_state[4 + s_context])){
1869
        int type;
1870
        int l = left->color[0];
1871
        int cb= left->color[1];
1872
        int cr= left->color[2];
1873
        int mx= mid_pred(left->mx, top->mx, tr->mx);
1874
        int my= mid_pred(left->my, top->my, tr->my);
1875
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1876
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1877
        
1878
        type= get_cabac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1879

    
1880
        if(type){
1881
            l += get_symbol(&s->c, &s->block_state[32], 1);
1882
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
1883
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
1884
        }else{
1885
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1886
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1887
        }
1888
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1889
    }else{
1890
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1891
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1892
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1893
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1894
    }
1895
}
1896

    
1897
static void encode_blocks(SnowContext *s){
1898
    int x, y;
1899
    int w= s->b_width;
1900
    int h= s->b_height;
1901

    
1902
    for(y=0; y<h; y++){
1903
        for(x=0; x<w; x++){
1904
            encode_q_branch(s, 0, x, y);
1905
        }
1906
    }
1907
}
1908

    
1909
static void decode_blocks(SnowContext *s){
1910
    int x, y;
1911
    int w= s->b_width;
1912
    int h= s->b_height;
1913

    
1914
    for(y=0; y<h; y++){
1915
        for(x=0; x<w; x++){
1916
            decode_q_branch(s, 0, x, y);
1917
        }
1918
    }
1919
}
1920

    
1921
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1922
    int x, y;
1923
START_TIMER
1924
    for(y=0; y < b_h+5; y++){
1925
        for(x=0; x < b_w; x++){
1926
            int a0= src[x    ];
1927
            int a1= src[x + 1];
1928
            int a2= src[x + 2];
1929
            int a3= src[x + 3];
1930
            int a4= src[x + 4];
1931
            int a5= src[x + 5];
1932
//            int am= 9*(a1+a2) - (a0+a3);
1933
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1934
//            int am= 18*(a2+a3) - 2*(a1+a4);
1935
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1936
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1937

    
1938
//            if(b_w==16) am= 8*(a1+a2);
1939

    
1940
            if(dx<8) tmp[x]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
1941
            else     tmp[x]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
1942

    
1943
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
1944
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
1945
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
1946
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1947
        }
1948
        tmp += stride;
1949
        src += stride;
1950
    }
1951
    tmp -= (b_h+5)*stride;
1952
    
1953
    for(y=0; y < b_h; y++){
1954
        for(x=0; x < b_w; x++){
1955
            int a0= tmp[x + 0*stride];
1956
            int a1= tmp[x + 1*stride];
1957
            int a2= tmp[x + 2*stride];
1958
            int a3= tmp[x + 3*stride];
1959
            int a4= tmp[x + 4*stride];
1960
            int a5= tmp[x + 5*stride];
1961
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1962
//            int am= 18*(a2+a3) - 2*(a1+a4);
1963
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1964
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1965
            
1966
//            if(b_w==16) am= 8*(a1+a2);
1967

    
1968
            if(dy<8) dst[x]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
1969
            else     dst[x]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
1970

    
1971
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
1972
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
1973
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
1974
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
1975
        }
1976
        dst += stride;
1977
        tmp += stride;
1978
    }
1979
STOP_TIMER("mc_block")
1980
}
1981

    
1982
#define mca(dx,dy,b_w)\
1983
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
1984
    uint8_t tmp[stride*(b_w+5)];\
1985
    assert(h==b_w);\
1986
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1987
}
1988

    
1989
mca( 0, 0,16)
1990
mca( 8, 0,16)
1991
mca( 0, 8,16)
1992
mca( 8, 8,16)
1993
mca( 0, 0,8)
1994
mca( 8, 0,8)
1995
mca( 0, 8,8)
1996
mca( 8, 8,8)
1997

    
1998
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1999
    if(block->type){
2000
        int x, y;
2001
        const int color= block->color[plane_index];
2002
        for(y=0; y < b_h; y++){
2003
            for(x=0; x < b_w; x++){
2004
                dst[x + y*stride]= color;
2005
            }
2006
        }
2007
    }else{
2008
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2009
        int mx= block->mx*scale;
2010
        int my= block->my*scale;
2011
        const int dx= mx&15;
2012
        const int dy= my&15;
2013
        sx += (mx>>4) - 2;
2014
        sy += (my>>4) - 2;
2015
        src += sx + sy*stride;
2016
        if(   (unsigned)sx >= w - b_w - 4
2017
           || (unsigned)sy >= h - b_h - 4){
2018
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2019
            src= tmp + MB_SIZE;
2020
        }
2021
        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2022
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2023
        else
2024
            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2025
    }
2026
}
2027

    
2028
static always_inline int same_block(BlockNode *a, BlockNode *b){
2029
    return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2030
}
2031

    
2032
//FIXME name clenup (b_w, block_w, b_width stuff)
2033
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2034
    const int b_width = s->b_width  << s->block_max_depth;
2035
    const int b_height= s->b_height << s->block_max_depth;
2036
    const int b_stride= b_width;
2037
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2038
    BlockNode *rt= lt+1;
2039
    BlockNode *lb= lt+b_stride;
2040
    BlockNode *rb= lb+1;
2041
    uint8_t *block[4]; 
2042
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2043
    int x,y;
2044

    
2045
    if(b_x<0){
2046
        lt= rt;
2047
        lb= rb;
2048
    }else if(b_x + 1 >= b_width){
2049
        rt= lt;
2050
        rb= lb;
2051
    }
2052
    if(b_y<0){
2053
        lt= lb;
2054
        rt= rb;
2055
    }else if(b_y + 1 >= b_height){
2056
        lb= lt;
2057
        rb= rt;
2058
    }
2059
        
2060
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2061
        obmc -= src_x;
2062
        b_w += src_x;
2063
        src_x=0;
2064
    }else if(src_x + b_w > w){
2065
        b_w = w - src_x;
2066
    }
2067
    if(src_y<0){
2068
        obmc -= src_y*obmc_stride;
2069
        b_h += src_y;
2070
        src_y=0;
2071
    }else if(src_y + b_h> h){
2072
        b_h = h - src_y;
2073
    }
2074
    
2075
    if(b_w<=0 || b_h<=0) return;
2076

    
2077
assert(src_stride > 7*MB_SIZE);
2078
    dst += src_x + src_y*dst_stride;
2079
//    src += src_x + src_y*src_stride;
2080

    
2081
    block[0]= tmp+3*MB_SIZE;
2082
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2083

    
2084
    if(same_block(lt, rt)){
2085
        block[1]= block[0];
2086
    }else{
2087
        block[1]= tmp + 4*MB_SIZE;
2088
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2089
    }
2090
        
2091
    if(same_block(lt, lb)){
2092
        block[2]= block[0];
2093
    }else if(same_block(rt, lb)){
2094
        block[2]= block[1];
2095
    }else{
2096
        block[2]= tmp+5*MB_SIZE;
2097
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2098
    }
2099

    
2100
    if(same_block(lt, rb) ){
2101
        block[3]= block[0];
2102
    }else if(same_block(rt, rb)){
2103
        block[3]= block[1];
2104
    }else if(same_block(lb, rb)){
2105
        block[3]= block[2];
2106
    }else{
2107
        block[3]= tmp+6*MB_SIZE;
2108
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2109
    }
2110
#if 0
2111
    for(y=0; y<b_h; y++){
2112
        for(x=0; x<b_w; x++){
2113
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2114
            if(add) dst[x + y*dst_stride] += v;
2115
            else    dst[x + y*dst_stride] -= v;
2116
        }
2117
    }
2118
    for(y=0; y<b_h; y++){
2119
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2120
        for(x=0; x<b_w; x++){
2121
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2122
            if(add) dst[x + y*dst_stride] += v;
2123
            else    dst[x + y*dst_stride] -= v;
2124
        }
2125
    }
2126
    for(y=0; y<b_h; y++){
2127
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2128
        for(x=0; x<b_w; x++){
2129
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2130
            if(add) dst[x + y*dst_stride] += v;
2131
            else    dst[x + y*dst_stride] -= v;
2132
        }
2133
    }
2134
    for(y=0; y<b_h; y++){
2135
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2136
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2137
        for(x=0; x<b_w; x++){
2138
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2139
            if(add) dst[x + y*dst_stride] += v;
2140
            else    dst[x + y*dst_stride] -= v;
2141
        }
2142
    }
2143
#else
2144
    for(y=0; y<b_h; y++){
2145
        //FIXME ugly missue of obmc_stride
2146
        uint8_t *obmc1= obmc + y*obmc_stride;
2147
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2148
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2149
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2150
        for(x=0; x<b_w; x++){
2151
            int v=   obmc1[x] * block[3][x + y*src_stride]
2152
                    +obmc2[x] * block[2][x + y*src_stride]
2153
                    +obmc3[x] * block[1][x + y*src_stride]
2154
                    +obmc4[x] * block[0][x + y*src_stride];
2155
            if(add) dst[x + y*dst_stride] += v * (256/OBMC_MAX);
2156
            else    dst[x + y*dst_stride] -= v * (256/OBMC_MAX);
2157
        }
2158
    }
2159
#endif
2160
}
2161

    
2162
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2163
    Plane *p= &s->plane[plane_index];
2164
    const int mb_w= s->b_width  << s->block_max_depth;
2165
    const int mb_h= s->b_height << s->block_max_depth;
2166
    int x, y, mb_x, mb_y;
2167
    int block_size = MB_SIZE >> s->block_max_depth;
2168
    int block_w    = plane_index ? block_size/2 : block_size;
2169
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2170
    int obmc_stride= plane_index ? block_size : 2*block_size;
2171
    int ref_stride= s->last_picture.linesize[plane_index];
2172
    uint8_t *ref  = s->last_picture.data[plane_index];
2173
    int w= p->width;
2174
    int h= p->height;
2175
    START_TIMER
2176
    
2177
    if(s->keyframe || (s->avctx->debug&512)){
2178
        for(y=0; y<h; y++){
2179
            for(x=0; x<w; x++){
2180
                if(add) buf[x + y*w]+= 128*256;
2181
                else    buf[x + y*w]-= 128*256;
2182
            }
2183
        }
2184

    
2185
        return;
2186
    }
2187
    
2188
    for(mb_y=0; mb_y<=mb_h; mb_y++){
2189
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2190
            START_TIMER
2191

    
2192
            add_yblock(s, buf, ref, obmc, 
2193
                       block_w*mb_x - block_w/2,
2194
                       block_w*mb_y - block_w/2,
2195
                       block_w, block_w,
2196
                       w, h,
2197
                       w, ref_stride, obmc_stride,
2198
                       mb_x - 1, mb_y - 1,
2199
                       add, plane_index);
2200
            
2201
            STOP_TIMER("add_yblock")
2202
        }
2203
    }
2204
    
2205
    STOP_TIMER("predict_plane")
2206
}
2207

    
2208
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2209
    const int level= b->level;
2210
    const int w= b->width;
2211
    const int h= b->height;
2212
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2213
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2214
    int x,y, thres1, thres2;
2215
    START_TIMER
2216

    
2217
    assert(QROOT==8);
2218

    
2219
    if(s->qlog == LOSSLESS_QLOG) return;
2220
 
2221
    bias= bias ? 0 : (3*qmul)>>3;
2222
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2223
    thres2= 2*thres1;
2224
    
2225
    if(!bias){
2226
        for(y=0; y<h; y++){
2227
            for(x=0; x<w; x++){
2228
                int i= src[x + y*stride];
2229
                
2230
                if((unsigned)(i+thres1) > thres2){
2231
                    if(i>=0){
2232
                        i<<= QEXPSHIFT;
2233
                        i/= qmul; //FIXME optimize
2234
                        src[x + y*stride]=  i;
2235
                    }else{
2236
                        i= -i;
2237
                        i<<= QEXPSHIFT;
2238
                        i/= qmul; //FIXME optimize
2239
                        src[x + y*stride]= -i;
2240
                    }
2241
                }else
2242
                    src[x + y*stride]= 0;
2243
            }
2244
        }
2245
    }else{
2246
        for(y=0; y<h; y++){
2247
            for(x=0; x<w; x++){
2248
                int i= src[x + y*stride]; 
2249
                
2250
                if((unsigned)(i+thres1) > thres2){
2251
                    if(i>=0){
2252
                        i<<= QEXPSHIFT;
2253
                        i= (i + bias) / qmul; //FIXME optimize
2254
                        src[x + y*stride]=  i;
2255
                    }else{
2256
                        i= -i;
2257
                        i<<= QEXPSHIFT;
2258
                        i= (i + bias) / qmul; //FIXME optimize
2259
                        src[x + y*stride]= -i;
2260
                    }
2261
                }else
2262
                    src[x + y*stride]= 0;
2263
            }
2264
        }
2265
    }
2266
    if(level+1 == s->spatial_decomposition_count){
2267
//        STOP_TIMER("quantize")
2268
    }
2269
}
2270

    
2271
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2272
    const int w= b->width;
2273
    const int h= b->height;
2274
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2275
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2276
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2277
    int x,y;
2278
    START_TIMER
2279
    
2280
    if(s->qlog == LOSSLESS_QLOG) return;
2281
    
2282
    assert(QROOT==8);
2283

    
2284
    for(y=0; y<h; y++){
2285
        for(x=0; x<w; x++){
2286
            int i= src[x + y*stride];
2287
            if(i<0){
2288
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2289
            }else if(i>0){
2290
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2291
            }
2292
        }
2293
    }
2294
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2295
        STOP_TIMER("dquant")
2296
    }
2297
}
2298

    
2299
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2300
    const int w= b->width;
2301
    const int h= b->height;
2302
    int x,y;
2303
    
2304
    for(y=h-1; y>=0; y--){
2305
        for(x=w-1; x>=0; x--){
2306
            int i= x + y*stride;
2307
            
2308
            if(x){
2309
                if(use_median){
2310
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2311
                    else  src[i] -= src[i - 1];
2312
                }else{
2313
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2314
                    else  src[i] -= src[i - 1];
2315
                }
2316
            }else{
2317
                if(y) src[i] -= src[i - stride];
2318
            }
2319
        }
2320
    }
2321
}
2322

    
2323
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2324
    const int w= b->width;
2325
    const int h= b->height;
2326
    int x,y;
2327
    
2328
    for(y=0; y<h; y++){
2329
        for(x=0; x<w; x++){
2330
            int i= x + y*stride;
2331
            
2332
            if(x){
2333
                if(use_median){
2334
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2335
                    else  src[i] += src[i - 1];
2336
                }else{
2337
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2338
                    else  src[i] += src[i - 1];
2339
                }
2340
            }else{
2341
                if(y) src[i] += src[i - stride];
2342
            }
2343
        }
2344
    }
2345
}
2346

    
2347
static void encode_header(SnowContext *s){
2348
    int plane_index, level, orientation;
2349
    uint8_t kstate[32]={0};    
2350

    
2351
    put_cabac(&s->c, kstate, s->keyframe);
2352
    if(s->keyframe || s->always_reset)
2353
        reset_contexts(s);
2354
    if(s->keyframe){
2355
        put_symbol(&s->c, s->header_state, s->version, 0);
2356
        put_cabac(&s->c, s->header_state, s->always_reset);
2357
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2358
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2359
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2360
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
2361
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2362
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
2363
        put_cabac(&s->c, s->header_state, s->spatial_scalability);
2364
//        put_cabac(&s->c, s->header_state, s->rate_scalability);
2365

    
2366
        for(plane_index=0; plane_index<2; plane_index++){
2367
            for(level=0; level<s->spatial_decomposition_count; level++){
2368
                for(orientation=level ? 1:0; orientation<4; orientation++){
2369
                    if(orientation==2) continue;
2370
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2371
                }
2372
            }
2373
        }
2374
    }
2375
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2376
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
2377
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
2378
    put_symbol(&s->c, s->header_state, s->qbias, 1);
2379
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
2380
}
2381

    
2382
static int decode_header(SnowContext *s){
2383
    int plane_index, level, orientation;
2384
    uint8_t kstate[32]={0};    
2385

    
2386
    s->keyframe= get_cabac(&s->c, kstate);
2387
    if(s->keyframe || s->always_reset)
2388
        reset_contexts(s);
2389
    if(s->keyframe){
2390
        s->version= get_symbol(&s->c, s->header_state, 0);
2391
        if(s->version>0){
2392
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2393
            return -1;
2394
        }
2395
        s->always_reset= get_cabac(&s->c, s->header_state);
2396
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2397
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2398
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2399
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2400
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2401
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2402
        s->spatial_scalability= get_cabac(&s->c, s->header_state);
2403
//        s->rate_scalability= get_cabac(&s->c, s->header_state);
2404

    
2405
        for(plane_index=0; plane_index<3; plane_index++){
2406
            for(level=0; level<s->spatial_decomposition_count; level++){
2407
                for(orientation=level ? 1:0; orientation<4; orientation++){
2408
                    int q;
2409
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2410
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2411
                    else                    q= get_symbol(&s->c, s->header_state, 1);
2412
                    s->plane[plane_index].band[level][orientation].qlog= q;
2413
                }
2414
            }
2415
        }
2416
    }
2417
    
2418
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2419
    if(s->spatial_decomposition_type > 2){
2420
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2421
        return -1;
2422
    }
2423
    
2424
    s->qlog= get_symbol(&s->c, s->header_state, 1);
2425
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2426
    s->qbias= get_symbol(&s->c, s->header_state, 1);
2427
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
2428

    
2429
    return 0;
2430
}
2431

    
2432
static int common_init(AVCodecContext *avctx){
2433
    SnowContext *s = avctx->priv_data;
2434
    int width, height;
2435
    int level, orientation, plane_index, dec;
2436

    
2437
    s->avctx= avctx;
2438
        
2439
    dsputil_init(&s->dsp, avctx);
2440

    
2441
#define mcf(dx,dy)\
2442
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
2443
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2444
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
2445
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
2446
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
2447
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
2448

    
2449
    mcf( 0, 0)
2450
    mcf( 4, 0)
2451
    mcf( 8, 0)
2452
    mcf(12, 0)
2453
    mcf( 0, 4)
2454
    mcf( 4, 4)
2455
    mcf( 8, 4)
2456
    mcf(12, 4)
2457
    mcf( 0, 8)
2458
    mcf( 4, 8)
2459
    mcf( 8, 8)
2460
    mcf(12, 8)
2461
    mcf( 0,12)
2462
    mcf( 4,12)
2463
    mcf( 8,12)
2464
    mcf(12,12)
2465

    
2466
#define mcfh(dx,dy)\
2467
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
2468
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2469
        mc_block_hpel ## dx ## dy ## 16;\
2470
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
2471
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
2472
        mc_block_hpel ## dx ## dy ## 8;
2473

    
2474
    mcfh(0, 0)
2475
    mcfh(8, 0)
2476
    mcfh(0, 8)
2477
    mcfh(8, 8)
2478
        
2479
    dec= s->spatial_decomposition_count= 5;
2480
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2481
    
2482
    s->chroma_h_shift= 1; //FIXME XXX
2483
    s->chroma_v_shift= 1;
2484
    
2485
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2486
    
2487
    width= s->avctx->width;
2488
    height= s->avctx->height;
2489

    
2490
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
2491
    s->pred_buffer= av_mallocz(width*height*sizeof(DWTELEM));
2492
    
2493
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2494
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
2495
    
2496
    for(plane_index=0; plane_index<3; plane_index++){    
2497
        int w= s->avctx->width;
2498
        int h= s->avctx->height;
2499

    
2500
        if(plane_index){
2501
            w>>= s->chroma_h_shift;
2502
            h>>= s->chroma_v_shift;
2503
        }
2504
        s->plane[plane_index].width = w;
2505
        s->plane[plane_index].height= h;
2506
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
2507
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2508
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2509
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2510
                
2511
                b->buf= s->spatial_dwt_buffer;
2512
                b->level= level;
2513
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2514
                b->width = (w + !(orientation&1))>>1;
2515
                b->height= (h + !(orientation>1))>>1;
2516
                
2517
                if(orientation&1) b->buf += (w+1)>>1;
2518
                if(orientation>1) b->buf += b->stride>>1;
2519
                
2520
                if(level)
2521
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
2522
                b->x    = av_mallocz(((b->width+1) * b->height+1)*sizeof(int16_t));
2523
                b->coeff= av_mallocz(((b->width+1) * b->height+1)*sizeof(DWTELEM));
2524
            }
2525
            w= (w+1)>>1;
2526
            h= (h+1)>>1;
2527
        }
2528
    }
2529
    
2530
    reset_contexts(s);
2531
/*    
2532
    width= s->width= avctx->width;
2533
    height= s->height= avctx->height;
2534
    
2535
    assert(width && height);
2536
*/
2537
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2538
    
2539
    return 0;
2540
}
2541

    
2542

    
2543
static void calculate_vissual_weight(SnowContext *s, Plane *p){
2544
    int width = p->width;
2545
    int height= p->height;
2546
    int level, orientation, x, y;
2547

    
2548
    for(level=0; level<s->spatial_decomposition_count; level++){
2549
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2550
            SubBand *b= &p->band[level][orientation];
2551
            DWTELEM *buf= b->buf;
2552
            int64_t error=0;
2553
            
2554
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2555
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
2556
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
2557
            for(y=0; y<height; y++){
2558
                for(x=0; x<width; x++){
2559
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
2560
                    error += d*d;
2561
                }
2562
            }
2563

    
2564
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
2565
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
2566
        }
2567
    }
2568
}
2569

    
2570
static int encode_init(AVCodecContext *avctx)
2571
{
2572
    SnowContext *s = avctx->priv_data;
2573
    int plane_index;
2574

    
2575
    if(avctx->strict_std_compliance >= 0){
2576
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2577
               "use vstrict=-1 to use it anyway\n");
2578
        return -1;
2579
    }
2580
 
2581
    common_init(avctx);
2582
    alloc_blocks(s);
2583
 
2584
    s->version=0;
2585
    
2586
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2587
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2588
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2589
    h263_encode_init(&s->m); //mv_penalty
2590

    
2591
    for(plane_index=0; plane_index<3; plane_index++){
2592
        calculate_vissual_weight(s, &s->plane[plane_index]);
2593
    }
2594
    
2595
    
2596
    avctx->coded_frame= &s->current_picture;
2597
    switch(avctx->pix_fmt){
2598
//    case PIX_FMT_YUV444P:
2599
//    case PIX_FMT_YUV422P:
2600
    case PIX_FMT_YUV420P:
2601
    case PIX_FMT_GRAY8:
2602
//    case PIX_FMT_YUV411P:
2603
//    case PIX_FMT_YUV410P:
2604
        s->colorspace_type= 0;
2605
        break;
2606
/*    case PIX_FMT_RGBA32:
2607
        s->colorspace= 1;
2608
        break;*/
2609
    default:
2610
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2611
        return -1;
2612
    }
2613
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2614
    s->chroma_h_shift= 1;
2615
    s->chroma_v_shift= 1;
2616
    return 0;
2617
}
2618

    
2619
static int frame_start(SnowContext *s){
2620
   AVFrame tmp;
2621
   int w= s->avctx->width; //FIXME round up to x16 ?
2622
   int h= s->avctx->height;
2623

    
2624
    if(s->current_picture.data[0]){
2625
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
2626
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2627
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2628
    }
2629

    
2630
    tmp= s->last_picture;
2631
    s->last_picture= s->current_picture;
2632
    s->current_picture= tmp;
2633
    
2634
    s->current_picture.reference= 1;
2635
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2636
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2637
        return -1;
2638
    }
2639
    
2640
    return 0;
2641
}
2642

    
2643
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2644
    SnowContext *s = avctx->priv_data;
2645
    CABACContext * const c= &s->c;
2646
    AVFrame *pict = data;
2647
    const int width= s->avctx->width;
2648
    const int height= s->avctx->height;
2649
    int level, orientation, plane_index;
2650

    
2651
    ff_init_cabac_encoder(c, buf, buf_size);
2652
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2653
    
2654
    s->input_picture = *pict;
2655

    
2656
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2657
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2658
    
2659
    if(pict->quality){
2660
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2661
        //<64 >60
2662
        s->qlog += 61;
2663
    }else{
2664
        s->qlog= LOSSLESS_QLOG;
2665
    }
2666

    
2667
    frame_start(s);
2668
    s->current_picture.key_frame= s->keyframe;
2669

    
2670
    if(pict->pict_type == P_TYPE){
2671
        int block_width = (width +15)>>4;
2672
        int block_height= (height+15)>>4;
2673
        int stride= s->current_picture.linesize[0];
2674
        
2675
        assert(s->current_picture.data[0]);
2676
        assert(s->last_picture.data[0]);
2677
     
2678
        s->m.avctx= s->avctx;
2679
        s->m.current_picture.data[0]= s->current_picture.data[0];
2680
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
2681
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
2682
        s->m.current_picture_ptr= &s->m.current_picture;
2683
        s->m.   last_picture_ptr= &s->m.   last_picture;
2684
        s->m.linesize=
2685
        s->m.   last_picture.linesize[0]=
2686
        s->m.    new_picture.linesize[0]=
2687
        s->m.current_picture.linesize[0]= stride;
2688
        s->m.uvlinesize= s->current_picture.linesize[1];
2689
        s->m.width = width;
2690
        s->m.height= height;
2691
        s->m.mb_width = block_width;
2692
        s->m.mb_height= block_height;
2693
        s->m.mb_stride=   s->m.mb_width+1;
2694
        s->m.b8_stride= 2*s->m.mb_width+1;
2695
        s->m.f_code=1;
2696
        s->m.pict_type= pict->pict_type;
2697
        s->m.me_method= s->avctx->me_method;
2698
        s->m.me.scene_change_score=0;
2699
        s->m.flags= s->avctx->flags;
2700
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2701
        s->m.out_format= FMT_H263;
2702
        s->m.unrestricted_mv= 1;
2703

    
2704
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
2705
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
2706
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
2707

    
2708
        s->m.dsp= s->dsp; //move
2709
        ff_init_me(&s->m);
2710
    }
2711
    
2712
redo_frame:
2713
        
2714
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
2715

    
2716
    encode_header(s);
2717
    encode_blocks(s);
2718
      
2719
    for(plane_index=0; plane_index<3; plane_index++){
2720
        Plane *p= &s->plane[plane_index];
2721
        int w= p->width;
2722
        int h= p->height;
2723
        int x, y;
2724
//        int bits= put_bits_count(&s->c.pb);
2725

    
2726
        //FIXME optimize
2727
     if(pict->data[plane_index]) //FIXME gray hack
2728
        for(y=0; y<h; y++){
2729
            for(x=0; x<w; x++){
2730
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
2731
            }
2732
        }
2733
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
2734
        
2735
        if(   plane_index==0 
2736
           && pict->pict_type == P_TYPE 
2737
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
2738
            ff_init_cabac_encoder(c, buf, buf_size);
2739
            ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2740
            pict->pict_type= FF_I_TYPE;
2741
            s->keyframe=1;
2742
            reset_contexts(s);
2743
            goto redo_frame;
2744
        }
2745
        
2746
        if(s->qlog == LOSSLESS_QLOG){
2747
            for(y=0; y<h; y++){
2748
                for(x=0; x<w; x++){
2749
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + 127)>>8;
2750
                }
2751
            }
2752
        }
2753
 
2754
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2755

    
2756
        for(level=0; level<s->spatial_decomposition_count; level++){
2757
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2758
                SubBand *b= &p->band[level][orientation];
2759
                
2760
                quantize(s, b, b->buf, b->stride, s->qbias);
2761
                if(orientation==0)
2762
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
2763
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2764
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
2765
                if(orientation==0)
2766
                    correlate(s, b, b->buf, b->stride, 1, 0);
2767
            }
2768
        }
2769
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
2770

    
2771
        for(level=0; level<s->spatial_decomposition_count; level++){
2772
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2773
                SubBand *b= &p->band[level][orientation];
2774

    
2775
                dequantize(s, b, b->buf, b->stride);
2776
            }
2777
        }
2778

    
2779
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2780
        if(s->qlog == LOSSLESS_QLOG){
2781
            for(y=0; y<h; y++){
2782
                for(x=0; x<w; x++){
2783
                    s->spatial_dwt_buffer[y*w + x]<<=8;
2784
                }
2785
            }
2786
        }
2787
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2788
        //FIXME optimize
2789
        for(y=0; y<h; y++){
2790
            for(x=0; x<w; x++){
2791
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2792
                if(v&(~255)) v= ~(v>>31);
2793
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2794
            }
2795
        }
2796
        if(s->avctx->flags&CODEC_FLAG_PSNR){
2797
            int64_t error= 0;
2798
            
2799
    if(pict->data[plane_index]) //FIXME gray hack
2800
            for(y=0; y<h; y++){
2801
                for(x=0; x<w; x++){
2802
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
2803
                    error += d*d;
2804
                }
2805
            }
2806
            s->avctx->error[plane_index] += error;
2807
            s->current_picture.error[plane_index] = error;
2808
        }
2809
    }
2810

    
2811
    if(s->last_picture.data[0])
2812
        avctx->release_buffer(avctx, &s->last_picture);
2813

    
2814
    emms_c();
2815
    
2816
    return put_cabac_terminate(c, 1);
2817
}
2818

    
2819
static void common_end(SnowContext *s){
2820
    int plane_index, level, orientation;
2821

    
2822
    av_freep(&s->spatial_dwt_buffer);
2823

    
2824
    av_freep(&s->m.me.scratchpad);    
2825
    av_freep(&s->m.me.map);
2826
    av_freep(&s->m.me.score_map);
2827
 
2828
    av_freep(&s->block);
2829

    
2830
    for(plane_index=0; plane_index<3; plane_index++){    
2831
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2832
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2833
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2834
                
2835
                av_freep(&b->x);
2836
                av_freep(&b->coeff);
2837
            }
2838
        }
2839
    }
2840
}
2841

    
2842
static int encode_end(AVCodecContext *avctx)
2843
{
2844
    SnowContext *s = avctx->priv_data;
2845

    
2846
    common_end(s);
2847

    
2848
    return 0;
2849
}
2850

    
2851
static int decode_init(AVCodecContext *avctx)
2852
{
2853
//    SnowContext *s = avctx->priv_data;
2854

    
2855
    common_init(avctx);
2856
    
2857
    return 0;
2858
}
2859

    
2860
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
2861
    SnowContext *s = avctx->priv_data;
2862
    CABACContext * const c= &s->c;
2863
    int bytes_read;
2864
    AVFrame *picture = data;
2865
    int level, orientation, plane_index;
2866
    
2867

    
2868
    /* no supplementary picture */
2869
    if (buf_size == 0)
2870
        return 0;
2871

    
2872
    ff_init_cabac_decoder(c, buf, buf_size);
2873
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2874

    
2875
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2876
    decode_header(s);
2877
    if(!s->block) alloc_blocks(s);
2878

    
2879
    frame_start(s);
2880
    //keyframe flag dupliaction mess FIXME
2881
    if(avctx->debug&FF_DEBUG_PICT_INFO)
2882
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2883
    
2884
    decode_blocks(s);
2885

    
2886
    for(plane_index=0; plane_index<3; plane_index++){
2887
        Plane *p= &s->plane[plane_index];
2888
        int w= p->width;
2889
        int h= p->height;
2890
        int x, y;
2891
        
2892
if(s->avctx->debug&2048){
2893
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2894
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2895

    
2896
        for(y=0; y<h; y++){
2897
            for(x=0; x<w; x++){
2898
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2899
                if(v&(~255)) v= ~(v>>31);
2900
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2901
            }
2902
        }
2903
}
2904
        for(level=0; level<s->spatial_decomposition_count; level++){
2905
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2906
                SubBand *b= &p->band[level][orientation];
2907

    
2908
                decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2909
                if(orientation==0){
2910
                    correlate(s, b, b->buf, b->stride, 1, 0);
2911
                    dequantize(s, b, b->buf, b->stride);
2912
                    assert(b->buf == s->spatial_dwt_buffer);
2913
                }
2914
            }
2915
        }
2916

    
2917
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2918
        if(s->qlog == LOSSLESS_QLOG){
2919
            for(y=0; y<h; y++){
2920
                for(x=0; x<w; x++){
2921
                    s->spatial_dwt_buffer[y*w + x]<<=8;
2922
                }
2923
            }
2924
        }
2925
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2926

    
2927
        //FIXME optimize
2928
        for(y=0; y<h; y++){
2929
            for(x=0; x<w; x++){
2930
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2931
                if(v&(~255)) v= ~(v>>31);
2932
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2933
            }
2934
        }
2935
    }
2936
            
2937
    emms_c();
2938

    
2939
    if(s->last_picture.data[0])
2940
        avctx->release_buffer(avctx, &s->last_picture);
2941

    
2942
if(!(s->avctx->debug&2048))        
2943
    *picture= s->current_picture;
2944
else
2945
    *picture= s->mconly_picture;
2946
    
2947
    *data_size = sizeof(AVFrame);
2948
    
2949
    bytes_read= get_cabac_terminate(c);
2950
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
2951

    
2952
    return bytes_read;
2953
}
2954

    
2955
static int decode_end(AVCodecContext *avctx)
2956
{
2957
    SnowContext *s = avctx->priv_data;
2958

    
2959
    common_end(s);
2960

    
2961
    return 0;
2962
}
2963

    
2964
AVCodec snow_decoder = {
2965
    "snow",
2966
    CODEC_TYPE_VIDEO,
2967
    CODEC_ID_SNOW,
2968
    sizeof(SnowContext),
2969
    decode_init,
2970
    NULL,
2971
    decode_end,
2972
    decode_frame,
2973
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2974
    NULL
2975
};
2976

    
2977
AVCodec snow_encoder = {
2978
    "snow",
2979
    CODEC_TYPE_VIDEO,
2980
    CODEC_ID_SNOW,
2981
    sizeof(SnowContext),
2982
    encode_init,
2983
    encode_frame,
2984
    encode_end,
2985
};
2986

    
2987

    
2988
#if 0
2989
#undef malloc
2990
#undef free
2991
#undef printf
2992

2993
int main(){
2994
    int width=256;
2995
    int height=256;
2996
    int buffer[2][width*height];
2997
    SnowContext s;
2998
    int i;
2999
    s.spatial_decomposition_count=6;
3000
    s.spatial_decomposition_type=1;
3001
    
3002
    printf("testing 5/3 DWT\n");
3003
    for(i=0; i<width*height; i++)
3004
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3005
    
3006
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3007
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3008
    
3009
    for(i=0; i<width*height; i++)
3010
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3011

3012
    printf("testing 9/7 DWT\n");
3013
    s.spatial_decomposition_type=0;
3014
    for(i=0; i<width*height; i++)
3015
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3016
    
3017
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3018
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3019
    
3020
    for(i=0; i<width*height; i++)
3021
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3022
        
3023
    printf("testing AC coder\n");
3024
    memset(s.header_state, 0, sizeof(s.header_state));
3025
    ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
3026
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3027
        
3028
    for(i=-256; i<256; i++){
3029
START_TIMER
3030
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3031
STOP_TIMER("put_symbol")
3032
    }
3033
    put_cabac_terminate(&s.c, 1);
3034

3035
    memset(s.header_state, 0, sizeof(s.header_state));
3036
    ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
3037
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3038
    
3039
    for(i=-256; i<256; i++){
3040
        int j;
3041
START_TIMER
3042
        j= get_symbol(&s.c, s.header_state, 1);
3043
STOP_TIMER("get_symbol")
3044
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3045
    }
3046
{
3047
int level, orientation, x, y;
3048
int64_t errors[8][4];
3049
int64_t g=0;
3050

3051
    memset(errors, 0, sizeof(errors));
3052
    s.spatial_decomposition_count=3;
3053
    s.spatial_decomposition_type=0;
3054
    for(level=0; level<s.spatial_decomposition_count; level++){
3055
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3056
            int w= width  >> (s.spatial_decomposition_count-level);
3057
            int h= height >> (s.spatial_decomposition_count-level);
3058
            int stride= width  << (s.spatial_decomposition_count-level);
3059
            DWTELEM *buf= buffer[0];
3060
            int64_t error=0;
3061

3062
            if(orientation&1) buf+=w;
3063
            if(orientation>1) buf+=stride>>1;
3064
            
3065
            memset(buffer[0], 0, sizeof(int)*width*height);
3066
            buf[w/2 + h/2*stride]= 256*256;
3067
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3068
            for(y=0; y<height; y++){
3069
                for(x=0; x<width; x++){
3070
                    int64_t d= buffer[0][x + y*width];
3071
                    error += d*d;
3072
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3073
                }
3074
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3075
            }
3076
            error= (int)(sqrt(error)+0.5);
3077
            errors[level][orientation]= error;
3078
            if(g) g=ff_gcd(g, error);
3079
            else g= error;
3080
        }
3081
    }
3082
    printf("static int const visual_weight[][4]={\n");
3083
    for(level=0; level<s.spatial_decomposition_count; level++){
3084
        printf("  {");
3085
        for(orientation=0; orientation<4; orientation++){
3086
            printf("%8lld,", errors[level][orientation]/g);
3087
        }
3088
        printf("},\n");
3089
    }
3090
    printf("};\n");
3091
    {
3092
            int level=2;
3093
            int orientation=3;
3094
            int w= width  >> (s.spatial_decomposition_count-level);
3095
            int h= height >> (s.spatial_decomposition_count-level);
3096
            int stride= width  << (s.spatial_decomposition_count-level);
3097
            DWTELEM *buf= buffer[0];
3098
            int64_t error=0;
3099

3100
            buf+=w;
3101
            buf+=stride>>1;
3102
            
3103
            memset(buffer[0], 0, sizeof(int)*width*height);
3104
#if 1
3105
            for(y=0; y<height; y++){
3106
                for(x=0; x<width; x++){
3107
                    int tab[4]={0,2,3,1};
3108
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3109
                }
3110
            }
3111
            ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3112
#else
3113
            for(y=0; y<h; y++){
3114
                for(x=0; x<w; x++){
3115
                    buf[x + y*stride  ]=169;
3116
                    buf[x + y*stride-w]=64;
3117
                }
3118
            }
3119
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3120
#endif
3121
            for(y=0; y<height; y++){
3122
                for(x=0; x<width; x++){
3123
                    int64_t d= buffer[0][x + y*width];
3124
                    error += d*d;
3125
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3126
                }
3127
                if(ABS(height/2-y)<9) printf("\n");
3128
            }
3129
    }
3130

    
3131
}
3132
    return 0;
3133
}
3134
#endif
3135