Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 19aa028d

History | View | Annotate | Download (101 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22
#include "cabac.h"
23

    
24
#include "mpegvideo.h"
25

    
26
#undef NDEBUG
27
#include <assert.h>
28

    
29
#define MAX_DECOMPOSITIONS 8
30
#define MAX_PLANES 4
31
#define DWTELEM int
32
#define QROOT 8 
33
#define LOSSLESS_QLOG -128
34

    
35
static const int8_t quant3[256]={
36
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
52
};
53
static const int8_t quant3b[256]={
54
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
};
71
static const int8_t quant5[256]={
72
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
88
};
89
static const int8_t quant7[256]={
90
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
93
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
106
};
107
static const int8_t quant9[256]={
108
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
124
};
125
static const int8_t quant11[256]={
126
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
142
};
143
static const int8_t quant13[256]={
144
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
145
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
160
};
161

    
162
#define OBMC_MAX 64
163
#if 0 //64*cubic
164
static const uint8_t obmc32[1024]={
165
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
167
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
168
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
169
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
170
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
171
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
172
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
173
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
174
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
175
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
176
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
177
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
178
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
179
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
180
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
182
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
183
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
184
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
185
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
186
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
187
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
190
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
191
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
194
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
195
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
196
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197
//error:0.000022
198
};
199
static const uint8_t obmc16[256]={
200
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
201
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
202
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
203
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
204
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
205
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
206
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
207
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
209
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
210
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
211
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
212
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
213
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
214
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216
//error:0.000033
217
};
218
#elif 1 // 64*linear
219
static const uint8_t obmc32[1024]={
220
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
221
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
222
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
223
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
224
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
225
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
226
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
227
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
228
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
229
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
230
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
231
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
232
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
233
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
234
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
235
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
237
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
238
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
239
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
240
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
241
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
242
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
243
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
244
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
245
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
246
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
249
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
250
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
251
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
252
 //error:0.000020
253
};
254
static const uint8_t obmc16[256]={
255
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
256
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
257
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
258
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
259
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
260
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
261
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
262
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
264
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
265
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
266
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
267
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
268
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
269
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
270
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
271
//error:0.000015
272
};
273
#else //64*cos
274
static const uint8_t obmc32[1024]={
275
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
277
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
278
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
279
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
280
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
281
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
282
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
283
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
284
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
285
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
286
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
287
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
288
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
289
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
290
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
292
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
293
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
294
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
295
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
296
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
297
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
300
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
301
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
304
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
305
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
306
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307
//error:0.000022
308
};
309
static const uint8_t obmc16[256]={
310
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
311
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
312
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
313
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
314
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
315
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
316
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
317
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
319
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
320
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
321
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
322
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
323
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
324
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326
//error:0.000022
327
};
328
#endif
329

    
330
//linear *64
331
static const uint8_t obmc8[64]={
332
 1, 3, 5, 7, 7, 5, 3, 1,
333
 3, 9,15,21,21,15, 9, 3,
334
 5,15,25,35,35,25,15, 5,
335
 7,21,35,49,49,35,21, 7,
336
 7,21,35,49,49,35,21, 7,
337
 5,15,25,35,35,25,15, 5,
338
 3, 9,15,21,21,15, 9, 3,
339
 1, 3, 5, 7, 7, 5, 3, 1,
340
//error:0.000000
341
};
342

    
343
//linear *64
344
static const uint8_t obmc4[16]={
345
 4,12,12, 4,
346
12,36,36,12,
347
12,36,36,12,
348
 4,12,12, 4,
349
//error:0.000000
350
};
351

    
352
static const uint8_t *obmc_tab[4]={
353
    obmc32, obmc16, obmc8, obmc4
354
};
355

    
356
typedef struct BlockNode{
357
    int16_t mx;
358
    int16_t my;
359
    uint8_t color[3];
360
    uint8_t type;
361
//#define TYPE_SPLIT    1
362
#define BLOCK_INTRA   1
363
//#define TYPE_NOCOLOR  4
364
    uint8_t level; //FIXME merge into type?
365
}BlockNode;
366

    
367
#define LOG2_MB_SIZE 4
368
#define MB_SIZE (1<<LOG2_MB_SIZE)
369

    
370
typedef struct SubBand{
371
    int level;
372
    int stride;
373
    int width;
374
    int height;
375
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
376
    DWTELEM *buf;
377
    int16_t *x;
378
    DWTELEM *coeff;
379
    struct SubBand *parent;
380
    uint8_t state[/*7*2*/ 7 + 512][32];
381
}SubBand;
382

    
383
typedef struct Plane{
384
    int width;
385
    int height;
386
    SubBand band[MAX_DECOMPOSITIONS][4];
387
}Plane;
388

    
389
typedef struct SnowContext{
390
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
391

    
392
    AVCodecContext *avctx;
393
    CABACContext c;
394
    DSPContext dsp;
395
    AVFrame input_picture;
396
    AVFrame current_picture;
397
    AVFrame last_picture;
398
    AVFrame mconly_picture;
399
//     uint8_t q_context[16];
400
    uint8_t header_state[32];
401
    uint8_t block_state[128 + 32*128];
402
    int keyframe;
403
    int always_reset;
404
    int version;
405
    int spatial_decomposition_type;
406
    int temporal_decomposition_type;
407
    int spatial_decomposition_count;
408
    int temporal_decomposition_count;
409
    DWTELEM *spatial_dwt_buffer;
410
    DWTELEM *pred_buffer;
411
    int colorspace_type;
412
    int chroma_h_shift;
413
    int chroma_v_shift;
414
    int spatial_scalability;
415
    int qlog;
416
    int lambda;
417
    int lambda2;
418
    int mv_scale;
419
    int qbias;
420
#define QBIAS_SHIFT 3
421
    int b_width;
422
    int b_height;
423
    int block_max_depth;
424
    Plane plane[MAX_PLANES];
425
    BlockNode *block;
426

    
427
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
428
}SnowContext;
429

    
430
#define QEXPSHIFT 7 //FIXME try to change this to 0
431
static const uint8_t qexp[8]={
432
    128, 140, 152, 166, 181, 197, 215, 235
433
//   64,  70,  76,  83,  91,  99, 108, 117
434
//   32,  35,  38,  41,  45,  49,  54,  59
435
//   16,  17,  19,  21,  23,  25,  27,  29
436
//    8,   9,  10,  10,  11,  12,  13,  15
437
};
438

    
439
static inline int mirror(int v, int m){
440
    if     (v<0) return -v;
441
    else if(v>m) return 2*m-v;
442
    else         return v;
443
}
444

    
445
static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
446
    int i;
447

    
448
    if(v){
449
        const int a= ABS(v);
450
        const int e= av_log2(a);
451
#if 1
452
        const int el= FFMIN(e, 10);   
453
        put_cabac(c, state+0, 0);
454

    
455
        for(i=0; i<el; i++){
456
            put_cabac(c, state+1+i, 1);  //1..10
457
        }
458
        for(; i<e; i++){
459
            put_cabac(c, state+1+9, 1);  //1..10
460
        }
461
        put_cabac(c, state+1+FFMIN(i,9), 0);
462

    
463
        for(i=e-1; i>=el; i--){
464
            put_cabac(c, state+22+9, (a>>i)&1); //22..31
465
        }
466
        for(; i>=0; i--){
467
            put_cabac(c, state+22+i, (a>>i)&1); //22..31
468
        }
469

    
470
        if(is_signed)
471
            put_cabac(c, state+11 + el, v < 0); //11..21
472
#else
473
        
474
        put_cabac(c, state+0, 0);
475
        if(e<=9){
476
            for(i=0; i<e; i++){
477
                put_cabac(c, state+1+i, 1);  //1..10
478
            }
479
            put_cabac(c, state+1+i, 0);
480

    
481
            for(i=e-1; i>=0; i--){
482
                put_cabac(c, state+22+i, (a>>i)&1); //22..31
483
            }
484

    
485
            if(is_signed)
486
                put_cabac(c, state+11 + e, v < 0); //11..21
487
        }else{
488
            for(i=0; i<e; i++){
489
                put_cabac(c, state+1+FFMIN(i,9), 1);  //1..10
490
            }
491
            put_cabac(c, state+1+FFMIN(i,9), 0);
492

    
493
            for(i=e-1; i>=0; i--){
494
                put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
495
            }
496

    
497
            if(is_signed)
498
                put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
499
        }
500
#endif
501
    }else{
502
        put_cabac(c, state+0, 1);
503
    }
504
}
505

    
506
static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
507
    if(get_cabac(c, state+0))
508
        return 0;
509
    else{
510
        int i, e, a, el;
511
 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
512
        for(e=0; e<10; e++){ 
513
            if(get_cabac(c, state + 1 + e)==0) // 1..10
514
                break;
515
        }
516
        el= e;
517
 
518
        if(e==10){
519
            while(get_cabac(c, state + 1 + 9)) //10
520
                e++;
521
        }
522
        a= 1;
523
        for(i=e-1; i>=el; i--){
524
            a += a + get_cabac(c, state+22+9); //31
525
        }
526
        for(; i>=0; i--){
527
            a += a + get_cabac(c, state+22+i); //22..31
528
        }
529

    
530
        if(is_signed && get_cabac(c, state+11 + el)) //11..21
531
            return -a;
532
        else
533
            return a;
534
    }
535
}
536

    
537
static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
538
    int i;
539
    int r= log2>=0 ? 1<<log2 : 1;
540

    
541
    assert(v>=0);
542
    assert(log2>=-4);
543

    
544
    while(v >= r){
545
        put_cabac(c, state+4+log2, 1);
546
        v -= r;
547
        log2++;
548
        if(log2>0) r+=r;
549
    }
550
    put_cabac(c, state+4+log2, 0);
551
    
552
    for(i=log2-1; i>=0; i--){
553
        put_cabac(c, state+31-i, (v>>i)&1);
554
    }
555
}
556

    
557
static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
558
    int i;
559
    int r= log2>=0 ? 1<<log2 : 1;
560
    int v=0;
561

    
562
    assert(log2>=-4);
563

    
564
    while(get_cabac(c, state+4+log2)){
565
        v+= r;
566
        log2++;
567
        if(log2>0) r+=r;
568
    }
569
    
570
    for(i=log2-1; i>=0; i--){
571
        v+= get_cabac(c, state+31-i)<<i;
572
    }
573

    
574
    return v;
575
}
576

    
577
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
578
    const int mirror_left= !highpass;
579
    const int mirror_right= (width&1) ^ highpass;
580
    const int w= (width>>1) - 1 + (highpass & width);
581
    int i;
582

    
583
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
584
    if(mirror_left){
585
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
586
        dst += dst_step;
587
        src += src_step;
588
    }
589
    
590
    for(i=0; i<w; i++){
591
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
592
    }
593
    
594
    if(mirror_right){
595
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
596
    }
597
}
598

    
599
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
600
    const int mirror_left= !highpass;
601
    const int mirror_right= (width&1) ^ highpass;
602
    const int w= (width>>1) - 1 + (highpass & width);
603
    int i;
604

    
605
    if(mirror_left){
606
        int r= 3*2*ref[0];
607
        r += r>>4;
608
        r += r>>8;
609
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
610
        dst += dst_step;
611
        src += src_step;
612
    }
613
    
614
    for(i=0; i<w; i++){
615
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
616
        r += r>>4;
617
        r += r>>8;
618
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
619
    }
620
    
621
    if(mirror_right){
622
        int r= 3*2*ref[w*ref_step];
623
        r += r>>4;
624
        r += r>>8;
625
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
626
    }
627
}
628

    
629

    
630
static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
631
    int x, i;
632
    
633
    for(x=start; x<width; x+=2){
634
        int64_t sum=0;
635

    
636
        for(i=0; i<n; i++){
637
            int x2= x + 2*i - n + 1;
638
            if     (x2<     0) x2= -x2;
639
            else if(x2>=width) x2= 2*width-x2-2;
640
            sum += coeffs[i]*(int64_t)dst[x2];
641
        }
642
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
643
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
644
    }
645
}
646

    
647
static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
648
    int x, y, i;
649
    for(y=start; y<height; y+=2){
650
        for(x=0; x<width; x++){
651
            int64_t sum=0;
652
    
653
            for(i=0; i<n; i++){
654
                int y2= y + 2*i - n + 1;
655
                if     (y2<      0) y2= -y2;
656
                else if(y2>=height) y2= 2*height-y2-2;
657
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
658
            }
659
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
660
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
661
        }
662
    }
663
}
664

    
665
#define SCALEX 1
666
#define LX0 0
667
#define LX1 1
668

    
669
#if 0 // more accurate 9/7
670
#define N1 2
671
#define SHIFT1 14
672
#define COEFFS1 (int[]){-25987,-25987}
673
#define N2 2
674
#define SHIFT2 19
675
#define COEFFS2 (int[]){-27777,-27777}
676
#define N3 2
677
#define SHIFT3 15
678
#define COEFFS3 (int[]){28931,28931}
679
#define N4 2
680
#define SHIFT4 15
681
#define COEFFS4 (int[]){14533,14533}
682
#elif 1 // 13/7 CRF
683
#define N1 4
684
#define SHIFT1 4
685
#define COEFFS1 (int[]){1,-9,-9,1}
686
#define N2 4
687
#define SHIFT2 4
688
#define COEFFS2 (int[]){-1,5,5,-1}
689
#define N3 0
690
#define SHIFT3 1
691
#define COEFFS3 NULL
692
#define N4 0
693
#define SHIFT4 1
694
#define COEFFS4 NULL
695
#elif 1 // 3/5
696
#define LX0 1
697
#define LX1 0
698
#define SCALEX 0.5
699
#define N1 2
700
#define SHIFT1 1
701
#define COEFFS1 (int[]){1,1}
702
#define N2 2
703
#define SHIFT2 2
704
#define COEFFS2 (int[]){-1,-1}
705
#define N3 0
706
#define SHIFT3 0
707
#define COEFFS3 NULL
708
#define N4 0
709
#define SHIFT4 0
710
#define COEFFS4 NULL
711
#elif 1 // 11/5 
712
#define N1 0
713
#define SHIFT1 1
714
#define COEFFS1 NULL
715
#define N2 2
716
#define SHIFT2 2
717
#define COEFFS2 (int[]){-1,-1}
718
#define N3 2
719
#define SHIFT3 0
720
#define COEFFS3 (int[]){-1,-1}
721
#define N4 4
722
#define SHIFT4 7
723
#define COEFFS4 (int[]){-5,29,29,-5}
724
#define SCALEX 4
725
#elif 1 // 9/7 CDF
726
#define N1 2
727
#define SHIFT1 7
728
#define COEFFS1 (int[]){-203,-203}
729
#define N2 2
730
#define SHIFT2 12
731
#define COEFFS2 (int[]){-217,-217}
732
#define N3 2
733
#define SHIFT3 7
734
#define COEFFS3 (int[]){113,113}
735
#define N4 2
736
#define SHIFT4 9
737
#define COEFFS4 (int[]){227,227}
738
#define SCALEX 1
739
#elif 1 // 7/5 CDF
740
#define N1 0
741
#define SHIFT1 1
742
#define COEFFS1 NULL
743
#define N2 2
744
#define SHIFT2 2
745
#define COEFFS2 (int[]){-1,-1}
746
#define N3 2
747
#define SHIFT3 0
748
#define COEFFS3 (int[]){-1,-1}
749
#define N4 2
750
#define SHIFT4 4
751
#define COEFFS4 (int[]){3,3}
752
#elif 1 // 9/7 MN
753
#define N1 4
754
#define SHIFT1 4
755
#define COEFFS1 (int[]){1,-9,-9,1}
756
#define N2 2
757
#define SHIFT2 2
758
#define COEFFS2 (int[]){1,1}
759
#define N3 0
760
#define SHIFT3 1
761
#define COEFFS3 NULL
762
#define N4 0
763
#define SHIFT4 1
764
#define COEFFS4 NULL
765
#else // 13/7 CRF
766
#define N1 4
767
#define SHIFT1 4
768
#define COEFFS1 (int[]){1,-9,-9,1}
769
#define N2 4
770
#define SHIFT2 4
771
#define COEFFS2 (int[]){-1,5,5,-1}
772
#define N3 0
773
#define SHIFT3 1
774
#define COEFFS3 NULL
775
#define N4 0
776
#define SHIFT4 1
777
#define COEFFS4 NULL
778
#endif
779
static void horizontal_decomposeX(int *b, int width){
780
    int temp[width];
781
    const int width2= width>>1;
782
    const int w2= (width+1)>>1;
783
    int A1,A2,A3,A4, x;
784

    
785
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
786
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
787
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
788
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
789
    
790
    for(x=0; x<width2; x++){
791
        temp[x   ]= b[2*x    ];
792
        temp[x+w2]= b[2*x + 1];
793
    }
794
    if(width&1)
795
        temp[x   ]= b[2*x    ];
796
    memcpy(b, temp, width*sizeof(int));
797
}
798

    
799
static void horizontal_composeX(int *b, int width){
800
    int temp[width];
801
    const int width2= width>>1;
802
    int A1,A2,A3,A4, x;
803
    const int w2= (width+1)>>1;
804

    
805
    memcpy(temp, b, width*sizeof(int));
806
    for(x=0; x<width2; x++){
807
        b[2*x    ]= temp[x   ];
808
        b[2*x + 1]= temp[x+w2];
809
    }
810
    if(width&1)
811
        b[2*x    ]= temp[x   ];
812

    
813
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
814
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
815
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
816
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
817
}
818

    
819
static void spatial_decomposeX(int *buffer, int width, int height, int stride){
820
    int x, y;
821
  
822
    for(y=0; y<height; y++){
823
        for(x=0; x<width; x++){
824
            buffer[y*stride + x] *= SCALEX;
825
        }
826
    }
827

    
828
    for(y=0; y<height; y++){
829
        horizontal_decomposeX(buffer + y*stride, width);
830
    }
831
    
832
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
833
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
834
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
835
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
836
}
837

    
838
static void spatial_composeX(int *buffer, int width, int height, int stride){
839
    int x, y;
840
  
841
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
842
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
843
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
844
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
845

    
846
    for(y=0; y<height; y++){
847
        horizontal_composeX(buffer + y*stride, width);
848
    }
849

    
850
    for(y=0; y<height; y++){
851
        for(x=0; x<width; x++){
852
            buffer[y*stride + x] /= SCALEX;
853
        }
854
    }
855
}
856

    
857
static void horizontal_decompose53i(int *b, int width){
858
    int temp[width];
859
    const int width2= width>>1;
860
    int A1,A2,A3,A4, x;
861
    const int w2= (width+1)>>1;
862

    
863
    for(x=0; x<width2; x++){
864
        temp[x   ]= b[2*x    ];
865
        temp[x+w2]= b[2*x + 1];
866
    }
867
    if(width&1)
868
        temp[x   ]= b[2*x    ];
869
#if 0
870
    A2= temp[1       ];
871
    A4= temp[0       ];
872
    A1= temp[0+width2];
873
    A1 -= (A2 + A4)>>1;
874
    A4 += (A1 + 1)>>1;
875
    b[0+width2] = A1;
876
    b[0       ] = A4;
877
    for(x=1; x+1<width2; x+=2){
878
        A3= temp[x+width2];
879
        A4= temp[x+1     ];
880
        A3 -= (A2 + A4)>>1;
881
        A2 += (A1 + A3 + 2)>>2;
882
        b[x+width2] = A3;
883
        b[x       ] = A2;
884

885
        A1= temp[x+1+width2];
886
        A2= temp[x+2       ];
887
        A1 -= (A2 + A4)>>1;
888
        A4 += (A1 + A3 + 2)>>2;
889
        b[x+1+width2] = A1;
890
        b[x+1       ] = A4;
891
    }
892
    A3= temp[width-1];
893
    A3 -= A2;
894
    A2 += (A1 + A3 + 2)>>2;
895
    b[width -1] = A3;
896
    b[width2-1] = A2;
897
#else        
898
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
899
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
900
#endif
901
}
902

    
903
static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
904
    int i;
905
    
906
    for(i=0; i<width; i++){
907
        b1[i] -= (b0[i] + b2[i])>>1;
908
    }
909
}
910

    
911
static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
912
    int i;
913
    
914
    for(i=0; i<width; i++){
915
        b1[i] += (b0[i] + b2[i] + 2)>>2;
916
    }
917
}
918

    
919
static void spatial_decompose53i(int *buffer, int width, int height, int stride){
920
    int y;
921
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
922
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
923
  
924
    for(y=-2; y<height; y+=2){
925
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
926
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
927

    
928
{START_TIMER
929
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
930
        if(y+2 < height) horizontal_decompose53i(b3, width);
931
STOP_TIMER("horizontal_decompose53i")}
932
        
933
{START_TIMER
934
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
935
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
936
STOP_TIMER("vertical_decompose53i*")}
937
        
938
        b0=b2;
939
        b1=b3;
940
    }
941
}
942

    
943
#define lift5 lift
944
#if 1
945
#define W_AM 3
946
#define W_AO 0
947
#define W_AS 1
948

    
949
#define W_BM 1
950
#define W_BO 8
951
#define W_BS 4
952

    
953
#undef lift5
954
#define W_CM 9999
955
#define W_CO 2
956
#define W_CS 2
957

    
958
#define W_DM 15
959
#define W_DO 16
960
#define W_DS 5
961
#elif 0
962
#define W_AM 55
963
#define W_AO 16
964
#define W_AS 5
965

    
966
#define W_BM 3
967
#define W_BO 32
968
#define W_BS 6
969

    
970
#define W_CM 127
971
#define W_CO 64
972
#define W_CS 7
973

    
974
#define W_DM 7
975
#define W_DO 8
976
#define W_DS 4
977
#elif 0
978
#define W_AM 97
979
#define W_AO 32
980
#define W_AS 6
981

    
982
#define W_BM 63
983
#define W_BO 512
984
#define W_BS 10
985

    
986
#define W_CM 13
987
#define W_CO 8
988
#define W_CS 4
989

    
990
#define W_DM 15
991
#define W_DO 16
992
#define W_DS 5
993

    
994
#else
995

    
996
#define W_AM 203
997
#define W_AO 64
998
#define W_AS 7
999

    
1000
#define W_BM 217
1001
#define W_BO 2048
1002
#define W_BS 12
1003

    
1004
#define W_CM 113
1005
#define W_CO 64
1006
#define W_CS 7
1007

    
1008
#define W_DM 227
1009
#define W_DO 128
1010
#define W_DS 9
1011
#endif
1012
static void horizontal_decompose97i(int *b, int width){
1013
    int temp[width];
1014
    const int w2= (width+1)>>1;
1015

    
1016
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1017
    lift (temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1018
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1019
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1020
}
1021

    
1022

    
1023
static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
1024
    int i;
1025
    
1026
    for(i=0; i<width; i++){
1027
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1028
    }
1029
}
1030

    
1031
static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
1032
    int i;
1033
    
1034
    for(i=0; i<width; i++){
1035
#ifdef lift5
1036
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1037
#else
1038
        int r= 3*(b0[i] + b2[i]);
1039
        r+= r>>4;
1040
        r+= r>>8;
1041
        b1[i] += (r+W_CO)>>W_CS;
1042
#endif
1043
    }
1044
}
1045

    
1046
static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
1047
    int i;
1048
    
1049
    for(i=0; i<width; i++){
1050
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1051
    }
1052
}
1053

    
1054
static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
1055
    int i;
1056
    
1057
    for(i=0; i<width; i++){
1058
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1059
    }
1060
}
1061

    
1062
static void spatial_decompose97i(int *buffer, int width, int height, int stride){
1063
    int y;
1064
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1065
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1066
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1067
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1068
  
1069
    for(y=-4; y<height; y+=2){
1070
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1071
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1072

    
1073
{START_TIMER
1074
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1075
        if(y+4 < height) horizontal_decompose97i(b5, width);
1076
if(width>400){
1077
STOP_TIMER("horizontal_decompose97i")
1078
}}
1079
        
1080
{START_TIMER
1081
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1082
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1083
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1084
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1085

    
1086
if(width>400){
1087
STOP_TIMER("vertical_decompose97i")
1088
}}
1089
        
1090
        b0=b2;
1091
        b1=b3;
1092
        b2=b4;
1093
        b3=b5;
1094
    }
1095
}
1096

    
1097
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1098
    int level;
1099
    
1100
    for(level=0; level<decomposition_count; level++){
1101
        switch(type){
1102
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1103
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1104
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1105
        }
1106
    }
1107
}
1108

    
1109
static void horizontal_compose53i(int *b, int width){
1110
    int temp[width];
1111
    const int width2= width>>1;
1112
    const int w2= (width+1)>>1;
1113
    int A1,A2,A3,A4, x;
1114

    
1115
#if 0
1116
    A2= temp[1       ];
1117
    A4= temp[0       ];
1118
    A1= temp[0+width2];
1119
    A1 -= (A2 + A4)>>1;
1120
    A4 += (A1 + 1)>>1;
1121
    b[0+width2] = A1;
1122
    b[0       ] = A4;
1123
    for(x=1; x+1<width2; x+=2){
1124
        A3= temp[x+width2];
1125
        A4= temp[x+1     ];
1126
        A3 -= (A2 + A4)>>1;
1127
        A2 += (A1 + A3 + 2)>>2;
1128
        b[x+width2] = A3;
1129
        b[x       ] = A2;
1130

1131
        A1= temp[x+1+width2];
1132
        A2= temp[x+2       ];
1133
        A1 -= (A2 + A4)>>1;
1134
        A4 += (A1 + A3 + 2)>>2;
1135
        b[x+1+width2] = A1;
1136
        b[x+1       ] = A4;
1137
    }
1138
    A3= temp[width-1];
1139
    A3 -= A2;
1140
    A2 += (A1 + A3 + 2)>>2;
1141
    b[width -1] = A3;
1142
    b[width2-1] = A2;
1143
#else   
1144
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1145
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1146
#endif
1147
    for(x=0; x<width2; x++){
1148
        b[2*x    ]= temp[x   ];
1149
        b[2*x + 1]= temp[x+w2];
1150
    }
1151
    if(width&1)
1152
        b[2*x    ]= temp[x   ];
1153
}
1154

    
1155
static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1156
    int i;
1157
    
1158
    for(i=0; i<width; i++){
1159
        b1[i] += (b0[i] + b2[i])>>1;
1160
    }
1161
}
1162

    
1163
static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1164
    int i;
1165
    
1166
    for(i=0; i<width; i++){
1167
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1168
    }
1169
}
1170

    
1171
static void spatial_compose53i(int *buffer, int width, int height, int stride){
1172
    int y;
1173
    DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1174
    DWTELEM *b1= buffer + mirror(-1  , height-1)*stride;
1175
  
1176
    for(y=-1; y<=height; y+=2){
1177
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1178
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1179

    
1180
{START_TIMER
1181
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1182
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1183
STOP_TIMER("vertical_compose53i*")}
1184

    
1185
{START_TIMER
1186
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1187
        if(b0 <= b2) horizontal_compose53i(b1, width);
1188
STOP_TIMER("horizontal_compose53i")}
1189

    
1190
        b0=b2;
1191
        b1=b3;
1192
    }
1193
}   
1194

    
1195
 
1196
static void horizontal_compose97i(int *b, int width){
1197
    int temp[width];
1198
    const int w2= (width+1)>>1;
1199

    
1200
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1201
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1202
    lift (b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1203
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1204
}
1205

    
1206
static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1207
    int i;
1208
    
1209
    for(i=0; i<width; i++){
1210
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1211
    }
1212
}
1213

    
1214
static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1215
    int i;
1216
    
1217
    for(i=0; i<width; i++){
1218
#ifdef lift5
1219
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1220
#else
1221
        int r= 3*(b0[i] + b2[i]);
1222
        r+= r>>4;
1223
        r+= r>>8;
1224
        b1[i] -= (r+W_CO)>>W_CS;
1225
#endif
1226
    }
1227
}
1228

    
1229
static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1230
    int i;
1231
    
1232
    for(i=0; i<width; i++){
1233
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1234
    }
1235
}
1236

    
1237
static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1238
    int i;
1239
    
1240
    for(i=0; i<width; i++){
1241
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1242
    }
1243
}
1244

    
1245
static void spatial_compose97i(int *buffer, int width, int height, int stride){
1246
    int y;
1247
    DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1248
    DWTELEM *b1= buffer + mirror(-3  , height-1)*stride;
1249
    DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1250
    DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1251

    
1252
    for(y=-3; y<=height; y+=2){
1253
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1254
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1255

    
1256
        if(stride == width && y+4 < height && 0){ 
1257
            int x;
1258
            for(x=0; x<width/2; x++)
1259
                b5[x] += 64*2;
1260
            for(; x<width; x++)
1261
                b5[x] += 169*2;
1262
        }
1263
        
1264
{START_TIMER
1265
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1266
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1267
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1268
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1269
if(width>400){
1270
STOP_TIMER("vertical_compose97i")}}
1271

    
1272
{START_TIMER
1273
        if(y-1>=  0) horizontal_compose97i(b0, width);
1274
        if(b0 <= b2) horizontal_compose97i(b1, width);
1275
if(width>400 && b0 <= b2){
1276
STOP_TIMER("horizontal_compose97i")}}
1277
        
1278
        b0=b2;
1279
        b1=b3;
1280
        b2=b4;
1281
        b3=b5;
1282
    }
1283
}
1284

    
1285
void ff_spatial_idwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1286
    int level;
1287

    
1288
    for(level=decomposition_count-1; level>=0; level--){
1289
        switch(type){
1290
        case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1291
        case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1292
        case 2: spatial_composeX  (buffer, width>>level, height>>level, stride<<level); break;
1293
        }
1294
    }
1295
}
1296

    
1297
static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1298
    const int w= b->width;
1299
    const int h= b->height;
1300
    int x, y;
1301

    
1302
    if(1){
1303
        int run=0;
1304
        int runs[w*h];
1305
        int run_index=0;
1306
                
1307
        for(y=0; y<h; y++){
1308
            for(x=0; x<w; x++){
1309
                int v, p=0;
1310
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1311
                v= src[x + y*stride];
1312

    
1313
                if(y){
1314
                    t= src[x + (y-1)*stride];
1315
                    if(x){
1316
                        lt= src[x - 1 + (y-1)*stride];
1317
                    }
1318
                    if(x + 1 < w){
1319
                        rt= src[x + 1 + (y-1)*stride];
1320
                    }
1321
                }
1322
                if(x){
1323
                    l= src[x - 1 + y*stride];
1324
                    /*if(x > 1){
1325
                        if(orientation==1) ll= src[y + (x-2)*stride];
1326
                        else               ll= src[x - 2 + y*stride];
1327
                    }*/
1328
                }
1329
                if(parent){
1330
                    int px= x>>1;
1331
                    int py= y>>1;
1332
                    if(px<b->parent->width && py<b->parent->height) 
1333
                        p= parent[px + py*2*stride];
1334
                }
1335
                if(!(/*ll|*/l|lt|t|rt|p)){
1336
                    if(v){
1337
                        runs[run_index++]= run;
1338
                        run=0;
1339
                    }else{
1340
                        run++;
1341
                    }
1342
                }
1343
            }
1344
        }
1345
        runs[run_index++]= run;
1346
        run_index=0;
1347
        run= runs[run_index++];
1348

    
1349
        put_symbol2(&s->c, b->state[1], run, 3);
1350
        
1351
        for(y=0; y<h; y++){
1352
            for(x=0; x<w; x++){
1353
                int v, p=0;
1354
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1355
                v= src[x + y*stride];
1356

    
1357
                if(y){
1358
                    t= src[x + (y-1)*stride];
1359
                    if(x){
1360
                        lt= src[x - 1 + (y-1)*stride];
1361
                    }
1362
                    if(x + 1 < w){
1363
                        rt= src[x + 1 + (y-1)*stride];
1364
                    }
1365
                }
1366
                if(x){
1367
                    l= src[x - 1 + y*stride];
1368
                    /*if(x > 1){
1369
                        if(orientation==1) ll= src[y + (x-2)*stride];
1370
                        else               ll= src[x - 2 + y*stride];
1371
                    }*/
1372
                }
1373
                if(parent){
1374
                    int px= x>>1;
1375
                    int py= y>>1;
1376
                    if(px<b->parent->width && py<b->parent->height) 
1377
                        p= parent[px + py*2*stride];
1378
                }
1379
                if(/*ll|*/l|lt|t|rt|p){
1380
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1381

    
1382
                    put_cabac(&s->c, &b->state[0][context], !!v);
1383
                }else{
1384
                    if(!run){
1385
                        run= runs[run_index++];
1386

    
1387
                        put_symbol2(&s->c, b->state[1], run, 3);
1388
                        assert(v);
1389
                    }else{
1390
                        run--;
1391
                        assert(!v);
1392
                    }
1393
                }
1394
                if(v){
1395
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1396

    
1397
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1398
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1399
                }
1400
            }
1401
        }
1402
    }
1403
}
1404

    
1405
static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1406
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1407
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1408
    encode_subband_c0run(s, b, src, parent, stride, orientation);
1409
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1410
}
1411

    
1412
static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1413
    const int w= b->width;
1414
    const int h= b->height;
1415
    int x,y;
1416
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
1417
    int qmul= qexp[qlog&7]<<(qlog>>3);
1418
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1419
    
1420
    START_TIMER
1421

    
1422
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1423
        qadd= 0;
1424
        qmul= 1<<QEXPSHIFT;
1425
    }
1426

    
1427
    if(1){
1428
        int run;
1429
        int index=0;
1430
        int prev_index=-1;
1431
        int prev2_index=0;
1432
        int parent_index= 0;
1433
        int prev_parent_index= 0;
1434
        
1435
        for(y=0; y<b->height; y++)
1436
            memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1437

    
1438
        run= get_symbol2(&s->c, b->state[1], 3);
1439
        for(y=0; y<h; y++){
1440
            int v=0;
1441
            int lt=0, t=0, rt=0;
1442

    
1443
            if(y && b->x[prev_index] == 0){
1444
                rt= b->coeff[prev_index];
1445
            }
1446
            for(x=0; x<w; x++){
1447
                int p=0;
1448
                const int l= v;
1449
                
1450
                lt= t; t= rt;
1451

    
1452
                if(y){
1453
                    if(b->x[prev_index] <= x)
1454
                        prev_index++;
1455
                    if(b->x[prev_index] == x + 1)
1456
                        rt= b->coeff[prev_index];
1457
                    else
1458
                        rt=0;
1459
                }
1460
                if(parent){
1461
                    if(x>>1 > b->parent->x[parent_index]){
1462
                        parent_index++;
1463
                    }
1464
                    if(x>>1 == b->parent->x[parent_index]){
1465
                        p= b->parent->coeff[parent_index];
1466
                    }
1467
                }
1468
                if(/*ll|*/l|lt|t|rt|p){
1469
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1470

    
1471
                    v=get_cabac(&s->c, &b->state[0][context]);
1472
                }else{
1473
                    if(!run){
1474
                        run= get_symbol2(&s->c, b->state[1], 3);
1475
                        v=1;
1476
                    }else{
1477
                        run--;
1478
                        v=0;
1479

    
1480
                        if(y && parent){
1481
                            int max_run;
1482

    
1483
                            max_run= FFMIN(run, b->x[prev_index] - x - 2);
1484
                            max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1485
                            x+= max_run;
1486
                            run-= max_run;
1487
                        }
1488
                    }
1489
                }
1490
                if(v){
1491
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1492
                    v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1493
                    if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
1494
                        src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
1495
                        v= -v;
1496
                    }else{
1497
                        src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1498
                    }
1499
                    b->x[index]=x; //FIXME interleave x/coeff
1500
                    b->coeff[index++]= v;
1501
                }
1502
            }
1503
            b->x[index++]= w+1; //end marker
1504
            prev_index= prev2_index;
1505
            prev2_index= index;
1506
            
1507
            if(parent){
1508
                while(b->parent->x[parent_index] != b->parent->width+1)
1509
                    parent_index++;
1510
                parent_index++;
1511
                if(y&1){
1512
                    prev_parent_index= parent_index;
1513
                }else{
1514
                    parent_index= prev_parent_index;
1515
                }
1516
            }
1517
        }
1518
        b->x[index++]= w+1; //end marker
1519
        if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
1520
            STOP_TIMER("decode_subband")
1521
        }
1522
        
1523
        return;
1524
    }
1525
}
1526

    
1527
static void reset_contexts(SnowContext *s){
1528
    int plane_index, level, orientation;
1529

    
1530
    for(plane_index=0; plane_index<3; plane_index++){
1531
        for(level=0; level<s->spatial_decomposition_count; level++){
1532
            for(orientation=level ? 1:0; orientation<4; orientation++){
1533
                memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
1534
            }
1535
        }
1536
    }
1537
    memset(s->header_state, 0, sizeof(s->header_state));
1538
    memset(s->block_state, 0, sizeof(s->block_state));
1539
}
1540

    
1541
static int alloc_blocks(SnowContext *s){
1542
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1543
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1544
    
1545
    s->b_width = w;
1546
    s->b_height= h;
1547
    
1548
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1549
    return 0;
1550
}
1551

    
1552
static inline void copy_cabac_state(CABACContext *d, CABACContext *s){
1553
    PutBitContext bak= d->pb;
1554
    *d= *s;
1555
    d->pb= bak;
1556
}
1557

    
1558
//near copy & paste from dsputil, FIXME
1559
static int pix_sum(uint8_t * pix, int line_size, int w)
1560
{
1561
    int s, i, j;
1562

    
1563
    s = 0;
1564
    for (i = 0; i < w; i++) {
1565
        for (j = 0; j < w; j++) {
1566
            s += pix[0];
1567
            pix ++;
1568
        }
1569
        pix += line_size - w;
1570
    }
1571
    return s;
1572
}
1573

    
1574
//near copy & paste from dsputil, FIXME
1575
static int pix_norm1(uint8_t * pix, int line_size, int w)
1576
{
1577
    int s, i, j;
1578
    uint32_t *sq = squareTbl + 256;
1579

    
1580
    s = 0;
1581
    for (i = 0; i < w; i++) {
1582
        for (j = 0; j < w; j ++) {
1583
            s += sq[pix[0]];
1584
            pix ++;
1585
        }
1586
        pix += line_size - w;
1587
    }
1588
    return s;
1589
}
1590

    
1591
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1592
    const int w= s->b_width << s->block_max_depth;
1593
    const int rem_depth= s->block_max_depth - level;
1594
    const int index= (x + y*w) << rem_depth;
1595
    const int block_w= 1<<rem_depth;
1596
    BlockNode block;
1597
    int i,j;
1598
    
1599
    block.color[0]= l;
1600
    block.color[1]= cb;
1601
    block.color[2]= cr;
1602
    block.mx= mx;
1603
    block.my= my;
1604
    block.type= type;
1605
    block.level= level;
1606

    
1607
    for(j=0; j<block_w; j++){
1608
        for(i=0; i<block_w; i++){
1609
            s->block[index + i + j*w]= block;
1610
        }
1611
    }
1612
}
1613

    
1614
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1615
    const int offset[3]= {
1616
          y*c->  stride + x,
1617
        ((y*c->uvstride + x)>>1),
1618
        ((y*c->uvstride + x)>>1),
1619
    };
1620
    int i;
1621
    for(i=0; i<3; i++){
1622
        c->src[0][i]= src [i];
1623
        c->ref[0][i]= ref [i] + offset[i];
1624
    }
1625
    assert(!ref_index);
1626
}
1627

    
1628
//FIXME copy&paste
1629
#define P_LEFT P[1]
1630
#define P_TOP P[2]
1631
#define P_TOPRIGHT P[3]
1632
#define P_MEDIAN P[4]
1633
#define P_MV1 P[9]
1634
#define FLAG_QPEL   1 //must be 1
1635

    
1636
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1637
    uint8_t p_buffer[1024];
1638
    uint8_t i_buffer[1024];
1639
    uint8_t p_state[sizeof(s->block_state)];
1640
    uint8_t i_state[sizeof(s->block_state)];
1641
    CABACContext pc, ic;
1642
    PutBitContext pbbak= s->c.pb;
1643
    int score, score2, iscore, i_len, p_len, block_s, sum;
1644
    const int w= s->b_width  << s->block_max_depth;
1645
    const int h= s->b_height << s->block_max_depth;
1646
    const int rem_depth= s->block_max_depth - level;
1647
    const int index= (x + y*w) << rem_depth;
1648
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1649
    static BlockNode null_block= { //FIXME add border maybe
1650
        .color= {128,128,128},
1651
        .mx= 0,
1652
        .my= 0,
1653
        .type= 0,
1654
        .level= 0,
1655
    };
1656
    int trx= (x+1)<<rem_depth;
1657
    int try= (y+1)<<rem_depth;
1658
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
1659
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
1660
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1661
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1662
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1663
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1664
    int pl = left->color[0];
1665
    int pcb= left->color[1];
1666
    int pcr= left->color[2];
1667
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
1668
    int pmy= mid_pred(left->my, top->my, tr->my);
1669
    int mx=0, my=0;
1670
    int l,cr,cb, i;
1671
    const int stride= s->current_picture.linesize[0];
1672
    const int uvstride= s->current_picture.linesize[1];
1673
    const int instride= s->input_picture.linesize[0];
1674
    const int uvinstride= s->input_picture.linesize[1];
1675
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
1676
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1677
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1678
    uint8_t current_mb[3][stride*block_w];
1679
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1680
    int P[10][2];
1681
    int16_t last_mv[3][2];
1682
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1683
    const int shift= 1+qpel;
1684
    MotionEstContext *c= &s->m.me;
1685
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
1686
    int my_context= av_log2(2*ABS(left->my - top->my));
1687
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1688

    
1689
    assert(sizeof(s->block_state) >= 256);
1690
    if(s->keyframe){
1691
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1692
        return 0;
1693
    }
1694

    
1695
    //FIXME optimize
1696
    for(i=0; i<block_w; i++)
1697
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
1698
    for(i=0; i<block_w>>1; i++)
1699
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1700
    for(i=0; i<block_w>>1; i++)
1701
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1702

    
1703
//    clip predictors / edge ?
1704

    
1705
    P_LEFT[0]= left->mx;
1706
    P_LEFT[1]= left->my;
1707
    P_TOP [0]= top->mx;
1708
    P_TOP [1]= top->my;
1709
    P_TOPRIGHT[0]= tr->mx;
1710
    P_TOPRIGHT[1]= tr->my;
1711
    
1712
    last_mv[0][0]= s->block[index].mx;
1713
    last_mv[0][1]= s->block[index].my;
1714
    last_mv[1][0]= right->mx;
1715
    last_mv[1][1]= right->my;
1716
    last_mv[2][0]= bottom->mx;
1717
    last_mv[2][1]= bottom->my;
1718
    
1719
    s->m.mb_stride=2;
1720
    s->m.mb_x= 
1721
    s->m.mb_y= 0;
1722
    s->m.me.skip= 0;
1723

    
1724
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1725
    
1726
    assert(s->m.me.  stride ==   stride);
1727
    assert(s->m.me.uvstride == uvstride);
1728
    
1729
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1730
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1731
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1732
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1733
    
1734
    c->xmin = - x*block_w - 16;
1735
    c->ymin = - y*block_w - 16;
1736
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16;
1737
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16;
1738

    
1739
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1740
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
1741
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1742
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1743
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1744
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1745
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1746

    
1747
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1748
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1749

    
1750
    if (!y) {
1751
        c->pred_x= P_LEFT[0];
1752
        c->pred_y= P_LEFT[1];
1753
    } else {
1754
        c->pred_x = P_MEDIAN[0];
1755
        c->pred_y = P_MEDIAN[1];
1756
    }
1757

    
1758
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
1759
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1760

    
1761
    assert(mx >= c->xmin);
1762
    assert(mx <= c->xmax);
1763
    assert(my >= c->ymin);
1764
    assert(my <= c->ymax);
1765
    
1766
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1767
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1768
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1769
                             
1770
  //  subpel search
1771
    pc= s->c;
1772
    init_put_bits(&pc.pb, p_buffer, sizeof(p_buffer));
1773
    memcpy(p_state, s->block_state, sizeof(s->block_state));
1774

    
1775
    if(level!=s->block_max_depth)
1776
        put_cabac(&pc, &p_state[4 + s_context], 1);
1777
    put_cabac(&pc, &p_state[1 + left->type + top->type], 0);
1778
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1779
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
1780
    p_len= put_bits_count(&pc.pb);
1781
    score += (s->lambda2*(p_len + pc.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1782

    
1783
    block_s= block_w*block_w;
1784
    sum = pix_sum(&current_mb[0][0], stride, block_w);
1785
    l= (sum + block_s/2)/block_s;
1786
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1787
    
1788
    block_s= block_w*block_w>>2;
1789
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1790
    cb= (sum + block_s/2)/block_s;
1791
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1792
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1793
    cr= (sum + block_s/2)/block_s;
1794
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1795

    
1796
    ic= s->c;
1797
    init_put_bits(&ic.pb, i_buffer, sizeof(i_buffer));
1798
    memcpy(i_state, s->block_state, sizeof(s->block_state));
1799
    if(level!=s->block_max_depth)
1800
        put_cabac(&ic, &i_state[4 + s_context], 1);
1801
    put_cabac(&ic, &i_state[1 + left->type + top->type], 1);
1802
    put_symbol(&ic, &i_state[32],  l-pl , 1);
1803
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1804
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1805
    i_len= put_bits_count(&ic.pb);
1806
    iscore += (s->lambda2*(i_len + ic.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1807

    
1808
//    assert(score==256*256*256*64-1);
1809
    assert(iscore < 255*255*256 + s->lambda2*10);
1810
    assert(iscore >= 0);
1811
    assert(l>=0 && l<=255);
1812
    assert(pl>=0 && pl<=255);
1813

    
1814
    if(level==0){
1815
        int varc= iscore >> 8;
1816
        int vard= score >> 8;
1817
        if (vard <= 64 || vard < varc)
1818
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1819
        else
1820
            c->scene_change_score+= s->m.qscale;
1821
    }
1822
        
1823
    if(level!=s->block_max_depth){
1824
        put_cabac(&s->c, &s->block_state[4 + s_context], 0);
1825
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1826
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1827
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1828
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1829
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1830
    
1831
        if(score2 < score && score2 < iscore)
1832
            return score2;
1833
    }
1834
    
1835
    if(iscore < score){
1836
        flush_put_bits(&ic.pb);
1837
        ff_copy_bits(&pbbak, i_buffer, i_len);
1838
        s->c= ic;
1839
        s->c.pb= pbbak;
1840
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1841
        memcpy(s->block_state, i_state, sizeof(s->block_state));
1842
        return iscore;
1843
    }else{
1844
        flush_put_bits(&pc.pb);
1845
        ff_copy_bits(&pbbak, p_buffer, p_len);
1846
        s->c= pc;
1847
        s->c.pb= pbbak;
1848
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1849
        memcpy(s->block_state, p_state, sizeof(s->block_state));
1850
        return score;
1851
    }
1852
}
1853

    
1854
static void decode_q_branch(SnowContext *s, int level, int x, int y){
1855
    const int w= s->b_width << s->block_max_depth;
1856
    const int rem_depth= s->block_max_depth - level;
1857
    const int index= (x + y*w) << rem_depth;
1858
    static BlockNode null_block= { //FIXME add border maybe
1859
        .color= {128,128,128},
1860
        .mx= 0,
1861
        .my= 0,
1862
        .type= 0,
1863
        .level= 0,
1864
    };
1865
    int trx= (x+1)<<rem_depth;
1866
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
1867
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
1868
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1869
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1870
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1871
    
1872
    if(s->keyframe){
1873
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1874
        return;
1875
    }
1876

    
1877
    if(level==s->block_max_depth || get_cabac(&s->c, &s->block_state[4 + s_context])){
1878
        int type;
1879
        int l = left->color[0];
1880
        int cb= left->color[1];
1881
        int cr= left->color[2];
1882
        int mx= mid_pred(left->mx, top->mx, tr->mx);
1883
        int my= mid_pred(left->my, top->my, tr->my);
1884
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1885
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1886
        
1887
        type= get_cabac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1888

    
1889
        if(type){
1890
            l += get_symbol(&s->c, &s->block_state[32], 1);
1891
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
1892
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
1893
        }else{
1894
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1895
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1896
        }
1897
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1898
    }else{
1899
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1900
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1901
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1902
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1903
    }
1904
}
1905

    
1906
static void encode_blocks(SnowContext *s){
1907
    int x, y;
1908
    int w= s->b_width;
1909
    int h= s->b_height;
1910

    
1911
    for(y=0; y<h; y++){
1912
        for(x=0; x<w; x++){
1913
            encode_q_branch(s, 0, x, y);
1914
        }
1915
    }
1916
}
1917

    
1918
static void decode_blocks(SnowContext *s){
1919
    int x, y;
1920
    int w= s->b_width;
1921
    int h= s->b_height;
1922

    
1923
    for(y=0; y<h; y++){
1924
        for(x=0; x<w; x++){
1925
            decode_q_branch(s, 0, x, y);
1926
        }
1927
    }
1928
}
1929

    
1930
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1931
    int x, y;
1932

    
1933
    for(y=0; y < b_h+5; y++){
1934
        for(x=0; x < b_w; x++){
1935
            int a0= src[x     + y*stride];
1936
            int a1= src[x + 1 + y*stride];
1937
            int a2= src[x + 2 + y*stride];
1938
            int a3= src[x + 3 + y*stride];
1939
            int a4= src[x + 4 + y*stride];
1940
            int a5= src[x + 5 + y*stride];
1941
//            int am= 9*(a1+a2) - (a0+a3);
1942
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1943
//            int am= 18*(a2+a3) - 2*(a1+a4);
1944
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1945
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1946

    
1947
//            if(b_w==16) am= 8*(a1+a2);
1948

    
1949
            if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
1950
            else     tmp[x + y*stride]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
1951

    
1952
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
1953
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
1954
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
1955
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1956
        }
1957
    }
1958
    for(y=0; y < b_h; y++){
1959
        for(x=0; x < b_w; x++){
1960
            int a0= tmp[x +  y     *stride];
1961
            int a1= tmp[x + (y + 1)*stride];
1962
            int a2= tmp[x + (y + 2)*stride];
1963
            int a3= tmp[x + (y + 3)*stride];
1964
            int a4= tmp[x + (y + 4)*stride];
1965
            int a5= tmp[x + (y + 5)*stride];
1966
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1967
//            int am= 18*(a2+a3) - 2*(a1+a4);
1968
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1969
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1970
            
1971
//            if(b_w==16) am= 8*(a1+a2);
1972

    
1973
            if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
1974
            else     dst[x + y*stride]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
1975

    
1976
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
1977
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
1978
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
1979
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
1980
        }
1981
    }
1982
}
1983

    
1984
#define mcb(dx,dy,b_w)\
1985
static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
1986
    uint8_t tmp[stride*(b_w+5)];\
1987
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1988
}
1989

    
1990
mcb( 0, 0,16)
1991
mcb( 4, 0,16)
1992
mcb( 8, 0,16)
1993
mcb(12, 0,16)
1994
mcb( 0, 4,16)
1995
mcb( 4, 4,16)
1996
mcb( 8, 4,16)
1997
mcb(12, 4,16)
1998
mcb( 0, 8,16)
1999
mcb( 4, 8,16)
2000
mcb( 8, 8,16)
2001
mcb(12, 8,16)
2002
mcb( 0,12,16)
2003
mcb( 4,12,16)
2004
mcb( 8,12,16)
2005
mcb(12,12,16)
2006

    
2007
#define mca(dx,dy,b_w)\
2008
static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
2009
    uint8_t tmp[stride*(b_w+5)];\
2010
    assert(h==b_w);\
2011
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2012
}
2013

    
2014
mca( 0, 0,16)
2015
mca( 8, 0,16)
2016
mca( 0, 8,16)
2017
mca( 8, 8,16)
2018

    
2019
static always_inline void add_xblock(SnowContext *s, DWTELEM *dst, uint8_t *src, uint8_t *obmc, int s_x, int s_y, int b_w, int b_h, int mv_x, int mv_y, int w, int h, int dst_stride, int src_stride, int obmc_stride, int mb_type, int add, int color){
2020
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME move to context to gurantee alignment
2021
    int x,y;
2022

    
2023
    if(s_x<0){
2024
        obmc -= s_x;
2025
        b_w += s_x;
2026
        s_x=0;
2027
    }else if(s_x + b_w > w){
2028
        b_w = w - s_x;
2029
    }
2030
    if(s_y<0){
2031
        obmc -= s_y*obmc_stride;
2032
        b_h += s_y;
2033
        s_y=0;
2034
    }else if(s_y + b_h> h){
2035
        b_h = h - s_y;
2036
    }
2037

    
2038
    if(b_w<=0 || b_h<=0) return;
2039
    
2040
    dst += s_x + s_y*dst_stride;
2041

    
2042
    if(mb_type==BLOCK_INTRA){
2043
        for(y=0; y < b_h; y++){
2044
            for(x=0; x < b_w; x++){
2045
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * color * (256/OBMC_MAX);
2046
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * color * (256/OBMC_MAX);
2047
            }
2048
        }
2049
    }else{
2050
        int dx= mv_x&15;
2051
        int dy= mv_y&15;
2052
//        int dxy= (mv_x&1) + 2*(mv_y&1);
2053

    
2054
        s_x += (mv_x>>4) - 2;
2055
        s_y += (mv_y>>4) - 2;
2056
        src += s_x + s_y*src_stride;
2057
        //use dsputil
2058
    
2059
        if(   (unsigned)s_x >= w - b_w - 4
2060
           || (unsigned)s_y >= h - b_h - 4){
2061
            ff_emulated_edge_mc(tmp + 32, src, src_stride, b_w+5, b_h+5, s_x, s_y, w, h);
2062
            src= tmp + 32;
2063
        }
2064

    
2065
        assert(mb_type==0);
2066
        mc_block(tmp, src, tmp + 64+8, src_stride, b_w, b_h, dx, dy);
2067
        for(y=0; y < b_h; y++){
2068
            for(x=0; x < b_w; x++){
2069
                if(add) dst[x + y*dst_stride] += obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2070
                else    dst[x + y*dst_stride] -= obmc[x + y*obmc_stride] * tmp[x + y*src_stride] * (256/OBMC_MAX);
2071
            }
2072
        }
2073
    }
2074
}
2075

    
2076
static void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2077
    Plane *p= &s->plane[plane_index];
2078
    const int mb_w= s->b_width  << s->block_max_depth;
2079
    const int mb_h= s->b_height << s->block_max_depth;
2080
    const int mb_stride= mb_w;
2081
    int x, y, mb_x, mb_y;
2082
    int scale      = plane_index ?  s->mv_scale : 2*s->mv_scale;
2083
    int block_size = MB_SIZE >> s->block_max_depth;
2084
    int block_w    = plane_index ? block_size/2 : block_size;
2085
    uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2086
    int obmc_stride= plane_index ? block_size : 2*block_size;
2087
    int ref_stride= s->last_picture.linesize[plane_index];
2088
    uint8_t *ref  = s->last_picture.data[plane_index];
2089
    int w= p->width;
2090
    int h= p->height;
2091
    START_TIMER
2092
    
2093
if(s->avctx->debug&512){
2094
    for(y=0; y<h; y++){
2095
        for(x=0; x<w; x++){
2096
            if(add) buf[x + y*w]+= 128*256;
2097
            else    buf[x + y*w]-= 128*256;
2098
        }
2099
    }
2100
    
2101
    return;
2102
}
2103
    for(mb_y=-1; mb_y<=mb_h; mb_y++){
2104
        for(mb_x=-1; mb_x<=mb_w; mb_x++){
2105
            int index= clip(mb_x, 0, mb_w-1) + clip(mb_y, 0, mb_h-1)*mb_stride;
2106

    
2107
            START_TIMER
2108
            add_xblock(s, buf, ref, obmc, 
2109
                       block_w*mb_x - block_w/2, 
2110
                       block_w*mb_y - block_w/2,
2111
                       2*block_w, 2*block_w,
2112
                       s->block[index].mx*scale, s->block[index].my*scale,
2113
                       w, h,
2114
                       w, ref_stride, obmc_stride, 
2115
                       s->block[index].type, add, s->block[index].color[plane_index]);
2116

    
2117
            STOP_TIMER("add_xblock")
2118
        }
2119
    }
2120
    
2121
    STOP_TIMER("predict_plane")
2122
}
2123

    
2124
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2125
    const int level= b->level;
2126
    const int w= b->width;
2127
    const int h= b->height;
2128
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2129
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2130
    int x,y, thres1, thres2;
2131
    START_TIMER
2132

    
2133
    assert(QROOT==8);
2134

    
2135
    if(s->qlog == LOSSLESS_QLOG) return;
2136
 
2137
    bias= bias ? 0 : (3*qmul)>>3;
2138
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2139
    thres2= 2*thres1;
2140
    
2141
    if(!bias){
2142
        for(y=0; y<h; y++){
2143
            for(x=0; x<w; x++){
2144
                int i= src[x + y*stride];
2145
                
2146
                if((unsigned)(i+thres1) > thres2){
2147
                    if(i>=0){
2148
                        i<<= QEXPSHIFT;
2149
                        i/= qmul; //FIXME optimize
2150
                        src[x + y*stride]=  i;
2151
                    }else{
2152
                        i= -i;
2153
                        i<<= QEXPSHIFT;
2154
                        i/= qmul; //FIXME optimize
2155
                        src[x + y*stride]= -i;
2156
                    }
2157
                }else
2158
                    src[x + y*stride]= 0;
2159
            }
2160
        }
2161
    }else{
2162
        for(y=0; y<h; y++){
2163
            for(x=0; x<w; x++){
2164
                int i= src[x + y*stride]; 
2165
                
2166
                if((unsigned)(i+thres1) > thres2){
2167
                    if(i>=0){
2168
                        i<<= QEXPSHIFT;
2169
                        i= (i + bias) / qmul; //FIXME optimize
2170
                        src[x + y*stride]=  i;
2171
                    }else{
2172
                        i= -i;
2173
                        i<<= QEXPSHIFT;
2174
                        i= (i + bias) / qmul; //FIXME optimize
2175
                        src[x + y*stride]= -i;
2176
                    }
2177
                }else
2178
                    src[x + y*stride]= 0;
2179
            }
2180
        }
2181
    }
2182
    if(level+1 == s->spatial_decomposition_count){
2183
//        STOP_TIMER("quantize")
2184
    }
2185
}
2186

    
2187
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2188
    const int w= b->width;
2189
    const int h= b->height;
2190
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2191
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2192
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2193
    int x,y;
2194
    START_TIMER
2195
    
2196
    if(s->qlog == LOSSLESS_QLOG) return;
2197
    
2198
    assert(QROOT==8);
2199

    
2200
    for(y=0; y<h; y++){
2201
        for(x=0; x<w; x++){
2202
            int i= src[x + y*stride];
2203
            if(i<0){
2204
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2205
            }else if(i>0){
2206
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2207
            }
2208
        }
2209
    }
2210
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2211
        STOP_TIMER("dquant")
2212
    }
2213
}
2214

    
2215
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2216
    const int w= b->width;
2217
    const int h= b->height;
2218
    int x,y;
2219
    
2220
    for(y=h-1; y>=0; y--){
2221
        for(x=w-1; x>=0; x--){
2222
            int i= x + y*stride;
2223
            
2224
            if(x){
2225
                if(use_median){
2226
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2227
                    else  src[i] -= src[i - 1];
2228
                }else{
2229
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2230
                    else  src[i] -= src[i - 1];
2231
                }
2232
            }else{
2233
                if(y) src[i] -= src[i - stride];
2234
            }
2235
        }
2236
    }
2237
}
2238

    
2239
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2240
    const int w= b->width;
2241
    const int h= b->height;
2242
    int x,y;
2243
    
2244
    for(y=0; y<h; y++){
2245
        for(x=0; x<w; x++){
2246
            int i= x + y*stride;
2247
            
2248
            if(x){
2249
                if(use_median){
2250
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2251
                    else  src[i] += src[i - 1];
2252
                }else{
2253
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2254
                    else  src[i] += src[i - 1];
2255
                }
2256
            }else{
2257
                if(y) src[i] += src[i - stride];
2258
            }
2259
        }
2260
    }
2261
}
2262

    
2263
static void encode_header(SnowContext *s){
2264
    int plane_index, level, orientation;
2265
    uint8_t kstate[32]={0};    
2266

    
2267
    put_cabac(&s->c, kstate, s->keyframe);
2268
    if(s->keyframe || s->always_reset)
2269
        reset_contexts(s);
2270
    if(s->keyframe){
2271
        put_symbol(&s->c, s->header_state, s->version, 0);
2272
        put_cabac(&s->c, s->header_state, s->always_reset);
2273
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2274
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2275
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2276
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
2277
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2278
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
2279
        put_cabac(&s->c, s->header_state, s->spatial_scalability);
2280
//        put_cabac(&s->c, s->header_state, s->rate_scalability);
2281

    
2282
        for(plane_index=0; plane_index<2; plane_index++){
2283
            for(level=0; level<s->spatial_decomposition_count; level++){
2284
                for(orientation=level ? 1:0; orientation<4; orientation++){
2285
                    if(orientation==2) continue;
2286
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2287
                }
2288
            }
2289
        }
2290
    }
2291
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2292
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
2293
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
2294
    put_symbol(&s->c, s->header_state, s->qbias, 1);
2295
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
2296
}
2297

    
2298
static int decode_header(SnowContext *s){
2299
    int plane_index, level, orientation;
2300
    uint8_t kstate[32]={0};    
2301

    
2302
    s->keyframe= get_cabac(&s->c, kstate);
2303
    if(s->keyframe || s->always_reset)
2304
        reset_contexts(s);
2305
    if(s->keyframe){
2306
        s->version= get_symbol(&s->c, s->header_state, 0);
2307
        if(s->version>0){
2308
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2309
            return -1;
2310
        }
2311
        s->always_reset= get_cabac(&s->c, s->header_state);
2312
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2313
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2314
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2315
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2316
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2317
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2318
        s->spatial_scalability= get_cabac(&s->c, s->header_state);
2319
//        s->rate_scalability= get_cabac(&s->c, s->header_state);
2320

    
2321
        for(plane_index=0; plane_index<3; plane_index++){
2322
            for(level=0; level<s->spatial_decomposition_count; level++){
2323
                for(orientation=level ? 1:0; orientation<4; orientation++){
2324
                    int q;
2325
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2326
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2327
                    else                    q= get_symbol(&s->c, s->header_state, 1);
2328
                    s->plane[plane_index].band[level][orientation].qlog= q;
2329
                }
2330
            }
2331
        }
2332
    }
2333
    
2334
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2335
    if(s->spatial_decomposition_type > 2){
2336
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2337
        return -1;
2338
    }
2339
    
2340
    s->qlog= get_symbol(&s->c, s->header_state, 1);
2341
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2342
    s->qbias= get_symbol(&s->c, s->header_state, 1);
2343
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
2344

    
2345
    return 0;
2346
}
2347

    
2348
static int common_init(AVCodecContext *avctx){
2349
    SnowContext *s = avctx->priv_data;
2350
    int width, height;
2351
    int level, orientation, plane_index, dec;
2352

    
2353
    s->avctx= avctx;
2354
        
2355
    dsputil_init(&s->dsp, avctx);
2356

    
2357
#define mcf(dx,dy)\
2358
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
2359
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2360
        mc_block ## dx ## dy;
2361

    
2362
    mcf( 0, 0)
2363
    mcf( 4, 0)
2364
    mcf( 8, 0)
2365
    mcf(12, 0)
2366
    mcf( 0, 4)
2367
    mcf( 4, 4)
2368
    mcf( 8, 4)
2369
    mcf(12, 4)
2370
    mcf( 0, 8)
2371
    mcf( 4, 8)
2372
    mcf( 8, 8)
2373
    mcf(12, 8)
2374
    mcf( 0,12)
2375
    mcf( 4,12)
2376
    mcf( 8,12)
2377
    mcf(12,12)
2378

    
2379
#define mcfh(dx,dy)\
2380
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
2381
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2382
        mc_block_hpel ## dx ## dy;
2383

    
2384
    mcfh(0, 0)
2385
    mcfh(8, 0)
2386
    mcfh(0, 8)
2387
    mcfh(8, 8)
2388
        
2389
    dec= s->spatial_decomposition_count= 5;
2390
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2391
    
2392
    s->chroma_h_shift= 1; //FIXME XXX
2393
    s->chroma_v_shift= 1;
2394
    
2395
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2396
    
2397
    width= s->avctx->width;
2398
    height= s->avctx->height;
2399

    
2400
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
2401
    s->pred_buffer= av_mallocz(width*height*sizeof(DWTELEM));
2402
    
2403
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2404
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
2405
    
2406
    for(plane_index=0; plane_index<3; plane_index++){    
2407
        int w= s->avctx->width;
2408
        int h= s->avctx->height;
2409

    
2410
        if(plane_index){
2411
            w>>= s->chroma_h_shift;
2412
            h>>= s->chroma_v_shift;
2413
        }
2414
        s->plane[plane_index].width = w;
2415
        s->plane[plane_index].height= h;
2416
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
2417
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2418
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2419
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2420
                
2421
                b->buf= s->spatial_dwt_buffer;
2422
                b->level= level;
2423
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2424
                b->width = (w + !(orientation&1))>>1;
2425
                b->height= (h + !(orientation>1))>>1;
2426
                
2427
                if(orientation&1) b->buf += (w+1)>>1;
2428
                if(orientation>1) b->buf += b->stride>>1;
2429
                
2430
                if(level)
2431
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
2432
                b->x    = av_mallocz(((b->width+1) * b->height+1)*sizeof(int16_t));
2433
                b->coeff= av_mallocz(((b->width+1) * b->height+1)*sizeof(DWTELEM));
2434
            }
2435
            w= (w+1)>>1;
2436
            h= (h+1)>>1;
2437
        }
2438
    }
2439
    
2440
    reset_contexts(s);
2441
/*    
2442
    width= s->width= avctx->width;
2443
    height= s->height= avctx->height;
2444
    
2445
    assert(width && height);
2446
*/
2447
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2448
    
2449
    return 0;
2450
}
2451

    
2452

    
2453
static void calculate_vissual_weight(SnowContext *s, Plane *p){
2454
    int width = p->width;
2455
    int height= p->height;
2456
    int level, orientation, x, y;
2457

    
2458
    for(level=0; level<s->spatial_decomposition_count; level++){
2459
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2460
            SubBand *b= &p->band[level][orientation];
2461
            DWTELEM *buf= b->buf;
2462
            int64_t error=0;
2463
            
2464
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2465
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
2466
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
2467
            for(y=0; y<height; y++){
2468
                for(x=0; x<width; x++){
2469
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
2470
                    error += d*d;
2471
                }
2472
            }
2473

    
2474
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
2475
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
2476
        }
2477
    }
2478
}
2479

    
2480
static int encode_init(AVCodecContext *avctx)
2481
{
2482
    SnowContext *s = avctx->priv_data;
2483
    int plane_index;
2484

    
2485
    if(avctx->strict_std_compliance >= 0){
2486
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2487
               "use vstrict=-1 to use it anyway\n");
2488
        return -1;
2489
    }
2490
 
2491
    common_init(avctx);
2492
    alloc_blocks(s);
2493
 
2494
    s->version=0;
2495
    
2496
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2497
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2498
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2499
    h263_encode_init(&s->m); //mv_penalty
2500

    
2501
    for(plane_index=0; plane_index<3; plane_index++){
2502
        calculate_vissual_weight(s, &s->plane[plane_index]);
2503
    }
2504
    
2505
    
2506
    avctx->coded_frame= &s->current_picture;
2507
    switch(avctx->pix_fmt){
2508
//    case PIX_FMT_YUV444P:
2509
//    case PIX_FMT_YUV422P:
2510
    case PIX_FMT_YUV420P:
2511
    case PIX_FMT_GRAY8:
2512
//    case PIX_FMT_YUV411P:
2513
//    case PIX_FMT_YUV410P:
2514
        s->colorspace_type= 0;
2515
        break;
2516
/*    case PIX_FMT_RGBA32:
2517
        s->colorspace= 1;
2518
        break;*/
2519
    default:
2520
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2521
        return -1;
2522
    }
2523
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2524
    s->chroma_h_shift= 1;
2525
    s->chroma_v_shift= 1;
2526
    return 0;
2527
}
2528

    
2529
static int frame_start(SnowContext *s){
2530
   AVFrame tmp;
2531
   int w= s->avctx->width; //FIXME round up to x16 ?
2532
   int h= s->avctx->height;
2533

    
2534
    if(s->current_picture.data[0]){
2535
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
2536
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2537
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2538
    }
2539

    
2540
    tmp= s->last_picture;
2541
    s->last_picture= s->current_picture;
2542
    s->current_picture= tmp;
2543
    
2544
    s->current_picture.reference= 1;
2545
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2546
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2547
        return -1;
2548
    }
2549
    
2550
    return 0;
2551
}
2552

    
2553
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2554
    SnowContext *s = avctx->priv_data;
2555
    CABACContext * const c= &s->c;
2556
    AVFrame *pict = data;
2557
    const int width= s->avctx->width;
2558
    const int height= s->avctx->height;
2559
    int level, orientation, plane_index;
2560

    
2561
    ff_init_cabac_encoder(c, buf, buf_size);
2562
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2563
    
2564
    s->input_picture = *pict;
2565

    
2566
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2567
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2568
    
2569
    if(pict->quality){
2570
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2571
        //<64 >60
2572
        s->qlog += 61;
2573
    }else{
2574
        s->qlog= LOSSLESS_QLOG;
2575
    }
2576

    
2577
    frame_start(s);
2578
    s->current_picture.key_frame= s->keyframe;
2579

    
2580
    if(pict->pict_type == P_TYPE){
2581
        int block_width = (width +15)>>4;
2582
        int block_height= (height+15)>>4;
2583
        int stride= s->current_picture.linesize[0];
2584
        
2585
        assert(s->current_picture.data[0]);
2586
        assert(s->last_picture.data[0]);
2587
     
2588
        s->m.avctx= s->avctx;
2589
        s->m.current_picture.data[0]= s->current_picture.data[0];
2590
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
2591
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
2592
        s->m.current_picture_ptr= &s->m.current_picture;
2593
        s->m.   last_picture_ptr= &s->m.   last_picture;
2594
        s->m.linesize=
2595
        s->m.   last_picture.linesize[0]=
2596
        s->m.    new_picture.linesize[0]=
2597
        s->m.current_picture.linesize[0]= stride;
2598
        s->m.uvlinesize= s->current_picture.linesize[1];
2599
        s->m.width = width;
2600
        s->m.height= height;
2601
        s->m.mb_width = block_width;
2602
        s->m.mb_height= block_height;
2603
        s->m.mb_stride=   s->m.mb_width+1;
2604
        s->m.b8_stride= 2*s->m.mb_width+1;
2605
        s->m.f_code=1;
2606
        s->m.pict_type= pict->pict_type;
2607
        s->m.me_method= s->avctx->me_method;
2608
        s->m.me.scene_change_score=0;
2609
        s->m.flags= s->avctx->flags;
2610
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2611
        s->m.out_format= FMT_H263;
2612
        s->m.unrestricted_mv= 1;
2613

    
2614
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
2615
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
2616
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
2617

    
2618
        s->m.dsp= s->dsp; //move
2619
        ff_init_me(&s->m);
2620
    }
2621
    
2622
redo_frame:
2623
        
2624
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
2625

    
2626
    encode_header(s);
2627
    encode_blocks(s);
2628
      
2629
    for(plane_index=0; plane_index<3; plane_index++){
2630
        Plane *p= &s->plane[plane_index];
2631
        int w= p->width;
2632
        int h= p->height;
2633
        int x, y;
2634
//        int bits= put_bits_count(&s->c.pb);
2635

    
2636
        //FIXME optimize
2637
     if(pict->data[plane_index]) //FIXME gray hack
2638
        for(y=0; y<h; y++){
2639
            for(x=0; x<w; x++){
2640
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
2641
            }
2642
        }
2643
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
2644
        
2645
        if(   plane_index==0 
2646
           && pict->pict_type == P_TYPE 
2647
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
2648
            ff_init_cabac_encoder(c, buf, buf_size);
2649
            ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2650
            pict->pict_type= FF_I_TYPE;
2651
            s->keyframe=1;
2652
            reset_contexts(s);
2653
            goto redo_frame;
2654
        }
2655
        
2656
        if(s->qlog == LOSSLESS_QLOG){
2657
            for(y=0; y<h; y++){
2658
                for(x=0; x<w; x++){
2659
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + 127)>>8;
2660
                }
2661
            }
2662
        }
2663
 
2664
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2665

    
2666
        for(level=0; level<s->spatial_decomposition_count; level++){
2667
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2668
                SubBand *b= &p->band[level][orientation];
2669
                
2670
                quantize(s, b, b->buf, b->stride, s->qbias);
2671
                if(orientation==0)
2672
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
2673
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2674
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
2675
                if(orientation==0)
2676
                    correlate(s, b, b->buf, b->stride, 1, 0);
2677
            }
2678
        }
2679
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
2680

    
2681
        for(level=0; level<s->spatial_decomposition_count; level++){
2682
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2683
                SubBand *b= &p->band[level][orientation];
2684

    
2685
                dequantize(s, b, b->buf, b->stride);
2686
            }
2687
        }
2688

    
2689
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2690
        if(s->qlog == LOSSLESS_QLOG){
2691
            for(y=0; y<h; y++){
2692
                for(x=0; x<w; x++){
2693
                    s->spatial_dwt_buffer[y*w + x]<<=8;
2694
                }
2695
            }
2696
        }
2697
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2698
        //FIXME optimize
2699
        for(y=0; y<h; y++){
2700
            for(x=0; x<w; x++){
2701
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2702
                if(v&(~255)) v= ~(v>>31);
2703
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2704
            }
2705
        }
2706
        if(s->avctx->flags&CODEC_FLAG_PSNR){
2707
            int64_t error= 0;
2708
            
2709
    if(pict->data[plane_index]) //FIXME gray hack
2710
            for(y=0; y<h; y++){
2711
                for(x=0; x<w; x++){
2712
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
2713
                    error += d*d;
2714
                }
2715
            }
2716
            s->avctx->error[plane_index] += error;
2717
            s->avctx->error[3] += error;
2718
        }
2719
    }
2720

    
2721
    if(s->last_picture.data[0])
2722
        avctx->release_buffer(avctx, &s->last_picture);
2723

    
2724
    emms_c();
2725
    
2726
    return put_cabac_terminate(c, 1);
2727
}
2728

    
2729
static void common_end(SnowContext *s){
2730
    int plane_index, level, orientation;
2731

    
2732
    av_freep(&s->spatial_dwt_buffer);
2733

    
2734
    av_freep(&s->m.me.scratchpad);    
2735
    av_freep(&s->m.me.map);
2736
    av_freep(&s->m.me.score_map);
2737
 
2738
    av_freep(&s->block);
2739

    
2740
    for(plane_index=0; plane_index<3; plane_index++){    
2741
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2742
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2743
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2744
                
2745
                av_freep(&b->x);
2746
                av_freep(&b->coeff);
2747
            }
2748
        }
2749
    }
2750
}
2751

    
2752
static int encode_end(AVCodecContext *avctx)
2753
{
2754
    SnowContext *s = avctx->priv_data;
2755

    
2756
    common_end(s);
2757

    
2758
    return 0;
2759
}
2760

    
2761
static int decode_init(AVCodecContext *avctx)
2762
{
2763
//    SnowContext *s = avctx->priv_data;
2764

    
2765
    common_init(avctx);
2766
    
2767
    return 0;
2768
}
2769

    
2770
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
2771
    SnowContext *s = avctx->priv_data;
2772
    CABACContext * const c= &s->c;
2773
    int bytes_read;
2774
    AVFrame *picture = data;
2775
    int level, orientation, plane_index;
2776
    
2777

    
2778
    /* no supplementary picture */
2779
    if (buf_size == 0)
2780
        return 0;
2781

    
2782
    ff_init_cabac_decoder(c, buf, buf_size);
2783
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2784

    
2785
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2786
    decode_header(s);
2787
    if(!s->block) alloc_blocks(s);
2788

    
2789
    frame_start(s);
2790
    //keyframe flag dupliaction mess FIXME
2791
    if(avctx->debug&FF_DEBUG_PICT_INFO)
2792
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2793
    
2794
    decode_blocks(s);
2795

    
2796
    for(plane_index=0; plane_index<3; plane_index++){
2797
        Plane *p= &s->plane[plane_index];
2798
        int w= p->width;
2799
        int h= p->height;
2800
        int x, y;
2801
        
2802
if(s->avctx->debug&2048){
2803
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2804
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2805

    
2806
        for(y=0; y<h; y++){
2807
            for(x=0; x<w; x++){
2808
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2809
                if(v&(~255)) v= ~(v>>31);
2810
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2811
            }
2812
        }
2813
}
2814
        for(level=0; level<s->spatial_decomposition_count; level++){
2815
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2816
                SubBand *b= &p->band[level][orientation];
2817

    
2818
                decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2819
                if(orientation==0){
2820
                    correlate(s, b, b->buf, b->stride, 1, 0);
2821
                    dequantize(s, b, b->buf, b->stride);
2822
                    assert(b->buf == s->spatial_dwt_buffer);
2823
                }
2824
            }
2825
        }
2826

    
2827
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2828
        if(s->qlog == LOSSLESS_QLOG){
2829
            for(y=0; y<h; y++){
2830
                for(x=0; x<w; x++){
2831
                    s->spatial_dwt_buffer[y*w + x]<<=8;
2832
                }
2833
            }
2834
        }
2835
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2836

    
2837
        //FIXME optimize
2838
        for(y=0; y<h; y++){
2839
            for(x=0; x<w; x++){
2840
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2841
                if(v&(~255)) v= ~(v>>31);
2842
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2843
            }
2844
        }
2845
    }
2846
            
2847
    emms_c();
2848

    
2849
    if(s->last_picture.data[0])
2850
        avctx->release_buffer(avctx, &s->last_picture);
2851

    
2852
if(!(s->avctx->debug&2048))        
2853
    *picture= s->current_picture;
2854
else
2855
    *picture= s->mconly_picture;
2856
    
2857
    *data_size = sizeof(AVFrame);
2858
    
2859
    bytes_read= get_cabac_terminate(c);
2860
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
2861

    
2862
    return bytes_read;
2863
}
2864

    
2865
static int decode_end(AVCodecContext *avctx)
2866
{
2867
    SnowContext *s = avctx->priv_data;
2868

    
2869
    common_end(s);
2870

    
2871
    return 0;
2872
}
2873

    
2874
AVCodec snow_decoder = {
2875
    "snow",
2876
    CODEC_TYPE_VIDEO,
2877
    CODEC_ID_SNOW,
2878
    sizeof(SnowContext),
2879
    decode_init,
2880
    NULL,
2881
    decode_end,
2882
    decode_frame,
2883
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2884
    NULL
2885
};
2886

    
2887
AVCodec snow_encoder = {
2888
    "snow",
2889
    CODEC_TYPE_VIDEO,
2890
    CODEC_ID_SNOW,
2891
    sizeof(SnowContext),
2892
    encode_init,
2893
    encode_frame,
2894
    encode_end,
2895
};
2896

    
2897

    
2898
#if 0
2899
#undef malloc
2900
#undef free
2901
#undef printf
2902

2903
int main(){
2904
    int width=256;
2905
    int height=256;
2906
    int buffer[2][width*height];
2907
    SnowContext s;
2908
    int i;
2909
    s.spatial_decomposition_count=6;
2910
    s.spatial_decomposition_type=1;
2911
    
2912
    printf("testing 5/3 DWT\n");
2913
    for(i=0; i<width*height; i++)
2914
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
2915
    
2916
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
2917
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
2918
    
2919
    for(i=0; i<width*height; i++)
2920
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
2921

2922
    printf("testing 9/7 DWT\n");
2923
    s.spatial_decomposition_type=0;
2924
    for(i=0; i<width*height; i++)
2925
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
2926
    
2927
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
2928
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
2929
    
2930
    for(i=0; i<width*height; i++)
2931
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
2932
        
2933
    printf("testing AC coder\n");
2934
    memset(s.header_state, 0, sizeof(s.header_state));
2935
    ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
2936
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2937
        
2938
    for(i=-256; i<256; i++){
2939
START_TIMER
2940
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
2941
STOP_TIMER("put_symbol")
2942
    }
2943
    put_cabac_terminate(&s.c, 1);
2944

2945
    memset(s.header_state, 0, sizeof(s.header_state));
2946
    ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
2947
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2948
    
2949
    for(i=-256; i<256; i++){
2950
        int j;
2951
START_TIMER
2952
        j= get_symbol(&s.c, s.header_state, 1);
2953
STOP_TIMER("get_symbol")
2954
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
2955
    }
2956
{
2957
int level, orientation, x, y;
2958
int64_t errors[8][4];
2959
int64_t g=0;
2960

2961
    memset(errors, 0, sizeof(errors));
2962
    s.spatial_decomposition_count=3;
2963
    s.spatial_decomposition_type=0;
2964
    for(level=0; level<s.spatial_decomposition_count; level++){
2965
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2966
            int w= width  >> (s.spatial_decomposition_count-level);
2967
            int h= height >> (s.spatial_decomposition_count-level);
2968
            int stride= width  << (s.spatial_decomposition_count-level);
2969
            DWTELEM *buf= buffer[0];
2970
            int64_t error=0;
2971

2972
            if(orientation&1) buf+=w;
2973
            if(orientation>1) buf+=stride>>1;
2974
            
2975
            memset(buffer[0], 0, sizeof(int)*width*height);
2976
            buf[w/2 + h/2*stride]= 256*256;
2977
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
2978
            for(y=0; y<height; y++){
2979
                for(x=0; x<width; x++){
2980
                    int64_t d= buffer[0][x + y*width];
2981
                    error += d*d;
2982
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
2983
                }
2984
                if(ABS(height/2-y)<9 && level==2) printf("\n");
2985
            }
2986
            error= (int)(sqrt(error)+0.5);
2987
            errors[level][orientation]= error;
2988
            if(g) g=ff_gcd(g, error);
2989
            else g= error;
2990
        }
2991
    }
2992
    printf("static int const visual_weight[][4]={\n");
2993
    for(level=0; level<s.spatial_decomposition_count; level++){
2994
        printf("  {");
2995
        for(orientation=0; orientation<4; orientation++){
2996
            printf("%8lld,", errors[level][orientation]/g);
2997
        }
2998
        printf("},\n");
2999
    }
3000
    printf("};\n");
3001
    {
3002
            int level=2;
3003
            int orientation=3;
3004
            int w= width  >> (s.spatial_decomposition_count-level);
3005
            int h= height >> (s.spatial_decomposition_count-level);
3006
            int stride= width  << (s.spatial_decomposition_count-level);
3007
            DWTELEM *buf= buffer[0];
3008
            int64_t error=0;
3009

3010
            buf+=w;
3011
            buf+=stride>>1;
3012
            
3013
            memset(buffer[0], 0, sizeof(int)*width*height);
3014
#if 1
3015
            for(y=0; y<height; y++){
3016
                for(x=0; x<width; x++){
3017
                    int tab[4]={0,2,3,1};
3018
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3019
                }
3020
            }
3021
            ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3022
#else
3023
            for(y=0; y<h; y++){
3024
                for(x=0; x<w; x++){
3025
                    buf[x + y*stride  ]=169;
3026
                    buf[x + y*stride-w]=64;
3027
                }
3028
            }
3029
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3030
#endif
3031
            for(y=0; y<height; y++){
3032
                for(x=0; x<width; x++){
3033
                    int64_t d= buffer[0][x + y*width];
3034
                    error += d*d;
3035
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3036
                }
3037
                if(ABS(height/2-y)<9) printf("\n");
3038
            }
3039
    }
3040

    
3041
}
3042
    return 0;
3043
}
3044
#endif
3045