Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ ec697587

History | View | Annotate | Download (105 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22
#include "cabac.h"
23

    
24
#include "mpegvideo.h"
25

    
26
#undef NDEBUG
27
#include <assert.h>
28

    
29
#define MAX_DECOMPOSITIONS 8
30
#define MAX_PLANES 4
31
#define DWTELEM int
32
#define QROOT 8 
33
#define LOSSLESS_QLOG -128
34

    
35
static const int8_t quant3[256]={
36
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
52
};
53
static const int8_t quant3b[256]={
54
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
};
71
static const int8_t quant5[256]={
72
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
73
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
74
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
75
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
81
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
82
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
83
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
88
};
89
static const int8_t quant7[256]={
90
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
93
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
94
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
95
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
96
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
99
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
100
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
101
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
106
};
107
static const int8_t quant9[256]={
108
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
110
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
111
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
112
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
117
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
118
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
119
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
124
};
125
static const int8_t quant11[256]={
126
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
127
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
129
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
130
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
131
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
135
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
136
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
137
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
142
};
143
static const int8_t quant13[256]={
144
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
145
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
148
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
149
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
150
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
153
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
154
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
155
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
160
};
161

    
162
#define OBMC_MAX 64
163
#if 0 //64*cubic
164
static const uint8_t obmc32[1024]={
165
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
167
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
168
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
169
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
170
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
171
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
172
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
173
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
174
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
175
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
176
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
177
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
178
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
179
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
180
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
181
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
182
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
183
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
184
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
185
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
186
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
187
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
190
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
191
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
194
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
195
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
196
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
197
//error:0.000022
198
};
199
static const uint8_t obmc16[256]={
200
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
201
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
202
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
203
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
204
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
205
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
206
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
207
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
208
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
209
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
210
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
211
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
212
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
213
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
214
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216
//error:0.000033
217
};
218
#elif 1 // 64*linear
219
static const uint8_t obmc32[1024]={
220
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
221
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
222
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
223
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
224
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
225
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
226
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
227
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
228
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
229
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
230
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
231
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
232
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
233
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
234
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
235
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
236
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
237
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
238
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
239
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
240
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
241
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
242
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
243
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
244
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
245
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
246
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
249
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
250
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
251
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
252
 //error:0.000020
253
};
254
static const uint8_t obmc16[256]={
255
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
256
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
257
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
258
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
259
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
260
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
261
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
262
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
263
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
264
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
265
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
266
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
267
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
268
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
269
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
270
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
271
//error:0.000015
272
};
273
#else //64*cos
274
static const uint8_t obmc32[1024]={
275
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
276
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
277
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
278
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
279
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
280
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
281
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
282
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
283
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
284
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
285
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
286
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
287
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
288
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
289
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
290
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
291
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
292
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
293
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
294
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
295
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
296
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
297
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
300
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
301
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
304
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
305
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
306
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307
//error:0.000022
308
};
309
static const uint8_t obmc16[256]={
310
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
311
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
312
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
313
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
314
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
315
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
316
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
317
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
318
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
319
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
320
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
321
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
322
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
323
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
324
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326
//error:0.000022
327
};
328
#endif
329

    
330
//linear *64
331
static const uint8_t obmc8[64]={
332
 1, 3, 5, 7, 7, 5, 3, 1,
333
 3, 9,15,21,21,15, 9, 3,
334
 5,15,25,35,35,25,15, 5,
335
 7,21,35,49,49,35,21, 7,
336
 7,21,35,49,49,35,21, 7,
337
 5,15,25,35,35,25,15, 5,
338
 3, 9,15,21,21,15, 9, 3,
339
 1, 3, 5, 7, 7, 5, 3, 1,
340
//error:0.000000
341
};
342

    
343
//linear *64
344
static const uint8_t obmc4[16]={
345
 4,12,12, 4,
346
12,36,36,12,
347
12,36,36,12,
348
 4,12,12, 4,
349
//error:0.000000
350
};
351

    
352
static const uint8_t *obmc_tab[4]={
353
    obmc32, obmc16, obmc8, obmc4
354
};
355

    
356
typedef struct BlockNode{
357
    int16_t mx;
358
    int16_t my;
359
    uint8_t color[3];
360
    uint8_t type;
361
//#define TYPE_SPLIT    1
362
#define BLOCK_INTRA   1
363
//#define TYPE_NOCOLOR  4
364
    uint8_t level; //FIXME merge into type?
365
}BlockNode;
366

    
367
#define LOG2_MB_SIZE 4
368
#define MB_SIZE (1<<LOG2_MB_SIZE)
369

    
370
typedef struct SubBand{
371
    int level;
372
    int stride;
373
    int width;
374
    int height;
375
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
376
    DWTELEM *buf;
377
    int16_t *x;
378
    DWTELEM *coeff;
379
    struct SubBand *parent;
380
    uint8_t state[/*7*2*/ 7 + 512][32];
381
}SubBand;
382

    
383
typedef struct Plane{
384
    int width;
385
    int height;
386
    SubBand band[MAX_DECOMPOSITIONS][4];
387
}Plane;
388

    
389
typedef struct SnowContext{
390
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
391

    
392
    AVCodecContext *avctx;
393
    CABACContext c;
394
    DSPContext dsp;
395
    AVFrame input_picture;
396
    AVFrame current_picture;
397
    AVFrame last_picture;
398
    AVFrame mconly_picture;
399
//     uint8_t q_context[16];
400
    uint8_t header_state[32];
401
    uint8_t block_state[128 + 32*128];
402
    int keyframe;
403
    int always_reset;
404
    int version;
405
    int spatial_decomposition_type;
406
    int temporal_decomposition_type;
407
    int spatial_decomposition_count;
408
    int temporal_decomposition_count;
409
    DWTELEM *spatial_dwt_buffer;
410
    DWTELEM *pred_buffer;
411
    int colorspace_type;
412
    int chroma_h_shift;
413
    int chroma_v_shift;
414
    int spatial_scalability;
415
    int qlog;
416
    int lambda;
417
    int lambda2;
418
    int mv_scale;
419
    int qbias;
420
#define QBIAS_SHIFT 3
421
    int b_width;
422
    int b_height;
423
    int block_max_depth;
424
    Plane plane[MAX_PLANES];
425
    BlockNode *block;
426

    
427
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
428
}SnowContext;
429

    
430
#define QEXPSHIFT 7 //FIXME try to change this to 0
431
static const uint8_t qexp[8]={
432
    128, 140, 152, 166, 181, 197, 215, 235
433
//   64,  70,  76,  83,  91,  99, 108, 117
434
//   32,  35,  38,  41,  45,  49,  54,  59
435
//   16,  17,  19,  21,  23,  25,  27,  29
436
//    8,   9,  10,  10,  11,  12,  13,  15
437
};
438

    
439
static inline int mirror(int v, int m){
440
    if     (v<0) return -v;
441
    else if(v>m) return 2*m-v;
442
    else         return v;
443
}
444

    
445
static inline void put_symbol(CABACContext *c, uint8_t *state, int v, int is_signed){
446
    int i;
447

    
448
    if(v){
449
        const int a= ABS(v);
450
        const int e= av_log2(a);
451
#if 1
452
        const int el= FFMIN(e, 10);   
453
        put_cabac(c, state+0, 0);
454

    
455
        for(i=0; i<el; i++){
456
            put_cabac(c, state+1+i, 1);  //1..10
457
        }
458
        for(; i<e; i++){
459
            put_cabac(c, state+1+9, 1);  //1..10
460
        }
461
        put_cabac(c, state+1+FFMIN(i,9), 0);
462

    
463
        for(i=e-1; i>=el; i--){
464
            put_cabac(c, state+22+9, (a>>i)&1); //22..31
465
        }
466
        for(; i>=0; i--){
467
            put_cabac(c, state+22+i, (a>>i)&1); //22..31
468
        }
469

    
470
        if(is_signed)
471
            put_cabac(c, state+11 + el, v < 0); //11..21
472
#else
473
        
474
        put_cabac(c, state+0, 0);
475
        if(e<=9){
476
            for(i=0; i<e; i++){
477
                put_cabac(c, state+1+i, 1);  //1..10
478
            }
479
            put_cabac(c, state+1+i, 0);
480

    
481
            for(i=e-1; i>=0; i--){
482
                put_cabac(c, state+22+i, (a>>i)&1); //22..31
483
            }
484

    
485
            if(is_signed)
486
                put_cabac(c, state+11 + e, v < 0); //11..21
487
        }else{
488
            for(i=0; i<e; i++){
489
                put_cabac(c, state+1+FFMIN(i,9), 1);  //1..10
490
            }
491
            put_cabac(c, state+1+FFMIN(i,9), 0);
492

    
493
            for(i=e-1; i>=0; i--){
494
                put_cabac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
495
            }
496

    
497
            if(is_signed)
498
                put_cabac(c, state+11 + FFMIN(e,10), v < 0); //11..21
499
        }
500
#endif
501
    }else{
502
        put_cabac(c, state+0, 1);
503
    }
504
}
505

    
506
static inline int get_symbol(CABACContext *c, uint8_t *state, int is_signed){
507
    if(get_cabac(c, state+0))
508
        return 0;
509
    else{
510
        int i, e, a, el;
511
 //FIXME try to merge loops with FFMIN() maybe they are equally fast and they are surly cuter
512
        for(e=0; e<10; e++){ 
513
            if(get_cabac(c, state + 1 + e)==0) // 1..10
514
                break;
515
        }
516
        el= e;
517
 
518
        if(e==10){
519
            while(get_cabac(c, state + 1 + 9)) //10
520
                e++;
521
        }
522
        a= 1;
523
        for(i=e-1; i>=el; i--){
524
            a += a + get_cabac(c, state+22+9); //31
525
        }
526
        for(; i>=0; i--){
527
            a += a + get_cabac(c, state+22+i); //22..31
528
        }
529

    
530
        if(is_signed && get_cabac(c, state+11 + el)) //11..21
531
            return -a;
532
        else
533
            return a;
534
    }
535
}
536

    
537
static inline void put_symbol2(CABACContext *c, uint8_t *state, int v, int log2){
538
    int i;
539
    int r= log2>=0 ? 1<<log2 : 1;
540

    
541
    assert(v>=0);
542
    assert(log2>=-4);
543

    
544
    while(v >= r){
545
        put_cabac(c, state+4+log2, 1);
546
        v -= r;
547
        log2++;
548
        if(log2>0) r+=r;
549
    }
550
    put_cabac(c, state+4+log2, 0);
551
    
552
    for(i=log2-1; i>=0; i--){
553
        put_cabac(c, state+31-i, (v>>i)&1);
554
    }
555
}
556

    
557
static inline int get_symbol2(CABACContext *c, uint8_t *state, int log2){
558
    int i;
559
    int r= log2>=0 ? 1<<log2 : 1;
560
    int v=0;
561

    
562
    assert(log2>=-4);
563

    
564
    while(get_cabac(c, state+4+log2)){
565
        v+= r;
566
        log2++;
567
        if(log2>0) r+=r;
568
    }
569
    
570
    for(i=log2-1; i>=0; i--){
571
        v+= get_cabac(c, state+31-i)<<i;
572
    }
573

    
574
    return v;
575
}
576

    
577
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
578
    const int mirror_left= !highpass;
579
    const int mirror_right= (width&1) ^ highpass;
580
    const int w= (width>>1) - 1 + (highpass & width);
581
    int i;
582

    
583
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
584
    if(mirror_left){
585
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
586
        dst += dst_step;
587
        src += src_step;
588
    }
589
    
590
    for(i=0; i<w; i++){
591
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
592
    }
593
    
594
    if(mirror_right){
595
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
596
    }
597
}
598

    
599
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
600
    const int mirror_left= !highpass;
601
    const int mirror_right= (width&1) ^ highpass;
602
    const int w= (width>>1) - 1 + (highpass & width);
603
    int i;
604

    
605
    if(mirror_left){
606
        int r= 3*2*ref[0];
607
        r += r>>4;
608
        r += r>>8;
609
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
610
        dst += dst_step;
611
        src += src_step;
612
    }
613
    
614
    for(i=0; i<w; i++){
615
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
616
        r += r>>4;
617
        r += r>>8;
618
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
619
    }
620
    
621
    if(mirror_right){
622
        int r= 3*2*ref[w*ref_step];
623
        r += r>>4;
624
        r += r>>8;
625
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
626
    }
627
}
628

    
629

    
630
static void inplace_lift(int *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
631
    int x, i;
632
    
633
    for(x=start; x<width; x+=2){
634
        int64_t sum=0;
635

    
636
        for(i=0; i<n; i++){
637
            int x2= x + 2*i - n + 1;
638
            if     (x2<     0) x2= -x2;
639
            else if(x2>=width) x2= 2*width-x2-2;
640
            sum += coeffs[i]*(int64_t)dst[x2];
641
        }
642
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
643
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
644
    }
645
}
646

    
647
static void inplace_liftV(int *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
648
    int x, y, i;
649
    for(y=start; y<height; y+=2){
650
        for(x=0; x<width; x++){
651
            int64_t sum=0;
652
    
653
            for(i=0; i<n; i++){
654
                int y2= y + 2*i - n + 1;
655
                if     (y2<      0) y2= -y2;
656
                else if(y2>=height) y2= 2*height-y2-2;
657
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
658
            }
659
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
660
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
661
        }
662
    }
663
}
664

    
665
#define SCALEX 1
666
#define LX0 0
667
#define LX1 1
668

    
669
#if 0 // more accurate 9/7
670
#define N1 2
671
#define SHIFT1 14
672
#define COEFFS1 (int[]){-25987,-25987}
673
#define N2 2
674
#define SHIFT2 19
675
#define COEFFS2 (int[]){-27777,-27777}
676
#define N3 2
677
#define SHIFT3 15
678
#define COEFFS3 (int[]){28931,28931}
679
#define N4 2
680
#define SHIFT4 15
681
#define COEFFS4 (int[]){14533,14533}
682
#elif 1 // 13/7 CRF
683
#define N1 4
684
#define SHIFT1 4
685
#define COEFFS1 (int[]){1,-9,-9,1}
686
#define N2 4
687
#define SHIFT2 4
688
#define COEFFS2 (int[]){-1,5,5,-1}
689
#define N3 0
690
#define SHIFT3 1
691
#define COEFFS3 NULL
692
#define N4 0
693
#define SHIFT4 1
694
#define COEFFS4 NULL
695
#elif 1 // 3/5
696
#define LX0 1
697
#define LX1 0
698
#define SCALEX 0.5
699
#define N1 2
700
#define SHIFT1 1
701
#define COEFFS1 (int[]){1,1}
702
#define N2 2
703
#define SHIFT2 2
704
#define COEFFS2 (int[]){-1,-1}
705
#define N3 0
706
#define SHIFT3 0
707
#define COEFFS3 NULL
708
#define N4 0
709
#define SHIFT4 0
710
#define COEFFS4 NULL
711
#elif 1 // 11/5 
712
#define N1 0
713
#define SHIFT1 1
714
#define COEFFS1 NULL
715
#define N2 2
716
#define SHIFT2 2
717
#define COEFFS2 (int[]){-1,-1}
718
#define N3 2
719
#define SHIFT3 0
720
#define COEFFS3 (int[]){-1,-1}
721
#define N4 4
722
#define SHIFT4 7
723
#define COEFFS4 (int[]){-5,29,29,-5}
724
#define SCALEX 4
725
#elif 1 // 9/7 CDF
726
#define N1 2
727
#define SHIFT1 7
728
#define COEFFS1 (int[]){-203,-203}
729
#define N2 2
730
#define SHIFT2 12
731
#define COEFFS2 (int[]){-217,-217}
732
#define N3 2
733
#define SHIFT3 7
734
#define COEFFS3 (int[]){113,113}
735
#define N4 2
736
#define SHIFT4 9
737
#define COEFFS4 (int[]){227,227}
738
#define SCALEX 1
739
#elif 1 // 7/5 CDF
740
#define N1 0
741
#define SHIFT1 1
742
#define COEFFS1 NULL
743
#define N2 2
744
#define SHIFT2 2
745
#define COEFFS2 (int[]){-1,-1}
746
#define N3 2
747
#define SHIFT3 0
748
#define COEFFS3 (int[]){-1,-1}
749
#define N4 2
750
#define SHIFT4 4
751
#define COEFFS4 (int[]){3,3}
752
#elif 1 // 9/7 MN
753
#define N1 4
754
#define SHIFT1 4
755
#define COEFFS1 (int[]){1,-9,-9,1}
756
#define N2 2
757
#define SHIFT2 2
758
#define COEFFS2 (int[]){1,1}
759
#define N3 0
760
#define SHIFT3 1
761
#define COEFFS3 NULL
762
#define N4 0
763
#define SHIFT4 1
764
#define COEFFS4 NULL
765
#else // 13/7 CRF
766
#define N1 4
767
#define SHIFT1 4
768
#define COEFFS1 (int[]){1,-9,-9,1}
769
#define N2 4
770
#define SHIFT2 4
771
#define COEFFS2 (int[]){-1,5,5,-1}
772
#define N3 0
773
#define SHIFT3 1
774
#define COEFFS3 NULL
775
#define N4 0
776
#define SHIFT4 1
777
#define COEFFS4 NULL
778
#endif
779
static void horizontal_decomposeX(int *b, int width){
780
    int temp[width];
781
    const int width2= width>>1;
782
    const int w2= (width+1)>>1;
783
    int A1,A2,A3,A4, x;
784

    
785
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
786
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
787
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
788
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
789
    
790
    for(x=0; x<width2; x++){
791
        temp[x   ]= b[2*x    ];
792
        temp[x+w2]= b[2*x + 1];
793
    }
794
    if(width&1)
795
        temp[x   ]= b[2*x    ];
796
    memcpy(b, temp, width*sizeof(int));
797
}
798

    
799
static void horizontal_composeX(int *b, int width){
800
    int temp[width];
801
    const int width2= width>>1;
802
    int A1,A2,A3,A4, x;
803
    const int w2= (width+1)>>1;
804

    
805
    memcpy(temp, b, width*sizeof(int));
806
    for(x=0; x<width2; x++){
807
        b[2*x    ]= temp[x   ];
808
        b[2*x + 1]= temp[x+w2];
809
    }
810
    if(width&1)
811
        b[2*x    ]= temp[x   ];
812

    
813
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
814
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
815
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
816
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
817
}
818

    
819
static void spatial_decomposeX(int *buffer, int width, int height, int stride){
820
    int x, y;
821
  
822
    for(y=0; y<height; y++){
823
        for(x=0; x<width; x++){
824
            buffer[y*stride + x] *= SCALEX;
825
        }
826
    }
827

    
828
    for(y=0; y<height; y++){
829
        horizontal_decomposeX(buffer + y*stride, width);
830
    }
831
    
832
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
833
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
834
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
835
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
836
}
837

    
838
static void spatial_composeX(int *buffer, int width, int height, int stride){
839
    int x, y;
840
  
841
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
842
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
843
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
844
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
845

    
846
    for(y=0; y<height; y++){
847
        horizontal_composeX(buffer + y*stride, width);
848
    }
849

    
850
    for(y=0; y<height; y++){
851
        for(x=0; x<width; x++){
852
            buffer[y*stride + x] /= SCALEX;
853
        }
854
    }
855
}
856

    
857
static void horizontal_decompose53i(int *b, int width){
858
    int temp[width];
859
    const int width2= width>>1;
860
    int A1,A2,A3,A4, x;
861
    const int w2= (width+1)>>1;
862

    
863
    for(x=0; x<width2; x++){
864
        temp[x   ]= b[2*x    ];
865
        temp[x+w2]= b[2*x + 1];
866
    }
867
    if(width&1)
868
        temp[x   ]= b[2*x    ];
869
#if 0
870
    A2= temp[1       ];
871
    A4= temp[0       ];
872
    A1= temp[0+width2];
873
    A1 -= (A2 + A4)>>1;
874
    A4 += (A1 + 1)>>1;
875
    b[0+width2] = A1;
876
    b[0       ] = A4;
877
    for(x=1; x+1<width2; x+=2){
878
        A3= temp[x+width2];
879
        A4= temp[x+1     ];
880
        A3 -= (A2 + A4)>>1;
881
        A2 += (A1 + A3 + 2)>>2;
882
        b[x+width2] = A3;
883
        b[x       ] = A2;
884

885
        A1= temp[x+1+width2];
886
        A2= temp[x+2       ];
887
        A1 -= (A2 + A4)>>1;
888
        A4 += (A1 + A3 + 2)>>2;
889
        b[x+1+width2] = A1;
890
        b[x+1       ] = A4;
891
    }
892
    A3= temp[width-1];
893
    A3 -= A2;
894
    A2 += (A1 + A3 + 2)>>2;
895
    b[width -1] = A3;
896
    b[width2-1] = A2;
897
#else        
898
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
899
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
900
#endif
901
}
902

    
903
static void vertical_decompose53iH0(int *b0, int *b1, int *b2, int width){
904
    int i;
905
    
906
    for(i=0; i<width; i++){
907
        b1[i] -= (b0[i] + b2[i])>>1;
908
    }
909
}
910

    
911
static void vertical_decompose53iL0(int *b0, int *b1, int *b2, int width){
912
    int i;
913
    
914
    for(i=0; i<width; i++){
915
        b1[i] += (b0[i] + b2[i] + 2)>>2;
916
    }
917
}
918

    
919
static void spatial_decompose53i(int *buffer, int width, int height, int stride){
920
    int y;
921
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
922
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
923
  
924
    for(y=-2; y<height; y+=2){
925
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
926
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
927

    
928
{START_TIMER
929
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
930
        if(y+2 < height) horizontal_decompose53i(b3, width);
931
STOP_TIMER("horizontal_decompose53i")}
932
        
933
{START_TIMER
934
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
935
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
936
STOP_TIMER("vertical_decompose53i*")}
937
        
938
        b0=b2;
939
        b1=b3;
940
    }
941
}
942

    
943
#define lift5 lift
944
#if 1
945
#define W_AM 3
946
#define W_AO 0
947
#define W_AS 1
948

    
949
#define W_BM 1
950
#define W_BO 8
951
#define W_BS 4
952

    
953
#undef lift5
954
#define W_CM 9999
955
#define W_CO 2
956
#define W_CS 2
957

    
958
#define W_DM 15
959
#define W_DO 16
960
#define W_DS 5
961
#elif 0
962
#define W_AM 55
963
#define W_AO 16
964
#define W_AS 5
965

    
966
#define W_BM 3
967
#define W_BO 32
968
#define W_BS 6
969

    
970
#define W_CM 127
971
#define W_CO 64
972
#define W_CS 7
973

    
974
#define W_DM 7
975
#define W_DO 8
976
#define W_DS 4
977
#elif 0
978
#define W_AM 97
979
#define W_AO 32
980
#define W_AS 6
981

    
982
#define W_BM 63
983
#define W_BO 512
984
#define W_BS 10
985

    
986
#define W_CM 13
987
#define W_CO 8
988
#define W_CS 4
989

    
990
#define W_DM 15
991
#define W_DO 16
992
#define W_DS 5
993

    
994
#else
995

    
996
#define W_AM 203
997
#define W_AO 64
998
#define W_AS 7
999

    
1000
#define W_BM 217
1001
#define W_BO 2048
1002
#define W_BS 12
1003

    
1004
#define W_CM 113
1005
#define W_CO 64
1006
#define W_CS 7
1007

    
1008
#define W_DM 227
1009
#define W_DO 128
1010
#define W_DS 9
1011
#endif
1012
static void horizontal_decompose97i(int *b, int width){
1013
    int temp[width];
1014
    const int w2= (width+1)>>1;
1015

    
1016
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1017
    lift (temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1018
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1019
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1020
}
1021

    
1022

    
1023
static void vertical_decompose97iH0(int *b0, int *b1, int *b2, int width){
1024
    int i;
1025
    
1026
    for(i=0; i<width; i++){
1027
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1028
    }
1029
}
1030

    
1031
static void vertical_decompose97iH1(int *b0, int *b1, int *b2, int width){
1032
    int i;
1033
    
1034
    for(i=0; i<width; i++){
1035
#ifdef lift5
1036
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1037
#else
1038
        int r= 3*(b0[i] + b2[i]);
1039
        r+= r>>4;
1040
        r+= r>>8;
1041
        b1[i] += (r+W_CO)>>W_CS;
1042
#endif
1043
    }
1044
}
1045

    
1046
static void vertical_decompose97iL0(int *b0, int *b1, int *b2, int width){
1047
    int i;
1048
    
1049
    for(i=0; i<width; i++){
1050
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1051
    }
1052
}
1053

    
1054
static void vertical_decompose97iL1(int *b0, int *b1, int *b2, int width){
1055
    int i;
1056
    
1057
    for(i=0; i<width; i++){
1058
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1059
    }
1060
}
1061

    
1062
static void spatial_decompose97i(int *buffer, int width, int height, int stride){
1063
    int y;
1064
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1065
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1066
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1067
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1068
  
1069
    for(y=-4; y<height; y+=2){
1070
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1071
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1072

    
1073
{START_TIMER
1074
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1075
        if(y+4 < height) horizontal_decompose97i(b5, width);
1076
if(width>400){
1077
STOP_TIMER("horizontal_decompose97i")
1078
}}
1079
        
1080
{START_TIMER
1081
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1082
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1083
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1084
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1085

    
1086
if(width>400){
1087
STOP_TIMER("vertical_decompose97i")
1088
}}
1089
        
1090
        b0=b2;
1091
        b1=b3;
1092
        b2=b4;
1093
        b3=b5;
1094
    }
1095
}
1096

    
1097
void ff_spatial_dwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1098
    int level;
1099
    
1100
    for(level=0; level<decomposition_count; level++){
1101
        switch(type){
1102
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1103
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1104
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1105
        }
1106
    }
1107
}
1108

    
1109
static void horizontal_compose53i(int *b, int width){
1110
    int temp[width];
1111
    const int width2= width>>1;
1112
    const int w2= (width+1)>>1;
1113
    int A1,A2,A3,A4, x;
1114

    
1115
#if 0
1116
    A2= temp[1       ];
1117
    A4= temp[0       ];
1118
    A1= temp[0+width2];
1119
    A1 -= (A2 + A4)>>1;
1120
    A4 += (A1 + 1)>>1;
1121
    b[0+width2] = A1;
1122
    b[0       ] = A4;
1123
    for(x=1; x+1<width2; x+=2){
1124
        A3= temp[x+width2];
1125
        A4= temp[x+1     ];
1126
        A3 -= (A2 + A4)>>1;
1127
        A2 += (A1 + A3 + 2)>>2;
1128
        b[x+width2] = A3;
1129
        b[x       ] = A2;
1130

1131
        A1= temp[x+1+width2];
1132
        A2= temp[x+2       ];
1133
        A1 -= (A2 + A4)>>1;
1134
        A4 += (A1 + A3 + 2)>>2;
1135
        b[x+1+width2] = A1;
1136
        b[x+1       ] = A4;
1137
    }
1138
    A3= temp[width-1];
1139
    A3 -= A2;
1140
    A2 += (A1 + A3 + 2)>>2;
1141
    b[width -1] = A3;
1142
    b[width2-1] = A2;
1143
#else   
1144
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1145
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1146
#endif
1147
    for(x=0; x<width2; x++){
1148
        b[2*x    ]= temp[x   ];
1149
        b[2*x + 1]= temp[x+w2];
1150
    }
1151
    if(width&1)
1152
        b[2*x    ]= temp[x   ];
1153
}
1154

    
1155
static void vertical_compose53iH0(int *b0, int *b1, int *b2, int width){
1156
    int i;
1157
    
1158
    for(i=0; i<width; i++){
1159
        b1[i] += (b0[i] + b2[i])>>1;
1160
    }
1161
}
1162

    
1163
static void vertical_compose53iL0(int *b0, int *b1, int *b2, int width){
1164
    int i;
1165
    
1166
    for(i=0; i<width; i++){
1167
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1168
    }
1169
}
1170

    
1171
static void spatial_compose53i(int *buffer, int width, int height, int stride){
1172
    int y;
1173
    DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1174
    DWTELEM *b1= buffer + mirror(-1  , height-1)*stride;
1175
  
1176
    for(y=-1; y<=height; y+=2){
1177
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1178
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1179

    
1180
{START_TIMER
1181
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1182
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1183
STOP_TIMER("vertical_compose53i*")}
1184

    
1185
{START_TIMER
1186
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1187
        if(b0 <= b2) horizontal_compose53i(b1, width);
1188
STOP_TIMER("horizontal_compose53i")}
1189

    
1190
        b0=b2;
1191
        b1=b3;
1192
    }
1193
}   
1194

    
1195
 
1196
static void horizontal_compose97i(int *b, int width){
1197
    int temp[width];
1198
    const int w2= (width+1)>>1;
1199

    
1200
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1201
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1202
    lift (b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1203
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1204
}
1205

    
1206
static void vertical_compose97iH0(int *b0, int *b1, int *b2, int width){
1207
    int i;
1208
    
1209
    for(i=0; i<width; i++){
1210
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1211
    }
1212
}
1213

    
1214
static void vertical_compose97iH1(int *b0, int *b1, int *b2, int width){
1215
    int i;
1216
    
1217
    for(i=0; i<width; i++){
1218
#ifdef lift5
1219
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1220
#else
1221
        int r= 3*(b0[i] + b2[i]);
1222
        r+= r>>4;
1223
        r+= r>>8;
1224
        b1[i] -= (r+W_CO)>>W_CS;
1225
#endif
1226
    }
1227
}
1228

    
1229
static void vertical_compose97iL0(int *b0, int *b1, int *b2, int width){
1230
    int i;
1231
    
1232
    for(i=0; i<width; i++){
1233
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1234
    }
1235
}
1236

    
1237
static void vertical_compose97iL1(int *b0, int *b1, int *b2, int width){
1238
    int i;
1239
    
1240
    for(i=0; i<width; i++){
1241
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1242
    }
1243
}
1244

    
1245
static void spatial_compose97i(int *buffer, int width, int height, int stride){
1246
    int y;
1247
    DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1248
    DWTELEM *b1= buffer + mirror(-3  , height-1)*stride;
1249
    DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1250
    DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1251

    
1252
    for(y=-3; y<=height; y+=2){
1253
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1254
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1255

    
1256
        if(stride == width && y+4 < height && 0){ 
1257
            int x;
1258
            for(x=0; x<width/2; x++)
1259
                b5[x] += 64*2;
1260
            for(; x<width; x++)
1261
                b5[x] += 169*2;
1262
        }
1263
        
1264
{START_TIMER
1265
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1266
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1267
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1268
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1269
if(width>400){
1270
STOP_TIMER("vertical_compose97i")}}
1271

    
1272
{START_TIMER
1273
        if(y-1>=  0) horizontal_compose97i(b0, width);
1274
        if(b0 <= b2) horizontal_compose97i(b1, width);
1275
if(width>400 && b0 <= b2){
1276
STOP_TIMER("horizontal_compose97i")}}
1277
        
1278
        b0=b2;
1279
        b1=b3;
1280
        b2=b4;
1281
        b3=b5;
1282
    }
1283
}
1284

    
1285
void ff_spatial_idwt(int *buffer, int width, int height, int stride, int type, int decomposition_count){
1286
    int level;
1287

    
1288
    for(level=decomposition_count-1; level>=0; level--){
1289
        switch(type){
1290
        case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1291
        case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1292
        case 2: spatial_composeX  (buffer, width>>level, height>>level, stride<<level); break;
1293
        }
1294
    }
1295
}
1296

    
1297
static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1298
    const int w= b->width;
1299
    const int h= b->height;
1300
    int x, y;
1301

    
1302
    if(1){
1303
        int run=0;
1304
        int runs[w*h];
1305
        int run_index=0;
1306
                
1307
        for(y=0; y<h; y++){
1308
            for(x=0; x<w; x++){
1309
                int v, p=0;
1310
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1311
                v= src[x + y*stride];
1312

    
1313
                if(y){
1314
                    t= src[x + (y-1)*stride];
1315
                    if(x){
1316
                        lt= src[x - 1 + (y-1)*stride];
1317
                    }
1318
                    if(x + 1 < w){
1319
                        rt= src[x + 1 + (y-1)*stride];
1320
                    }
1321
                }
1322
                if(x){
1323
                    l= src[x - 1 + y*stride];
1324
                    /*if(x > 1){
1325
                        if(orientation==1) ll= src[y + (x-2)*stride];
1326
                        else               ll= src[x - 2 + y*stride];
1327
                    }*/
1328
                }
1329
                if(parent){
1330
                    int px= x>>1;
1331
                    int py= y>>1;
1332
                    if(px<b->parent->width && py<b->parent->height) 
1333
                        p= parent[px + py*2*stride];
1334
                }
1335
                if(!(/*ll|*/l|lt|t|rt|p)){
1336
                    if(v){
1337
                        runs[run_index++]= run;
1338
                        run=0;
1339
                    }else{
1340
                        run++;
1341
                    }
1342
                }
1343
            }
1344
        }
1345
        runs[run_index++]= run;
1346
        run_index=0;
1347
        run= runs[run_index++];
1348

    
1349
        put_symbol2(&s->c, b->state[1], run, 3);
1350
        
1351
        for(y=0; y<h; y++){
1352
            for(x=0; x<w; x++){
1353
                int v, p=0;
1354
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1355
                v= src[x + y*stride];
1356

    
1357
                if(y){
1358
                    t= src[x + (y-1)*stride];
1359
                    if(x){
1360
                        lt= src[x - 1 + (y-1)*stride];
1361
                    }
1362
                    if(x + 1 < w){
1363
                        rt= src[x + 1 + (y-1)*stride];
1364
                    }
1365
                }
1366
                if(x){
1367
                    l= src[x - 1 + y*stride];
1368
                    /*if(x > 1){
1369
                        if(orientation==1) ll= src[y + (x-2)*stride];
1370
                        else               ll= src[x - 2 + y*stride];
1371
                    }*/
1372
                }
1373
                if(parent){
1374
                    int px= x>>1;
1375
                    int py= y>>1;
1376
                    if(px<b->parent->width && py<b->parent->height) 
1377
                        p= parent[px + py*2*stride];
1378
                }
1379
                if(/*ll|*/l|lt|t|rt|p){
1380
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1381

    
1382
                    put_cabac(&s->c, &b->state[0][context], !!v);
1383
                }else{
1384
                    if(!run){
1385
                        run= runs[run_index++];
1386

    
1387
                        put_symbol2(&s->c, b->state[1], run, 3);
1388
                        assert(v);
1389
                    }else{
1390
                        run--;
1391
                        assert(!v);
1392
                    }
1393
                }
1394
                if(v){
1395
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1396

    
1397
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1398
                    put_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1399
                }
1400
            }
1401
        }
1402
    }
1403
}
1404

    
1405
static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1406
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1407
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1408
    encode_subband_c0run(s, b, src, parent, stride, orientation);
1409
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1410
}
1411

    
1412
static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1413
    const int w= b->width;
1414
    const int h= b->height;
1415
    int x,y;
1416
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
1417
    int qmul= qexp[qlog&7]<<(qlog>>3);
1418
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1419
    
1420
    START_TIMER
1421

    
1422
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1423
        qadd= 0;
1424
        qmul= 1<<QEXPSHIFT;
1425
    }
1426

    
1427
    if(1){
1428
        int run;
1429
        int index=0;
1430
        int prev_index=-1;
1431
        int prev2_index=0;
1432
        int parent_index= 0;
1433
        int prev_parent_index= 0;
1434
        
1435
        for(y=0; y<b->height; y++)
1436
            memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1437

    
1438
        run= get_symbol2(&s->c, b->state[1], 3);
1439
        for(y=0; y<h; y++){
1440
            int v=0;
1441
            int lt=0, t=0, rt=0;
1442

    
1443
            if(y && b->x[prev_index] == 0){
1444
                rt= b->coeff[prev_index];
1445
            }
1446
            for(x=0; x<w; x++){
1447
                int p=0;
1448
                const int l= v;
1449
                
1450
                lt= t; t= rt;
1451

    
1452
                if(y){
1453
                    if(b->x[prev_index] <= x)
1454
                        prev_index++;
1455
                    if(b->x[prev_index] == x + 1)
1456
                        rt= b->coeff[prev_index];
1457
                    else
1458
                        rt=0;
1459
                }
1460
                if(parent){
1461
                    if(x>>1 > b->parent->x[parent_index]){
1462
                        parent_index++;
1463
                    }
1464
                    if(x>>1 == b->parent->x[parent_index]){
1465
                        p= b->parent->coeff[parent_index];
1466
                    }
1467
                }
1468
                if(/*ll|*/l|lt|t|rt|p){
1469
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1470

    
1471
                    v=get_cabac(&s->c, &b->state[0][context]);
1472
                }else{
1473
                    if(!run){
1474
                        run= get_symbol2(&s->c, b->state[1], 3);
1475
                        v=1;
1476
                    }else{
1477
                        run--;
1478
                        v=0;
1479

    
1480
                        if(y && parent){
1481
                            int max_run;
1482

    
1483
                            max_run= FFMIN(run, b->x[prev_index] - x - 2);
1484
                            max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1485
                            x+= max_run;
1486
                            run-= max_run;
1487
                        }
1488
                    }
1489
                }
1490
                if(v){
1491
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1492
                    v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1493
                    if(get_cabac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
1494
                        src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
1495
                        v= -v;
1496
                    }else{
1497
                        src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1498
                    }
1499
                    b->x[index]=x; //FIXME interleave x/coeff
1500
                    b->coeff[index++]= v;
1501
                }
1502
            }
1503
            b->x[index++]= w+1; //end marker
1504
            prev_index= prev2_index;
1505
            prev2_index= index;
1506
            
1507
            if(parent){
1508
                while(b->parent->x[parent_index] != b->parent->width+1)
1509
                    parent_index++;
1510
                parent_index++;
1511
                if(y&1){
1512
                    prev_parent_index= parent_index;
1513
                }else{
1514
                    parent_index= prev_parent_index;
1515
                }
1516
            }
1517
        }
1518
        b->x[index++]= w+1; //end marker
1519
        if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
1520
            STOP_TIMER("decode_subband")
1521
        }
1522
        
1523
        return;
1524
    }
1525
}
1526

    
1527
static void reset_contexts(SnowContext *s){
1528
    int plane_index, level, orientation;
1529

    
1530
    for(plane_index=0; plane_index<3; plane_index++){
1531
        for(level=0; level<s->spatial_decomposition_count; level++){
1532
            for(orientation=level ? 1:0; orientation<4; orientation++){
1533
                memset(s->plane[plane_index].band[level][orientation].state, 0, sizeof(s->plane[plane_index].band[level][orientation].state));
1534
            }
1535
        }
1536
    }
1537
    memset(s->header_state, 0, sizeof(s->header_state));
1538
    memset(s->block_state, 0, sizeof(s->block_state));
1539
}
1540

    
1541
static int alloc_blocks(SnowContext *s){
1542
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1543
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1544
    
1545
    s->b_width = w;
1546
    s->b_height= h;
1547
    
1548
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1549
    return 0;
1550
}
1551

    
1552
static inline void copy_cabac_state(CABACContext *d, CABACContext *s){
1553
    PutBitContext bak= d->pb;
1554
    *d= *s;
1555
    d->pb= bak;
1556
}
1557

    
1558
//near copy & paste from dsputil, FIXME
1559
static int pix_sum(uint8_t * pix, int line_size, int w)
1560
{
1561
    int s, i, j;
1562

    
1563
    s = 0;
1564
    for (i = 0; i < w; i++) {
1565
        for (j = 0; j < w; j++) {
1566
            s += pix[0];
1567
            pix ++;
1568
        }
1569
        pix += line_size - w;
1570
    }
1571
    return s;
1572
}
1573

    
1574
//near copy & paste from dsputil, FIXME
1575
static int pix_norm1(uint8_t * pix, int line_size, int w)
1576
{
1577
    int s, i, j;
1578
    uint32_t *sq = squareTbl + 256;
1579

    
1580
    s = 0;
1581
    for (i = 0; i < w; i++) {
1582
        for (j = 0; j < w; j ++) {
1583
            s += sq[pix[0]];
1584
            pix ++;
1585
        }
1586
        pix += line_size - w;
1587
    }
1588
    return s;
1589
}
1590

    
1591
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1592
    const int w= s->b_width << s->block_max_depth;
1593
    const int rem_depth= s->block_max_depth - level;
1594
    const int index= (x + y*w) << rem_depth;
1595
    const int block_w= 1<<rem_depth;
1596
    BlockNode block;
1597
    int i,j;
1598
    
1599
    block.color[0]= l;
1600
    block.color[1]= cb;
1601
    block.color[2]= cr;
1602
    block.mx= mx;
1603
    block.my= my;
1604
    block.type= type;
1605
    block.level= level;
1606

    
1607
    for(j=0; j<block_w; j++){
1608
        for(i=0; i<block_w; i++){
1609
            s->block[index + i + j*w]= block;
1610
        }
1611
    }
1612
}
1613

    
1614
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1615
    const int offset[3]= {
1616
          y*c->  stride + x,
1617
        ((y*c->uvstride + x)>>1),
1618
        ((y*c->uvstride + x)>>1),
1619
    };
1620
    int i;
1621
    for(i=0; i<3; i++){
1622
        c->src[0][i]= src [i];
1623
        c->ref[0][i]= ref [i] + offset[i];
1624
    }
1625
    assert(!ref_index);
1626
}
1627

    
1628
//FIXME copy&paste
1629
#define P_LEFT P[1]
1630
#define P_TOP P[2]
1631
#define P_TOPRIGHT P[3]
1632
#define P_MEDIAN P[4]
1633
#define P_MV1 P[9]
1634
#define FLAG_QPEL   1 //must be 1
1635

    
1636
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1637
    uint8_t p_buffer[1024];
1638
    uint8_t i_buffer[1024];
1639
    uint8_t p_state[sizeof(s->block_state)];
1640
    uint8_t i_state[sizeof(s->block_state)];
1641
    CABACContext pc, ic;
1642
    PutBitContext pbbak= s->c.pb;
1643
    int score, score2, iscore, i_len, p_len, block_s, sum;
1644
    const int w= s->b_width  << s->block_max_depth;
1645
    const int h= s->b_height << s->block_max_depth;
1646
    const int rem_depth= s->block_max_depth - level;
1647
    const int index= (x + y*w) << rem_depth;
1648
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1649
    static BlockNode null_block= { //FIXME add border maybe
1650
        .color= {128,128,128},
1651
        .mx= 0,
1652
        .my= 0,
1653
        .type= 0,
1654
        .level= 0,
1655
    };
1656
    int trx= (x+1)<<rem_depth;
1657
    int try= (y+1)<<rem_depth;
1658
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
1659
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
1660
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1661
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1662
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1663
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1664
    int pl = left->color[0];
1665
    int pcb= left->color[1];
1666
    int pcr= left->color[2];
1667
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
1668
    int pmy= mid_pred(left->my, top->my, tr->my);
1669
    int mx=0, my=0;
1670
    int l,cr,cb, i;
1671
    const int stride= s->current_picture.linesize[0];
1672
    const int uvstride= s->current_picture.linesize[1];
1673
    const int instride= s->input_picture.linesize[0];
1674
    const int uvinstride= s->input_picture.linesize[1];
1675
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
1676
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1677
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1678
    uint8_t current_mb[3][stride*block_w];
1679
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1680
    int P[10][2];
1681
    int16_t last_mv[3][2];
1682
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1683
    const int shift= 1+qpel;
1684
    MotionEstContext *c= &s->m.me;
1685
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
1686
    int my_context= av_log2(2*ABS(left->my - top->my));
1687
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1688

    
1689
    assert(sizeof(s->block_state) >= 256);
1690
    if(s->keyframe){
1691
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1692
        return 0;
1693
    }
1694

    
1695
    //FIXME optimize
1696
    for(i=0; i<block_w; i++)
1697
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
1698
    for(i=0; i<block_w>>1; i++)
1699
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1700
    for(i=0; i<block_w>>1; i++)
1701
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1702

    
1703
//    clip predictors / edge ?
1704

    
1705
    P_LEFT[0]= left->mx;
1706
    P_LEFT[1]= left->my;
1707
    P_TOP [0]= top->mx;
1708
    P_TOP [1]= top->my;
1709
    P_TOPRIGHT[0]= tr->mx;
1710
    P_TOPRIGHT[1]= tr->my;
1711
    
1712
    last_mv[0][0]= s->block[index].mx;
1713
    last_mv[0][1]= s->block[index].my;
1714
    last_mv[1][0]= right->mx;
1715
    last_mv[1][1]= right->my;
1716
    last_mv[2][0]= bottom->mx;
1717
    last_mv[2][1]= bottom->my;
1718
    
1719
    s->m.mb_stride=2;
1720
    s->m.mb_x= 
1721
    s->m.mb_y= 0;
1722
    s->m.me.skip= 0;
1723

    
1724
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1725
    
1726
    assert(s->m.me.  stride ==   stride);
1727
    assert(s->m.me.uvstride == uvstride);
1728
    
1729
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1730
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1731
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1732
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1733
    
1734
    c->xmin = - x*block_w - 16+2;
1735
    c->ymin = - y*block_w - 16+2;
1736
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1737
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1738

    
1739
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1740
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
1741
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1742
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1743
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1744
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1745
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1746

    
1747
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1748
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1749

    
1750
    if (!y) {
1751
        c->pred_x= P_LEFT[0];
1752
        c->pred_y= P_LEFT[1];
1753
    } else {
1754
        c->pred_x = P_MEDIAN[0];
1755
        c->pred_y = P_MEDIAN[1];
1756
    }
1757

    
1758
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
1759
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1760

    
1761
    assert(mx >= c->xmin);
1762
    assert(mx <= c->xmax);
1763
    assert(my >= c->ymin);
1764
    assert(my <= c->ymax);
1765
    
1766
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1767
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1768
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1769
                             
1770
  //  subpel search
1771
    pc= s->c;
1772
    init_put_bits(&pc.pb, p_buffer, sizeof(p_buffer));
1773
    memcpy(p_state, s->block_state, sizeof(s->block_state));
1774

    
1775
    if(level!=s->block_max_depth)
1776
        put_cabac(&pc, &p_state[4 + s_context], 1);
1777
    put_cabac(&pc, &p_state[1 + left->type + top->type], 0);
1778
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1779
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
1780
    p_len= put_bits_count(&pc.pb);
1781
    score += (s->lambda2*(p_len + pc.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1782

    
1783
    block_s= block_w*block_w;
1784
    sum = pix_sum(&current_mb[0][0], stride, block_w);
1785
    l= (sum + block_s/2)/block_s;
1786
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1787
    
1788
    block_s= block_w*block_w>>2;
1789
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1790
    cb= (sum + block_s/2)/block_s;
1791
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1792
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1793
    cr= (sum + block_s/2)/block_s;
1794
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1795

    
1796
    ic= s->c;
1797
    init_put_bits(&ic.pb, i_buffer, sizeof(i_buffer));
1798
    memcpy(i_state, s->block_state, sizeof(s->block_state));
1799
    if(level!=s->block_max_depth)
1800
        put_cabac(&ic, &i_state[4 + s_context], 1);
1801
    put_cabac(&ic, &i_state[1 + left->type + top->type], 1);
1802
    put_symbol(&ic, &i_state[32],  l-pl , 1);
1803
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1804
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1805
    i_len= put_bits_count(&ic.pb);
1806
    iscore += (s->lambda2*(i_len + ic.outstanding_count - s->c.outstanding_count))>>FF_LAMBDA_SHIFT;
1807

    
1808
//    assert(score==256*256*256*64-1);
1809
    assert(iscore < 255*255*256 + s->lambda2*10);
1810
    assert(iscore >= 0);
1811
    assert(l>=0 && l<=255);
1812
    assert(pl>=0 && pl<=255);
1813

    
1814
    if(level==0){
1815
        int varc= iscore >> 8;
1816
        int vard= score >> 8;
1817
        if (vard <= 64 || vard < varc)
1818
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1819
        else
1820
            c->scene_change_score+= s->m.qscale;
1821
    }
1822
        
1823
    if(level!=s->block_max_depth){
1824
        put_cabac(&s->c, &s->block_state[4 + s_context], 0);
1825
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1826
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1827
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1828
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1829
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1830
    
1831
        if(score2 < score && score2 < iscore)
1832
            return score2;
1833
    }
1834
    
1835
    if(iscore < score){
1836
        flush_put_bits(&ic.pb);
1837
        ff_copy_bits(&pbbak, i_buffer, i_len);
1838
        s->c= ic;
1839
        s->c.pb= pbbak;
1840
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1841
        memcpy(s->block_state, i_state, sizeof(s->block_state));
1842
        return iscore;
1843
    }else{
1844
        flush_put_bits(&pc.pb);
1845
        ff_copy_bits(&pbbak, p_buffer, p_len);
1846
        s->c= pc;
1847
        s->c.pb= pbbak;
1848
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1849
        memcpy(s->block_state, p_state, sizeof(s->block_state));
1850
        return score;
1851
    }
1852
}
1853

    
1854
static void decode_q_branch(SnowContext *s, int level, int x, int y){
1855
    const int w= s->b_width << s->block_max_depth;
1856
    const int rem_depth= s->block_max_depth - level;
1857
    const int index= (x + y*w) << rem_depth;
1858
    static BlockNode null_block= { //FIXME add border maybe
1859
        .color= {128,128,128},
1860
        .mx= 0,
1861
        .my= 0,
1862
        .type= 0,
1863
        .level= 0,
1864
    };
1865
    int trx= (x+1)<<rem_depth;
1866
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
1867
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
1868
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1869
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1870
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1871
    
1872
    if(s->keyframe){
1873
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1874
        return;
1875
    }
1876

    
1877
    if(level==s->block_max_depth || get_cabac(&s->c, &s->block_state[4 + s_context])){
1878
        int type;
1879
        int l = left->color[0];
1880
        int cb= left->color[1];
1881
        int cr= left->color[2];
1882
        int mx= mid_pred(left->mx, top->mx, tr->mx);
1883
        int my= mid_pred(left->my, top->my, tr->my);
1884
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1885
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1886
        
1887
        type= get_cabac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1888

    
1889
        if(type){
1890
            l += get_symbol(&s->c, &s->block_state[32], 1);
1891
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
1892
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
1893
        }else{
1894
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1895
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1896
        }
1897
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1898
    }else{
1899
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1900
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1901
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1902
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1903
    }
1904
}
1905

    
1906
static void encode_blocks(SnowContext *s){
1907
    int x, y;
1908
    int w= s->b_width;
1909
    int h= s->b_height;
1910

    
1911
    for(y=0; y<h; y++){
1912
        for(x=0; x<w; x++){
1913
            encode_q_branch(s, 0, x, y);
1914
        }
1915
    }
1916
}
1917

    
1918
static void decode_blocks(SnowContext *s){
1919
    int x, y;
1920
    int w= s->b_width;
1921
    int h= s->b_height;
1922

    
1923
    for(y=0; y<h; y++){
1924
        for(x=0; x<w; x++){
1925
            decode_q_branch(s, 0, x, y);
1926
        }
1927
    }
1928
}
1929

    
1930
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1931
    int x, y;
1932
START_TIMER
1933
    for(y=0; y < b_h+5; y++){
1934
        for(x=0; x < b_w; x++){
1935
            int a0= src[x    ];
1936
            int a1= src[x + 1];
1937
            int a2= src[x + 2];
1938
            int a3= src[x + 3];
1939
            int a4= src[x + 4];
1940
            int a5= src[x + 5];
1941
//            int am= 9*(a1+a2) - (a0+a3);
1942
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1943
//            int am= 18*(a2+a3) - 2*(a1+a4);
1944
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1945
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1946

    
1947
//            if(b_w==16) am= 8*(a1+a2);
1948

    
1949
            if(dx<8) tmp[x]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
1950
            else     tmp[x]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
1951

    
1952
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
1953
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
1954
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
1955
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1956
        }
1957
        tmp += stride;
1958
        src += stride;
1959
    }
1960
    tmp -= (b_h+5)*stride;
1961
    
1962
    for(y=0; y < b_h; y++){
1963
        for(x=0; x < b_w; x++){
1964
            int a0= tmp[x + 0*stride];
1965
            int a1= tmp[x + 1*stride];
1966
            int a2= tmp[x + 2*stride];
1967
            int a3= tmp[x + 3*stride];
1968
            int a4= tmp[x + 4*stride];
1969
            int a5= tmp[x + 5*stride];
1970
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1971
//            int am= 18*(a2+a3) - 2*(a1+a4);
1972
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1973
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1974
            
1975
//            if(b_w==16) am= 8*(a1+a2);
1976

    
1977
            if(dy<8) dst[x]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
1978
            else     dst[x]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
1979

    
1980
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
1981
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
1982
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
1983
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
1984
        }
1985
        dst += stride;
1986
        tmp += stride;
1987
    }
1988
STOP_TIMER("mc_block")
1989
}
1990

    
1991
#define mca(dx,dy,b_w)\
1992
static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
1993
    uint8_t tmp[stride*(b_w+5)];\
1994
    assert(h==b_w);\
1995
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
1996
}
1997

    
1998
mca( 0, 0,16)
1999
mca( 8, 0,16)
2000
mca( 0, 8,16)
2001
mca( 8, 8,16)
2002

    
2003
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2004
    if(block->type){
2005
        int x, y;
2006
        const int color= block->color[plane_index];
2007
        for(y=0; y < b_h; y++){
2008
            for(x=0; x < b_w; x++){
2009
                dst[x + y*stride]= color;
2010
            }
2011
        }
2012
    }else{
2013
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2014
        int mx= block->mx*scale;
2015
        int my= block->my*scale;
2016
        const int dx= mx&15;
2017
        const int dy= my&15;
2018
        sx += (mx>>4) - 2;
2019
        sy += (my>>4) - 2;
2020
        src += sx + sy*stride;
2021
        if(   (unsigned)sx >= w - b_w - 4
2022
           || (unsigned)sy >= h - b_h - 4){
2023
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2024
            src= tmp + MB_SIZE;
2025
        }
2026
        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2027
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2028
        else
2029
            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2030
    }
2031
}
2032

    
2033
static always_inline int same_block(BlockNode *a, BlockNode *b){
2034
    return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2035
}
2036

    
2037
//FIXME name clenup (b_w, block_w, b_width stuff)
2038
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2039
    const int b_width = s->b_width  << s->block_max_depth;
2040
    const int b_height= s->b_height << s->block_max_depth;
2041
    const int b_stride= b_width;
2042
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2043
    BlockNode *rt= lt+1;
2044
    BlockNode *lb= lt+b_stride;
2045
    BlockNode *rb= lb+1;
2046
    uint8_t *block[4]; 
2047
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2048
    int x,y;
2049

    
2050
    if(b_x<0){
2051
        lt= rt;
2052
        lb= rb;
2053
    }else if(b_x + 1 >= b_width){
2054
        rt= lt;
2055
        rb= lb;
2056
    }
2057
    if(b_y<0){
2058
        lt= lb;
2059
        rt= rb;
2060
    }else if(b_y + 1 >= b_height){
2061
        lb= lt;
2062
        rb= rt;
2063
    }
2064
        
2065
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2066
        obmc -= src_x;
2067
        b_w += src_x;
2068
        src_x=0;
2069
    }else if(src_x + b_w > w){
2070
        b_w = w - src_x;
2071
    }
2072
    if(src_y<0){
2073
        obmc -= src_y*obmc_stride;
2074
        b_h += src_y;
2075
        src_y=0;
2076
    }else if(src_y + b_h> h){
2077
        b_h = h - src_y;
2078
    }
2079
    
2080
    if(b_w<=0 || b_h<=0) return;
2081

    
2082
assert(src_stride > 7*MB_SIZE);
2083
    dst += src_x + src_y*dst_stride;
2084
//    src += src_x + src_y*src_stride;
2085

    
2086
    block[0]= tmp+3*MB_SIZE;
2087
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2088

    
2089
    if(same_block(lt, rt)){
2090
        block[1]= block[0];
2091
    }else{
2092
        block[1]= tmp + 4*MB_SIZE;
2093
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2094
    }
2095
        
2096
    if(same_block(lt, lb)){
2097
        block[2]= block[0];
2098
    }else if(same_block(rt, lb)){
2099
        block[2]= block[1];
2100
    }else{
2101
        block[2]= tmp+5*MB_SIZE;
2102
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2103
    }
2104

    
2105
    if(same_block(lt, rb) ){
2106
        block[3]= block[0];
2107
    }else if(same_block(rt, rb)){
2108
        block[3]= block[1];
2109
    }else if(same_block(lb, rb)){
2110
        block[3]= block[2];
2111
    }else{
2112
        block[3]= tmp+6*MB_SIZE;
2113
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2114
    }
2115
#if 0
2116
    for(y=0; y<b_h; y++){
2117
        for(x=0; x<b_w; x++){
2118
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2119
            if(add) dst[x + y*dst_stride] += v;
2120
            else    dst[x + y*dst_stride] -= v;
2121
        }
2122
    }
2123
    for(y=0; y<b_h; y++){
2124
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2125
        for(x=0; x<b_w; x++){
2126
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2127
            if(add) dst[x + y*dst_stride] += v;
2128
            else    dst[x + y*dst_stride] -= v;
2129
        }
2130
    }
2131
    for(y=0; y<b_h; y++){
2132
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2133
        for(x=0; x<b_w; x++){
2134
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2135
            if(add) dst[x + y*dst_stride] += v;
2136
            else    dst[x + y*dst_stride] -= v;
2137
        }
2138
    }
2139
    for(y=0; y<b_h; y++){
2140
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2141
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2142
        for(x=0; x<b_w; x++){
2143
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2144
            if(add) dst[x + y*dst_stride] += v;
2145
            else    dst[x + y*dst_stride] -= v;
2146
        }
2147
    }
2148
#else
2149
    for(y=0; y<b_h; y++){
2150
        //FIXME ugly missue of obmc_stride
2151
        uint8_t *obmc1= obmc + y*obmc_stride;
2152
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2153
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2154
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2155
        for(x=0; x<b_w; x++){
2156
            int v=   obmc1[x] * block[3][x + y*src_stride]
2157
                    +obmc2[x] * block[2][x + y*src_stride]
2158
                    +obmc3[x] * block[1][x + y*src_stride]
2159
                    +obmc4[x] * block[0][x + y*src_stride];
2160
            if(add) dst[x + y*dst_stride] += v * (256/OBMC_MAX);
2161
            else    dst[x + y*dst_stride] -= v * (256/OBMC_MAX);
2162
        }
2163
    }
2164
#endif
2165
}
2166

    
2167
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2168
    Plane *p= &s->plane[plane_index];
2169
    const int mb_w= s->b_width  << s->block_max_depth;
2170
    const int mb_h= s->b_height << s->block_max_depth;
2171
    int x, y, mb_x, mb_y;
2172
    int block_size = MB_SIZE >> s->block_max_depth;
2173
    int block_w    = plane_index ? block_size/2 : block_size;
2174
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2175
    int obmc_stride= plane_index ? block_size : 2*block_size;
2176
    int ref_stride= s->last_picture.linesize[plane_index];
2177
    uint8_t *ref  = s->last_picture.data[plane_index];
2178
    int w= p->width;
2179
    int h= p->height;
2180
    START_TIMER
2181
    
2182
    if(s->keyframe || (s->avctx->debug&512)){
2183
        for(y=0; y<h; y++){
2184
            for(x=0; x<w; x++){
2185
                if(add) buf[x + y*w]+= 128*256;
2186
                else    buf[x + y*w]-= 128*256;
2187
            }
2188
        }
2189

    
2190
        return;
2191
    }
2192
    
2193
    for(mb_y=0; mb_y<=mb_h; mb_y++){
2194
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2195
            START_TIMER
2196

    
2197
            add_yblock(s, buf, ref, obmc, 
2198
                       block_w*mb_x - block_w/2,
2199
                       block_w*mb_y - block_w/2,
2200
                       block_w, block_w,
2201
                       w, h,
2202
                       w, ref_stride, obmc_stride,
2203
                       mb_x - 1, mb_y - 1,
2204
                       add, plane_index);
2205
            
2206
            STOP_TIMER("add_yblock")
2207
        }
2208
    }
2209
    
2210
    STOP_TIMER("predict_plane")
2211
}
2212

    
2213
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2214
    const int level= b->level;
2215
    const int w= b->width;
2216
    const int h= b->height;
2217
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2218
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2219
    int x,y, thres1, thres2;
2220
    START_TIMER
2221

    
2222
    assert(QROOT==8);
2223

    
2224
    if(s->qlog == LOSSLESS_QLOG) return;
2225
 
2226
    bias= bias ? 0 : (3*qmul)>>3;
2227
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2228
    thres2= 2*thres1;
2229
    
2230
    if(!bias){
2231
        for(y=0; y<h; y++){
2232
            for(x=0; x<w; x++){
2233
                int i= src[x + y*stride];
2234
                
2235
                if((unsigned)(i+thres1) > thres2){
2236
                    if(i>=0){
2237
                        i<<= QEXPSHIFT;
2238
                        i/= qmul; //FIXME optimize
2239
                        src[x + y*stride]=  i;
2240
                    }else{
2241
                        i= -i;
2242
                        i<<= QEXPSHIFT;
2243
                        i/= qmul; //FIXME optimize
2244
                        src[x + y*stride]= -i;
2245
                    }
2246
                }else
2247
                    src[x + y*stride]= 0;
2248
            }
2249
        }
2250
    }else{
2251
        for(y=0; y<h; y++){
2252
            for(x=0; x<w; x++){
2253
                int i= src[x + y*stride]; 
2254
                
2255
                if((unsigned)(i+thres1) > thres2){
2256
                    if(i>=0){
2257
                        i<<= QEXPSHIFT;
2258
                        i= (i + bias) / qmul; //FIXME optimize
2259
                        src[x + y*stride]=  i;
2260
                    }else{
2261
                        i= -i;
2262
                        i<<= QEXPSHIFT;
2263
                        i= (i + bias) / qmul; //FIXME optimize
2264
                        src[x + y*stride]= -i;
2265
                    }
2266
                }else
2267
                    src[x + y*stride]= 0;
2268
            }
2269
        }
2270
    }
2271
    if(level+1 == s->spatial_decomposition_count){
2272
//        STOP_TIMER("quantize")
2273
    }
2274
}
2275

    
2276
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2277
    const int w= b->width;
2278
    const int h= b->height;
2279
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2280
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2281
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2282
    int x,y;
2283
    START_TIMER
2284
    
2285
    if(s->qlog == LOSSLESS_QLOG) return;
2286
    
2287
    assert(QROOT==8);
2288

    
2289
    for(y=0; y<h; y++){
2290
        for(x=0; x<w; x++){
2291
            int i= src[x + y*stride];
2292
            if(i<0){
2293
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2294
            }else if(i>0){
2295
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2296
            }
2297
        }
2298
    }
2299
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2300
        STOP_TIMER("dquant")
2301
    }
2302
}
2303

    
2304
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2305
    const int w= b->width;
2306
    const int h= b->height;
2307
    int x,y;
2308
    
2309
    for(y=h-1; y>=0; y--){
2310
        for(x=w-1; x>=0; x--){
2311
            int i= x + y*stride;
2312
            
2313
            if(x){
2314
                if(use_median){
2315
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2316
                    else  src[i] -= src[i - 1];
2317
                }else{
2318
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2319
                    else  src[i] -= src[i - 1];
2320
                }
2321
            }else{
2322
                if(y) src[i] -= src[i - stride];
2323
            }
2324
        }
2325
    }
2326
}
2327

    
2328
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2329
    const int w= b->width;
2330
    const int h= b->height;
2331
    int x,y;
2332
    
2333
    for(y=0; y<h; y++){
2334
        for(x=0; x<w; x++){
2335
            int i= x + y*stride;
2336
            
2337
            if(x){
2338
                if(use_median){
2339
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2340
                    else  src[i] += src[i - 1];
2341
                }else{
2342
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2343
                    else  src[i] += src[i - 1];
2344
                }
2345
            }else{
2346
                if(y) src[i] += src[i - stride];
2347
            }
2348
        }
2349
    }
2350
}
2351

    
2352
static void encode_header(SnowContext *s){
2353
    int plane_index, level, orientation;
2354
    uint8_t kstate[32]={0};    
2355

    
2356
    put_cabac(&s->c, kstate, s->keyframe);
2357
    if(s->keyframe || s->always_reset)
2358
        reset_contexts(s);
2359
    if(s->keyframe){
2360
        put_symbol(&s->c, s->header_state, s->version, 0);
2361
        put_cabac(&s->c, s->header_state, s->always_reset);
2362
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2363
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2364
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2365
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
2366
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2367
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
2368
        put_cabac(&s->c, s->header_state, s->spatial_scalability);
2369
//        put_cabac(&s->c, s->header_state, s->rate_scalability);
2370

    
2371
        for(plane_index=0; plane_index<2; plane_index++){
2372
            for(level=0; level<s->spatial_decomposition_count; level++){
2373
                for(orientation=level ? 1:0; orientation<4; orientation++){
2374
                    if(orientation==2) continue;
2375
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2376
                }
2377
            }
2378
        }
2379
    }
2380
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2381
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
2382
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
2383
    put_symbol(&s->c, s->header_state, s->qbias, 1);
2384
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
2385
}
2386

    
2387
static int decode_header(SnowContext *s){
2388
    int plane_index, level, orientation;
2389
    uint8_t kstate[32]={0};    
2390

    
2391
    s->keyframe= get_cabac(&s->c, kstate);
2392
    if(s->keyframe || s->always_reset)
2393
        reset_contexts(s);
2394
    if(s->keyframe){
2395
        s->version= get_symbol(&s->c, s->header_state, 0);
2396
        if(s->version>0){
2397
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2398
            return -1;
2399
        }
2400
        s->always_reset= get_cabac(&s->c, s->header_state);
2401
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2402
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2403
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2404
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2405
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2406
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2407
        s->spatial_scalability= get_cabac(&s->c, s->header_state);
2408
//        s->rate_scalability= get_cabac(&s->c, s->header_state);
2409

    
2410
        for(plane_index=0; plane_index<3; plane_index++){
2411
            for(level=0; level<s->spatial_decomposition_count; level++){
2412
                for(orientation=level ? 1:0; orientation<4; orientation++){
2413
                    int q;
2414
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2415
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2416
                    else                    q= get_symbol(&s->c, s->header_state, 1);
2417
                    s->plane[plane_index].band[level][orientation].qlog= q;
2418
                }
2419
            }
2420
        }
2421
    }
2422
    
2423
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2424
    if(s->spatial_decomposition_type > 2){
2425
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2426
        return -1;
2427
    }
2428
    
2429
    s->qlog= get_symbol(&s->c, s->header_state, 1);
2430
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2431
    s->qbias= get_symbol(&s->c, s->header_state, 1);
2432
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
2433

    
2434
    return 0;
2435
}
2436

    
2437
static int common_init(AVCodecContext *avctx){
2438
    SnowContext *s = avctx->priv_data;
2439
    int width, height;
2440
    int level, orientation, plane_index, dec;
2441

    
2442
    s->avctx= avctx;
2443
        
2444
    dsputil_init(&s->dsp, avctx);
2445

    
2446
#define mcf(dx,dy)\
2447
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
2448
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2449
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];
2450

    
2451
    mcf( 0, 0)
2452
    mcf( 4, 0)
2453
    mcf( 8, 0)
2454
    mcf(12, 0)
2455
    mcf( 0, 4)
2456
    mcf( 4, 4)
2457
    mcf( 8, 4)
2458
    mcf(12, 4)
2459
    mcf( 0, 8)
2460
    mcf( 4, 8)
2461
    mcf( 8, 8)
2462
    mcf(12, 8)
2463
    mcf( 0,12)
2464
    mcf( 4,12)
2465
    mcf( 8,12)
2466
    mcf(12,12)
2467

    
2468
#define mcfh(dx,dy)\
2469
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
2470
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2471
        mc_block_hpel ## dx ## dy;
2472

    
2473
    mcfh(0, 0)
2474
    mcfh(8, 0)
2475
    mcfh(0, 8)
2476
    mcfh(8, 8)
2477
        
2478
    dec= s->spatial_decomposition_count= 5;
2479
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2480
    
2481
    s->chroma_h_shift= 1; //FIXME XXX
2482
    s->chroma_v_shift= 1;
2483
    
2484
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2485
    
2486
    width= s->avctx->width;
2487
    height= s->avctx->height;
2488

    
2489
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
2490
    s->pred_buffer= av_mallocz(width*height*sizeof(DWTELEM));
2491
    
2492
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2493
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
2494
    
2495
    for(plane_index=0; plane_index<3; plane_index++){    
2496
        int w= s->avctx->width;
2497
        int h= s->avctx->height;
2498

    
2499
        if(plane_index){
2500
            w>>= s->chroma_h_shift;
2501
            h>>= s->chroma_v_shift;
2502
        }
2503
        s->plane[plane_index].width = w;
2504
        s->plane[plane_index].height= h;
2505
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
2506
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2507
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2508
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2509
                
2510
                b->buf= s->spatial_dwt_buffer;
2511
                b->level= level;
2512
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2513
                b->width = (w + !(orientation&1))>>1;
2514
                b->height= (h + !(orientation>1))>>1;
2515
                
2516
                if(orientation&1) b->buf += (w+1)>>1;
2517
                if(orientation>1) b->buf += b->stride>>1;
2518
                
2519
                if(level)
2520
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
2521
                b->x    = av_mallocz(((b->width+1) * b->height+1)*sizeof(int16_t));
2522
                b->coeff= av_mallocz(((b->width+1) * b->height+1)*sizeof(DWTELEM));
2523
            }
2524
            w= (w+1)>>1;
2525
            h= (h+1)>>1;
2526
        }
2527
    }
2528
    
2529
    reset_contexts(s);
2530
/*    
2531
    width= s->width= avctx->width;
2532
    height= s->height= avctx->height;
2533
    
2534
    assert(width && height);
2535
*/
2536
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2537
    
2538
    return 0;
2539
}
2540

    
2541

    
2542
static void calculate_vissual_weight(SnowContext *s, Plane *p){
2543
    int width = p->width;
2544
    int height= p->height;
2545
    int level, orientation, x, y;
2546

    
2547
    for(level=0; level<s->spatial_decomposition_count; level++){
2548
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2549
            SubBand *b= &p->band[level][orientation];
2550
            DWTELEM *buf= b->buf;
2551
            int64_t error=0;
2552
            
2553
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2554
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
2555
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
2556
            for(y=0; y<height; y++){
2557
                for(x=0; x<width; x++){
2558
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
2559
                    error += d*d;
2560
                }
2561
            }
2562

    
2563
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
2564
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
2565
        }
2566
    }
2567
}
2568

    
2569
static int encode_init(AVCodecContext *avctx)
2570
{
2571
    SnowContext *s = avctx->priv_data;
2572
    int plane_index;
2573

    
2574
    if(avctx->strict_std_compliance >= 0){
2575
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2576
               "use vstrict=-1 to use it anyway\n");
2577
        return -1;
2578
    }
2579
 
2580
    common_init(avctx);
2581
    alloc_blocks(s);
2582
 
2583
    s->version=0;
2584
    
2585
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2586
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2587
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2588
    h263_encode_init(&s->m); //mv_penalty
2589

    
2590
    for(plane_index=0; plane_index<3; plane_index++){
2591
        calculate_vissual_weight(s, &s->plane[plane_index]);
2592
    }
2593
    
2594
    
2595
    avctx->coded_frame= &s->current_picture;
2596
    switch(avctx->pix_fmt){
2597
//    case PIX_FMT_YUV444P:
2598
//    case PIX_FMT_YUV422P:
2599
    case PIX_FMT_YUV420P:
2600
    case PIX_FMT_GRAY8:
2601
//    case PIX_FMT_YUV411P:
2602
//    case PIX_FMT_YUV410P:
2603
        s->colorspace_type= 0;
2604
        break;
2605
/*    case PIX_FMT_RGBA32:
2606
        s->colorspace= 1;
2607
        break;*/
2608
    default:
2609
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2610
        return -1;
2611
    }
2612
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2613
    s->chroma_h_shift= 1;
2614
    s->chroma_v_shift= 1;
2615
    return 0;
2616
}
2617

    
2618
static int frame_start(SnowContext *s){
2619
   AVFrame tmp;
2620
   int w= s->avctx->width; //FIXME round up to x16 ?
2621
   int h= s->avctx->height;
2622

    
2623
    if(s->current_picture.data[0]){
2624
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
2625
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2626
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2627
    }
2628

    
2629
    tmp= s->last_picture;
2630
    s->last_picture= s->current_picture;
2631
    s->current_picture= tmp;
2632
    
2633
    s->current_picture.reference= 1;
2634
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2635
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2636
        return -1;
2637
    }
2638
    
2639
    return 0;
2640
}
2641

    
2642
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2643
    SnowContext *s = avctx->priv_data;
2644
    CABACContext * const c= &s->c;
2645
    AVFrame *pict = data;
2646
    const int width= s->avctx->width;
2647
    const int height= s->avctx->height;
2648
    int level, orientation, plane_index;
2649

    
2650
    ff_init_cabac_encoder(c, buf, buf_size);
2651
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2652
    
2653
    s->input_picture = *pict;
2654

    
2655
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2656
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2657
    
2658
    if(pict->quality){
2659
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2660
        //<64 >60
2661
        s->qlog += 61;
2662
    }else{
2663
        s->qlog= LOSSLESS_QLOG;
2664
    }
2665

    
2666
    frame_start(s);
2667
    s->current_picture.key_frame= s->keyframe;
2668

    
2669
    if(pict->pict_type == P_TYPE){
2670
        int block_width = (width +15)>>4;
2671
        int block_height= (height+15)>>4;
2672
        int stride= s->current_picture.linesize[0];
2673
        
2674
        assert(s->current_picture.data[0]);
2675
        assert(s->last_picture.data[0]);
2676
     
2677
        s->m.avctx= s->avctx;
2678
        s->m.current_picture.data[0]= s->current_picture.data[0];
2679
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
2680
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
2681
        s->m.current_picture_ptr= &s->m.current_picture;
2682
        s->m.   last_picture_ptr= &s->m.   last_picture;
2683
        s->m.linesize=
2684
        s->m.   last_picture.linesize[0]=
2685
        s->m.    new_picture.linesize[0]=
2686
        s->m.current_picture.linesize[0]= stride;
2687
        s->m.uvlinesize= s->current_picture.linesize[1];
2688
        s->m.width = width;
2689
        s->m.height= height;
2690
        s->m.mb_width = block_width;
2691
        s->m.mb_height= block_height;
2692
        s->m.mb_stride=   s->m.mb_width+1;
2693
        s->m.b8_stride= 2*s->m.mb_width+1;
2694
        s->m.f_code=1;
2695
        s->m.pict_type= pict->pict_type;
2696
        s->m.me_method= s->avctx->me_method;
2697
        s->m.me.scene_change_score=0;
2698
        s->m.flags= s->avctx->flags;
2699
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2700
        s->m.out_format= FMT_H263;
2701
        s->m.unrestricted_mv= 1;
2702

    
2703
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
2704
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
2705
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
2706

    
2707
        s->m.dsp= s->dsp; //move
2708
        ff_init_me(&s->m);
2709
    }
2710
    
2711
redo_frame:
2712
        
2713
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
2714

    
2715
    encode_header(s);
2716
    encode_blocks(s);
2717
      
2718
    for(plane_index=0; plane_index<3; plane_index++){
2719
        Plane *p= &s->plane[plane_index];
2720
        int w= p->width;
2721
        int h= p->height;
2722
        int x, y;
2723
//        int bits= put_bits_count(&s->c.pb);
2724

    
2725
        //FIXME optimize
2726
     if(pict->data[plane_index]) //FIXME gray hack
2727
        for(y=0; y<h; y++){
2728
            for(x=0; x<w; x++){
2729
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<8;
2730
            }
2731
        }
2732
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
2733
        
2734
        if(   plane_index==0 
2735
           && pict->pict_type == P_TYPE 
2736
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
2737
            ff_init_cabac_encoder(c, buf, buf_size);
2738
            ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2739
            pict->pict_type= FF_I_TYPE;
2740
            s->keyframe=1;
2741
            reset_contexts(s);
2742
            goto redo_frame;
2743
        }
2744
        
2745
        if(s->qlog == LOSSLESS_QLOG){
2746
            for(y=0; y<h; y++){
2747
                for(x=0; x<w; x++){
2748
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + 127)>>8;
2749
                }
2750
            }
2751
        }
2752
 
2753
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2754

    
2755
        for(level=0; level<s->spatial_decomposition_count; level++){
2756
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2757
                SubBand *b= &p->band[level][orientation];
2758
                
2759
                quantize(s, b, b->buf, b->stride, s->qbias);
2760
                if(orientation==0)
2761
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
2762
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2763
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
2764
                if(orientation==0)
2765
                    correlate(s, b, b->buf, b->stride, 1, 0);
2766
            }
2767
        }
2768
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
2769

    
2770
        for(level=0; level<s->spatial_decomposition_count; level++){
2771
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2772
                SubBand *b= &p->band[level][orientation];
2773

    
2774
                dequantize(s, b, b->buf, b->stride);
2775
            }
2776
        }
2777

    
2778
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2779
        if(s->qlog == LOSSLESS_QLOG){
2780
            for(y=0; y<h; y++){
2781
                for(x=0; x<w; x++){
2782
                    s->spatial_dwt_buffer[y*w + x]<<=8;
2783
                }
2784
            }
2785
        }
2786
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2787
        //FIXME optimize
2788
        for(y=0; y<h; y++){
2789
            for(x=0; x<w; x++){
2790
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2791
                if(v&(~255)) v= ~(v>>31);
2792
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2793
            }
2794
        }
2795
        if(s->avctx->flags&CODEC_FLAG_PSNR){
2796
            int64_t error= 0;
2797
            
2798
    if(pict->data[plane_index]) //FIXME gray hack
2799
            for(y=0; y<h; y++){
2800
                for(x=0; x<w; x++){
2801
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
2802
                    error += d*d;
2803
                }
2804
            }
2805
            s->avctx->error[plane_index] += error;
2806
            s->avctx->error[3] += error;
2807
        }
2808
    }
2809

    
2810
    if(s->last_picture.data[0])
2811
        avctx->release_buffer(avctx, &s->last_picture);
2812

    
2813
    emms_c();
2814
    
2815
    return put_cabac_terminate(c, 1);
2816
}
2817

    
2818
static void common_end(SnowContext *s){
2819
    int plane_index, level, orientation;
2820

    
2821
    av_freep(&s->spatial_dwt_buffer);
2822

    
2823
    av_freep(&s->m.me.scratchpad);    
2824
    av_freep(&s->m.me.map);
2825
    av_freep(&s->m.me.score_map);
2826
 
2827
    av_freep(&s->block);
2828

    
2829
    for(plane_index=0; plane_index<3; plane_index++){    
2830
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2831
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2832
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2833
                
2834
                av_freep(&b->x);
2835
                av_freep(&b->coeff);
2836
            }
2837
        }
2838
    }
2839
}
2840

    
2841
static int encode_end(AVCodecContext *avctx)
2842
{
2843
    SnowContext *s = avctx->priv_data;
2844

    
2845
    common_end(s);
2846

    
2847
    return 0;
2848
}
2849

    
2850
static int decode_init(AVCodecContext *avctx)
2851
{
2852
//    SnowContext *s = avctx->priv_data;
2853

    
2854
    common_init(avctx);
2855
    
2856
    return 0;
2857
}
2858

    
2859
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
2860
    SnowContext *s = avctx->priv_data;
2861
    CABACContext * const c= &s->c;
2862
    int bytes_read;
2863
    AVFrame *picture = data;
2864
    int level, orientation, plane_index;
2865
    
2866

    
2867
    /* no supplementary picture */
2868
    if (buf_size == 0)
2869
        return 0;
2870

    
2871
    ff_init_cabac_decoder(c, buf, buf_size);
2872
    ff_init_cabac_states(c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
2873

    
2874
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2875
    decode_header(s);
2876
    if(!s->block) alloc_blocks(s);
2877

    
2878
    frame_start(s);
2879
    //keyframe flag dupliaction mess FIXME
2880
    if(avctx->debug&FF_DEBUG_PICT_INFO)
2881
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2882
    
2883
    decode_blocks(s);
2884

    
2885
    for(plane_index=0; plane_index<3; plane_index++){
2886
        Plane *p= &s->plane[plane_index];
2887
        int w= p->width;
2888
        int h= p->height;
2889
        int x, y;
2890
        
2891
if(s->avctx->debug&2048){
2892
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2893
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2894

    
2895
        for(y=0; y<h; y++){
2896
            for(x=0; x<w; x++){
2897
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2898
                if(v&(~255)) v= ~(v>>31);
2899
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2900
            }
2901
        }
2902
}
2903
        for(level=0; level<s->spatial_decomposition_count; level++){
2904
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2905
                SubBand *b= &p->band[level][orientation];
2906

    
2907
                decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2908
                if(orientation==0){
2909
                    correlate(s, b, b->buf, b->stride, 1, 0);
2910
                    dequantize(s, b, b->buf, b->stride);
2911
                    assert(b->buf == s->spatial_dwt_buffer);
2912
                }
2913
            }
2914
        }
2915

    
2916
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2917
        if(s->qlog == LOSSLESS_QLOG){
2918
            for(y=0; y<h; y++){
2919
                for(x=0; x<w; x++){
2920
                    s->spatial_dwt_buffer[y*w + x]<<=8;
2921
                }
2922
            }
2923
        }
2924
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2925

    
2926
        //FIXME optimize
2927
        for(y=0; y<h; y++){
2928
            for(x=0; x<w; x++){
2929
                int v= (s->spatial_dwt_buffer[y*w + x]+128)>>8;
2930
                if(v&(~255)) v= ~(v>>31);
2931
                s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v;
2932
            }
2933
        }
2934
    }
2935
            
2936
    emms_c();
2937

    
2938
    if(s->last_picture.data[0])
2939
        avctx->release_buffer(avctx, &s->last_picture);
2940

    
2941
if(!(s->avctx->debug&2048))        
2942
    *picture= s->current_picture;
2943
else
2944
    *picture= s->mconly_picture;
2945
    
2946
    *data_size = sizeof(AVFrame);
2947
    
2948
    bytes_read= get_cabac_terminate(c);
2949
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n");
2950

    
2951
    return bytes_read;
2952
}
2953

    
2954
static int decode_end(AVCodecContext *avctx)
2955
{
2956
    SnowContext *s = avctx->priv_data;
2957

    
2958
    common_end(s);
2959

    
2960
    return 0;
2961
}
2962

    
2963
AVCodec snow_decoder = {
2964
    "snow",
2965
    CODEC_TYPE_VIDEO,
2966
    CODEC_ID_SNOW,
2967
    sizeof(SnowContext),
2968
    decode_init,
2969
    NULL,
2970
    decode_end,
2971
    decode_frame,
2972
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2973
    NULL
2974
};
2975

    
2976
AVCodec snow_encoder = {
2977
    "snow",
2978
    CODEC_TYPE_VIDEO,
2979
    CODEC_ID_SNOW,
2980
    sizeof(SnowContext),
2981
    encode_init,
2982
    encode_frame,
2983
    encode_end,
2984
};
2985

    
2986

    
2987
#if 0
2988
#undef malloc
2989
#undef free
2990
#undef printf
2991

2992
int main(){
2993
    int width=256;
2994
    int height=256;
2995
    int buffer[2][width*height];
2996
    SnowContext s;
2997
    int i;
2998
    s.spatial_decomposition_count=6;
2999
    s.spatial_decomposition_type=1;
3000
    
3001
    printf("testing 5/3 DWT\n");
3002
    for(i=0; i<width*height; i++)
3003
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3004
    
3005
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3006
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3007
    
3008
    for(i=0; i<width*height; i++)
3009
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3010

3011
    printf("testing 9/7 DWT\n");
3012
    s.spatial_decomposition_type=0;
3013
    for(i=0; i<width*height; i++)
3014
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3015
    
3016
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3017
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3018
    
3019
    for(i=0; i<width*height; i++)
3020
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3021
        
3022
    printf("testing AC coder\n");
3023
    memset(s.header_state, 0, sizeof(s.header_state));
3024
    ff_init_cabac_encoder(&s.c, buffer[0], 256*256);
3025
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3026
        
3027
    for(i=-256; i<256; i++){
3028
START_TIMER
3029
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3030
STOP_TIMER("put_symbol")
3031
    }
3032
    put_cabac_terminate(&s.c, 1);
3033

3034
    memset(s.header_state, 0, sizeof(s.header_state));
3035
    ff_init_cabac_decoder(&s.c, buffer[0], 256*256);
3036
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3037
    
3038
    for(i=-256; i<256; i++){
3039
        int j;
3040
START_TIMER
3041
        j= get_symbol(&s.c, s.header_state, 1);
3042
STOP_TIMER("get_symbol")
3043
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3044
    }
3045
{
3046
int level, orientation, x, y;
3047
int64_t errors[8][4];
3048
int64_t g=0;
3049

3050
    memset(errors, 0, sizeof(errors));
3051
    s.spatial_decomposition_count=3;
3052
    s.spatial_decomposition_type=0;
3053
    for(level=0; level<s.spatial_decomposition_count; level++){
3054
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3055
            int w= width  >> (s.spatial_decomposition_count-level);
3056
            int h= height >> (s.spatial_decomposition_count-level);
3057
            int stride= width  << (s.spatial_decomposition_count-level);
3058
            DWTELEM *buf= buffer[0];
3059
            int64_t error=0;
3060

3061
            if(orientation&1) buf+=w;
3062
            if(orientation>1) buf+=stride>>1;
3063
            
3064
            memset(buffer[0], 0, sizeof(int)*width*height);
3065
            buf[w/2 + h/2*stride]= 256*256;
3066
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3067
            for(y=0; y<height; y++){
3068
                for(x=0; x<width; x++){
3069
                    int64_t d= buffer[0][x + y*width];
3070
                    error += d*d;
3071
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3072
                }
3073
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3074
            }
3075
            error= (int)(sqrt(error)+0.5);
3076
            errors[level][orientation]= error;
3077
            if(g) g=ff_gcd(g, error);
3078
            else g= error;
3079
        }
3080
    }
3081
    printf("static int const visual_weight[][4]={\n");
3082
    for(level=0; level<s.spatial_decomposition_count; level++){
3083
        printf("  {");
3084
        for(orientation=0; orientation<4; orientation++){
3085
            printf("%8lld,", errors[level][orientation]/g);
3086
        }
3087
        printf("},\n");
3088
    }
3089
    printf("};\n");
3090
    {
3091
            int level=2;
3092
            int orientation=3;
3093
            int w= width  >> (s.spatial_decomposition_count-level);
3094
            int h= height >> (s.spatial_decomposition_count-level);
3095
            int stride= width  << (s.spatial_decomposition_count-level);
3096
            DWTELEM *buf= buffer[0];
3097
            int64_t error=0;
3098

3099
            buf+=w;
3100
            buf+=stride>>1;
3101
            
3102
            memset(buffer[0], 0, sizeof(int)*width*height);
3103
#if 1
3104
            for(y=0; y<height; y++){
3105
                for(x=0; x<width; x++){
3106
                    int tab[4]={0,2,3,1};
3107
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3108
                }
3109
            }
3110
            ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3111
#else
3112
            for(y=0; y<h; y++){
3113
                for(x=0; x<w; x++){
3114
                    buf[x + y*stride  ]=169;
3115
                    buf[x + y*stride-w]=64;
3116
                }
3117
            }
3118
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3119
#endif
3120
            for(y=0; y<height; y++){
3121
                for(x=0; x<width; x++){
3122
                    int64_t d= buffer[0][x + y*width];
3123
                    error += d*d;
3124
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3125
                }
3126
                if(ABS(height/2-y)<9) printf("\n");
3127
            }
3128
    }
3129

    
3130
}
3131
    return 0;
3132
}
3133
#endif
3134