Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 2554db9b

History | View | Annotate | Download (106 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22

    
23
#include "rangecoder.h"
24
#define MID_STATE 128
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QROOT 8 
35
#define LOSSLESS_QLOG -128
36
#define FRAC_BITS 8
37

    
38
static const int8_t quant3[256]={
39
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
55
};
56
static const int8_t quant3b[256]={
57
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73
};
74
static const int8_t quant5[256]={
75
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
81
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
83
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
88
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
89
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
90
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
91
};
92
static const int8_t quant7[256]={
93
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
96
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
99
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
100
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
101
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
104
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
105
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
106
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
107
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
109
};
110
static const int8_t quant9[256]={
111
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
117
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
118
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
119
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
123
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
124
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
125
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
126
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
127
};
128
static const int8_t quant11[256]={
129
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
135
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
136
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
137
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
140
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
141
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
142
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
143
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
145
};
146
static const int8_t quant13[256]={
147
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
148
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
153
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
154
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
155
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
157
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
158
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
159
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
160
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
162
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
163
};
164

    
165
#define LOG2_OBMC_MAX 6
166
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
167
#if 0 //64*cubic
168
static const uint8_t obmc32[1024]={
169
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
171
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
172
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
173
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
174
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
175
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
176
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
177
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
178
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
179
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
180
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
181
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
182
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
184
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
185
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
186
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
187
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
188
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
189
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
192
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
193
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
194
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
196
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
197
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
198
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
199
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
200
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201
//error:0.000022
202
};
203
static const uint8_t obmc16[256]={
204
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
205
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
206
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
207
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
208
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
209
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
211
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
212
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
213
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
214
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
215
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
216
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
219
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
220
//error:0.000033
221
};
222
#elif 1 // 64*linear
223
static const uint8_t obmc32[1024]={
224
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
225
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
226
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
227
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
228
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
229
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
230
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
231
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
232
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
233
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
234
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
235
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
236
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
237
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
239
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
240
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
241
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
242
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
243
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
244
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
245
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
246
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
247
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
248
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
249
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
251
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
252
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
253
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
254
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
255
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
256
 //error:0.000020
257
};
258
static const uint8_t obmc16[256]={
259
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
260
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
261
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
262
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
263
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
264
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
266
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
267
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
268
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
269
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
270
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
271
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
272
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
273
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
274
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
275
//error:0.000015
276
};
277
#else //64*cos
278
static const uint8_t obmc32[1024]={
279
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
281
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
282
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
283
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
284
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
285
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
286
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
287
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
288
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
289
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
290
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
291
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
292
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
294
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
295
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
296
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
297
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
298
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
299
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
302
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
303
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
304
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
306
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
307
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
308
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
309
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
310
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311
//error:0.000022
312
};
313
static const uint8_t obmc16[256]={
314
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
315
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
316
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
317
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
318
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
319
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
321
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
322
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
323
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
324
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
325
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
326
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
329
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
330
//error:0.000022
331
};
332
#endif
333

    
334
//linear *64
335
static const uint8_t obmc8[64]={
336
 1, 3, 5, 7, 7, 5, 3, 1,
337
 3, 9,15,21,21,15, 9, 3,
338
 5,15,25,35,35,25,15, 5,
339
 7,21,35,49,49,35,21, 7,
340
 7,21,35,49,49,35,21, 7,
341
 5,15,25,35,35,25,15, 5,
342
 3, 9,15,21,21,15, 9, 3,
343
 1, 3, 5, 7, 7, 5, 3, 1,
344
//error:0.000000
345
};
346

    
347
//linear *64
348
static const uint8_t obmc4[16]={
349
 4,12,12, 4,
350
12,36,36,12,
351
12,36,36,12,
352
 4,12,12, 4,
353
//error:0.000000
354
};
355

    
356
static const uint8_t *obmc_tab[4]={
357
    obmc32, obmc16, obmc8, obmc4
358
};
359

    
360
typedef struct BlockNode{
361
    int16_t mx;
362
    int16_t my;
363
    uint8_t color[3];
364
    uint8_t type;
365
//#define TYPE_SPLIT    1
366
#define BLOCK_INTRA   1
367
//#define TYPE_NOCOLOR  4
368
    uint8_t level; //FIXME merge into type?
369
}BlockNode;
370

    
371
#define LOG2_MB_SIZE 4
372
#define MB_SIZE (1<<LOG2_MB_SIZE)
373

    
374
typedef struct SubBand{
375
    int level;
376
    int stride;
377
    int width;
378
    int height;
379
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
380
    DWTELEM *buf;
381
    int16_t *x;
382
    DWTELEM *coeff;
383
    struct SubBand *parent;
384
    uint8_t state[/*7*2*/ 7 + 512][32];
385
}SubBand;
386

    
387
typedef struct Plane{
388
    int width;
389
    int height;
390
    SubBand band[MAX_DECOMPOSITIONS][4];
391
}Plane;
392

    
393
typedef struct SnowContext{
394
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
395

    
396
    AVCodecContext *avctx;
397
    RangeCoder c;
398
    DSPContext dsp;
399
    AVFrame input_picture;
400
    AVFrame current_picture;
401
    AVFrame last_picture;
402
    AVFrame mconly_picture;
403
//     uint8_t q_context[16];
404
    uint8_t header_state[32];
405
    uint8_t block_state[128 + 32*128];
406
    int keyframe;
407
    int always_reset;
408
    int version;
409
    int spatial_decomposition_type;
410
    int temporal_decomposition_type;
411
    int spatial_decomposition_count;
412
    int temporal_decomposition_count;
413
    DWTELEM *spatial_dwt_buffer;
414
    int colorspace_type;
415
    int chroma_h_shift;
416
    int chroma_v_shift;
417
    int spatial_scalability;
418
    int qlog;
419
    int lambda;
420
    int lambda2;
421
    int mv_scale;
422
    int qbias;
423
#define QBIAS_SHIFT 3
424
    int b_width;
425
    int b_height;
426
    int block_max_depth;
427
    Plane plane[MAX_PLANES];
428
    BlockNode *block;
429

    
430
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
431
}SnowContext;
432

    
433
#ifdef        __sgi
434
// Avoid a name clash on SGI IRIX
435
#undef        qexp
436
#endif
437
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
438
static const uint8_t qexp[8]={
439
    128, 140, 152, 166, 181, 197, 215, 235
440
//   64,  70,  76,  83,  91,  99, 108, 117
441
//   32,  35,  38,  41,  45,  49,  54,  59
442
//   16,  17,  19,  21,  23,  25,  27,  29
443
//    8,   9,  10,  10,  11,  12,  13,  15
444
};
445

    
446
static inline int mirror(int v, int m){
447
    if     (v<0) return -v;
448
    else if(v>m) return 2*m-v;
449
    else         return v;
450
}
451

    
452
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
453
    int i;
454

    
455
    if(v){
456
        const int a= ABS(v);
457
        const int e= av_log2(a);
458
#if 1
459
        const int el= FFMIN(e, 10);   
460
        put_rac(c, state+0, 0);
461

    
462
        for(i=0; i<el; i++){
463
            put_rac(c, state+1+i, 1);  //1..10
464
        }
465
        for(; i<e; i++){
466
            put_rac(c, state+1+9, 1);  //1..10
467
        }
468
        put_rac(c, state+1+FFMIN(i,9), 0);
469

    
470
        for(i=e-1; i>=el; i--){
471
            put_rac(c, state+22+9, (a>>i)&1); //22..31
472
        }
473
        for(; i>=0; i--){
474
            put_rac(c, state+22+i, (a>>i)&1); //22..31
475
        }
476

    
477
        if(is_signed)
478
            put_rac(c, state+11 + el, v < 0); //11..21
479
#else
480
        
481
        put_rac(c, state+0, 0);
482
        if(e<=9){
483
            for(i=0; i<e; i++){
484
                put_rac(c, state+1+i, 1);  //1..10
485
            }
486
            put_rac(c, state+1+i, 0);
487

    
488
            for(i=e-1; i>=0; i--){
489
                put_rac(c, state+22+i, (a>>i)&1); //22..31
490
            }
491

    
492
            if(is_signed)
493
                put_rac(c, state+11 + e, v < 0); //11..21
494
        }else{
495
            for(i=0; i<e; i++){
496
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
497
            }
498
            put_rac(c, state+1+FFMIN(i,9), 0);
499

    
500
            for(i=e-1; i>=0; i--){
501
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
502
            }
503

    
504
            if(is_signed)
505
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
506
        }
507
#endif
508
    }else{
509
        put_rac(c, state+0, 1);
510
    }
511
}
512

    
513
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
514
    if(get_rac(c, state+0))
515
        return 0;
516
    else{
517
        int i, e, a;
518
        e= 0;
519
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
520
            e++;
521
        }
522

    
523
        a= 1;
524
        for(i=e-1; i>=0; i--){
525
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
526
        }
527

    
528
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
529
            return -a;
530
        else
531
            return a;
532
    }
533
}
534

    
535
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
536
    int i;
537
    int r= log2>=0 ? 1<<log2 : 1;
538

    
539
    assert(v>=0);
540
    assert(log2>=-4);
541

    
542
    while(v >= r){
543
        put_rac(c, state+4+log2, 1);
544
        v -= r;
545
        log2++;
546
        if(log2>0) r+=r;
547
    }
548
    put_rac(c, state+4+log2, 0);
549
    
550
    for(i=log2-1; i>=0; i--){
551
        put_rac(c, state+31-i, (v>>i)&1);
552
    }
553
}
554

    
555
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
556
    int i;
557
    int r= log2>=0 ? 1<<log2 : 1;
558
    int v=0;
559

    
560
    assert(log2>=-4);
561

    
562
    while(get_rac(c, state+4+log2)){
563
        v+= r;
564
        log2++;
565
        if(log2>0) r+=r;
566
    }
567
    
568
    for(i=log2-1; i>=0; i--){
569
        v+= get_rac(c, state+31-i)<<i;
570
    }
571

    
572
    return v;
573
}
574

    
575
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
576
    const int mirror_left= !highpass;
577
    const int mirror_right= (width&1) ^ highpass;
578
    const int w= (width>>1) - 1 + (highpass & width);
579
    int i;
580

    
581
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
582
    if(mirror_left){
583
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
584
        dst += dst_step;
585
        src += src_step;
586
    }
587
    
588
    for(i=0; i<w; i++){
589
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
590
    }
591
    
592
    if(mirror_right){
593
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
594
    }
595
}
596

    
597
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
598
    const int mirror_left= !highpass;
599
    const int mirror_right= (width&1) ^ highpass;
600
    const int w= (width>>1) - 1 + (highpass & width);
601
    int i;
602

    
603
    if(mirror_left){
604
        int r= 3*2*ref[0];
605
        r += r>>4;
606
        r += r>>8;
607
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
608
        dst += dst_step;
609
        src += src_step;
610
    }
611
    
612
    for(i=0; i<w; i++){
613
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
614
        r += r>>4;
615
        r += r>>8;
616
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
617
    }
618
    
619
    if(mirror_right){
620
        int r= 3*2*ref[w*ref_step];
621
        r += r>>4;
622
        r += r>>8;
623
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
624
    }
625
}
626

    
627

    
628
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
629
    int x, i;
630
    
631
    for(x=start; x<width; x+=2){
632
        int64_t sum=0;
633

    
634
        for(i=0; i<n; i++){
635
            int x2= x + 2*i - n + 1;
636
            if     (x2<     0) x2= -x2;
637
            else if(x2>=width) x2= 2*width-x2-2;
638
            sum += coeffs[i]*(int64_t)dst[x2];
639
        }
640
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
641
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
642
    }
643
}
644

    
645
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
646
    int x, y, i;
647
    for(y=start; y<height; y+=2){
648
        for(x=0; x<width; x++){
649
            int64_t sum=0;
650
    
651
            for(i=0; i<n; i++){
652
                int y2= y + 2*i - n + 1;
653
                if     (y2<      0) y2= -y2;
654
                else if(y2>=height) y2= 2*height-y2-2;
655
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
656
            }
657
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
658
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
659
        }
660
    }
661
}
662

    
663
#define SCALEX 1
664
#define LX0 0
665
#define LX1 1
666

    
667
#if 0 // more accurate 9/7
668
#define N1 2
669
#define SHIFT1 14
670
#define COEFFS1 (int[]){-25987,-25987}
671
#define N2 2
672
#define SHIFT2 19
673
#define COEFFS2 (int[]){-27777,-27777}
674
#define N3 2
675
#define SHIFT3 15
676
#define COEFFS3 (int[]){28931,28931}
677
#define N4 2
678
#define SHIFT4 15
679
#define COEFFS4 (int[]){14533,14533}
680
#elif 1 // 13/7 CRF
681
#define N1 4
682
#define SHIFT1 4
683
#define COEFFS1 (int[]){1,-9,-9,1}
684
#define N2 4
685
#define SHIFT2 4
686
#define COEFFS2 (int[]){-1,5,5,-1}
687
#define N3 0
688
#define SHIFT3 1
689
#define COEFFS3 NULL
690
#define N4 0
691
#define SHIFT4 1
692
#define COEFFS4 NULL
693
#elif 1 // 3/5
694
#define LX0 1
695
#define LX1 0
696
#define SCALEX 0.5
697
#define N1 2
698
#define SHIFT1 1
699
#define COEFFS1 (int[]){1,1}
700
#define N2 2
701
#define SHIFT2 2
702
#define COEFFS2 (int[]){-1,-1}
703
#define N3 0
704
#define SHIFT3 0
705
#define COEFFS3 NULL
706
#define N4 0
707
#define SHIFT4 0
708
#define COEFFS4 NULL
709
#elif 1 // 11/5 
710
#define N1 0
711
#define SHIFT1 1
712
#define COEFFS1 NULL
713
#define N2 2
714
#define SHIFT2 2
715
#define COEFFS2 (int[]){-1,-1}
716
#define N3 2
717
#define SHIFT3 0
718
#define COEFFS3 (int[]){-1,-1}
719
#define N4 4
720
#define SHIFT4 7
721
#define COEFFS4 (int[]){-5,29,29,-5}
722
#define SCALEX 4
723
#elif 1 // 9/7 CDF
724
#define N1 2
725
#define SHIFT1 7
726
#define COEFFS1 (int[]){-203,-203}
727
#define N2 2
728
#define SHIFT2 12
729
#define COEFFS2 (int[]){-217,-217}
730
#define N3 2
731
#define SHIFT3 7
732
#define COEFFS3 (int[]){113,113}
733
#define N4 2
734
#define SHIFT4 9
735
#define COEFFS4 (int[]){227,227}
736
#define SCALEX 1
737
#elif 1 // 7/5 CDF
738
#define N1 0
739
#define SHIFT1 1
740
#define COEFFS1 NULL
741
#define N2 2
742
#define SHIFT2 2
743
#define COEFFS2 (int[]){-1,-1}
744
#define N3 2
745
#define SHIFT3 0
746
#define COEFFS3 (int[]){-1,-1}
747
#define N4 2
748
#define SHIFT4 4
749
#define COEFFS4 (int[]){3,3}
750
#elif 1 // 9/7 MN
751
#define N1 4
752
#define SHIFT1 4
753
#define COEFFS1 (int[]){1,-9,-9,1}
754
#define N2 2
755
#define SHIFT2 2
756
#define COEFFS2 (int[]){1,1}
757
#define N3 0
758
#define SHIFT3 1
759
#define COEFFS3 NULL
760
#define N4 0
761
#define SHIFT4 1
762
#define COEFFS4 NULL
763
#else // 13/7 CRF
764
#define N1 4
765
#define SHIFT1 4
766
#define COEFFS1 (int[]){1,-9,-9,1}
767
#define N2 4
768
#define SHIFT2 4
769
#define COEFFS2 (int[]){-1,5,5,-1}
770
#define N3 0
771
#define SHIFT3 1
772
#define COEFFS3 NULL
773
#define N4 0
774
#define SHIFT4 1
775
#define COEFFS4 NULL
776
#endif
777
static void horizontal_decomposeX(DWTELEM *b, int width){
778
    DWTELEM temp[width];
779
    const int width2= width>>1;
780
    const int w2= (width+1)>>1;
781
    int A1,A2,A3,A4, x;
782

    
783
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
784
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
785
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
786
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
787
    
788
    for(x=0; x<width2; x++){
789
        temp[x   ]= b[2*x    ];
790
        temp[x+w2]= b[2*x + 1];
791
    }
792
    if(width&1)
793
        temp[x   ]= b[2*x    ];
794
    memcpy(b, temp, width*sizeof(int));
795
}
796

    
797
static void horizontal_composeX(DWTELEM *b, int width){
798
    DWTELEM temp[width];
799
    const int width2= width>>1;
800
    int A1,A2,A3,A4, x;
801
    const int w2= (width+1)>>1;
802

    
803
    memcpy(temp, b, width*sizeof(int));
804
    for(x=0; x<width2; x++){
805
        b[2*x    ]= temp[x   ];
806
        b[2*x + 1]= temp[x+w2];
807
    }
808
    if(width&1)
809
        b[2*x    ]= temp[x   ];
810

    
811
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
812
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
813
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
814
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
815
}
816

    
817
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
818
    int x, y;
819
  
820
    for(y=0; y<height; y++){
821
        for(x=0; x<width; x++){
822
            buffer[y*stride + x] *= SCALEX;
823
        }
824
    }
825

    
826
    for(y=0; y<height; y++){
827
        horizontal_decomposeX(buffer + y*stride, width);
828
    }
829
    
830
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
831
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
832
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
833
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
834
}
835

    
836
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
837
    int x, y;
838
  
839
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
840
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
841
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
842
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
843

    
844
    for(y=0; y<height; y++){
845
        horizontal_composeX(buffer + y*stride, width);
846
    }
847

    
848
    for(y=0; y<height; y++){
849
        for(x=0; x<width; x++){
850
            buffer[y*stride + x] /= SCALEX;
851
        }
852
    }
853
}
854

    
855
static void horizontal_decompose53i(DWTELEM *b, int width){
856
    DWTELEM temp[width];
857
    const int width2= width>>1;
858
    int A1,A2,A3,A4, x;
859
    const int w2= (width+1)>>1;
860

    
861
    for(x=0; x<width2; x++){
862
        temp[x   ]= b[2*x    ];
863
        temp[x+w2]= b[2*x + 1];
864
    }
865
    if(width&1)
866
        temp[x   ]= b[2*x    ];
867
#if 0
868
    A2= temp[1       ];
869
    A4= temp[0       ];
870
    A1= temp[0+width2];
871
    A1 -= (A2 + A4)>>1;
872
    A4 += (A1 + 1)>>1;
873
    b[0+width2] = A1;
874
    b[0       ] = A4;
875
    for(x=1; x+1<width2; x+=2){
876
        A3= temp[x+width2];
877
        A4= temp[x+1     ];
878
        A3 -= (A2 + A4)>>1;
879
        A2 += (A1 + A3 + 2)>>2;
880
        b[x+width2] = A3;
881
        b[x       ] = A2;
882

883
        A1= temp[x+1+width2];
884
        A2= temp[x+2       ];
885
        A1 -= (A2 + A4)>>1;
886
        A4 += (A1 + A3 + 2)>>2;
887
        b[x+1+width2] = A1;
888
        b[x+1       ] = A4;
889
    }
890
    A3= temp[width-1];
891
    A3 -= A2;
892
    A2 += (A1 + A3 + 2)>>2;
893
    b[width -1] = A3;
894
    b[width2-1] = A2;
895
#else        
896
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
897
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
898
#endif
899
}
900

    
901
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
902
    int i;
903
    
904
    for(i=0; i<width; i++){
905
        b1[i] -= (b0[i] + b2[i])>>1;
906
    }
907
}
908

    
909
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
910
    int i;
911
    
912
    for(i=0; i<width; i++){
913
        b1[i] += (b0[i] + b2[i] + 2)>>2;
914
    }
915
}
916

    
917
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
918
    int y;
919
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
920
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
921
  
922
    for(y=-2; y<height; y+=2){
923
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
924
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
925

    
926
{START_TIMER
927
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
928
        if(y+2 < height) horizontal_decompose53i(b3, width);
929
STOP_TIMER("horizontal_decompose53i")}
930
        
931
{START_TIMER
932
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
933
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
934
STOP_TIMER("vertical_decompose53i*")}
935
        
936
        b0=b2;
937
        b1=b3;
938
    }
939
}
940

    
941
#define lift5 lift
942
#if 1
943
#define W_AM 3
944
#define W_AO 0
945
#define W_AS 1
946

    
947
#define W_BM 1
948
#define W_BO 8
949
#define W_BS 4
950

    
951
#undef lift5
952
#define W_CM 9999
953
#define W_CO 2
954
#define W_CS 2
955

    
956
#define W_DM 15
957
#define W_DO 16
958
#define W_DS 5
959
#elif 0
960
#define W_AM 55
961
#define W_AO 16
962
#define W_AS 5
963

    
964
#define W_BM 3
965
#define W_BO 32
966
#define W_BS 6
967

    
968
#define W_CM 127
969
#define W_CO 64
970
#define W_CS 7
971

    
972
#define W_DM 7
973
#define W_DO 8
974
#define W_DS 4
975
#elif 0
976
#define W_AM 97
977
#define W_AO 32
978
#define W_AS 6
979

    
980
#define W_BM 63
981
#define W_BO 512
982
#define W_BS 10
983

    
984
#define W_CM 13
985
#define W_CO 8
986
#define W_CS 4
987

    
988
#define W_DM 15
989
#define W_DO 16
990
#define W_DS 5
991

    
992
#else
993

    
994
#define W_AM 203
995
#define W_AO 64
996
#define W_AS 7
997

    
998
#define W_BM 217
999
#define W_BO 2048
1000
#define W_BS 12
1001

    
1002
#define W_CM 113
1003
#define W_CO 64
1004
#define W_CS 7
1005

    
1006
#define W_DM 227
1007
#define W_DO 128
1008
#define W_DS 9
1009
#endif
1010
static void horizontal_decompose97i(DWTELEM *b, int width){
1011
    DWTELEM temp[width];
1012
    const int w2= (width+1)>>1;
1013

    
1014
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1015
    lift (temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1016
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1017
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1018
}
1019

    
1020

    
1021
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1022
    int i;
1023
    
1024
    for(i=0; i<width; i++){
1025
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1026
    }
1027
}
1028

    
1029
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1030
    int i;
1031
    
1032
    for(i=0; i<width; i++){
1033
#ifdef lift5
1034
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1035
#else
1036
        int r= 3*(b0[i] + b2[i]);
1037
        r+= r>>4;
1038
        r+= r>>8;
1039
        b1[i] += (r+W_CO)>>W_CS;
1040
#endif
1041
    }
1042
}
1043

    
1044
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1045
    int i;
1046
    
1047
    for(i=0; i<width; i++){
1048
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1049
    }
1050
}
1051

    
1052
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1053
    int i;
1054
    
1055
    for(i=0; i<width; i++){
1056
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1057
    }
1058
}
1059

    
1060
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1061
    int y;
1062
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1063
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1064
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1065
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1066
  
1067
    for(y=-4; y<height; y+=2){
1068
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1069
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1070

    
1071
{START_TIMER
1072
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1073
        if(y+4 < height) horizontal_decompose97i(b5, width);
1074
if(width>400){
1075
STOP_TIMER("horizontal_decompose97i")
1076
}}
1077
        
1078
{START_TIMER
1079
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1080
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1081
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1082
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1083

    
1084
if(width>400){
1085
STOP_TIMER("vertical_decompose97i")
1086
}}
1087
        
1088
        b0=b2;
1089
        b1=b3;
1090
        b2=b4;
1091
        b3=b5;
1092
    }
1093
}
1094

    
1095
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1096
    int level;
1097
    
1098
    for(level=0; level<decomposition_count; level++){
1099
        switch(type){
1100
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1101
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1102
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1103
        }
1104
    }
1105
}
1106

    
1107
static void horizontal_compose53i(DWTELEM *b, int width){
1108
    DWTELEM temp[width];
1109
    const int width2= width>>1;
1110
    const int w2= (width+1)>>1;
1111
    int A1,A2,A3,A4, x;
1112

    
1113
#if 0
1114
    A2= temp[1       ];
1115
    A4= temp[0       ];
1116
    A1= temp[0+width2];
1117
    A1 -= (A2 + A4)>>1;
1118
    A4 += (A1 + 1)>>1;
1119
    b[0+width2] = A1;
1120
    b[0       ] = A4;
1121
    for(x=1; x+1<width2; x+=2){
1122
        A3= temp[x+width2];
1123
        A4= temp[x+1     ];
1124
        A3 -= (A2 + A4)>>1;
1125
        A2 += (A1 + A3 + 2)>>2;
1126
        b[x+width2] = A3;
1127
        b[x       ] = A2;
1128

1129
        A1= temp[x+1+width2];
1130
        A2= temp[x+2       ];
1131
        A1 -= (A2 + A4)>>1;
1132
        A4 += (A1 + A3 + 2)>>2;
1133
        b[x+1+width2] = A1;
1134
        b[x+1       ] = A4;
1135
    }
1136
    A3= temp[width-1];
1137
    A3 -= A2;
1138
    A2 += (A1 + A3 + 2)>>2;
1139
    b[width -1] = A3;
1140
    b[width2-1] = A2;
1141
#else   
1142
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1143
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1144
#endif
1145
    for(x=0; x<width2; x++){
1146
        b[2*x    ]= temp[x   ];
1147
        b[2*x + 1]= temp[x+w2];
1148
    }
1149
    if(width&1)
1150
        b[2*x    ]= temp[x   ];
1151
}
1152

    
1153
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1154
    int i;
1155
    
1156
    for(i=0; i<width; i++){
1157
        b1[i] += (b0[i] + b2[i])>>1;
1158
    }
1159
}
1160

    
1161
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1162
    int i;
1163
    
1164
    for(i=0; i<width; i++){
1165
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1166
    }
1167
}
1168

    
1169
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1170
    int y;
1171
    DWTELEM *b0= buffer + mirror(-1-1, height-1)*stride;
1172
    DWTELEM *b1= buffer + mirror(-1  , height-1)*stride;
1173
  
1174
    for(y=-1; y<=height; y+=2){
1175
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1176
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1177

    
1178
{START_TIMER
1179
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1180
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1181
STOP_TIMER("vertical_compose53i*")}
1182

    
1183
{START_TIMER
1184
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1185
        if(b0 <= b2) horizontal_compose53i(b1, width);
1186
STOP_TIMER("horizontal_compose53i")}
1187

    
1188
        b0=b2;
1189
        b1=b3;
1190
    }
1191
}   
1192

    
1193
 
1194
static void horizontal_compose97i(DWTELEM *b, int width){
1195
    DWTELEM temp[width];
1196
    const int w2= (width+1)>>1;
1197

    
1198
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1199
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1200
    lift (b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1201
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1202
}
1203

    
1204
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1205
    int i;
1206
    
1207
    for(i=0; i<width; i++){
1208
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1209
    }
1210
}
1211

    
1212
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1213
    int i;
1214
    
1215
    for(i=0; i<width; i++){
1216
#ifdef lift5
1217
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1218
#else
1219
        int r= 3*(b0[i] + b2[i]);
1220
        r+= r>>4;
1221
        r+= r>>8;
1222
        b1[i] -= (r+W_CO)>>W_CS;
1223
#endif
1224
    }
1225
}
1226

    
1227
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1228
    int i;
1229
    
1230
    for(i=0; i<width; i++){
1231
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1232
    }
1233
}
1234

    
1235
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1236
    int i;
1237
    
1238
    for(i=0; i<width; i++){
1239
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1240
    }
1241
}
1242

    
1243
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1244
    int y;
1245
    DWTELEM *b0= buffer + mirror(-3-1, height-1)*stride;
1246
    DWTELEM *b1= buffer + mirror(-3  , height-1)*stride;
1247
    DWTELEM *b2= buffer + mirror(-3+1, height-1)*stride;
1248
    DWTELEM *b3= buffer + mirror(-3+2, height-1)*stride;
1249

    
1250
    for(y=-3; y<=height; y+=2){
1251
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1252
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1253

    
1254
        if(stride == width && y+4 < height && 0){ 
1255
            int x;
1256
            for(x=0; x<width/2; x++)
1257
                b5[x] += 64*2;
1258
            for(; x<width; x++)
1259
                b5[x] += 169*2;
1260
        }
1261
        
1262
{START_TIMER
1263
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1264
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1265
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1266
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1267
if(width>400){
1268
STOP_TIMER("vertical_compose97i")}}
1269

    
1270
{START_TIMER
1271
        if(y-1>=  0) horizontal_compose97i(b0, width);
1272
        if(b0 <= b2) horizontal_compose97i(b1, width);
1273
if(width>400 && b0 <= b2){
1274
STOP_TIMER("horizontal_compose97i")}}
1275
        
1276
        b0=b2;
1277
        b1=b3;
1278
        b2=b4;
1279
        b3=b5;
1280
    }
1281
}
1282

    
1283
void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1284
    int level;
1285

    
1286
    for(level=decomposition_count-1; level>=0; level--){
1287
        switch(type){
1288
        case 0: spatial_compose97i(buffer, width>>level, height>>level, stride<<level); break;
1289
        case 1: spatial_compose53i(buffer, width>>level, height>>level, stride<<level); break;
1290
        case 2: spatial_composeX  (buffer, width>>level, height>>level, stride<<level); break;
1291
        }
1292
    }
1293
}
1294

    
1295
static void encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1296
    const int w= b->width;
1297
    const int h= b->height;
1298
    int x, y;
1299

    
1300
    if(1){
1301
        int run=0;
1302
        int runs[w*h];
1303
        int run_index=0;
1304
                
1305
        for(y=0; y<h; y++){
1306
            for(x=0; x<w; x++){
1307
                int v, p=0;
1308
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1309
                v= src[x + y*stride];
1310

    
1311
                if(y){
1312
                    t= src[x + (y-1)*stride];
1313
                    if(x){
1314
                        lt= src[x - 1 + (y-1)*stride];
1315
                    }
1316
                    if(x + 1 < w){
1317
                        rt= src[x + 1 + (y-1)*stride];
1318
                    }
1319
                }
1320
                if(x){
1321
                    l= src[x - 1 + y*stride];
1322
                    /*if(x > 1){
1323
                        if(orientation==1) ll= src[y + (x-2)*stride];
1324
                        else               ll= src[x - 2 + y*stride];
1325
                    }*/
1326
                }
1327
                if(parent){
1328
                    int px= x>>1;
1329
                    int py= y>>1;
1330
                    if(px<b->parent->width && py<b->parent->height) 
1331
                        p= parent[px + py*2*stride];
1332
                }
1333
                if(!(/*ll|*/l|lt|t|rt|p)){
1334
                    if(v){
1335
                        runs[run_index++]= run;
1336
                        run=0;
1337
                    }else{
1338
                        run++;
1339
                    }
1340
                }
1341
            }
1342
        }
1343
        runs[run_index++]= run;
1344
        run_index=0;
1345
        run= runs[run_index++];
1346

    
1347
        put_symbol2(&s->c, b->state[1], run, 3);
1348
        
1349
        for(y=0; y<h; y++){
1350
            for(x=0; x<w; x++){
1351
                int v, p=0;
1352
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1353
                v= src[x + y*stride];
1354

    
1355
                if(y){
1356
                    t= src[x + (y-1)*stride];
1357
                    if(x){
1358
                        lt= src[x - 1 + (y-1)*stride];
1359
                    }
1360
                    if(x + 1 < w){
1361
                        rt= src[x + 1 + (y-1)*stride];
1362
                    }
1363
                }
1364
                if(x){
1365
                    l= src[x - 1 + y*stride];
1366
                    /*if(x > 1){
1367
                        if(orientation==1) ll= src[y + (x-2)*stride];
1368
                        else               ll= src[x - 2 + y*stride];
1369
                    }*/
1370
                }
1371
                if(parent){
1372
                    int px= x>>1;
1373
                    int py= y>>1;
1374
                    if(px<b->parent->width && py<b->parent->height) 
1375
                        p= parent[px + py*2*stride];
1376
                }
1377
                if(/*ll|*/l|lt|t|rt|p){
1378
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1379

    
1380
                    put_rac(&s->c, &b->state[0][context], !!v);
1381
                }else{
1382
                    if(!run){
1383
                        run= runs[run_index++];
1384

    
1385
                        put_symbol2(&s->c, b->state[1], run, 3);
1386
                        assert(v);
1387
                    }else{
1388
                        run--;
1389
                        assert(!v);
1390
                    }
1391
                }
1392
                if(v){
1393
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1394

    
1395
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1396
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1397
                }
1398
            }
1399
        }
1400
    }
1401
}
1402

    
1403
static void encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1404
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1405
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1406
    encode_subband_c0run(s, b, src, parent, stride, orientation);
1407
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1408
}
1409

    
1410
static inline void decode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1411
    const int w= b->width;
1412
    const int h= b->height;
1413
    int x,y;
1414
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
1415
    int qmul= qexp[qlog&7]<<(qlog>>3);
1416
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1417
    
1418
    START_TIMER
1419

    
1420
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1421
        qadd= 0;
1422
        qmul= 1<<QEXPSHIFT;
1423
    }
1424

    
1425
    if(1){
1426
        int run;
1427
        int index=0;
1428
        int prev_index=-1;
1429
        int prev2_index=0;
1430
        int parent_index= 0;
1431
        int prev_parent_index= 0;
1432
        
1433
        for(y=0; y<b->height; y++)
1434
            memset(&src[y*stride], 0, b->width*sizeof(DWTELEM));
1435

    
1436
        run= get_symbol2(&s->c, b->state[1], 3);
1437
        for(y=0; y<h; y++){
1438
            int v=0;
1439
            int lt=0, t=0, rt=0;
1440

    
1441
            if(y && b->x[prev_index] == 0){
1442
                rt= b->coeff[prev_index];
1443
            }
1444
            for(x=0; x<w; x++){
1445
                int p=0;
1446
                const int l= v;
1447
                
1448
                lt= t; t= rt;
1449

    
1450
                if(y){
1451
                    if(b->x[prev_index] <= x)
1452
                        prev_index++;
1453
                    if(b->x[prev_index] == x + 1)
1454
                        rt= b->coeff[prev_index];
1455
                    else
1456
                        rt=0;
1457
                }
1458
                if(parent){
1459
                    if(x>>1 > b->parent->x[parent_index]){
1460
                        parent_index++;
1461
                    }
1462
                    if(x>>1 == b->parent->x[parent_index]){
1463
                        p= b->parent->coeff[parent_index];
1464
                    }
1465
                }
1466
                if(/*ll|*/l|lt|t|rt|p){
1467
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1468

    
1469
                    v=get_rac(&s->c, &b->state[0][context]);
1470
                }else{
1471
                    if(!run){
1472
                        run= get_symbol2(&s->c, b->state[1], 3);
1473
                        v=1;
1474
                    }else{
1475
                        run--;
1476
                        v=0;
1477

    
1478
                        if(y && parent){
1479
                            int max_run;
1480

    
1481
                            max_run= FFMIN(run, b->x[prev_index] - x - 2);
1482
                            max_run= FFMIN(max_run, 2*b->parent->x[parent_index] - x - 1);
1483
                            x+= max_run;
1484
                            run-= max_run;
1485
                        }
1486
                    }
1487
                }
1488
                if(v){
1489
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1490
                    v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1491
                    if(get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]])){
1492
                        src[x + y*stride]=-(( v*qmul + qadd)>>(QEXPSHIFT));
1493
                        v= -v;
1494
                    }else{
1495
                        src[x + y*stride]= (( v*qmul + qadd)>>(QEXPSHIFT));
1496
                    }
1497
                    b->x[index]=x; //FIXME interleave x/coeff
1498
                    b->coeff[index++]= v;
1499
                }
1500
            }
1501
            b->x[index++]= w+1; //end marker
1502
            prev_index= prev2_index;
1503
            prev2_index= index;
1504
            
1505
            if(parent){
1506
                while(b->parent->x[parent_index] != b->parent->width+1)
1507
                    parent_index++;
1508
                parent_index++;
1509
                if(y&1){
1510
                    prev_parent_index= parent_index;
1511
                }else{
1512
                    parent_index= prev_parent_index;
1513
                }
1514
            }
1515
        }
1516
        b->x[index++]= w+1; //end marker
1517
        if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
1518
            STOP_TIMER("decode_subband")
1519
        }
1520
        
1521
        return;
1522
    }
1523
}
1524

    
1525
static void reset_contexts(SnowContext *s){
1526
    int plane_index, level, orientation;
1527

    
1528
    for(plane_index=0; plane_index<3; plane_index++){
1529
        for(level=0; level<s->spatial_decomposition_count; level++){
1530
            for(orientation=level ? 1:0; orientation<4; orientation++){
1531
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1532
            }
1533
        }
1534
    }
1535
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1536
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1537
}
1538

    
1539
static int alloc_blocks(SnowContext *s){
1540
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1541
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1542
    
1543
    s->b_width = w;
1544
    s->b_height= h;
1545
    
1546
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1547
    return 0;
1548
}
1549

    
1550
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1551
    uint8_t *bytestream= d->bytestream;
1552
    uint8_t *bytestream_start= d->bytestream_start;
1553
    *d= *s;
1554
    d->bytestream= bytestream;
1555
    d->bytestream_start= bytestream_start;
1556
}
1557

    
1558
//near copy & paste from dsputil, FIXME
1559
static int pix_sum(uint8_t * pix, int line_size, int w)
1560
{
1561
    int s, i, j;
1562

    
1563
    s = 0;
1564
    for (i = 0; i < w; i++) {
1565
        for (j = 0; j < w; j++) {
1566
            s += pix[0];
1567
            pix ++;
1568
        }
1569
        pix += line_size - w;
1570
    }
1571
    return s;
1572
}
1573

    
1574
//near copy & paste from dsputil, FIXME
1575
static int pix_norm1(uint8_t * pix, int line_size, int w)
1576
{
1577
    int s, i, j;
1578
    uint32_t *sq = squareTbl + 256;
1579

    
1580
    s = 0;
1581
    for (i = 0; i < w; i++) {
1582
        for (j = 0; j < w; j ++) {
1583
            s += sq[pix[0]];
1584
            pix ++;
1585
        }
1586
        pix += line_size - w;
1587
    }
1588
    return s;
1589
}
1590

    
1591
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1592
    const int w= s->b_width << s->block_max_depth;
1593
    const int rem_depth= s->block_max_depth - level;
1594
    const int index= (x + y*w) << rem_depth;
1595
    const int block_w= 1<<rem_depth;
1596
    BlockNode block;
1597
    int i,j;
1598
    
1599
    block.color[0]= l;
1600
    block.color[1]= cb;
1601
    block.color[2]= cr;
1602
    block.mx= mx;
1603
    block.my= my;
1604
    block.type= type;
1605
    block.level= level;
1606

    
1607
    for(j=0; j<block_w; j++){
1608
        for(i=0; i<block_w; i++){
1609
            s->block[index + i + j*w]= block;
1610
        }
1611
    }
1612
}
1613

    
1614
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1615
    const int offset[3]= {
1616
          y*c->  stride + x,
1617
        ((y*c->uvstride + x)>>1),
1618
        ((y*c->uvstride + x)>>1),
1619
    };
1620
    int i;
1621
    for(i=0; i<3; i++){
1622
        c->src[0][i]= src [i];
1623
        c->ref[0][i]= ref [i] + offset[i];
1624
    }
1625
    assert(!ref_index);
1626
}
1627

    
1628
//FIXME copy&paste
1629
#define P_LEFT P[1]
1630
#define P_TOP P[2]
1631
#define P_TOPRIGHT P[3]
1632
#define P_MEDIAN P[4]
1633
#define P_MV1 P[9]
1634
#define FLAG_QPEL   1 //must be 1
1635

    
1636
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1637
    uint8_t p_buffer[1024];
1638
    uint8_t i_buffer[1024];
1639
    uint8_t p_state[sizeof(s->block_state)];
1640
    uint8_t i_state[sizeof(s->block_state)];
1641
    RangeCoder pc, ic;
1642
    uint8_t *pbbak= s->c.bytestream;
1643
    uint8_t *pbbak_start= s->c.bytestream_start;
1644
    int score, score2, iscore, i_len, p_len, block_s, sum;
1645
    const int w= s->b_width  << s->block_max_depth;
1646
    const int h= s->b_height << s->block_max_depth;
1647
    const int rem_depth= s->block_max_depth - level;
1648
    const int index= (x + y*w) << rem_depth;
1649
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1650
    static BlockNode null_block= { //FIXME add border maybe
1651
        .color= {128,128,128},
1652
        .mx= 0,
1653
        .my= 0,
1654
        .type= 0,
1655
        .level= 0,
1656
    };
1657
    int trx= (x+1)<<rem_depth;
1658
    int try= (y+1)<<rem_depth;
1659
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
1660
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
1661
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1662
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1663
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1664
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1665
    int pl = left->color[0];
1666
    int pcb= left->color[1];
1667
    int pcr= left->color[2];
1668
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
1669
    int pmy= mid_pred(left->my, top->my, tr->my);
1670
    int mx=0, my=0;
1671
    int l,cr,cb, i;
1672
    const int stride= s->current_picture.linesize[0];
1673
    const int uvstride= s->current_picture.linesize[1];
1674
    const int instride= s->input_picture.linesize[0];
1675
    const int uvinstride= s->input_picture.linesize[1];
1676
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
1677
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
1678
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
1679
    uint8_t current_mb[3][stride*block_w];
1680
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
1681
    int P[10][2];
1682
    int16_t last_mv[3][2];
1683
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1684
    const int shift= 1+qpel;
1685
    MotionEstContext *c= &s->m.me;
1686
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
1687
    int my_context= av_log2(2*ABS(left->my - top->my));
1688
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1689

    
1690
    assert(sizeof(s->block_state) >= 256);
1691
    if(s->keyframe){
1692
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
1693
        return 0;
1694
    }
1695

    
1696
    //FIXME optimize
1697
    for(i=0; i<block_w; i++)
1698
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
1699
    for(i=0; i<block_w>>1; i++)
1700
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
1701
    for(i=0; i<block_w>>1; i++)
1702
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
1703

    
1704
//    clip predictors / edge ?
1705

    
1706
    P_LEFT[0]= left->mx;
1707
    P_LEFT[1]= left->my;
1708
    P_TOP [0]= top->mx;
1709
    P_TOP [1]= top->my;
1710
    P_TOPRIGHT[0]= tr->mx;
1711
    P_TOPRIGHT[1]= tr->my;
1712
    
1713
    last_mv[0][0]= s->block[index].mx;
1714
    last_mv[0][1]= s->block[index].my;
1715
    last_mv[1][0]= right->mx;
1716
    last_mv[1][1]= right->my;
1717
    last_mv[2][0]= bottom->mx;
1718
    last_mv[2][1]= bottom->my;
1719
    
1720
    s->m.mb_stride=2;
1721
    s->m.mb_x= 
1722
    s->m.mb_y= 0;
1723
    s->m.me.skip= 0;
1724

    
1725
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
1726
    
1727
    assert(s->m.me.  stride ==   stride);
1728
    assert(s->m.me.uvstride == uvstride);
1729
    
1730
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1731
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1732
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1733
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1734
    
1735
    c->xmin = - x*block_w - 16+2;
1736
    c->ymin = - y*block_w - 16+2;
1737
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1738
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1739

    
1740
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
1741
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
1742
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
1743
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
1744
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1745
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1746
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1747

    
1748
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1749
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1750

    
1751
    if (!y) {
1752
        c->pred_x= P_LEFT[0];
1753
        c->pred_y= P_LEFT[1];
1754
    } else {
1755
        c->pred_x = P_MEDIAN[0];
1756
        c->pred_y = P_MEDIAN[1];
1757
    }
1758

    
1759
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
1760
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1761

    
1762
    assert(mx >= c->xmin);
1763
    assert(mx <= c->xmax);
1764
    assert(my >= c->ymin);
1765
    assert(my <= c->ymax);
1766
    
1767
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1768
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1769
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
1770
                             
1771
  //  subpel search
1772
    pc= s->c;
1773
    pc.bytestream_start=
1774
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1775
    memcpy(p_state, s->block_state, sizeof(s->block_state));
1776

    
1777
    if(level!=s->block_max_depth)
1778
        put_rac(&pc, &p_state[4 + s_context], 1);
1779
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1780
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
1781
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
1782
    p_len= pc.bytestream - pc.bytestream_start;
1783
    score += (s->lambda2*(p_len*8
1784
              + (pc.outstanding_count - s->c.outstanding_count)*8
1785
              + (-av_log2(pc.range)    + av_log2(s->c.range))
1786
             ))>>FF_LAMBDA_SHIFT;
1787

    
1788
    block_s= block_w*block_w;
1789
    sum = pix_sum(&current_mb[0][0], stride, block_w);
1790
    l= (sum + block_s/2)/block_s;
1791
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
1792
    
1793
    block_s= block_w*block_w>>2;
1794
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
1795
    cb= (sum + block_s/2)/block_s;
1796
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1797
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
1798
    cr= (sum + block_s/2)/block_s;
1799
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1800

    
1801
    ic= s->c;
1802
    ic.bytestream_start=
1803
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1804
    memcpy(i_state, s->block_state, sizeof(s->block_state));
1805
    if(level!=s->block_max_depth)
1806
        put_rac(&ic, &i_state[4 + s_context], 1);
1807
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1808
    put_symbol(&ic, &i_state[32],  l-pl , 1);
1809
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
1810
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
1811
    i_len= ic.bytestream - ic.bytestream_start;
1812
    iscore += (s->lambda2*(i_len*8
1813
              + (ic.outstanding_count - s->c.outstanding_count)*8
1814
              + (-av_log2(ic.range)    + av_log2(s->c.range))
1815
             ))>>FF_LAMBDA_SHIFT;
1816

    
1817
//    assert(score==256*256*256*64-1);
1818
    assert(iscore < 255*255*256 + s->lambda2*10);
1819
    assert(iscore >= 0);
1820
    assert(l>=0 && l<=255);
1821
    assert(pl>=0 && pl<=255);
1822

    
1823
    if(level==0){
1824
        int varc= iscore >> 8;
1825
        int vard= score >> 8;
1826
        if (vard <= 64 || vard < varc)
1827
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1828
        else
1829
            c->scene_change_score+= s->m.qscale;
1830
    }
1831
        
1832
    if(level!=s->block_max_depth){
1833
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
1834
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1835
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1836
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1837
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1838
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1839
    
1840
        if(score2 < score && score2 < iscore)
1841
            return score2;
1842
    }
1843
    
1844
    if(iscore < score){
1845
        memcpy(pbbak, i_buffer, i_len);
1846
        s->c= ic;
1847
        s->c.bytestream_start= pbbak_start;
1848
        s->c.bytestream= pbbak + i_len;
1849
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
1850
        memcpy(s->block_state, i_state, sizeof(s->block_state));
1851
        return iscore;
1852
    }else{
1853
        memcpy(pbbak, p_buffer, p_len);
1854
        s->c= pc;
1855
        s->c.bytestream_start= pbbak_start;
1856
        s->c.bytestream= pbbak + p_len;
1857
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
1858
        memcpy(s->block_state, p_state, sizeof(s->block_state));
1859
        return score;
1860
    }
1861
}
1862

    
1863
static void decode_q_branch(SnowContext *s, int level, int x, int y){
1864
    const int w= s->b_width << s->block_max_depth;
1865
    const int rem_depth= s->block_max_depth - level;
1866
    const int index= (x + y*w) << rem_depth;
1867
    static BlockNode null_block= { //FIXME add border maybe
1868
        .color= {128,128,128},
1869
        .mx= 0,
1870
        .my= 0,
1871
        .type= 0,
1872
        .level= 0,
1873
    };
1874
    int trx= (x+1)<<rem_depth;
1875
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
1876
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
1877
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1878
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1879
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1880
    
1881
    if(s->keyframe){
1882
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
1883
        return;
1884
    }
1885

    
1886
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1887
        int type;
1888
        int l = left->color[0];
1889
        int cb= left->color[1];
1890
        int cr= left->color[2];
1891
        int mx= mid_pred(left->mx, top->mx, tr->mx);
1892
        int my= mid_pred(left->my, top->my, tr->my);
1893
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
1894
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
1895
        
1896
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1897

    
1898
        if(type){
1899
            l += get_symbol(&s->c, &s->block_state[32], 1);
1900
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
1901
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
1902
        }else{
1903
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
1904
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
1905
        }
1906
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
1907
    }else{
1908
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1909
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1910
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1911
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1912
    }
1913
}
1914

    
1915
static void encode_blocks(SnowContext *s){
1916
    int x, y;
1917
    int w= s->b_width;
1918
    int h= s->b_height;
1919

    
1920
    for(y=0; y<h; y++){
1921
        for(x=0; x<w; x++){
1922
            encode_q_branch(s, 0, x, y);
1923
        }
1924
    }
1925
}
1926

    
1927
static void decode_blocks(SnowContext *s){
1928
    int x, y;
1929
    int w= s->b_width;
1930
    int h= s->b_height;
1931

    
1932
    for(y=0; y<h; y++){
1933
        for(x=0; x<w; x++){
1934
            decode_q_branch(s, 0, x, y);
1935
        }
1936
    }
1937
}
1938

    
1939
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1940
    int x, y;
1941
START_TIMER
1942
    for(y=0; y < b_h+5; y++){
1943
        for(x=0; x < b_w; x++){
1944
            int a0= src[x    ];
1945
            int a1= src[x + 1];
1946
            int a2= src[x + 2];
1947
            int a3= src[x + 3];
1948
            int a4= src[x + 4];
1949
            int a5= src[x + 5];
1950
//            int am= 9*(a1+a2) - (a0+a3);
1951
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1952
//            int am= 18*(a2+a3) - 2*(a1+a4);
1953
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1954
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
1955

    
1956
//            if(b_w==16) am= 8*(a1+a2);
1957

    
1958
            if(dx<8) tmp[x]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
1959
            else     tmp[x]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
1960

    
1961
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
1962
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
1963
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
1964
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
1965
        }
1966
        tmp += stride;
1967
        src += stride;
1968
    }
1969
    tmp -= (b_h+5)*stride;
1970
    
1971
    for(y=0; y < b_h; y++){
1972
        for(x=0; x < b_w; x++){
1973
            int a0= tmp[x + 0*stride];
1974
            int a1= tmp[x + 1*stride];
1975
            int a2= tmp[x + 2*stride];
1976
            int a3= tmp[x + 3*stride];
1977
            int a4= tmp[x + 4*stride];
1978
            int a5= tmp[x + 5*stride];
1979
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1980
//            int am= 18*(a2+a3) - 2*(a1+a4);
1981
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
1982
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
1983
            
1984
//            if(b_w==16) am= 8*(a1+a2);
1985

    
1986
            if(dy<8) dst[x]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
1987
            else     dst[x]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
1988

    
1989
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
1990
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
1991
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
1992
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
1993
        }
1994
        dst += stride;
1995
        tmp += stride;
1996
    }
1997
STOP_TIMER("mc_block")
1998
}
1999

    
2000
#define mca(dx,dy,b_w)\
2001
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2002
    uint8_t tmp[stride*(b_w+5)];\
2003
    assert(h==b_w);\
2004
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2005
}
2006

    
2007
mca( 0, 0,16)
2008
mca( 8, 0,16)
2009
mca( 0, 8,16)
2010
mca( 8, 8,16)
2011
mca( 0, 0,8)
2012
mca( 8, 0,8)
2013
mca( 0, 8,8)
2014
mca( 8, 8,8)
2015

    
2016
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2017
    if(block->type){
2018
        int x, y;
2019
        const int color= block->color[plane_index];
2020
        for(y=0; y < b_h; y++){
2021
            for(x=0; x < b_w; x++){
2022
                dst[x + y*stride]= color;
2023
            }
2024
        }
2025
    }else{
2026
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2027
        int mx= block->mx*scale;
2028
        int my= block->my*scale;
2029
        const int dx= mx&15;
2030
        const int dy= my&15;
2031
        sx += (mx>>4) - 2;
2032
        sy += (my>>4) - 2;
2033
        src += sx + sy*stride;
2034
        if(   (unsigned)sx >= w - b_w - 4
2035
           || (unsigned)sy >= h - b_h - 4){
2036
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2037
            src= tmp + MB_SIZE;
2038
        }
2039
        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2040
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2041
        else
2042
            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2043
    }
2044
}
2045

    
2046
static always_inline int same_block(BlockNode *a, BlockNode *b){
2047
    return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2048
}
2049

    
2050
//FIXME name clenup (b_w, block_w, b_width stuff)
2051
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2052
    const int b_width = s->b_width  << s->block_max_depth;
2053
    const int b_height= s->b_height << s->block_max_depth;
2054
    const int b_stride= b_width;
2055
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2056
    BlockNode *rt= lt+1;
2057
    BlockNode *lb= lt+b_stride;
2058
    BlockNode *rb= lb+1;
2059
    uint8_t *block[4]; 
2060
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2061
    int x,y;
2062

    
2063
    if(b_x<0){
2064
        lt= rt;
2065
        lb= rb;
2066
    }else if(b_x + 1 >= b_width){
2067
        rt= lt;
2068
        rb= lb;
2069
    }
2070
    if(b_y<0){
2071
        lt= lb;
2072
        rt= rb;
2073
    }else if(b_y + 1 >= b_height){
2074
        lb= lt;
2075
        rb= rt;
2076
    }
2077
        
2078
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2079
        obmc -= src_x;
2080
        b_w += src_x;
2081
        src_x=0;
2082
    }else if(src_x + b_w > w){
2083
        b_w = w - src_x;
2084
    }
2085
    if(src_y<0){
2086
        obmc -= src_y*obmc_stride;
2087
        b_h += src_y;
2088
        src_y=0;
2089
    }else if(src_y + b_h> h){
2090
        b_h = h - src_y;
2091
    }
2092
    
2093
    if(b_w<=0 || b_h<=0) return;
2094

    
2095
assert(src_stride > 7*MB_SIZE);
2096
    dst += src_x + src_y*dst_stride;
2097
    dst8+= src_x + src_y*src_stride;
2098
//    src += src_x + src_y*src_stride;
2099

    
2100
    block[0]= tmp+3*MB_SIZE;
2101
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2102

    
2103
    if(same_block(lt, rt)){
2104
        block[1]= block[0];
2105
    }else{
2106
        block[1]= tmp + 4*MB_SIZE;
2107
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2108
    }
2109
        
2110
    if(same_block(lt, lb)){
2111
        block[2]= block[0];
2112
    }else if(same_block(rt, lb)){
2113
        block[2]= block[1];
2114
    }else{
2115
        block[2]= tmp+5*MB_SIZE;
2116
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2117
    }
2118

    
2119
    if(same_block(lt, rb) ){
2120
        block[3]= block[0];
2121
    }else if(same_block(rt, rb)){
2122
        block[3]= block[1];
2123
    }else if(same_block(lb, rb)){
2124
        block[3]= block[2];
2125
    }else{
2126
        block[3]= tmp+6*MB_SIZE;
2127
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2128
    }
2129
#if 0
2130
    for(y=0; y<b_h; y++){
2131
        for(x=0; x<b_w; x++){
2132
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2133
            if(add) dst[x + y*dst_stride] += v;
2134
            else    dst[x + y*dst_stride] -= v;
2135
        }
2136
    }
2137
    for(y=0; y<b_h; y++){
2138
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2139
        for(x=0; x<b_w; x++){
2140
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2141
            if(add) dst[x + y*dst_stride] += v;
2142
            else    dst[x + y*dst_stride] -= v;
2143
        }
2144
    }
2145
    for(y=0; y<b_h; y++){
2146
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2147
        for(x=0; x<b_w; x++){
2148
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2149
            if(add) dst[x + y*dst_stride] += v;
2150
            else    dst[x + y*dst_stride] -= v;
2151
        }
2152
    }
2153
    for(y=0; y<b_h; y++){
2154
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2155
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2156
        for(x=0; x<b_w; x++){
2157
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2158
            if(add) dst[x + y*dst_stride] += v;
2159
            else    dst[x + y*dst_stride] -= v;
2160
        }
2161
    }
2162
#else
2163
    for(y=0; y<b_h; y++){
2164
        //FIXME ugly missue of obmc_stride
2165
        uint8_t *obmc1= obmc + y*obmc_stride;
2166
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2167
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2168
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2169
        for(x=0; x<b_w; x++){
2170
            int v=   obmc1[x] * block[3][x + y*src_stride]
2171
                    +obmc2[x] * block[2][x + y*src_stride]
2172
                    +obmc3[x] * block[1][x + y*src_stride]
2173
                    +obmc4[x] * block[0][x + y*src_stride];
2174
            
2175
            v <<= 8 - LOG2_OBMC_MAX;
2176
            if(FRAC_BITS != 8){
2177
                v += 1<<(7 - FRAC_BITS);
2178
                v >>= 8 - FRAC_BITS;
2179
            }
2180
            if(add){
2181
                v += dst[x + y*dst_stride];
2182
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2183
                if(v&(~255)) v= ~(v>>31);
2184
                dst8[x + y*src_stride] = v;
2185
            }else{
2186
                dst[x + y*dst_stride] -= v;
2187
            }
2188
        }
2189
    }
2190
#endif
2191
}
2192

    
2193
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2194
    Plane *p= &s->plane[plane_index];
2195
    const int mb_w= s->b_width  << s->block_max_depth;
2196
    const int mb_h= s->b_height << s->block_max_depth;
2197
    int x, y, mb_x, mb_y;
2198
    int block_size = MB_SIZE >> s->block_max_depth;
2199
    int block_w    = plane_index ? block_size/2 : block_size;
2200
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2201
    int obmc_stride= plane_index ? block_size : 2*block_size;
2202
    int ref_stride= s->current_picture.linesize[plane_index];
2203
    uint8_t *ref  = s->last_picture.data[plane_index];
2204
    uint8_t *dst8= s->current_picture.data[plane_index];
2205
    int w= p->width;
2206
    int h= p->height;
2207
    START_TIMER
2208
    
2209
    if(s->keyframe || (s->avctx->debug&512)){
2210
        if(add){
2211
            for(y=0; y<h; y++){
2212
                for(x=0; x<w; x++){
2213
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2214
                    v >>= FRAC_BITS;
2215
                    if(v&(~255)) v= ~(v>>31);
2216
                    dst8[x + y*ref_stride]= v;
2217
                }
2218
            }
2219
        }else{
2220
            for(y=0; y<h; y++){
2221
                for(x=0; x<w; x++){
2222
                    buf[x + y*w]-= 128<<FRAC_BITS;
2223
                }
2224
            }
2225
        }
2226

    
2227
        return;
2228
    }
2229
    
2230
    for(mb_y=0; mb_y<=mb_h; mb_y++){
2231
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2232
            START_TIMER
2233

    
2234
            add_yblock(s, buf, dst8, ref, obmc, 
2235
                       block_w*mb_x - block_w/2,
2236
                       block_w*mb_y - block_w/2,
2237
                       block_w, block_w,
2238
                       w, h,
2239
                       w, ref_stride, obmc_stride,
2240
                       mb_x - 1, mb_y - 1,
2241
                       add, plane_index);
2242
            
2243
            STOP_TIMER("add_yblock")
2244
        }
2245
    }
2246
    
2247
    STOP_TIMER("predict_plane")
2248
}
2249

    
2250
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2251
    const int level= b->level;
2252
    const int w= b->width;
2253
    const int h= b->height;
2254
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2255
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2256
    int x,y, thres1, thres2;
2257
    START_TIMER
2258

    
2259
    assert(QROOT==8);
2260

    
2261
    if(s->qlog == LOSSLESS_QLOG) return;
2262
 
2263
    bias= bias ? 0 : (3*qmul)>>3;
2264
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2265
    thres2= 2*thres1;
2266
    
2267
    if(!bias){
2268
        for(y=0; y<h; y++){
2269
            for(x=0; x<w; x++){
2270
                int i= src[x + y*stride];
2271
                
2272
                if((unsigned)(i+thres1) > thres2){
2273
                    if(i>=0){
2274
                        i<<= QEXPSHIFT;
2275
                        i/= qmul; //FIXME optimize
2276
                        src[x + y*stride]=  i;
2277
                    }else{
2278
                        i= -i;
2279
                        i<<= QEXPSHIFT;
2280
                        i/= qmul; //FIXME optimize
2281
                        src[x + y*stride]= -i;
2282
                    }
2283
                }else
2284
                    src[x + y*stride]= 0;
2285
            }
2286
        }
2287
    }else{
2288
        for(y=0; y<h; y++){
2289
            for(x=0; x<w; x++){
2290
                int i= src[x + y*stride]; 
2291
                
2292
                if((unsigned)(i+thres1) > thres2){
2293
                    if(i>=0){
2294
                        i<<= QEXPSHIFT;
2295
                        i= (i + bias) / qmul; //FIXME optimize
2296
                        src[x + y*stride]=  i;
2297
                    }else{
2298
                        i= -i;
2299
                        i<<= QEXPSHIFT;
2300
                        i= (i + bias) / qmul; //FIXME optimize
2301
                        src[x + y*stride]= -i;
2302
                    }
2303
                }else
2304
                    src[x + y*stride]= 0;
2305
            }
2306
        }
2307
    }
2308
    if(level+1 == s->spatial_decomposition_count){
2309
//        STOP_TIMER("quantize")
2310
    }
2311
}
2312

    
2313
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2314
    const int w= b->width;
2315
    const int h= b->height;
2316
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2317
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2318
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2319
    int x,y;
2320
    START_TIMER
2321
    
2322
    if(s->qlog == LOSSLESS_QLOG) return;
2323
    
2324
    assert(QROOT==8);
2325

    
2326
    for(y=0; y<h; y++){
2327
        for(x=0; x<w; x++){
2328
            int i= src[x + y*stride];
2329
            if(i<0){
2330
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2331
            }else if(i>0){
2332
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2333
            }
2334
        }
2335
    }
2336
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2337
        STOP_TIMER("dquant")
2338
    }
2339
}
2340

    
2341
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2342
    const int w= b->width;
2343
    const int h= b->height;
2344
    int x,y;
2345
    
2346
    for(y=h-1; y>=0; y--){
2347
        for(x=w-1; x>=0; x--){
2348
            int i= x + y*stride;
2349
            
2350
            if(x){
2351
                if(use_median){
2352
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2353
                    else  src[i] -= src[i - 1];
2354
                }else{
2355
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2356
                    else  src[i] -= src[i - 1];
2357
                }
2358
            }else{
2359
                if(y) src[i] -= src[i - stride];
2360
            }
2361
        }
2362
    }
2363
}
2364

    
2365
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2366
    const int w= b->width;
2367
    const int h= b->height;
2368
    int x,y;
2369
    
2370
    for(y=0; y<h; y++){
2371
        for(x=0; x<w; x++){
2372
            int i= x + y*stride;
2373
            
2374
            if(x){
2375
                if(use_median){
2376
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2377
                    else  src[i] += src[i - 1];
2378
                }else{
2379
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2380
                    else  src[i] += src[i - 1];
2381
                }
2382
            }else{
2383
                if(y) src[i] += src[i - stride];
2384
            }
2385
        }
2386
    }
2387
}
2388

    
2389
static void encode_header(SnowContext *s){
2390
    int plane_index, level, orientation;
2391
    uint8_t kstate[32]; 
2392
    
2393
    memset(kstate, MID_STATE, sizeof(kstate));   
2394

    
2395
    put_rac(&s->c, kstate, s->keyframe);
2396
    if(s->keyframe || s->always_reset)
2397
        reset_contexts(s);
2398
    if(s->keyframe){
2399
        put_symbol(&s->c, s->header_state, s->version, 0);
2400
        put_rac(&s->c, s->header_state, s->always_reset);
2401
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
2402
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
2403
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
2404
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
2405
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
2406
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
2407
        put_rac(&s->c, s->header_state, s->spatial_scalability);
2408
//        put_rac(&s->c, s->header_state, s->rate_scalability);
2409

    
2410
        for(plane_index=0; plane_index<2; plane_index++){
2411
            for(level=0; level<s->spatial_decomposition_count; level++){
2412
                for(orientation=level ? 1:0; orientation<4; orientation++){
2413
                    if(orientation==2) continue;
2414
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
2415
                }
2416
            }
2417
        }
2418
    }
2419
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
2420
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
2421
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
2422
    put_symbol(&s->c, s->header_state, s->qbias, 1);
2423
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
2424
}
2425

    
2426
static int decode_header(SnowContext *s){
2427
    int plane_index, level, orientation;
2428
    uint8_t kstate[32];
2429

    
2430
    memset(kstate, MID_STATE, sizeof(kstate));   
2431

    
2432
    s->keyframe= get_rac(&s->c, kstate);
2433
    if(s->keyframe || s->always_reset)
2434
        reset_contexts(s);
2435
    if(s->keyframe){
2436
        s->version= get_symbol(&s->c, s->header_state, 0);
2437
        if(s->version>0){
2438
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
2439
            return -1;
2440
        }
2441
        s->always_reset= get_rac(&s->c, s->header_state);
2442
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2443
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2444
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2445
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2446
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2447
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2448
        s->spatial_scalability= get_rac(&s->c, s->header_state);
2449
//        s->rate_scalability= get_rac(&s->c, s->header_state);
2450

    
2451
        for(plane_index=0; plane_index<3; plane_index++){
2452
            for(level=0; level<s->spatial_decomposition_count; level++){
2453
                for(orientation=level ? 1:0; orientation<4; orientation++){
2454
                    int q;
2455
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2456
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2457
                    else                    q= get_symbol(&s->c, s->header_state, 1);
2458
                    s->plane[plane_index].band[level][orientation].qlog= q;
2459
                }
2460
            }
2461
        }
2462
    }
2463
    
2464
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2465
    if(s->spatial_decomposition_type > 2){
2466
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2467
        return -1;
2468
    }
2469
    
2470
    s->qlog= get_symbol(&s->c, s->header_state, 1);
2471
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
2472
    s->qbias= get_symbol(&s->c, s->header_state, 1);
2473
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
2474

    
2475
    return 0;
2476
}
2477

    
2478
static int common_init(AVCodecContext *avctx){
2479
    SnowContext *s = avctx->priv_data;
2480
    int width, height;
2481
    int level, orientation, plane_index, dec;
2482

    
2483
    s->avctx= avctx;
2484
        
2485
    dsputil_init(&s->dsp, avctx);
2486

    
2487
#define mcf(dx,dy)\
2488
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
2489
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2490
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
2491
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
2492
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
2493
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
2494

    
2495
    mcf( 0, 0)
2496
    mcf( 4, 0)
2497
    mcf( 8, 0)
2498
    mcf(12, 0)
2499
    mcf( 0, 4)
2500
    mcf( 4, 4)
2501
    mcf( 8, 4)
2502
    mcf(12, 4)
2503
    mcf( 0, 8)
2504
    mcf( 4, 8)
2505
    mcf( 8, 8)
2506
    mcf(12, 8)
2507
    mcf( 0,12)
2508
    mcf( 4,12)
2509
    mcf( 8,12)
2510
    mcf(12,12)
2511

    
2512
#define mcfh(dx,dy)\
2513
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
2514
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2515
        mc_block_hpel ## dx ## dy ## 16;\
2516
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
2517
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
2518
        mc_block_hpel ## dx ## dy ## 8;
2519

    
2520
    mcfh(0, 0)
2521
    mcfh(8, 0)
2522
    mcfh(0, 8)
2523
    mcfh(8, 8)
2524
        
2525
    dec= s->spatial_decomposition_count= 5;
2526
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2527
    
2528
    s->chroma_h_shift= 1; //FIXME XXX
2529
    s->chroma_v_shift= 1;
2530
    
2531
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2532
    
2533
    width= s->avctx->width;
2534
    height= s->avctx->height;
2535

    
2536
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
2537
    
2538
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2539
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
2540
    
2541
    for(plane_index=0; plane_index<3; plane_index++){    
2542
        int w= s->avctx->width;
2543
        int h= s->avctx->height;
2544

    
2545
        if(plane_index){
2546
            w>>= s->chroma_h_shift;
2547
            h>>= s->chroma_v_shift;
2548
        }
2549
        s->plane[plane_index].width = w;
2550
        s->plane[plane_index].height= h;
2551
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
2552
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2553
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2554
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2555
                
2556
                b->buf= s->spatial_dwt_buffer;
2557
                b->level= level;
2558
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2559
                b->width = (w + !(orientation&1))>>1;
2560
                b->height= (h + !(orientation>1))>>1;
2561
                
2562
                if(orientation&1) b->buf += (w+1)>>1;
2563
                if(orientation>1) b->buf += b->stride>>1;
2564
                
2565
                if(level)
2566
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
2567
                b->x    = av_mallocz(((b->width+1) * b->height+1)*sizeof(int16_t));
2568
                b->coeff= av_mallocz(((b->width+1) * b->height+1)*sizeof(DWTELEM));
2569
            }
2570
            w= (w+1)>>1;
2571
            h= (h+1)>>1;
2572
        }
2573
    }
2574
    
2575
    reset_contexts(s);
2576
/*    
2577
    width= s->width= avctx->width;
2578
    height= s->height= avctx->height;
2579
    
2580
    assert(width && height);
2581
*/
2582
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2583
    
2584
    return 0;
2585
}
2586

    
2587

    
2588
static void calculate_vissual_weight(SnowContext *s, Plane *p){
2589
    int width = p->width;
2590
    int height= p->height;
2591
    int level, orientation, x, y;
2592

    
2593
    for(level=0; level<s->spatial_decomposition_count; level++){
2594
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
2595
            SubBand *b= &p->band[level][orientation];
2596
            DWTELEM *buf= b->buf;
2597
            int64_t error=0;
2598
            
2599
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
2600
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
2601
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
2602
            for(y=0; y<height; y++){
2603
                for(x=0; x<width; x++){
2604
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
2605
                    error += d*d;
2606
                }
2607
            }
2608

    
2609
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
2610
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
2611
        }
2612
    }
2613
}
2614

    
2615
static int encode_init(AVCodecContext *avctx)
2616
{
2617
    SnowContext *s = avctx->priv_data;
2618
    int plane_index;
2619

    
2620
    if(avctx->strict_std_compliance >= 0){
2621
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
2622
               "use vstrict=-1 to use it anyway\n");
2623
        return -1;
2624
    }
2625
 
2626
    common_init(avctx);
2627
    alloc_blocks(s);
2628
 
2629
    s->version=0;
2630
    
2631
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2632
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2633
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2634
    h263_encode_init(&s->m); //mv_penalty
2635

    
2636
    for(plane_index=0; plane_index<3; plane_index++){
2637
        calculate_vissual_weight(s, &s->plane[plane_index]);
2638
    }
2639
    
2640
    
2641
    avctx->coded_frame= &s->current_picture;
2642
    switch(avctx->pix_fmt){
2643
//    case PIX_FMT_YUV444P:
2644
//    case PIX_FMT_YUV422P:
2645
    case PIX_FMT_YUV420P:
2646
    case PIX_FMT_GRAY8:
2647
//    case PIX_FMT_YUV411P:
2648
//    case PIX_FMT_YUV410P:
2649
        s->colorspace_type= 0;
2650
        break;
2651
/*    case PIX_FMT_RGBA32:
2652
        s->colorspace= 1;
2653
        break;*/
2654
    default:
2655
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
2656
        return -1;
2657
    }
2658
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2659
    s->chroma_h_shift= 1;
2660
    s->chroma_v_shift= 1;
2661
    return 0;
2662
}
2663

    
2664
static int frame_start(SnowContext *s){
2665
   AVFrame tmp;
2666
   int w= s->avctx->width; //FIXME round up to x16 ?
2667
   int h= s->avctx->height;
2668

    
2669
    if(s->current_picture.data[0]){
2670
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
2671
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2672
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2673
    }
2674

    
2675
    tmp= s->last_picture;
2676
    s->last_picture= s->current_picture;
2677
    s->current_picture= tmp;
2678
    
2679
    s->current_picture.reference= 1;
2680
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2681
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2682
        return -1;
2683
    }
2684
    
2685
    return 0;
2686
}
2687

    
2688
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
2689
    SnowContext *s = avctx->priv_data;
2690
    RangeCoder * const c= &s->c;
2691
    AVFrame *pict = data;
2692
    const int width= s->avctx->width;
2693
    const int height= s->avctx->height;
2694
    int level, orientation, plane_index;
2695

    
2696
    ff_init_range_encoder(c, buf, buf_size);
2697
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2698
    
2699
    s->input_picture = *pict;
2700

    
2701
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
2702
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
2703
    
2704
    if(pict->quality){
2705
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
2706
        //<64 >60
2707
        s->qlog += 61;
2708
    }else{
2709
        s->qlog= LOSSLESS_QLOG;
2710
    }
2711

    
2712
    frame_start(s);
2713
    s->current_picture.key_frame= s->keyframe;
2714

    
2715
    if(pict->pict_type == P_TYPE){
2716
        int block_width = (width +15)>>4;
2717
        int block_height= (height+15)>>4;
2718
        int stride= s->current_picture.linesize[0];
2719
        
2720
        assert(s->current_picture.data[0]);
2721
        assert(s->last_picture.data[0]);
2722
     
2723
        s->m.avctx= s->avctx;
2724
        s->m.current_picture.data[0]= s->current_picture.data[0];
2725
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
2726
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
2727
        s->m.current_picture_ptr= &s->m.current_picture;
2728
        s->m.   last_picture_ptr= &s->m.   last_picture;
2729
        s->m.linesize=
2730
        s->m.   last_picture.linesize[0]=
2731
        s->m.    new_picture.linesize[0]=
2732
        s->m.current_picture.linesize[0]= stride;
2733
        s->m.uvlinesize= s->current_picture.linesize[1];
2734
        s->m.width = width;
2735
        s->m.height= height;
2736
        s->m.mb_width = block_width;
2737
        s->m.mb_height= block_height;
2738
        s->m.mb_stride=   s->m.mb_width+1;
2739
        s->m.b8_stride= 2*s->m.mb_width+1;
2740
        s->m.f_code=1;
2741
        s->m.pict_type= pict->pict_type;
2742
        s->m.me_method= s->avctx->me_method;
2743
        s->m.me.scene_change_score=0;
2744
        s->m.flags= s->avctx->flags;
2745
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
2746
        s->m.out_format= FMT_H263;
2747
        s->m.unrestricted_mv= 1;
2748

    
2749
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
2750
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
2751
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
2752

    
2753
        s->m.dsp= s->dsp; //move
2754
        ff_init_me(&s->m);
2755
    }
2756
    
2757
redo_frame:
2758
        
2759
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
2760

    
2761
    encode_header(s);
2762
    encode_blocks(s);
2763
      
2764
    for(plane_index=0; plane_index<3; plane_index++){
2765
        Plane *p= &s->plane[plane_index];
2766
        int w= p->width;
2767
        int h= p->height;
2768
        int x, y;
2769
//        int bits= put_bits_count(&s->c.pb);
2770

    
2771
        //FIXME optimize
2772
     if(pict->data[plane_index]) //FIXME gray hack
2773
        for(y=0; y<h; y++){
2774
            for(x=0; x<w; x++){
2775
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
2776
            }
2777
        }
2778
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
2779
        
2780
        if(   plane_index==0 
2781
           && pict->pict_type == P_TYPE 
2782
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
2783
            ff_init_range_encoder(c, buf, buf_size);
2784
            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2785
            pict->pict_type= FF_I_TYPE;
2786
            s->keyframe=1;
2787
            reset_contexts(s);
2788
            goto redo_frame;
2789
        }
2790
        
2791
        if(s->qlog == LOSSLESS_QLOG){
2792
            for(y=0; y<h; y++){
2793
                for(x=0; x<w; x++){
2794
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1)))>>FRAC_BITS;
2795
                }
2796
            }
2797
        }
2798
 
2799
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2800

    
2801
        for(level=0; level<s->spatial_decomposition_count; level++){
2802
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2803
                SubBand *b= &p->band[level][orientation];
2804
                
2805
                quantize(s, b, b->buf, b->stride, s->qbias);
2806
                if(orientation==0)
2807
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
2808
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2809
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
2810
                if(orientation==0)
2811
                    correlate(s, b, b->buf, b->stride, 1, 0);
2812
            }
2813
        }
2814
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
2815

    
2816
        for(level=0; level<s->spatial_decomposition_count; level++){
2817
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2818
                SubBand *b= &p->band[level][orientation];
2819

    
2820
                dequantize(s, b, b->buf, b->stride);
2821
            }
2822
        }
2823

    
2824
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2825
        if(s->qlog == LOSSLESS_QLOG){
2826
            for(y=0; y<h; y++){
2827
                for(x=0; x<w; x++){
2828
                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
2829
                }
2830
            }
2831
        }
2832
{START_TIMER
2833
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2834
STOP_TIMER("pred-conv")}
2835
        if(s->avctx->flags&CODEC_FLAG_PSNR){
2836
            int64_t error= 0;
2837
            
2838
    if(pict->data[plane_index]) //FIXME gray hack
2839
            for(y=0; y<h; y++){
2840
                for(x=0; x<w; x++){
2841
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
2842
                    error += d*d;
2843
                }
2844
            }
2845
            s->avctx->error[plane_index] += error;
2846
            s->current_picture.error[plane_index] = error;
2847
        }
2848
    }
2849

    
2850
    if(s->last_picture.data[0])
2851
        avctx->release_buffer(avctx, &s->last_picture);
2852

    
2853
    emms_c();
2854
    
2855
    return ff_rac_terminate(c);
2856
}
2857

    
2858
static void common_end(SnowContext *s){
2859
    int plane_index, level, orientation;
2860

    
2861
    av_freep(&s->spatial_dwt_buffer);
2862

    
2863
    av_freep(&s->m.me.scratchpad);    
2864
    av_freep(&s->m.me.map);
2865
    av_freep(&s->m.me.score_map);
2866
 
2867
    av_freep(&s->block);
2868

    
2869
    for(plane_index=0; plane_index<3; plane_index++){    
2870
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
2871
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2872
                SubBand *b= &s->plane[plane_index].band[level][orientation];
2873
                
2874
                av_freep(&b->x);
2875
                av_freep(&b->coeff);
2876
            }
2877
        }
2878
    }
2879
}
2880

    
2881
static int encode_end(AVCodecContext *avctx)
2882
{
2883
    SnowContext *s = avctx->priv_data;
2884

    
2885
    common_end(s);
2886

    
2887
    return 0;
2888
}
2889

    
2890
static int decode_init(AVCodecContext *avctx)
2891
{
2892
//    SnowContext *s = avctx->priv_data;
2893

    
2894
    common_init(avctx);
2895
    
2896
    return 0;
2897
}
2898

    
2899
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
2900
    SnowContext *s = avctx->priv_data;
2901
    RangeCoder * const c= &s->c;
2902
    int bytes_read;
2903
    AVFrame *picture = data;
2904
    int level, orientation, plane_index;
2905
    
2906

    
2907
    /* no supplementary picture */
2908
    if (buf_size == 0)
2909
        return 0;
2910

    
2911
    ff_init_range_decoder(c, buf, buf_size);
2912
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2913

    
2914
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2915
    decode_header(s);
2916
    if(!s->block) alloc_blocks(s);
2917

    
2918
    frame_start(s);
2919
    //keyframe flag dupliaction mess FIXME
2920
    if(avctx->debug&FF_DEBUG_PICT_INFO)
2921
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2922
    
2923
    decode_blocks(s);
2924

    
2925
    for(plane_index=0; plane_index<3; plane_index++){
2926
        Plane *p= &s->plane[plane_index];
2927
        int w= p->width;
2928
        int h= p->height;
2929
        int x, y;
2930
        
2931
if(s->avctx->debug&2048){
2932
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2933
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2934

    
2935
        for(y=0; y<h; y++){
2936
            for(x=0; x<w; x++){
2937
                int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2938
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2939
            }
2940
        }
2941
}
2942
        for(level=0; level<s->spatial_decomposition_count; level++){
2943
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
2944
                SubBand *b= &p->band[level][orientation];
2945

    
2946
                decode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
2947
                if(orientation==0){
2948
                    correlate(s, b, b->buf, b->stride, 1, 0);
2949
                    dequantize(s, b, b->buf, b->stride);
2950
                    assert(b->buf == s->spatial_dwt_buffer);
2951
                }
2952
            }
2953
        }
2954

    
2955
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
2956
        if(s->qlog == LOSSLESS_QLOG){
2957
            for(y=0; y<h; y++){
2958
                for(x=0; x<w; x++){
2959
                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
2960
                }
2961
            }
2962
        }
2963
{START_TIMER
2964
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
2965
STOP_TIMER("predict_plane conv2")}
2966
    }
2967
            
2968
    emms_c();
2969

    
2970
    if(s->last_picture.data[0])
2971
        avctx->release_buffer(avctx, &s->last_picture);
2972

    
2973
if(!(s->avctx->debug&2048))        
2974
    *picture= s->current_picture;
2975
else
2976
    *picture= s->mconly_picture;
2977
    
2978
    *data_size = sizeof(AVFrame);
2979
    
2980
    bytes_read= c->bytestream - c->bytestream_start;
2981
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2982

    
2983
    return bytes_read;
2984
}
2985

    
2986
static int decode_end(AVCodecContext *avctx)
2987
{
2988
    SnowContext *s = avctx->priv_data;
2989

    
2990
    common_end(s);
2991

    
2992
    return 0;
2993
}
2994

    
2995
AVCodec snow_decoder = {
2996
    "snow",
2997
    CODEC_TYPE_VIDEO,
2998
    CODEC_ID_SNOW,
2999
    sizeof(SnowContext),
3000
    decode_init,
3001
    NULL,
3002
    decode_end,
3003
    decode_frame,
3004
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3005
    NULL
3006
};
3007

    
3008
AVCodec snow_encoder = {
3009
    "snow",
3010
    CODEC_TYPE_VIDEO,
3011
    CODEC_ID_SNOW,
3012
    sizeof(SnowContext),
3013
    encode_init,
3014
    encode_frame,
3015
    encode_end,
3016
};
3017

    
3018

    
3019
#if 0
3020
#undef malloc
3021
#undef free
3022
#undef printf
3023

3024
int main(){
3025
    int width=256;
3026
    int height=256;
3027
    int buffer[2][width*height];
3028
    SnowContext s;
3029
    int i;
3030
    s.spatial_decomposition_count=6;
3031
    s.spatial_decomposition_type=1;
3032
    
3033
    printf("testing 5/3 DWT\n");
3034
    for(i=0; i<width*height; i++)
3035
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3036
    
3037
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3038
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3039
    
3040
    for(i=0; i<width*height; i++)
3041
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3042

3043
    printf("testing 9/7 DWT\n");
3044
    s.spatial_decomposition_type=0;
3045
    for(i=0; i<width*height; i++)
3046
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3047
    
3048
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3049
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3050
    
3051
    for(i=0; i<width*height; i++)
3052
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3053
        
3054
    printf("testing AC coder\n");
3055
    memset(s.header_state, 0, sizeof(s.header_state));
3056
    ff_init_range_encoder(&s.c, buffer[0], 256*256);
3057
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3058
        
3059
    for(i=-256; i<256; i++){
3060
START_TIMER
3061
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3062
STOP_TIMER("put_symbol")
3063
    }
3064
    ff_rac_terminate(&s.c);
3065

3066
    memset(s.header_state, 0, sizeof(s.header_state));
3067
    ff_init_range_decoder(&s.c, buffer[0], 256*256);
3068
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3069
    
3070
    for(i=-256; i<256; i++){
3071
        int j;
3072
START_TIMER
3073
        j= get_symbol(&s.c, s.header_state, 1);
3074
STOP_TIMER("get_symbol")
3075
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3076
    }
3077
{
3078
int level, orientation, x, y;
3079
int64_t errors[8][4];
3080
int64_t g=0;
3081

3082
    memset(errors, 0, sizeof(errors));
3083
    s.spatial_decomposition_count=3;
3084
    s.spatial_decomposition_type=0;
3085
    for(level=0; level<s.spatial_decomposition_count; level++){
3086
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3087
            int w= width  >> (s.spatial_decomposition_count-level);
3088
            int h= height >> (s.spatial_decomposition_count-level);
3089
            int stride= width  << (s.spatial_decomposition_count-level);
3090
            DWTELEM *buf= buffer[0];
3091
            int64_t error=0;
3092

3093
            if(orientation&1) buf+=w;
3094
            if(orientation>1) buf+=stride>>1;
3095
            
3096
            memset(buffer[0], 0, sizeof(int)*width*height);
3097
            buf[w/2 + h/2*stride]= 256*256;
3098
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3099
            for(y=0; y<height; y++){
3100
                for(x=0; x<width; x++){
3101
                    int64_t d= buffer[0][x + y*width];
3102
                    error += d*d;
3103
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3104
                }
3105
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3106
            }
3107
            error= (int)(sqrt(error)+0.5);
3108
            errors[level][orientation]= error;
3109
            if(g) g=ff_gcd(g, error);
3110
            else g= error;
3111
        }
3112
    }
3113
    printf("static int const visual_weight[][4]={\n");
3114
    for(level=0; level<s.spatial_decomposition_count; level++){
3115
        printf("  {");
3116
        for(orientation=0; orientation<4; orientation++){
3117
            printf("%8lld,", errors[level][orientation]/g);
3118
        }
3119
        printf("},\n");
3120
    }
3121
    printf("};\n");
3122
    {
3123
            int level=2;
3124
            int orientation=3;
3125
            int w= width  >> (s.spatial_decomposition_count-level);
3126
            int h= height >> (s.spatial_decomposition_count-level);
3127
            int stride= width  << (s.spatial_decomposition_count-level);
3128
            DWTELEM *buf= buffer[0];
3129
            int64_t error=0;
3130

3131
            buf+=w;
3132
            buf+=stride>>1;
3133
            
3134
            memset(buffer[0], 0, sizeof(int)*width*height);
3135
#if 1
3136
            for(y=0; y<height; y++){
3137
                for(x=0; x<width; x++){
3138
                    int tab[4]={0,2,3,1};
3139
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3140
                }
3141
            }
3142
            ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3143
#else
3144
            for(y=0; y<h; y++){
3145
                for(x=0; x<w; x++){
3146
                    buf[x + y*stride  ]=169;
3147
                    buf[x + y*stride-w]=64;
3148
                }
3149
            }
3150
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3151
#endif
3152
            for(y=0; y<height; y++){
3153
                for(x=0; x<width; x++){
3154
                    int64_t d= buffer[0][x + y*width];
3155
                    error += d*d;
3156
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3157
                }
3158
                if(ABS(height/2-y)<9) printf("\n");
3159
            }
3160
    }
3161

    
3162
}
3163
    return 0;
3164
}
3165
#endif
3166