Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 565a45ac

History | View | Annotate | Download (131 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22

    
23
#include "rangecoder.h"
24
#define MID_STATE 128
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QROOT 8 
35
#define LOSSLESS_QLOG -128
36
#define FRAC_BITS 8
37

    
38
static const int8_t quant3[256]={
39
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
55
};
56
static const int8_t quant3b[256]={
57
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73
};
74
static const int8_t quant5[256]={
75
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
76
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
77
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
78
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
79
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
80
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
81
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
82
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
83
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
84
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
85
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
86
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
87
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
88
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
89
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
90
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
91
};
92
static const int8_t quant7[256]={
93
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
96
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
97
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
98
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
99
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
100
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
101
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
102
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
103
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
104
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
105
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
106
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
107
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
109
};
110
static const int8_t quant9[256]={
111
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
113
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
114
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
115
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
116
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
117
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
118
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
119
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
120
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
121
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
122
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
123
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
124
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
125
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
126
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
127
};
128
static const int8_t quant11[256]={
129
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
130
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
132
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
133
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
134
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
135
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
136
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
137
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
138
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
139
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
140
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
141
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
142
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
143
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
145
};
146
static const int8_t quant13[256]={
147
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
148
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
151
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
152
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
153
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
154
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
155
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
156
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
157
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
158
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
159
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
160
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
162
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
163
};
164

    
165
#define LOG2_OBMC_MAX 6
166
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
167
#if 0 //64*cubic
168
static const uint8_t obmc32[1024]={
169
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
170
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
171
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
172
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
173
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
174
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
175
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
176
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
177
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
178
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
179
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
180
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
181
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
182
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
183
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
184
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
185
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
186
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
187
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
188
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
189
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
192
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
193
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
194
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
196
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
197
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
198
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
199
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
200
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201
//error:0.000022
202
};
203
static const uint8_t obmc16[256]={
204
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
205
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
206
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
207
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
208
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
209
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
210
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
211
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
212
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
213
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
214
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
215
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
216
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
219
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
220
//error:0.000033
221
};
222
#elif 1 // 64*linear
223
static const uint8_t obmc32[1024]={
224
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
225
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
226
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
227
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
228
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
229
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
230
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
231
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
232
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
233
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
234
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
235
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
236
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
237
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
238
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
239
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
240
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
241
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
242
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
243
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
244
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
245
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
246
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
247
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
248
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
249
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
251
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
252
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
253
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
254
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
255
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
256
 //error:0.000020
257
};
258
static const uint8_t obmc16[256]={
259
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
260
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
261
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
262
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
263
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
264
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
265
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
266
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
267
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
268
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
269
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
270
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
271
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
272
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
273
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
274
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
275
//error:0.000015
276
};
277
#else //64*cos
278
static const uint8_t obmc32[1024]={
279
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
281
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
282
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
283
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
284
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
285
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
286
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
287
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
288
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
289
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
290
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
291
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
292
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
293
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
294
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
295
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
296
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
297
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
298
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
299
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
302
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
303
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
304
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
306
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
307
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
308
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
309
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
310
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
311
//error:0.000022
312
};
313
static const uint8_t obmc16[256]={
314
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
315
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
316
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
317
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
318
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
319
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
320
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
321
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
322
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
323
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
324
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
325
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
326
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
329
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
330
//error:0.000022
331
};
332
#endif
333

    
334
//linear *64
335
static const uint8_t obmc8[64]={
336
 1, 3, 5, 7, 7, 5, 3, 1,
337
 3, 9,15,21,21,15, 9, 3,
338
 5,15,25,35,35,25,15, 5,
339
 7,21,35,49,49,35,21, 7,
340
 7,21,35,49,49,35,21, 7,
341
 5,15,25,35,35,25,15, 5,
342
 3, 9,15,21,21,15, 9, 3,
343
 1, 3, 5, 7, 7, 5, 3, 1,
344
//error:0.000000
345
};
346

    
347
//linear *64
348
static const uint8_t obmc4[16]={
349
 4,12,12, 4,
350
12,36,36,12,
351
12,36,36,12,
352
 4,12,12, 4,
353
//error:0.000000
354
};
355

    
356
static const uint8_t *obmc_tab[4]={
357
    obmc32, obmc16, obmc8, obmc4
358
};
359

    
360
typedef struct BlockNode{
361
    int16_t mx;
362
    int16_t my;
363
    uint8_t color[3];
364
    uint8_t type;
365
//#define TYPE_SPLIT    1
366
#define BLOCK_INTRA   1
367
//#define TYPE_NOCOLOR  4
368
    uint8_t level; //FIXME merge into type?
369
}BlockNode;
370

    
371
#define LOG2_MB_SIZE 4
372
#define MB_SIZE (1<<LOG2_MB_SIZE)
373

    
374
typedef struct x_and_coeff{
375
    int16_t x;
376
    int16_t coeff;
377
} x_and_coeff;
378

    
379
typedef struct SubBand{
380
    int level;
381
    int stride;
382
    int width;
383
    int height;
384
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
385
    DWTELEM *buf;
386
    int buf_x_offset;
387
    int buf_y_offset;
388
    int stride_line; ///< Stride measured in lines, not pixels.
389
    x_and_coeff * x_coeff;
390
    struct SubBand *parent;
391
    uint8_t state[/*7*2*/ 7 + 512][32];
392
}SubBand;
393

    
394
typedef struct Plane{
395
    int width;
396
    int height;
397
    SubBand band[MAX_DECOMPOSITIONS][4];
398
}Plane;
399

    
400
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
401
typedef struct {
402
    DWTELEM * * line; ///< For use by idwt and predict_slices.
403
    DWTELEM * * data_stack; ///< Used for internal purposes.
404
    int data_stack_top;
405
    int line_count;
406
    int line_width;
407
    int data_count;
408
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
409
} slice_buffer;
410

    
411
typedef struct SnowContext{
412
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
413

    
414
    AVCodecContext *avctx;
415
    RangeCoder c;
416
    DSPContext dsp;
417
    AVFrame input_picture;
418
    AVFrame current_picture;
419
    AVFrame last_picture;
420
    AVFrame mconly_picture;
421
//     uint8_t q_context[16];
422
    uint8_t header_state[32];
423
    uint8_t block_state[128 + 32*128];
424
    int keyframe;
425
    int always_reset;
426
    int version;
427
    int spatial_decomposition_type;
428
    int temporal_decomposition_type;
429
    int spatial_decomposition_count;
430
    int temporal_decomposition_count;
431
    DWTELEM *spatial_dwt_buffer;
432
    int colorspace_type;
433
    int chroma_h_shift;
434
    int chroma_v_shift;
435
    int spatial_scalability;
436
    int qlog;
437
    int lambda;
438
    int lambda2;
439
    int mv_scale;
440
    int qbias;
441
#define QBIAS_SHIFT 3
442
    int b_width;
443
    int b_height;
444
    int block_max_depth;
445
    Plane plane[MAX_PLANES];
446
    BlockNode *block;
447
    slice_buffer sb;
448

    
449
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
450
}SnowContext;
451

    
452
typedef struct {
453
    DWTELEM *b0;
454
    DWTELEM *b1;
455
    DWTELEM *b2;
456
    DWTELEM *b3;
457
    int y;
458
} dwt_compose_t;
459

    
460
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
461
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
462

    
463
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
464
{
465
    int i;
466
  
467
    buf->base_buffer = base_buffer;
468
    buf->line_count = line_count;
469
    buf->line_width = line_width;
470
    buf->data_count = max_allocated_lines;
471
    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
472
    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
473
  
474
    for (i = 0; i < max_allocated_lines; i++)
475
    {
476
      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
477
    }
478
    
479
    buf->data_stack_top = max_allocated_lines - 1;
480
}
481

    
482
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
483
{
484
    int i;
485
    int offset;
486
    DWTELEM * buffer;
487
  
488
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);  
489
  
490
    assert(buf->data_stack_top >= 0);
491
//  assert(!buf->line[line]);
492
    if (buf->line[line])
493
        return buf->line[line];
494
    
495
    offset = buf->line_width * line;
496
    buffer = buf->data_stack[buf->data_stack_top];
497
    buf->data_stack_top--;
498
    buf->line[line] = buffer;
499
  
500
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
501
  
502
    return buffer;
503
}
504

    
505
static void slice_buffer_release(slice_buffer * buf, int line)
506
{
507
    int i;
508
    int offset;
509
    DWTELEM * buffer;
510

    
511
    assert(line >= 0 && line < buf->line_count);
512
    assert(buf->line[line]);
513

    
514
    offset = buf->line_width * line;
515
    buffer = buf->line[line];
516
    buf->data_stack_top++;
517
    buf->data_stack[buf->data_stack_top] = buffer;
518
    buf->line[line] = NULL;
519
  
520
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
521
}
522

    
523
static void slice_buffer_flush(slice_buffer * buf)
524
{
525
    int i;
526
    for (i = 0; i < buf->line_count; i++)
527
    {
528
        if (buf->line[i])
529
        {
530
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
531
            slice_buffer_release(buf, i);
532
        }
533
    }
534
}
535

    
536
static void slice_buffer_destroy(slice_buffer * buf)
537
{
538
    int i;
539
    slice_buffer_flush(buf);
540
  
541
    for (i = buf->data_count - 1; i >= 0; i--)
542
    {
543
        assert(buf->data_stack[i]);
544
        av_free(buf->data_stack[i]);
545
    }
546
    assert(buf->data_stack);
547
    av_free(buf->data_stack);
548
    assert(buf->line);
549
    av_free(buf->line);
550
}
551

    
552
#ifdef        __sgi
553
// Avoid a name clash on SGI IRIX
554
#undef        qexp
555
#endif
556
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
557
static const uint8_t qexp[8]={
558
    128, 140, 152, 166, 181, 197, 215, 235
559
//   64,  70,  76,  83,  91,  99, 108, 117
560
//   32,  35,  38,  41,  45,  49,  54,  59
561
//   16,  17,  19,  21,  23,  25,  27,  29
562
//    8,   9,  10,  10,  11,  12,  13,  15
563
};
564

    
565
static inline int mirror(int v, int m){
566
    if     (v<0) return -v;
567
    else if(v>m) return 2*m-v;
568
    else         return v;
569
}
570

    
571
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
572
    int i;
573

    
574
    if(v){
575
        const int a= ABS(v);
576
        const int e= av_log2(a);
577
#if 1
578
        const int el= FFMIN(e, 10);   
579
        put_rac(c, state+0, 0);
580

    
581
        for(i=0; i<el; i++){
582
            put_rac(c, state+1+i, 1);  //1..10
583
        }
584
        for(; i<e; i++){
585
            put_rac(c, state+1+9, 1);  //1..10
586
        }
587
        put_rac(c, state+1+FFMIN(i,9), 0);
588

    
589
        for(i=e-1; i>=el; i--){
590
            put_rac(c, state+22+9, (a>>i)&1); //22..31
591
        }
592
        for(; i>=0; i--){
593
            put_rac(c, state+22+i, (a>>i)&1); //22..31
594
        }
595

    
596
        if(is_signed)
597
            put_rac(c, state+11 + el, v < 0); //11..21
598
#else
599
        
600
        put_rac(c, state+0, 0);
601
        if(e<=9){
602
            for(i=0; i<e; i++){
603
                put_rac(c, state+1+i, 1);  //1..10
604
            }
605
            put_rac(c, state+1+i, 0);
606

    
607
            for(i=e-1; i>=0; i--){
608
                put_rac(c, state+22+i, (a>>i)&1); //22..31
609
            }
610

    
611
            if(is_signed)
612
                put_rac(c, state+11 + e, v < 0); //11..21
613
        }else{
614
            for(i=0; i<e; i++){
615
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
616
            }
617
            put_rac(c, state+1+FFMIN(i,9), 0);
618

    
619
            for(i=e-1; i>=0; i--){
620
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
621
            }
622

    
623
            if(is_signed)
624
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
625
        }
626
#endif
627
    }else{
628
        put_rac(c, state+0, 1);
629
    }
630
}
631

    
632
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
633
    if(get_rac(c, state+0))
634
        return 0;
635
    else{
636
        int i, e, a;
637
        e= 0;
638
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
639
            e++;
640
        }
641

    
642
        a= 1;
643
        for(i=e-1; i>=0; i--){
644
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
645
        }
646

    
647
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
648
            return -a;
649
        else
650
            return a;
651
    }
652
}
653

    
654
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
655
    int i;
656
    int r= log2>=0 ? 1<<log2 : 1;
657

    
658
    assert(v>=0);
659
    assert(log2>=-4);
660

    
661
    while(v >= r){
662
        put_rac(c, state+4+log2, 1);
663
        v -= r;
664
        log2++;
665
        if(log2>0) r+=r;
666
    }
667
    put_rac(c, state+4+log2, 0);
668
    
669
    for(i=log2-1; i>=0; i--){
670
        put_rac(c, state+31-i, (v>>i)&1);
671
    }
672
}
673

    
674
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
675
    int i;
676
    int r= log2>=0 ? 1<<log2 : 1;
677
    int v=0;
678

    
679
    assert(log2>=-4);
680

    
681
    while(get_rac(c, state+4+log2)){
682
        v+= r;
683
        log2++;
684
        if(log2>0) r+=r;
685
    }
686
    
687
    for(i=log2-1; i>=0; i--){
688
        v+= get_rac(c, state+31-i)<<i;
689
    }
690

    
691
    return v;
692
}
693

    
694
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
695
    const int mirror_left= !highpass;
696
    const int mirror_right= (width&1) ^ highpass;
697
    const int w= (width>>1) - 1 + (highpass & width);
698
    int i;
699

    
700
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
701
    if(mirror_left){
702
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
703
        dst += dst_step;
704
        src += src_step;
705
    }
706
    
707
    for(i=0; i<w; i++){
708
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
709
    }
710
    
711
    if(mirror_right){
712
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
713
    }
714
}
715

    
716
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
717
    const int mirror_left= !highpass;
718
    const int mirror_right= (width&1) ^ highpass;
719
    const int w= (width>>1) - 1 + (highpass & width);
720
    int i;
721

    
722
    if(mirror_left){
723
        int r= 3*2*ref[0];
724
        r += r>>4;
725
        r += r>>8;
726
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
727
        dst += dst_step;
728
        src += src_step;
729
    }
730
    
731
    for(i=0; i<w; i++){
732
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
733
        r += r>>4;
734
        r += r>>8;
735
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
736
    }
737
    
738
    if(mirror_right){
739
        int r= 3*2*ref[w*ref_step];
740
        r += r>>4;
741
        r += r>>8;
742
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
743
    }
744
}
745

    
746

    
747
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
748
    int x, i;
749
    
750
    for(x=start; x<width; x+=2){
751
        int64_t sum=0;
752

    
753
        for(i=0; i<n; i++){
754
            int x2= x + 2*i - n + 1;
755
            if     (x2<     0) x2= -x2;
756
            else if(x2>=width) x2= 2*width-x2-2;
757
            sum += coeffs[i]*(int64_t)dst[x2];
758
        }
759
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
760
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
761
    }
762
}
763

    
764
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
765
    int x, y, i;
766
    for(y=start; y<height; y+=2){
767
        for(x=0; x<width; x++){
768
            int64_t sum=0;
769
    
770
            for(i=0; i<n; i++){
771
                int y2= y + 2*i - n + 1;
772
                if     (y2<      0) y2= -y2;
773
                else if(y2>=height) y2= 2*height-y2-2;
774
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
775
            }
776
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
777
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
778
        }
779
    }
780
}
781

    
782
#define SCALEX 1
783
#define LX0 0
784
#define LX1 1
785

    
786
#if 0 // more accurate 9/7
787
#define N1 2
788
#define SHIFT1 14
789
#define COEFFS1 (int[]){-25987,-25987}
790
#define N2 2
791
#define SHIFT2 19
792
#define COEFFS2 (int[]){-27777,-27777}
793
#define N3 2
794
#define SHIFT3 15
795
#define COEFFS3 (int[]){28931,28931}
796
#define N4 2
797
#define SHIFT4 15
798
#define COEFFS4 (int[]){14533,14533}
799
#elif 1 // 13/7 CRF
800
#define N1 4
801
#define SHIFT1 4
802
#define COEFFS1 (int[]){1,-9,-9,1}
803
#define N2 4
804
#define SHIFT2 4
805
#define COEFFS2 (int[]){-1,5,5,-1}
806
#define N3 0
807
#define SHIFT3 1
808
#define COEFFS3 NULL
809
#define N4 0
810
#define SHIFT4 1
811
#define COEFFS4 NULL
812
#elif 1 // 3/5
813
#define LX0 1
814
#define LX1 0
815
#define SCALEX 0.5
816
#define N1 2
817
#define SHIFT1 1
818
#define COEFFS1 (int[]){1,1}
819
#define N2 2
820
#define SHIFT2 2
821
#define COEFFS2 (int[]){-1,-1}
822
#define N3 0
823
#define SHIFT3 0
824
#define COEFFS3 NULL
825
#define N4 0
826
#define SHIFT4 0
827
#define COEFFS4 NULL
828
#elif 1 // 11/5 
829
#define N1 0
830
#define SHIFT1 1
831
#define COEFFS1 NULL
832
#define N2 2
833
#define SHIFT2 2
834
#define COEFFS2 (int[]){-1,-1}
835
#define N3 2
836
#define SHIFT3 0
837
#define COEFFS3 (int[]){-1,-1}
838
#define N4 4
839
#define SHIFT4 7
840
#define COEFFS4 (int[]){-5,29,29,-5}
841
#define SCALEX 4
842
#elif 1 // 9/7 CDF
843
#define N1 2
844
#define SHIFT1 7
845
#define COEFFS1 (int[]){-203,-203}
846
#define N2 2
847
#define SHIFT2 12
848
#define COEFFS2 (int[]){-217,-217}
849
#define N3 2
850
#define SHIFT3 7
851
#define COEFFS3 (int[]){113,113}
852
#define N4 2
853
#define SHIFT4 9
854
#define COEFFS4 (int[]){227,227}
855
#define SCALEX 1
856
#elif 1 // 7/5 CDF
857
#define N1 0
858
#define SHIFT1 1
859
#define COEFFS1 NULL
860
#define N2 2
861
#define SHIFT2 2
862
#define COEFFS2 (int[]){-1,-1}
863
#define N3 2
864
#define SHIFT3 0
865
#define COEFFS3 (int[]){-1,-1}
866
#define N4 2
867
#define SHIFT4 4
868
#define COEFFS4 (int[]){3,3}
869
#elif 1 // 9/7 MN
870
#define N1 4
871
#define SHIFT1 4
872
#define COEFFS1 (int[]){1,-9,-9,1}
873
#define N2 2
874
#define SHIFT2 2
875
#define COEFFS2 (int[]){1,1}
876
#define N3 0
877
#define SHIFT3 1
878
#define COEFFS3 NULL
879
#define N4 0
880
#define SHIFT4 1
881
#define COEFFS4 NULL
882
#else // 13/7 CRF
883
#define N1 4
884
#define SHIFT1 4
885
#define COEFFS1 (int[]){1,-9,-9,1}
886
#define N2 4
887
#define SHIFT2 4
888
#define COEFFS2 (int[]){-1,5,5,-1}
889
#define N3 0
890
#define SHIFT3 1
891
#define COEFFS3 NULL
892
#define N4 0
893
#define SHIFT4 1
894
#define COEFFS4 NULL
895
#endif
896
static void horizontal_decomposeX(DWTELEM *b, int width){
897
    DWTELEM temp[width];
898
    const int width2= width>>1;
899
    const int w2= (width+1)>>1;
900
    int A1,A2,A3,A4, x;
901

    
902
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
903
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
904
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
905
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
906
    
907
    for(x=0; x<width2; x++){
908
        temp[x   ]= b[2*x    ];
909
        temp[x+w2]= b[2*x + 1];
910
    }
911
    if(width&1)
912
        temp[x   ]= b[2*x    ];
913
    memcpy(b, temp, width*sizeof(int));
914
}
915

    
916
static void horizontal_composeX(DWTELEM *b, int width){
917
    DWTELEM temp[width];
918
    const int width2= width>>1;
919
    int A1,A2,A3,A4, x;
920
    const int w2= (width+1)>>1;
921

    
922
    memcpy(temp, b, width*sizeof(int));
923
    for(x=0; x<width2; x++){
924
        b[2*x    ]= temp[x   ];
925
        b[2*x + 1]= temp[x+w2];
926
    }
927
    if(width&1)
928
        b[2*x    ]= temp[x   ];
929

    
930
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
931
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
932
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
933
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
934
}
935

    
936
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
937
    int x, y;
938
  
939
    for(y=0; y<height; y++){
940
        for(x=0; x<width; x++){
941
            buffer[y*stride + x] *= SCALEX;
942
        }
943
    }
944

    
945
    for(y=0; y<height; y++){
946
        horizontal_decomposeX(buffer + y*stride, width);
947
    }
948
    
949
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
950
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
951
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
952
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
953
}
954

    
955
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
956
    int x, y;
957
  
958
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
959
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
960
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
961
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
962

    
963
    for(y=0; y<height; y++){
964
        horizontal_composeX(buffer + y*stride, width);
965
    }
966

    
967
    for(y=0; y<height; y++){
968
        for(x=0; x<width; x++){
969
            buffer[y*stride + x] /= SCALEX;
970
        }
971
    }
972
}
973

    
974
static void horizontal_decompose53i(DWTELEM *b, int width){
975
    DWTELEM temp[width];
976
    const int width2= width>>1;
977
    int A1,A2,A3,A4, x;
978
    const int w2= (width+1)>>1;
979

    
980
    for(x=0; x<width2; x++){
981
        temp[x   ]= b[2*x    ];
982
        temp[x+w2]= b[2*x + 1];
983
    }
984
    if(width&1)
985
        temp[x   ]= b[2*x    ];
986
#if 0
987
    A2= temp[1       ];
988
    A4= temp[0       ];
989
    A1= temp[0+width2];
990
    A1 -= (A2 + A4)>>1;
991
    A4 += (A1 + 1)>>1;
992
    b[0+width2] = A1;
993
    b[0       ] = A4;
994
    for(x=1; x+1<width2; x+=2){
995
        A3= temp[x+width2];
996
        A4= temp[x+1     ];
997
        A3 -= (A2 + A4)>>1;
998
        A2 += (A1 + A3 + 2)>>2;
999
        b[x+width2] = A3;
1000
        b[x       ] = A2;
1001

1002
        A1= temp[x+1+width2];
1003
        A2= temp[x+2       ];
1004
        A1 -= (A2 + A4)>>1;
1005
        A4 += (A1 + A3 + 2)>>2;
1006
        b[x+1+width2] = A1;
1007
        b[x+1       ] = A4;
1008
    }
1009
    A3= temp[width-1];
1010
    A3 -= A2;
1011
    A2 += (A1 + A3 + 2)>>2;
1012
    b[width -1] = A3;
1013
    b[width2-1] = A2;
1014
#else        
1015
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1016
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
1017
#endif
1018
}
1019

    
1020
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1021
    int i;
1022
    
1023
    for(i=0; i<width; i++){
1024
        b1[i] -= (b0[i] + b2[i])>>1;
1025
    }
1026
}
1027

    
1028
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1029
    int i;
1030
    
1031
    for(i=0; i<width; i++){
1032
        b1[i] += (b0[i] + b2[i] + 2)>>2;
1033
    }
1034
}
1035

    
1036
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1037
    int y;
1038
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1039
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
1040
  
1041
    for(y=-2; y<height; y+=2){
1042
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1043
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1044

    
1045
{START_TIMER
1046
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
1047
        if(y+2 < height) horizontal_decompose53i(b3, width);
1048
STOP_TIMER("horizontal_decompose53i")}
1049
        
1050
{START_TIMER
1051
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1052
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1053
STOP_TIMER("vertical_decompose53i*")}
1054
        
1055
        b0=b2;
1056
        b1=b3;
1057
    }
1058
}
1059

    
1060
#define lift5 lift
1061
#if 1
1062
#define W_AM 3
1063
#define W_AO 0
1064
#define W_AS 1
1065

    
1066
#define W_BM 1
1067
#define W_BO 8
1068
#define W_BS 4
1069

    
1070
#undef lift5
1071
#define W_CM 9999
1072
#define W_CO 2
1073
#define W_CS 2
1074

    
1075
#define W_DM 15
1076
#define W_DO 16
1077
#define W_DS 5
1078
#elif 0
1079
#define W_AM 55
1080
#define W_AO 16
1081
#define W_AS 5
1082

    
1083
#define W_BM 3
1084
#define W_BO 32
1085
#define W_BS 6
1086

    
1087
#define W_CM 127
1088
#define W_CO 64
1089
#define W_CS 7
1090

    
1091
#define W_DM 7
1092
#define W_DO 8
1093
#define W_DS 4
1094
#elif 0
1095
#define W_AM 97
1096
#define W_AO 32
1097
#define W_AS 6
1098

    
1099
#define W_BM 63
1100
#define W_BO 512
1101
#define W_BS 10
1102

    
1103
#define W_CM 13
1104
#define W_CO 8
1105
#define W_CS 4
1106

    
1107
#define W_DM 15
1108
#define W_DO 16
1109
#define W_DS 5
1110

    
1111
#else
1112

    
1113
#define W_AM 203
1114
#define W_AO 64
1115
#define W_AS 7
1116

    
1117
#define W_BM 217
1118
#define W_BO 2048
1119
#define W_BS 12
1120

    
1121
#define W_CM 113
1122
#define W_CO 64
1123
#define W_CS 7
1124

    
1125
#define W_DM 227
1126
#define W_DO 128
1127
#define W_DS 9
1128
#endif
1129
static void horizontal_decompose97i(DWTELEM *b, int width){
1130
    DWTELEM temp[width];
1131
    const int w2= (width+1)>>1;
1132

    
1133
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1134
    lift (temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1135
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1136
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1137
}
1138

    
1139

    
1140
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1141
    int i;
1142
    
1143
    for(i=0; i<width; i++){
1144
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1145
    }
1146
}
1147

    
1148
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1149
    int i;
1150
    
1151
    for(i=0; i<width; i++){
1152
#ifdef lift5
1153
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1154
#else
1155
        int r= 3*(b0[i] + b2[i]);
1156
        r+= r>>4;
1157
        r+= r>>8;
1158
        b1[i] += (r+W_CO)>>W_CS;
1159
#endif
1160
    }
1161
}
1162

    
1163
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1164
    int i;
1165
    
1166
    for(i=0; i<width; i++){
1167
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1168
    }
1169
}
1170

    
1171
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1172
    int i;
1173
    
1174
    for(i=0; i<width; i++){
1175
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1176
    }
1177
}
1178

    
1179
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1180
    int y;
1181
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1182
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1183
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1184
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1185
  
1186
    for(y=-4; y<height; y+=2){
1187
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1188
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1189

    
1190
{START_TIMER
1191
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1192
        if(y+4 < height) horizontal_decompose97i(b5, width);
1193
if(width>400){
1194
STOP_TIMER("horizontal_decompose97i")
1195
}}
1196
        
1197
{START_TIMER
1198
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1199
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1200
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1201
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1202

    
1203
if(width>400){
1204
STOP_TIMER("vertical_decompose97i")
1205
}}
1206
        
1207
        b0=b2;
1208
        b1=b3;
1209
        b2=b4;
1210
        b3=b5;
1211
    }
1212
}
1213

    
1214
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1215
    int level;
1216
    
1217
    for(level=0; level<decomposition_count; level++){
1218
        switch(type){
1219
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1220
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1221
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1222
        }
1223
    }
1224
}
1225

    
1226
static void horizontal_compose53i(DWTELEM *b, int width){
1227
    DWTELEM temp[width];
1228
    const int width2= width>>1;
1229
    const int w2= (width+1)>>1;
1230
    int A1,A2,A3,A4, x;
1231

    
1232
#if 0
1233
    A2= temp[1       ];
1234
    A4= temp[0       ];
1235
    A1= temp[0+width2];
1236
    A1 -= (A2 + A4)>>1;
1237
    A4 += (A1 + 1)>>1;
1238
    b[0+width2] = A1;
1239
    b[0       ] = A4;
1240
    for(x=1; x+1<width2; x+=2){
1241
        A3= temp[x+width2];
1242
        A4= temp[x+1     ];
1243
        A3 -= (A2 + A4)>>1;
1244
        A2 += (A1 + A3 + 2)>>2;
1245
        b[x+width2] = A3;
1246
        b[x       ] = A2;
1247

1248
        A1= temp[x+1+width2];
1249
        A2= temp[x+2       ];
1250
        A1 -= (A2 + A4)>>1;
1251
        A4 += (A1 + A3 + 2)>>2;
1252
        b[x+1+width2] = A1;
1253
        b[x+1       ] = A4;
1254
    }
1255
    A3= temp[width-1];
1256
    A3 -= A2;
1257
    A2 += (A1 + A3 + 2)>>2;
1258
    b[width -1] = A3;
1259
    b[width2-1] = A2;
1260
#else   
1261
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1262
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1263
#endif
1264
    for(x=0; x<width2; x++){
1265
        b[2*x    ]= temp[x   ];
1266
        b[2*x + 1]= temp[x+w2];
1267
    }
1268
    if(width&1)
1269
        b[2*x    ]= temp[x   ];
1270
}
1271

    
1272
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1273
    int i;
1274
    
1275
    for(i=0; i<width; i++){
1276
        b1[i] += (b0[i] + b2[i])>>1;
1277
    }
1278
}
1279

    
1280
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1281
    int i;
1282
    
1283
    for(i=0; i<width; i++){
1284
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1285
    }
1286
}
1287

    
1288
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1289
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1290
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1291
    cs->y = -1;
1292
}
1293

    
1294
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1295
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1296
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1297
    cs->y = -1;
1298
}
1299

    
1300
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1301
    int y= cs->y;
1302
    int mirror0 = mirror(y-1, height-1);
1303
    int mirror1 = mirror(y  , height-1);
1304
    int mirror2 = mirror(y+1, height-1);
1305
    int mirror3 = mirror(y+2, height-1);
1306
    
1307
    DWTELEM *b0= cs->b0;
1308
    DWTELEM *b1= cs->b1;
1309
    DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1310
    DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1311

    
1312
{START_TIMER
1313
        if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1314
        if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1315
STOP_TIMER("vertical_compose53i*")}
1316

    
1317
{START_TIMER
1318
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1319
        if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1320
STOP_TIMER("horizontal_compose53i")}
1321

    
1322
    cs->b0 = b2;
1323
    cs->b1 = b3;
1324
    cs->y += 2;
1325
}
1326

    
1327
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1328
    int y= cs->y;
1329
    DWTELEM *b0= cs->b0;
1330
    DWTELEM *b1= cs->b1;
1331
    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1332
    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1333

    
1334
{START_TIMER
1335
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1336
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1337
STOP_TIMER("vertical_compose53i*")}
1338

    
1339
{START_TIMER
1340
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1341
        if(b0 <= b2) horizontal_compose53i(b1, width);
1342
STOP_TIMER("horizontal_compose53i")}
1343

    
1344
    cs->b0 = b2;
1345
    cs->b1 = b3;
1346
    cs->y += 2;
1347
}
1348

    
1349
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1350
    dwt_compose_t cs;
1351
    spatial_compose53i_init(&cs, buffer, height, stride);
1352
    while(cs.y <= height)
1353
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1354
}   
1355

    
1356
 
1357
static void horizontal_compose97i(DWTELEM *b, int width){
1358
    DWTELEM temp[width];
1359
    const int w2= (width+1)>>1;
1360

    
1361
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1362
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1363
    lift (b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1364
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1365
}
1366

    
1367
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1368
    int i;
1369
    
1370
    for(i=0; i<width; i++){
1371
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1372
    }
1373
}
1374

    
1375
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1376
    int i;
1377
    
1378
    for(i=0; i<width; i++){
1379
#ifdef lift5
1380
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1381
#else
1382
        int r= 3*(b0[i] + b2[i]);
1383
        r+= r>>4;
1384
        r+= r>>8;
1385
        b1[i] -= (r+W_CO)>>W_CS;
1386
#endif
1387
    }
1388
}
1389

    
1390
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1391
    int i;
1392
    
1393
    for(i=0; i<width; i++){
1394
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1395
    }
1396
}
1397

    
1398
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1399
    int i;
1400
    
1401
    for(i=0; i<width; i++){
1402
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1403
    }
1404
}
1405

    
1406
static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1407
    int i;
1408
    
1409
    for(i=0; i<width; i++){
1410
        int r;
1411
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1412
#ifdef lift5
1413
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1414
#else
1415
        r= 3*(b2[i] + b4[i]);
1416
        r+= r>>4;
1417
        r+= r>>8;
1418
        b3[i] -= (r+W_CO)>>W_CS;
1419
#endif
1420
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1421
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1422
    }
1423
}
1424

    
1425
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1426
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1427
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1428
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1429
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1430
    cs->y = -3;
1431
}
1432

    
1433
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1434
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1435
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1436
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1437
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1438
    cs->y = -3;
1439
}
1440

    
1441
static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1442
    int y = cs->y;
1443
    
1444
    int mirror0 = mirror(y - 1, height - 1);
1445
    int mirror1 = mirror(y + 0, height - 1);
1446
    int mirror2 = mirror(y + 1, height - 1);
1447
    int mirror3 = mirror(y + 2, height - 1);
1448
    int mirror4 = mirror(y + 3, height - 1);
1449
    int mirror5 = mirror(y + 4, height - 1);
1450
    DWTELEM *b0= cs->b0;
1451
    DWTELEM *b1= cs->b1;
1452
    DWTELEM *b2= cs->b2;
1453
    DWTELEM *b3= cs->b3;
1454
    DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1455
    DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1456
        
1457
{START_TIMER
1458
    if(y>0 && y+4<height){
1459
        vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1460
    }else{
1461
        if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1462
        if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1463
        if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1464
        if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1465
    }
1466
if(width>400){
1467
STOP_TIMER("vertical_compose97i")}}
1468

    
1469
{START_TIMER
1470
        if(y-1>=  0) horizontal_compose97i(b0, width);
1471
        if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1472
if(width>400 && mirror0 <= mirror2){
1473
STOP_TIMER("horizontal_compose97i")}}
1474

    
1475
    cs->b0=b2;
1476
    cs->b1=b3;
1477
    cs->b2=b4;
1478
    cs->b3=b5;
1479
    cs->y += 2;
1480
}
1481

    
1482
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1483
    int y = cs->y;
1484
    DWTELEM *b0= cs->b0;
1485
    DWTELEM *b1= cs->b1;
1486
    DWTELEM *b2= cs->b2;
1487
    DWTELEM *b3= cs->b3;
1488
    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1489
    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1490

    
1491
        if(stride == width && y+4 < height && 0){ 
1492
            int x;
1493
            for(x=0; x<width/2; x++)
1494
                b5[x] += 64*2;
1495
            for(; x<width; x++)
1496
                b5[x] += 169*2;
1497
        }
1498
        
1499
{START_TIMER
1500
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1501
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1502
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1503
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1504
if(width>400){
1505
STOP_TIMER("vertical_compose97i")}}
1506

    
1507
{START_TIMER
1508
        if(y-1>=  0) horizontal_compose97i(b0, width);
1509
        if(b0 <= b2) horizontal_compose97i(b1, width);
1510
if(width>400 && b0 <= b2){
1511
STOP_TIMER("horizontal_compose97i")}}
1512

    
1513
    cs->b0=b2;
1514
    cs->b1=b3;
1515
    cs->b2=b4;
1516
    cs->b3=b5;
1517
    cs->y += 2;
1518
}
1519

    
1520
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1521
    dwt_compose_t cs;
1522
    spatial_compose97i_init(&cs, buffer, height, stride);
1523
    while(cs.y <= height)
1524
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1525
}
1526

    
1527
void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1528
    int level;
1529
    for(level=decomposition_count-1; level>=0; level--){
1530
        switch(type){
1531
        case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1532
        case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1533
        /* not slicified yet */
1534
        case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1535
          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1536
        }
1537
    }
1538
}
1539

    
1540
void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1541
    int level;
1542
    for(level=decomposition_count-1; level>=0; level--){
1543
        switch(type){
1544
        case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1545
        case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1546
        /* not slicified yet */
1547
        case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1548
        }
1549
    }
1550
}
1551

    
1552
void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1553
    const int support = type==1 ? 3 : 5;
1554
    int level;
1555
    if(type==2) return;
1556

    
1557
    for(level=decomposition_count-1; level>=0; level--){
1558
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1559
            switch(type){
1560
            case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1561
                    break;
1562
            case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1563
                    break;
1564
            case 2: break;
1565
            }
1566
        }
1567
    }
1568
}
1569

    
1570
void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1571
    const int support = type==1 ? 3 : 5;
1572
    int level;
1573
    if(type==2) return;
1574

    
1575
    for(level=decomposition_count-1; level>=0; level--){
1576
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1577
            switch(type){
1578
            case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1579
                    break;
1580
            case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1581
                    break;
1582
            case 2: break;
1583
            }
1584
        }
1585
    }
1586
}
1587

    
1588
void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1589
    if(type==2){
1590
        int level;
1591
        for(level=decomposition_count-1; level>=0; level--)
1592
            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
1593
    }else{
1594
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1595
        int y;
1596
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1597
        for(y=0; y<height; y+=4)
1598
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1599
    }
1600
}
1601

    
1602
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1603
    const int w= b->width;
1604
    const int h= b->height;
1605
    int x, y;
1606

    
1607
    if(1){
1608
        int run=0;
1609
        int runs[w*h];
1610
        int run_index=0;
1611
                
1612
        for(y=0; y<h; y++){
1613
            for(x=0; x<w; x++){
1614
                int v, p=0;
1615
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1616
                v= src[x + y*stride];
1617

    
1618
                if(y){
1619
                    t= src[x + (y-1)*stride];
1620
                    if(x){
1621
                        lt= src[x - 1 + (y-1)*stride];
1622
                    }
1623
                    if(x + 1 < w){
1624
                        rt= src[x + 1 + (y-1)*stride];
1625
                    }
1626
                }
1627
                if(x){
1628
                    l= src[x - 1 + y*stride];
1629
                    /*if(x > 1){
1630
                        if(orientation==1) ll= src[y + (x-2)*stride];
1631
                        else               ll= src[x - 2 + y*stride];
1632
                    }*/
1633
                }
1634
                if(parent){
1635
                    int px= x>>1;
1636
                    int py= y>>1;
1637
                    if(px<b->parent->width && py<b->parent->height) 
1638
                        p= parent[px + py*2*stride];
1639
                }
1640
                if(!(/*ll|*/l|lt|t|rt|p)){
1641
                    if(v){
1642
                        runs[run_index++]= run;
1643
                        run=0;
1644
                    }else{
1645
                        run++;
1646
                    }
1647
                }
1648
            }
1649
        }
1650
        runs[run_index++]= run;
1651
        run_index=0;
1652
        run= runs[run_index++];
1653

    
1654
        put_symbol2(&s->c, b->state[1], run, 3);
1655
        
1656
        for(y=0; y<h; y++){
1657
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1658
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1659
                return -1;
1660
            }
1661
            for(x=0; x<w; x++){
1662
                int v, p=0;
1663
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1664
                v= src[x + y*stride];
1665

    
1666
                if(y){
1667
                    t= src[x + (y-1)*stride];
1668
                    if(x){
1669
                        lt= src[x - 1 + (y-1)*stride];
1670
                    }
1671
                    if(x + 1 < w){
1672
                        rt= src[x + 1 + (y-1)*stride];
1673
                    }
1674
                }
1675
                if(x){
1676
                    l= src[x - 1 + y*stride];
1677
                    /*if(x > 1){
1678
                        if(orientation==1) ll= src[y + (x-2)*stride];
1679
                        else               ll= src[x - 2 + y*stride];
1680
                    }*/
1681
                }
1682
                if(parent){
1683
                    int px= x>>1;
1684
                    int py= y>>1;
1685
                    if(px<b->parent->width && py<b->parent->height) 
1686
                        p= parent[px + py*2*stride];
1687
                }
1688
                if(/*ll|*/l|lt|t|rt|p){
1689
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1690

    
1691
                    put_rac(&s->c, &b->state[0][context], !!v);
1692
                }else{
1693
                    if(!run){
1694
                        run= runs[run_index++];
1695

    
1696
                        put_symbol2(&s->c, b->state[1], run, 3);
1697
                        assert(v);
1698
                    }else{
1699
                        run--;
1700
                        assert(!v);
1701
                    }
1702
                }
1703
                if(v){
1704
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1705

    
1706
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1707
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]], v<0);
1708
                }
1709
            }
1710
        }
1711
    }
1712
    return 0;
1713
}
1714

    
1715
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1716
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1717
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1718
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1719
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1720
}
1721

    
1722
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1723
    const int w= b->width;
1724
    const int h= b->height;
1725
    int x,y;
1726
    
1727
    if(1){
1728
        int run;
1729
        int index=0;
1730
        int prev_index=-1;
1731
        int prev2_index=0;
1732
        int parent_index= 0;
1733
        int prev_parent_index= 0;
1734

    
1735
        run= get_symbol2(&s->c, b->state[1], 3);
1736
        for(y=0; y<h; y++){
1737
            int v=0;
1738
            int lt=0, t=0, rt=0;
1739

    
1740
            if(y && b->x_coeff[prev_index].x == 0){
1741
                rt= b->x_coeff[prev_index].coeff;
1742
            }
1743
            for(x=0; x<w; x++){
1744
                int p=0;
1745
                const int l= v;
1746
                
1747
                lt= t; t= rt;
1748

    
1749
                if(y){
1750
                    if(b->x_coeff[prev_index].x <= x)
1751
                        prev_index++;
1752
                    if(b->x_coeff[prev_index].x == x + 1)
1753
                        rt= b->x_coeff[prev_index].coeff;
1754
                    else
1755
                        rt=0;
1756
                }
1757
                if(parent){
1758
                    if(x>>1 > parent->x_coeff[parent_index].x){
1759
                        parent_index++;
1760
                    }
1761
                    if(x>>1 == parent->x_coeff[parent_index].x){
1762
                        p= parent->x_coeff[parent_index].coeff;
1763
                    }
1764
                }
1765
                if(/*ll|*/l|lt|t|rt|p){
1766
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1767

    
1768
                    v=get_rac(&s->c, &b->state[0][context]);
1769
                }else{
1770
                    if(!run){
1771
                        run= get_symbol2(&s->c, b->state[1], 3);
1772
                        v=1;
1773
                    }else{
1774
                        run--;
1775
                        v=0;
1776

    
1777
                        if(y && parent){
1778
                            int max_run;
1779

    
1780
                            max_run= FFMIN(run, b->x_coeff[prev_index].x - x - 2);
1781
                            max_run= FFMIN(max_run, 2*parent->x_coeff[parent_index].x - x - 1);
1782
                            x+= max_run;
1783
                            run-= max_run;
1784
                        }
1785
                    }
1786
                }
1787
                if(v){
1788
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1789
                    v= get_symbol2(&s->c, b->state[context + 2], context-4) + 1;
1790
                    if(get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3b[l&0xFF] + 3*quant3b[t&0xFF]]))
1791
                        v *= -1;
1792
                    b->x_coeff[index].x=x;
1793
                    b->x_coeff[index++].coeff= v;
1794
                }
1795
            }
1796
            b->x_coeff[index++].x= w+1; //end marker
1797
            prev_index= prev2_index;
1798
            prev2_index= index;
1799
            
1800
            if(parent){
1801
                if(y&1){
1802
                    while(parent->x_coeff[parent_index].x != parent->width+1)
1803
                        parent_index++;
1804
                    parent_index++;
1805
                    prev_parent_index= parent_index;
1806
                }else{
1807
                    parent_index= prev_parent_index;
1808
                }
1809
            }
1810
        }
1811

    
1812
        b->x_coeff[index++].x= w+1; //end marker
1813
    }
1814
}
1815

    
1816
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1817
    const int w= b->width;
1818
    int x,y;
1819
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
1820
    int qmul= qexp[qlog&7]<<(qlog>>3);
1821
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1822
    int new_index = 0;
1823
    
1824
    START_TIMER
1825

    
1826
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1827
        qadd= 0;
1828
        qmul= 1<<QEXPSHIFT;
1829
    }
1830

    
1831
    /* If we are on the second or later slice, restore our index. */
1832
    if (start_y != 0)
1833
        new_index = save_state[0];
1834

    
1835
        
1836
    for(y=start_y; y<h; y++){
1837
        int x = 0;
1838
        int v;
1839
        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1840
        memset(line, 0, b->width*sizeof(DWTELEM));
1841
        v = b->x_coeff[new_index].coeff;
1842
        x = b->x_coeff[new_index++].x;
1843
        while(x < w)
1844
        {
1845
            if (v < 0)
1846
                line[x] = -(( -v*qmul + qadd)>>(QEXPSHIFT));
1847
            else
1848
                line[x] = (( v*qmul + qadd)>>(QEXPSHIFT));
1849
            v = b->x_coeff[new_index].coeff;
1850
            x = b->x_coeff[new_index++].x;
1851
        }
1852
    }
1853
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1854
        STOP_TIMER("decode_subband")
1855
    }
1856
        
1857
    /* Save our variables for the next slice. */
1858
    save_state[0] = new_index;
1859
        
1860
    return;
1861
}
1862

    
1863
static void reset_contexts(SnowContext *s){
1864
    int plane_index, level, orientation;
1865

    
1866
    for(plane_index=0; plane_index<3; plane_index++){
1867
        for(level=0; level<s->spatial_decomposition_count; level++){
1868
            for(orientation=level ? 1:0; orientation<4; orientation++){
1869
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1870
            }
1871
        }
1872
    }
1873
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1874
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1875
}
1876

    
1877
static int alloc_blocks(SnowContext *s){
1878
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1879
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1880
    
1881
    s->b_width = w;
1882
    s->b_height= h;
1883
    
1884
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1885
    return 0;
1886
}
1887

    
1888
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1889
    uint8_t *bytestream= d->bytestream;
1890
    uint8_t *bytestream_start= d->bytestream_start;
1891
    *d= *s;
1892
    d->bytestream= bytestream;
1893
    d->bytestream_start= bytestream_start;
1894
}
1895

    
1896
//near copy & paste from dsputil, FIXME
1897
static int pix_sum(uint8_t * pix, int line_size, int w)
1898
{
1899
    int s, i, j;
1900

    
1901
    s = 0;
1902
    for (i = 0; i < w; i++) {
1903
        for (j = 0; j < w; j++) {
1904
            s += pix[0];
1905
            pix ++;
1906
        }
1907
        pix += line_size - w;
1908
    }
1909
    return s;
1910
}
1911

    
1912
//near copy & paste from dsputil, FIXME
1913
static int pix_norm1(uint8_t * pix, int line_size, int w)
1914
{
1915
    int s, i, j;
1916
    uint32_t *sq = squareTbl + 256;
1917

    
1918
    s = 0;
1919
    for (i = 0; i < w; i++) {
1920
        for (j = 0; j < w; j ++) {
1921
            s += sq[pix[0]];
1922
            pix ++;
1923
        }
1924
        pix += line_size - w;
1925
    }
1926
    return s;
1927
}
1928

    
1929
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1930
    const int w= s->b_width << s->block_max_depth;
1931
    const int rem_depth= s->block_max_depth - level;
1932
    const int index= (x + y*w) << rem_depth;
1933
    const int block_w= 1<<rem_depth;
1934
    BlockNode block;
1935
    int i,j;
1936
    
1937
    block.color[0]= l;
1938
    block.color[1]= cb;
1939
    block.color[2]= cr;
1940
    block.mx= mx;
1941
    block.my= my;
1942
    block.type= type;
1943
    block.level= level;
1944

    
1945
    for(j=0; j<block_w; j++){
1946
        for(i=0; i<block_w; i++){
1947
            s->block[index + i + j*w]= block;
1948
        }
1949
    }
1950
}
1951

    
1952
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1953
    const int offset[3]= {
1954
          y*c->  stride + x,
1955
        ((y*c->uvstride + x)>>1),
1956
        ((y*c->uvstride + x)>>1),
1957
    };
1958
    int i;
1959
    for(i=0; i<3; i++){
1960
        c->src[0][i]= src [i];
1961
        c->ref[0][i]= ref [i] + offset[i];
1962
    }
1963
    assert(!ref_index);
1964
}
1965

    
1966
//FIXME copy&paste
1967
#define P_LEFT P[1]
1968
#define P_TOP P[2]
1969
#define P_TOPRIGHT P[3]
1970
#define P_MEDIAN P[4]
1971
#define P_MV1 P[9]
1972
#define FLAG_QPEL   1 //must be 1
1973

    
1974
static int encode_q_branch(SnowContext *s, int level, int x, int y){
1975
    uint8_t p_buffer[1024];
1976
    uint8_t i_buffer[1024];
1977
    uint8_t p_state[sizeof(s->block_state)];
1978
    uint8_t i_state[sizeof(s->block_state)];
1979
    RangeCoder pc, ic;
1980
    uint8_t *pbbak= s->c.bytestream;
1981
    uint8_t *pbbak_start= s->c.bytestream_start;
1982
    int score, score2, iscore, i_len, p_len, block_s, sum;
1983
    const int w= s->b_width  << s->block_max_depth;
1984
    const int h= s->b_height << s->block_max_depth;
1985
    const int rem_depth= s->block_max_depth - level;
1986
    const int index= (x + y*w) << rem_depth;
1987
    const int block_w= 1<<(LOG2_MB_SIZE - level);
1988
    static BlockNode null_block= { //FIXME add border maybe
1989
        .color= {128,128,128},
1990
        .mx= 0,
1991
        .my= 0,
1992
        .type= 0,
1993
        .level= 0,
1994
    };
1995
    int trx= (x+1)<<rem_depth;
1996
    int try= (y+1)<<rem_depth;
1997
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
1998
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
1999
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2000
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2001
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2002
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2003
    int pl = left->color[0];
2004
    int pcb= left->color[1];
2005
    int pcr= left->color[2];
2006
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2007
    int pmy= mid_pred(left->my, top->my, tr->my);
2008
    int mx=0, my=0;
2009
    int l,cr,cb, i;
2010
    const int stride= s->current_picture.linesize[0];
2011
    const int uvstride= s->current_picture.linesize[1];
2012
    const int instride= s->input_picture.linesize[0];
2013
    const int uvinstride= s->input_picture.linesize[1];
2014
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
2015
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2016
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2017
    uint8_t current_mb[3][stride*block_w];
2018
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2019
    int P[10][2];
2020
    int16_t last_mv[3][2];
2021
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2022
    const int shift= 1+qpel;
2023
    MotionEstContext *c= &s->m.me;
2024
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2025
    int my_context= av_log2(2*ABS(left->my - top->my));
2026
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2027

    
2028
    assert(sizeof(s->block_state) >= 256);
2029
    if(s->keyframe){
2030
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2031
        return 0;
2032
    }
2033

    
2034
    //FIXME optimize
2035
    for(i=0; i<block_w; i++)
2036
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
2037
    for(i=0; i<block_w>>1; i++)
2038
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2039
    for(i=0; i<block_w>>1; i++)
2040
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2041

    
2042
//    clip predictors / edge ?
2043

    
2044
    P_LEFT[0]= left->mx;
2045
    P_LEFT[1]= left->my;
2046
    P_TOP [0]= top->mx;
2047
    P_TOP [1]= top->my;
2048
    P_TOPRIGHT[0]= tr->mx;
2049
    P_TOPRIGHT[1]= tr->my;
2050
    
2051
    last_mv[0][0]= s->block[index].mx;
2052
    last_mv[0][1]= s->block[index].my;
2053
    last_mv[1][0]= right->mx;
2054
    last_mv[1][1]= right->my;
2055
    last_mv[2][0]= bottom->mx;
2056
    last_mv[2][1]= bottom->my;
2057
    
2058
    s->m.mb_stride=2;
2059
    s->m.mb_x= 
2060
    s->m.mb_y= 0;
2061
    s->m.me.skip= 0;
2062

    
2063
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2064
    
2065
    assert(s->m.me.  stride ==   stride);
2066
    assert(s->m.me.uvstride == uvstride);
2067
    
2068
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2069
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2070
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2071
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2072
    
2073
    c->xmin = - x*block_w - 16+2;
2074
    c->ymin = - y*block_w - 16+2;
2075
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2076
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2077

    
2078
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2079
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
2080
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2081
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2082
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2083
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2084
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2085

    
2086
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2087
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2088

    
2089
    if (!y) {
2090
        c->pred_x= P_LEFT[0];
2091
        c->pred_y= P_LEFT[1];
2092
    } else {
2093
        c->pred_x = P_MEDIAN[0];
2094
        c->pred_y = P_MEDIAN[1];
2095
    }
2096

    
2097
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
2098
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2099

    
2100
    assert(mx >= c->xmin);
2101
    assert(mx <= c->xmax);
2102
    assert(my >= c->ymin);
2103
    assert(my <= c->ymax);
2104
    
2105
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2106
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2107
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2108
                             
2109
  //  subpel search
2110
    pc= s->c;
2111
    pc.bytestream_start=
2112
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2113
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2114

    
2115
    if(level!=s->block_max_depth)
2116
        put_rac(&pc, &p_state[4 + s_context], 1);
2117
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2118
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2119
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2120
    p_len= pc.bytestream - pc.bytestream_start;
2121
    score += (s->lambda2*(p_len*8
2122
              + (pc.outstanding_count - s->c.outstanding_count)*8
2123
              + (-av_log2(pc.range)    + av_log2(s->c.range))
2124
             ))>>FF_LAMBDA_SHIFT;
2125

    
2126
    block_s= block_w*block_w;
2127
    sum = pix_sum(&current_mb[0][0], stride, block_w);
2128
    l= (sum + block_s/2)/block_s;
2129
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2130
    
2131
    block_s= block_w*block_w>>2;
2132
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2133
    cb= (sum + block_s/2)/block_s;
2134
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2135
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2136
    cr= (sum + block_s/2)/block_s;
2137
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2138

    
2139
    ic= s->c;
2140
    ic.bytestream_start=
2141
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2142
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2143
    if(level!=s->block_max_depth)
2144
        put_rac(&ic, &i_state[4 + s_context], 1);
2145
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2146
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2147
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2148
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2149
    i_len= ic.bytestream - ic.bytestream_start;
2150
    iscore += (s->lambda2*(i_len*8
2151
              + (ic.outstanding_count - s->c.outstanding_count)*8
2152
              + (-av_log2(ic.range)    + av_log2(s->c.range))
2153
             ))>>FF_LAMBDA_SHIFT;
2154

    
2155
//    assert(score==256*256*256*64-1);
2156
    assert(iscore < 255*255*256 + s->lambda2*10);
2157
    assert(iscore >= 0);
2158
    assert(l>=0 && l<=255);
2159
    assert(pl>=0 && pl<=255);
2160

    
2161
    if(level==0){
2162
        int varc= iscore >> 8;
2163
        int vard= score >> 8;
2164
        if (vard <= 64 || vard < varc)
2165
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2166
        else
2167
            c->scene_change_score+= s->m.qscale;
2168
    }
2169
        
2170
    if(level!=s->block_max_depth){
2171
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2172
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2173
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2174
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2175
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2176
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2177
    
2178
        if(score2 < score && score2 < iscore)
2179
            return score2;
2180
    }
2181
    
2182
    if(iscore < score){
2183
        memcpy(pbbak, i_buffer, i_len);
2184
        s->c= ic;
2185
        s->c.bytestream_start= pbbak_start;
2186
        s->c.bytestream= pbbak + i_len;
2187
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2188
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2189
        return iscore;
2190
    }else{
2191
        memcpy(pbbak, p_buffer, p_len);
2192
        s->c= pc;
2193
        s->c.bytestream_start= pbbak_start;
2194
        s->c.bytestream= pbbak + p_len;
2195
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2196
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2197
        return score;
2198
    }
2199
}
2200

    
2201
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2202
    const int w= s->b_width << s->block_max_depth;
2203
    const int rem_depth= s->block_max_depth - level;
2204
    const int index= (x + y*w) << rem_depth;
2205
    static BlockNode null_block= { //FIXME add border maybe
2206
        .color= {128,128,128},
2207
        .mx= 0,
2208
        .my= 0,
2209
        .type= 0,
2210
        .level= 0,
2211
    };
2212
    int trx= (x+1)<<rem_depth;
2213
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2214
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2215
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2216
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2217
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2218
    
2219
    if(s->keyframe){
2220
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2221
        return;
2222
    }
2223

    
2224
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2225
        int type;
2226
        int l = left->color[0];
2227
        int cb= left->color[1];
2228
        int cr= left->color[2];
2229
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2230
        int my= mid_pred(left->my, top->my, tr->my);
2231
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2232
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2233
        
2234
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2235

    
2236
        if(type){
2237
            l += get_symbol(&s->c, &s->block_state[32], 1);
2238
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2239
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2240
        }else{
2241
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2242
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2243
        }
2244
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2245
    }else{
2246
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2247
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2248
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2249
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2250
    }
2251
}
2252

    
2253
static void encode_blocks(SnowContext *s){
2254
    int x, y;
2255
    int w= s->b_width;
2256
    int h= s->b_height;
2257

    
2258
    for(y=0; y<h; y++){
2259
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2260
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2261
            return;
2262
        }
2263
        for(x=0; x<w; x++){
2264
            encode_q_branch(s, 0, x, y);
2265
        }
2266
    }
2267
}
2268

    
2269
static void decode_blocks(SnowContext *s){
2270
    int x, y;
2271
    int w= s->b_width;
2272
    int h= s->b_height;
2273

    
2274
    for(y=0; y<h; y++){
2275
        for(x=0; x<w; x++){
2276
            decode_q_branch(s, 0, x, y);
2277
        }
2278
    }
2279
}
2280

    
2281
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2282
    int x, y;
2283
START_TIMER
2284
    for(y=0; y < b_h+5; y++){
2285
        for(x=0; x < b_w; x++){
2286
            int a0= src[x    ];
2287
            int a1= src[x + 1];
2288
            int a2= src[x + 2];
2289
            int a3= src[x + 3];
2290
            int a4= src[x + 4];
2291
            int a5= src[x + 5];
2292
//            int am= 9*(a1+a2) - (a0+a3);
2293
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2294
//            int am= 18*(a2+a3) - 2*(a1+a4);
2295
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2296
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2297

    
2298
//            if(b_w==16) am= 8*(a1+a2);
2299

    
2300
            if(dx<8) tmp[x]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2301
            else     tmp[x]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2302

    
2303
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2304
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2305
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2306
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2307
        }
2308
        tmp += stride;
2309
        src += stride;
2310
    }
2311
    tmp -= (b_h+5)*stride;
2312
    
2313
    for(y=0; y < b_h; y++){
2314
        for(x=0; x < b_w; x++){
2315
            int a0= tmp[x + 0*stride];
2316
            int a1= tmp[x + 1*stride];
2317
            int a2= tmp[x + 2*stride];
2318
            int a3= tmp[x + 3*stride];
2319
            int a4= tmp[x + 4*stride];
2320
            int a5= tmp[x + 5*stride];
2321
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2322
//            int am= 18*(a2+a3) - 2*(a1+a4);
2323
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2324
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2325
            
2326
//            if(b_w==16) am= 8*(a1+a2);
2327

    
2328
            if(dy<8) dst[x]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2329
            else     dst[x]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2330

    
2331
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2332
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2333
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2334
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2335
        }
2336
        dst += stride;
2337
        tmp += stride;
2338
    }
2339
STOP_TIMER("mc_block")
2340
}
2341

    
2342
#define mca(dx,dy,b_w)\
2343
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2344
    uint8_t tmp[stride*(b_w+5)];\
2345
    assert(h==b_w);\
2346
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2347
}
2348

    
2349
mca( 0, 0,16)
2350
mca( 8, 0,16)
2351
mca( 0, 8,16)
2352
mca( 8, 8,16)
2353
mca( 0, 0,8)
2354
mca( 8, 0,8)
2355
mca( 0, 8,8)
2356
mca( 8, 8,8)
2357

    
2358
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2359
    if(block->type){
2360
        int x, y;
2361
        const int color= block->color[plane_index];
2362
        for(y=0; y < b_h; y++){
2363
            for(x=0; x < b_w; x++){
2364
                dst[x + y*stride]= color;
2365
            }
2366
        }
2367
    }else{
2368
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2369
        int mx= block->mx*scale;
2370
        int my= block->my*scale;
2371
        const int dx= mx&15;
2372
        const int dy= my&15;
2373
        sx += (mx>>4) - 2;
2374
        sy += (my>>4) - 2;
2375
        src += sx + sy*stride;
2376
        if(   (unsigned)sx >= w - b_w - 4
2377
           || (unsigned)sy >= h - b_h - 4){
2378
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2379
            src= tmp + MB_SIZE;
2380
        }
2381
        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2382
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2383
        else
2384
            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2385
    }
2386
}
2387

    
2388
static always_inline int same_block(BlockNode *a, BlockNode *b){
2389
    return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2390
}
2391

    
2392
//FIXME name clenup (b_w, block_w, b_width stuff)
2393
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2394
    DWTELEM * dst = NULL;
2395
    const int b_width = s->b_width  << s->block_max_depth;
2396
    const int b_height= s->b_height << s->block_max_depth;
2397
    const int b_stride= b_width;
2398
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2399
    BlockNode *rt= lt+1;
2400
    BlockNode *lb= lt+b_stride;
2401
    BlockNode *rb= lb+1;
2402
    uint8_t *block[4]; 
2403
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2404
    int x,y;
2405

    
2406
    if(b_x<0){
2407
        lt= rt;
2408
        lb= rb;
2409
    }else if(b_x + 1 >= b_width){
2410
        rt= lt;
2411
        rb= lb;
2412
    }
2413
    if(b_y<0){
2414
        lt= lb;
2415
        rt= rb;
2416
    }else if(b_y + 1 >= b_height){
2417
        lb= lt;
2418
        rb= rt;
2419
    }
2420
        
2421
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2422
        obmc -= src_x;
2423
        b_w += src_x;
2424
        src_x=0;
2425
    }else if(src_x + b_w > w){
2426
        b_w = w - src_x;
2427
    }
2428
    if(src_y<0){
2429
        obmc -= src_y*obmc_stride;
2430
        b_h += src_y;
2431
        src_y=0;
2432
    }else if(src_y + b_h> h){
2433
        b_h = h - src_y;
2434
    }
2435
    
2436
    if(b_w<=0 || b_h<=0) return;
2437

    
2438
assert(src_stride > 7*MB_SIZE);
2439
//    old_dst += src_x + src_y*dst_stride;
2440
    dst8+= src_x + src_y*src_stride;
2441
//    src += src_x + src_y*src_stride;
2442

    
2443
    block[0]= tmp+3*MB_SIZE;
2444
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2445

    
2446
    if(same_block(lt, rt)){
2447
        block[1]= block[0];
2448
    }else{
2449
        block[1]= tmp + 4*MB_SIZE;
2450
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2451
    }
2452
        
2453
    if(same_block(lt, lb)){
2454
        block[2]= block[0];
2455
    }else if(same_block(rt, lb)){
2456
        block[2]= block[1];
2457
    }else{
2458
        block[2]= tmp+5*MB_SIZE;
2459
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2460
    }
2461

    
2462
    if(same_block(lt, rb) ){
2463
        block[3]= block[0];
2464
    }else if(same_block(rt, rb)){
2465
        block[3]= block[1];
2466
    }else if(same_block(lb, rb)){
2467
        block[3]= block[2];
2468
    }else{
2469
        block[3]= tmp+6*MB_SIZE;
2470
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2471
    }
2472
#if 0
2473
    for(y=0; y<b_h; y++){
2474
        for(x=0; x<b_w; x++){
2475
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2476
            if(add) dst[x + y*dst_stride] += v;
2477
            else    dst[x + y*dst_stride] -= v;
2478
        }
2479
    }
2480
    for(y=0; y<b_h; y++){
2481
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2482
        for(x=0; x<b_w; x++){
2483
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2484
            if(add) dst[x + y*dst_stride] += v;
2485
            else    dst[x + y*dst_stride] -= v;
2486
        }
2487
    }
2488
    for(y=0; y<b_h; y++){
2489
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2490
        for(x=0; x<b_w; x++){
2491
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2492
            if(add) dst[x + y*dst_stride] += v;
2493
            else    dst[x + y*dst_stride] -= v;
2494
        }
2495
    }
2496
    for(y=0; y<b_h; y++){
2497
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2498
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2499
        for(x=0; x<b_w; x++){
2500
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2501
            if(add) dst[x + y*dst_stride] += v;
2502
            else    dst[x + y*dst_stride] -= v;
2503
        }
2504
    }
2505
#else
2506
{
2507

    
2508
    START_TIMER
2509
    
2510
    int block_index = 0;
2511
    for(y=0; y<b_h; y++){
2512
        //FIXME ugly missue of obmc_stride
2513
        uint8_t *obmc1= obmc + y*obmc_stride;
2514
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2515
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2516
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2517
        dst = slice_buffer_get_line(sb, src_y + y);
2518
        for(x=0; x<b_w; x++){
2519
            int v=   obmc1[x] * block[3][x + y*src_stride]
2520
                    +obmc2[x] * block[2][x + y*src_stride]
2521
                    +obmc3[x] * block[1][x + y*src_stride]
2522
                    +obmc4[x] * block[0][x + y*src_stride];
2523

    
2524
            v <<= 8 - LOG2_OBMC_MAX;
2525
            if(FRAC_BITS != 8){
2526
                v += 1<<(7 - FRAC_BITS);
2527
                v >>= 8 - FRAC_BITS;
2528
            }
2529
            if(add){
2530
//                v += old_dst[x + y*dst_stride];
2531
                v += dst[x + src_x];
2532
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2533
                if(v&(~255)) v= ~(v>>31);
2534
                dst8[x + y*src_stride] = v;
2535
            }else{
2536
//                old_dst[x + y*dst_stride] -= v;
2537
                dst[x + src_x] -= v;
2538
            }
2539
        }
2540
    }
2541
        STOP_TIMER("Inner add y block")
2542
}
2543
#endif
2544
}
2545

    
2546
//FIXME name clenup (b_w, block_w, b_width stuff)
2547
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2548
    const int b_width = s->b_width  << s->block_max_depth;
2549
    const int b_height= s->b_height << s->block_max_depth;
2550
    const int b_stride= b_width;
2551
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2552
    BlockNode *rt= lt+1;
2553
    BlockNode *lb= lt+b_stride;
2554
    BlockNode *rb= lb+1;
2555
    uint8_t *block[4]; 
2556
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2557
    int x,y;
2558

    
2559
    if(b_x<0){
2560
        lt= rt;
2561
        lb= rb;
2562
    }else if(b_x + 1 >= b_width){
2563
        rt= lt;
2564
        rb= lb;
2565
    }
2566
    if(b_y<0){
2567
        lt= lb;
2568
        rt= rb;
2569
    }else if(b_y + 1 >= b_height){
2570
        lb= lt;
2571
        rb= rt;
2572
    }
2573
        
2574
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2575
        obmc -= src_x;
2576
        b_w += src_x;
2577
        src_x=0;
2578
    }else if(src_x + b_w > w){
2579
        b_w = w - src_x;
2580
    }
2581
    if(src_y<0){
2582
        obmc -= src_y*obmc_stride;
2583
        b_h += src_y;
2584
        src_y=0;
2585
    }else if(src_y + b_h> h){
2586
        b_h = h - src_y;
2587
    }
2588
    
2589
    if(b_w<=0 || b_h<=0) return;
2590

    
2591
assert(src_stride > 7*MB_SIZE);
2592
    dst += src_x + src_y*dst_stride;
2593
    dst8+= src_x + src_y*src_stride;
2594
//    src += src_x + src_y*src_stride;
2595

    
2596
    block[0]= tmp+3*MB_SIZE;
2597
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2598

    
2599
    if(same_block(lt, rt)){
2600
        block[1]= block[0];
2601
    }else{
2602
        block[1]= tmp + 4*MB_SIZE;
2603
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2604
    }
2605
        
2606
    if(same_block(lt, lb)){
2607
        block[2]= block[0];
2608
    }else if(same_block(rt, lb)){
2609
        block[2]= block[1];
2610
    }else{
2611
        block[2]= tmp+5*MB_SIZE;
2612
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2613
    }
2614

    
2615
    if(same_block(lt, rb) ){
2616
        block[3]= block[0];
2617
    }else if(same_block(rt, rb)){
2618
        block[3]= block[1];
2619
    }else if(same_block(lb, rb)){
2620
        block[3]= block[2];
2621
    }else{
2622
        block[3]= tmp+6*MB_SIZE;
2623
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2624
    }
2625
#if 0
2626
    for(y=0; y<b_h; y++){
2627
        for(x=0; x<b_w; x++){
2628
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2629
            if(add) dst[x + y*dst_stride] += v;
2630
            else    dst[x + y*dst_stride] -= v;
2631
        }
2632
    }
2633
    for(y=0; y<b_h; y++){
2634
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2635
        for(x=0; x<b_w; x++){
2636
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2637
            if(add) dst[x + y*dst_stride] += v;
2638
            else    dst[x + y*dst_stride] -= v;
2639
        }
2640
    }
2641
    for(y=0; y<b_h; y++){
2642
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2643
        for(x=0; x<b_w; x++){
2644
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2645
            if(add) dst[x + y*dst_stride] += v;
2646
            else    dst[x + y*dst_stride] -= v;
2647
        }
2648
    }
2649
    for(y=0; y<b_h; y++){
2650
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2651
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2652
        for(x=0; x<b_w; x++){
2653
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2654
            if(add) dst[x + y*dst_stride] += v;
2655
            else    dst[x + y*dst_stride] -= v;
2656
        }
2657
    }
2658
#else
2659
    for(y=0; y<b_h; y++){
2660
        //FIXME ugly missue of obmc_stride
2661
        uint8_t *obmc1= obmc + y*obmc_stride;
2662
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2663
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2664
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2665
        for(x=0; x<b_w; x++){
2666
            int v=   obmc1[x] * block[3][x + y*src_stride]
2667
                    +obmc2[x] * block[2][x + y*src_stride]
2668
                    +obmc3[x] * block[1][x + y*src_stride]
2669
                    +obmc4[x] * block[0][x + y*src_stride];
2670
            
2671
            v <<= 8 - LOG2_OBMC_MAX;
2672
            if(FRAC_BITS != 8){
2673
                v += 1<<(7 - FRAC_BITS);
2674
                v >>= 8 - FRAC_BITS;
2675
            }
2676
            if(add){
2677
                v += dst[x + y*dst_stride];
2678
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2679
                if(v&(~255)) v= ~(v>>31);
2680
                dst8[x + y*src_stride] = v;
2681
            }else{
2682
                dst[x + y*dst_stride] -= v;
2683
            }
2684
        }
2685
    }
2686
#endif
2687
}
2688

    
2689
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2690
    Plane *p= &s->plane[plane_index];
2691
    const int mb_w= s->b_width  << s->block_max_depth;
2692
    const int mb_h= s->b_height << s->block_max_depth;
2693
    int x, y, mb_x;
2694
    int block_size = MB_SIZE >> s->block_max_depth;
2695
    int block_w    = plane_index ? block_size/2 : block_size;
2696
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2697
    int obmc_stride= plane_index ? block_size : 2*block_size;
2698
    int ref_stride= s->current_picture.linesize[plane_index];
2699
    uint8_t *ref  = s->last_picture.data[plane_index];
2700
    uint8_t *dst8= s->current_picture.data[plane_index];
2701
    int w= p->width;
2702
    int h= p->height;
2703
    START_TIMER
2704
    
2705
    if(s->keyframe || (s->avctx->debug&512)){
2706
        if(mb_y==mb_h)
2707
            return;
2708

    
2709
        if(add){
2710
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++)
2711
            {
2712
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2713
                DWTELEM * line = sb->line[y];
2714
                for(x=0; x<w; x++)
2715
                {
2716
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2717
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2718
                    v >>= FRAC_BITS;
2719
                    if(v&(~255)) v= ~(v>>31);
2720
                    dst8[x + y*ref_stride]= v;
2721
                }
2722
            }
2723
        }else{
2724
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++)
2725
            {
2726
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2727
                DWTELEM * line = sb->line[y];
2728
                for(x=0; x<w; x++)
2729
                {
2730
                    line[x] -= 128 << FRAC_BITS;
2731
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2732
                }
2733
            }
2734
        }
2735

    
2736
        return;
2737
    }
2738
    
2739
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2740
            START_TIMER
2741

    
2742
            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, 
2743
                       block_w*mb_x - block_w/2,
2744
                       block_w*mb_y - block_w/2,
2745
                       block_w, block_w,
2746
                       w, h,
2747
                       w, ref_stride, obmc_stride,
2748
                       mb_x - 1, mb_y - 1,
2749
                       add, plane_index);
2750
            
2751
            STOP_TIMER("add_yblock")
2752
        }
2753
    
2754
    STOP_TIMER("predict_slice")
2755
}
2756

    
2757
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2758
    Plane *p= &s->plane[plane_index];
2759
    const int mb_w= s->b_width  << s->block_max_depth;
2760
    const int mb_h= s->b_height << s->block_max_depth;
2761
    int x, y, mb_x;
2762
    int block_size = MB_SIZE >> s->block_max_depth;
2763
    int block_w    = plane_index ? block_size/2 : block_size;
2764
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2765
    int obmc_stride= plane_index ? block_size : 2*block_size;
2766
    int ref_stride= s->current_picture.linesize[plane_index];
2767
    uint8_t *ref  = s->last_picture.data[plane_index];
2768
    uint8_t *dst8= s->current_picture.data[plane_index];
2769
    int w= p->width;
2770
    int h= p->height;
2771
    START_TIMER
2772
    
2773
    if(s->keyframe || (s->avctx->debug&512)){
2774
        if(mb_y==mb_h)
2775
            return;
2776

    
2777
        if(add){
2778
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
2779
                for(x=0; x<w; x++){
2780
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2781
                    v >>= FRAC_BITS;
2782
                    if(v&(~255)) v= ~(v>>31);
2783
                    dst8[x + y*ref_stride]= v;
2784
                }
2785
            }
2786
        }else{
2787
            for(y=block_w*mb_y; y<block_w*(mb_y+1); y++){
2788
                for(x=0; x<w; x++){
2789
                    buf[x + y*w]-= 128<<FRAC_BITS;
2790
                }
2791
            }
2792
        }
2793

    
2794
        return;
2795
    }
2796
    
2797
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2798
            START_TIMER
2799

    
2800
            add_yblock(s, buf, dst8, ref, obmc, 
2801
                       block_w*mb_x - block_w/2,
2802
                       block_w*mb_y - block_w/2,
2803
                       block_w, block_w,
2804
                       w, h,
2805
                       w, ref_stride, obmc_stride,
2806
                       mb_x - 1, mb_y - 1,
2807
                       add, plane_index);
2808
            
2809
            STOP_TIMER("add_yblock")
2810
        }
2811
    
2812
    STOP_TIMER("predict_slice")
2813
}
2814

    
2815
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2816
    const int mb_h= s->b_height << s->block_max_depth;
2817
    int mb_y;
2818
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2819
        predict_slice(s, buf, plane_index, add, mb_y);
2820
}
2821

    
2822
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2823
    const int level= b->level;
2824
    const int w= b->width;
2825
    const int h= b->height;
2826
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2827
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2828
    int x,y, thres1, thres2;
2829
    START_TIMER
2830

    
2831
    assert(QROOT==8);
2832

    
2833
    if(s->qlog == LOSSLESS_QLOG) return;
2834
 
2835
    bias= bias ? 0 : (3*qmul)>>3;
2836
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2837
    thres2= 2*thres1;
2838
    
2839
    if(!bias){
2840
        for(y=0; y<h; y++){
2841
            for(x=0; x<w; x++){
2842
                int i= src[x + y*stride];
2843
                
2844
                if((unsigned)(i+thres1) > thres2){
2845
                    if(i>=0){
2846
                        i<<= QEXPSHIFT;
2847
                        i/= qmul; //FIXME optimize
2848
                        src[x + y*stride]=  i;
2849
                    }else{
2850
                        i= -i;
2851
                        i<<= QEXPSHIFT;
2852
                        i/= qmul; //FIXME optimize
2853
                        src[x + y*stride]= -i;
2854
                    }
2855
                }else
2856
                    src[x + y*stride]= 0;
2857
            }
2858
        }
2859
    }else{
2860
        for(y=0; y<h; y++){
2861
            for(x=0; x<w; x++){
2862
                int i= src[x + y*stride]; 
2863
                
2864
                if((unsigned)(i+thres1) > thres2){
2865
                    if(i>=0){
2866
                        i<<= QEXPSHIFT;
2867
                        i= (i + bias) / qmul; //FIXME optimize
2868
                        src[x + y*stride]=  i;
2869
                    }else{
2870
                        i= -i;
2871
                        i<<= QEXPSHIFT;
2872
                        i= (i + bias) / qmul; //FIXME optimize
2873
                        src[x + y*stride]= -i;
2874
                    }
2875
                }else
2876
                    src[x + y*stride]= 0;
2877
            }
2878
        }
2879
    }
2880
    if(level+1 == s->spatial_decomposition_count){
2881
//        STOP_TIMER("quantize")
2882
    }
2883
}
2884

    
2885
static void dequantize_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride){
2886
    const int w= b->width;
2887
    const int h= b->height;
2888
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2889
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2890
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2891
    int x,y;
2892
    START_TIMER
2893
    
2894
    if(s->qlog == LOSSLESS_QLOG) return;
2895
    
2896
    assert(QROOT==8);
2897
    
2898
    for(y=0; y<h; y++){
2899
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2900
        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2901
        for(x=0; x<w; x++){
2902
            int i= line[x];
2903
            if(i<0){
2904
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2905
            }else if(i>0){
2906
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2907
            }
2908
        }
2909
    }
2910
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2911
        STOP_TIMER("dquant")
2912
    }
2913
}
2914

    
2915
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2916
    const int w= b->width;
2917
    const int h= b->height;
2918
    const int qlog= clip(s->qlog + b->qlog, 0, 128);
2919
    const int qmul= qexp[qlog&7]<<(qlog>>3);
2920
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2921
    int x,y;
2922
    START_TIMER
2923
    
2924
    if(s->qlog == LOSSLESS_QLOG) return;
2925
    
2926
    assert(QROOT==8);
2927

    
2928
    for(y=0; y<h; y++){
2929
        for(x=0; x<w; x++){
2930
            int i= src[x + y*stride];
2931
            if(i<0){
2932
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2933
            }else if(i>0){
2934
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2935
            }
2936
        }
2937
    }
2938
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2939
        STOP_TIMER("dquant")
2940
    }
2941
}
2942

    
2943
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2944
    const int w= b->width;
2945
    const int h= b->height;
2946
    int x,y;
2947
    
2948
    for(y=h-1; y>=0; y--){
2949
        for(x=w-1; x>=0; x--){
2950
            int i= x + y*stride;
2951
            
2952
            if(x){
2953
                if(use_median){
2954
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
2955
                    else  src[i] -= src[i - 1];
2956
                }else{
2957
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
2958
                    else  src[i] -= src[i - 1];
2959
                }
2960
            }else{
2961
                if(y) src[i] -= src[i - stride];
2962
            }
2963
        }
2964
    }
2965
}
2966

    
2967
static void correlate_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2968
    const int w= b->width;
2969
    const int h= b->height;
2970
    int x,y;
2971
    
2972
//    START_TIMER
2973
    
2974
    DWTELEM * line;
2975
    DWTELEM * prev;
2976
    
2977
    for(y=0; y<h; y++){
2978
        prev = line;
2979
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2980
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2981
        for(x=0; x<w; x++){
2982
            if(x){
2983
                if(use_median){
2984
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
2985
                    else  line[x] += line[x - 1];
2986
                }else{
2987
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
2988
                    else  line[x] += line[x - 1];
2989
                }
2990
            }else{
2991
                if(y) line[x] += prev[x];
2992
            }
2993
        }
2994
    }
2995
    
2996
//    STOP_TIMER("correlate")
2997
}
2998

    
2999
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3000
    const int w= b->width;
3001
    const int h= b->height;
3002
    int x,y;
3003
    
3004
    for(y=0; y<h; y++){
3005
        for(x=0; x<w; x++){
3006
            int i= x + y*stride;
3007
            
3008
            if(x){
3009
                if(use_median){
3010
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3011
                    else  src[i] += src[i - 1];
3012
                }else{
3013
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3014
                    else  src[i] += src[i - 1];
3015
                }
3016
            }else{
3017
                if(y) src[i] += src[i - stride];
3018
            }
3019
        }
3020
    }
3021
}
3022

    
3023
static void encode_header(SnowContext *s){
3024
    int plane_index, level, orientation;
3025
    uint8_t kstate[32]; 
3026
    
3027
    memset(kstate, MID_STATE, sizeof(kstate));   
3028

    
3029
    put_rac(&s->c, kstate, s->keyframe);
3030
    if(s->keyframe || s->always_reset)
3031
        reset_contexts(s);
3032
    if(s->keyframe){
3033
        put_symbol(&s->c, s->header_state, s->version, 0);
3034
        put_rac(&s->c, s->header_state, s->always_reset);
3035
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3036
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3037
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3038
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3039
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3040
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3041
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3042
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3043

    
3044
        for(plane_index=0; plane_index<2; plane_index++){
3045
            for(level=0; level<s->spatial_decomposition_count; level++){
3046
                for(orientation=level ? 1:0; orientation<4; orientation++){
3047
                    if(orientation==2) continue;
3048
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3049
                }
3050
            }
3051
        }
3052
    }
3053
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3054
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
3055
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
3056
    put_symbol(&s->c, s->header_state, s->qbias, 1);
3057
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3058
}
3059

    
3060
static int decode_header(SnowContext *s){
3061
    int plane_index, level, orientation;
3062
    uint8_t kstate[32];
3063

    
3064
    memset(kstate, MID_STATE, sizeof(kstate));   
3065

    
3066
    s->keyframe= get_rac(&s->c, kstate);
3067
    if(s->keyframe || s->always_reset)
3068
        reset_contexts(s);
3069
    if(s->keyframe){
3070
        s->version= get_symbol(&s->c, s->header_state, 0);
3071
        if(s->version>0){
3072
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3073
            return -1;
3074
        }
3075
        s->always_reset= get_rac(&s->c, s->header_state);
3076
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3077
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3078
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3079
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3080
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3081
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3082
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3083
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3084

    
3085
        for(plane_index=0; plane_index<3; plane_index++){
3086
            for(level=0; level<s->spatial_decomposition_count; level++){
3087
                for(orientation=level ? 1:0; orientation<4; orientation++){
3088
                    int q;
3089
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3090
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3091
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3092
                    s->plane[plane_index].band[level][orientation].qlog= q;
3093
                }
3094
            }
3095
        }
3096
    }
3097
    
3098
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3099
    if(s->spatial_decomposition_type > 2){
3100
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3101
        return -1;
3102
    }
3103
    
3104
    s->qlog= get_symbol(&s->c, s->header_state, 1);
3105
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3106
    s->qbias= get_symbol(&s->c, s->header_state, 1);
3107
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3108

    
3109
    return 0;
3110
}
3111

    
3112
static int common_init(AVCodecContext *avctx){
3113
    SnowContext *s = avctx->priv_data;
3114
    int width, height;
3115
    int level, orientation, plane_index, dec;
3116

    
3117
    s->avctx= avctx;
3118
        
3119
    dsputil_init(&s->dsp, avctx);
3120

    
3121
#define mcf(dx,dy)\
3122
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3123
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3124
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3125
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3126
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3127
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3128

    
3129
    mcf( 0, 0)
3130
    mcf( 4, 0)
3131
    mcf( 8, 0)
3132
    mcf(12, 0)
3133
    mcf( 0, 4)
3134
    mcf( 4, 4)
3135
    mcf( 8, 4)
3136
    mcf(12, 4)
3137
    mcf( 0, 8)
3138
    mcf( 4, 8)
3139
    mcf( 8, 8)
3140
    mcf(12, 8)
3141
    mcf( 0,12)
3142
    mcf( 4,12)
3143
    mcf( 8,12)
3144
    mcf(12,12)
3145

    
3146
#define mcfh(dx,dy)\
3147
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3148
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3149
        mc_block_hpel ## dx ## dy ## 16;\
3150
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3151
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3152
        mc_block_hpel ## dx ## dy ## 8;
3153

    
3154
    mcfh(0, 0)
3155
    mcfh(8, 0)
3156
    mcfh(0, 8)
3157
    mcfh(8, 8)
3158
        
3159
    dec= s->spatial_decomposition_count= 5;
3160
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3161
    
3162
    s->chroma_h_shift= 1; //FIXME XXX
3163
    s->chroma_v_shift= 1;
3164
    
3165
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3166
    
3167
    width= s->avctx->width;
3168
    height= s->avctx->height;
3169

    
3170
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3171
    
3172
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3173
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3174
    
3175
    for(plane_index=0; plane_index<3; plane_index++){    
3176
        int w= s->avctx->width;
3177
        int h= s->avctx->height;
3178

    
3179
        if(plane_index){
3180
            w>>= s->chroma_h_shift;
3181
            h>>= s->chroma_v_shift;
3182
        }
3183
        s->plane[plane_index].width = w;
3184
        s->plane[plane_index].height= h;
3185
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3186
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3187
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3188
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3189
                
3190
                b->buf= s->spatial_dwt_buffer;
3191
                b->level= level;
3192
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3193
                b->width = (w + !(orientation&1))>>1;
3194
                b->height= (h + !(orientation>1))>>1;
3195
                
3196
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3197
                b->buf_x_offset = 0;
3198
                b->buf_y_offset = 0;
3199
                
3200
                if(orientation&1){
3201
                    b->buf += (w+1)>>1;
3202
                    b->buf_x_offset = (w+1)>>1;
3203
                }
3204
                if(orientation>1){
3205
                    b->buf += b->stride>>1;
3206
                    b->buf_y_offset = b->stride_line >> 1;
3207
                }
3208
                
3209
                if(level)
3210
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3211
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3212
            }
3213
            w= (w+1)>>1;
3214
            h= (h+1)>>1;
3215
        }
3216
    }
3217
    
3218
    reset_contexts(s);
3219
/*    
3220
    width= s->width= avctx->width;
3221
    height= s->height= avctx->height;
3222
    
3223
    assert(width && height);
3224
*/
3225
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3226
    
3227
    return 0;
3228
}
3229

    
3230

    
3231
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3232
    int width = p->width;
3233
    int height= p->height;
3234
    int level, orientation, x, y;
3235

    
3236
    for(level=0; level<s->spatial_decomposition_count; level++){
3237
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3238
            SubBand *b= &p->band[level][orientation];
3239
            DWTELEM *buf= b->buf;
3240
            int64_t error=0;
3241
            
3242
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3243
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
3244
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3245
            for(y=0; y<height; y++){
3246
                for(x=0; x<width; x++){
3247
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
3248
                    error += d*d;
3249
                }
3250
            }
3251

    
3252
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3253
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3254
        }
3255
    }
3256
}
3257

    
3258
static int encode_init(AVCodecContext *avctx)
3259
{
3260
    SnowContext *s = avctx->priv_data;
3261
    int plane_index;
3262

    
3263
    if(avctx->strict_std_compliance >= 0){
3264
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
3265
               "use vstrict=-1 / -strict -1 to use it anyway\n");
3266
        return -1;
3267
    }
3268
 
3269
    common_init(avctx);
3270
    alloc_blocks(s);
3271
 
3272
    s->version=0;
3273
    
3274
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3275
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3276
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3277
    h263_encode_init(&s->m); //mv_penalty
3278

    
3279
    for(plane_index=0; plane_index<3; plane_index++){
3280
        calculate_vissual_weight(s, &s->plane[plane_index]);
3281
    }
3282
    
3283
    
3284
    avctx->coded_frame= &s->current_picture;
3285
    switch(avctx->pix_fmt){
3286
//    case PIX_FMT_YUV444P:
3287
//    case PIX_FMT_YUV422P:
3288
    case PIX_FMT_YUV420P:
3289
    case PIX_FMT_GRAY8:
3290
//    case PIX_FMT_YUV411P:
3291
//    case PIX_FMT_YUV410P:
3292
        s->colorspace_type= 0;
3293
        break;
3294
/*    case PIX_FMT_RGBA32:
3295
        s->colorspace= 1;
3296
        break;*/
3297
    default:
3298
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3299
        return -1;
3300
    }
3301
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3302
    s->chroma_h_shift= 1;
3303
    s->chroma_v_shift= 1;
3304
    return 0;
3305
}
3306

    
3307
static int frame_start(SnowContext *s){
3308
   AVFrame tmp;
3309
   int w= s->avctx->width; //FIXME round up to x16 ?
3310
   int h= s->avctx->height;
3311

    
3312
    if(s->current_picture.data[0]){
3313
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
3314
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3315
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3316
    }
3317

    
3318
    tmp= s->last_picture;
3319
    s->last_picture= s->current_picture;
3320
    s->current_picture= tmp;
3321
    
3322
    s->current_picture.reference= 1;
3323
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3324
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3325
        return -1;
3326
    }
3327
    
3328
    return 0;
3329
}
3330

    
3331
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3332
    SnowContext *s = avctx->priv_data;
3333
    RangeCoder * const c= &s->c;
3334
    AVFrame *pict = data;
3335
    const int width= s->avctx->width;
3336
    const int height= s->avctx->height;
3337
    int level, orientation, plane_index;
3338

    
3339
    ff_init_range_encoder(c, buf, buf_size);
3340
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3341
    
3342
    s->input_picture = *pict;
3343

    
3344
    s->keyframe=avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3345
    pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3346
    
3347
    if(pict->quality){
3348
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3349
        //<64 >60
3350
        s->qlog += 61;
3351
    }else{
3352
        s->qlog= LOSSLESS_QLOG;
3353
    }
3354

    
3355
    frame_start(s);
3356
    s->current_picture.key_frame= s->keyframe;
3357

    
3358
    if(pict->pict_type == P_TYPE){
3359
        int block_width = (width +15)>>4;
3360
        int block_height= (height+15)>>4;
3361
        int stride= s->current_picture.linesize[0];
3362
        
3363
        assert(s->current_picture.data[0]);
3364
        assert(s->last_picture.data[0]);
3365
     
3366
        s->m.avctx= s->avctx;
3367
        s->m.current_picture.data[0]= s->current_picture.data[0];
3368
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
3369
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3370
        s->m.current_picture_ptr= &s->m.current_picture;
3371
        s->m.   last_picture_ptr= &s->m.   last_picture;
3372
        s->m.linesize=
3373
        s->m.   last_picture.linesize[0]=
3374
        s->m.    new_picture.linesize[0]=
3375
        s->m.current_picture.linesize[0]= stride;
3376
        s->m.uvlinesize= s->current_picture.linesize[1];
3377
        s->m.width = width;
3378
        s->m.height= height;
3379
        s->m.mb_width = block_width;
3380
        s->m.mb_height= block_height;
3381
        s->m.mb_stride=   s->m.mb_width+1;
3382
        s->m.b8_stride= 2*s->m.mb_width+1;
3383
        s->m.f_code=1;
3384
        s->m.pict_type= pict->pict_type;
3385
        s->m.me_method= s->avctx->me_method;
3386
        s->m.me.scene_change_score=0;
3387
        s->m.flags= s->avctx->flags;
3388
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3389
        s->m.out_format= FMT_H263;
3390
        s->m.unrestricted_mv= 1;
3391

    
3392
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3393
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3394
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3395

    
3396
        s->m.dsp= s->dsp; //move
3397
        ff_init_me(&s->m);
3398
    }
3399
    
3400
redo_frame:
3401
        
3402
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3403

    
3404
    encode_header(s);
3405
    encode_blocks(s);
3406
      
3407
    for(plane_index=0; plane_index<3; plane_index++){
3408
        Plane *p= &s->plane[plane_index];
3409
        int w= p->width;
3410
        int h= p->height;
3411
        int x, y;
3412
//        int bits= put_bits_count(&s->c.pb);
3413

    
3414
        //FIXME optimize
3415
     if(pict->data[plane_index]) //FIXME gray hack
3416
        for(y=0; y<h; y++){
3417
            for(x=0; x<w; x++){
3418
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3419
            }
3420
        }
3421
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3422
        
3423
        if(   plane_index==0 
3424
           && pict->pict_type == P_TYPE 
3425
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3426
            ff_init_range_encoder(c, buf, buf_size);
3427
            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3428
            pict->pict_type= FF_I_TYPE;
3429
            s->keyframe=1;
3430
            reset_contexts(s);
3431
            goto redo_frame;
3432
        }
3433
        
3434
        if(s->qlog == LOSSLESS_QLOG){
3435
            for(y=0; y<h; y++){
3436
                for(x=0; x<w; x++){
3437
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1)))>>FRAC_BITS;
3438
                }
3439
            }
3440
        }
3441
 
3442
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3443

    
3444
        for(level=0; level<s->spatial_decomposition_count; level++){
3445
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3446
                SubBand *b= &p->band[level][orientation];
3447
                
3448
                quantize(s, b, b->buf, b->stride, s->qbias);
3449
                if(orientation==0)
3450
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3451
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3452
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
3453
                if(orientation==0)
3454
                    correlate(s, b, b->buf, b->stride, 1, 0);
3455
            }
3456
        }
3457
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3458

    
3459
        for(level=0; level<s->spatial_decomposition_count; level++){
3460
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3461
                SubBand *b= &p->band[level][orientation];
3462

    
3463
                dequantize(s, b, b->buf, b->stride);
3464
            }
3465
        }
3466

    
3467
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3468
        if(s->qlog == LOSSLESS_QLOG){
3469
            for(y=0; y<h; y++){
3470
                for(x=0; x<w; x++){
3471
                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
3472
                }
3473
            }
3474
        }
3475
{START_TIMER
3476
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3477
STOP_TIMER("pred-conv")}
3478
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3479
            int64_t error= 0;
3480
            
3481
    if(pict->data[plane_index]) //FIXME gray hack
3482
            for(y=0; y<h; y++){
3483
                for(x=0; x<w; x++){
3484
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3485
                    error += d*d;
3486
                }
3487
            }
3488
            s->avctx->error[plane_index] += error;
3489
            s->current_picture.error[plane_index] = error;
3490
        }
3491
    }
3492

    
3493
    if(s->last_picture.data[0])
3494
        avctx->release_buffer(avctx, &s->last_picture);
3495

    
3496
    emms_c();
3497
    
3498
    return ff_rac_terminate(c);
3499
}
3500

    
3501
static void common_end(SnowContext *s){
3502
    int plane_index, level, orientation;
3503

    
3504
    av_freep(&s->spatial_dwt_buffer);
3505

    
3506
    av_freep(&s->m.me.scratchpad);    
3507
    av_freep(&s->m.me.map);
3508
    av_freep(&s->m.me.score_map);
3509
 
3510
    av_freep(&s->block);
3511

    
3512
    for(plane_index=0; plane_index<3; plane_index++){    
3513
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3514
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3515
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3516
                
3517
                av_freep(&b->x_coeff);
3518
            }
3519
        }
3520
    }
3521
}
3522

    
3523
static int encode_end(AVCodecContext *avctx)
3524
{
3525
    SnowContext *s = avctx->priv_data;
3526

    
3527
    common_end(s);
3528

    
3529
    return 0;
3530
}
3531

    
3532
static int decode_init(AVCodecContext *avctx)
3533
{
3534
    SnowContext *s = avctx->priv_data;
3535
    int block_size;
3536

    
3537
    common_init(avctx);
3538
    
3539
    block_size = MB_SIZE >> s->block_max_depth;
3540
    /* FIXME block_size * 2 is determined empirically. block_size * 1.5 is definitely needed, but I (Robert) cannot figure out why more than that is needed. Perhaps there is a bug, or perhaps I overlooked some demands that are placed on the buffer. */
3541
    /* FIXME The formula is WRONG. For height > 480, the buffer will overflow. */
3542
    /* FIXME For now, I will use a full frame of lines. Fortunately, this should not materially effect cache performance because lines are allocated using a stack, so if in fact only 50 out of 496 lines are needed at a time, the other 446 will sit allocated but never accessed. */
3543
//    slice_buffer_init(s->plane[0].sb, s->plane[0].height, (block_size * 2) + (s->spatial_decomposition_count * s->spatial_decomposition_count), s->plane[0].width, s->spatial_dwt_buffer);
3544
    slice_buffer_init(&s->sb, s->plane[0].height, s->plane[0].height, s->plane[0].width, s->spatial_dwt_buffer);
3545
    
3546
    return 0;
3547
}
3548

    
3549
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3550
    SnowContext *s = avctx->priv_data;
3551
    RangeCoder * const c= &s->c;
3552
    int bytes_read;
3553
    AVFrame *picture = data;
3554
    int level, orientation, plane_index;
3555

    
3556
    ff_init_range_decoder(c, buf, buf_size);
3557
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3558

    
3559
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3560
    decode_header(s);
3561
    if(!s->block) alloc_blocks(s);
3562

    
3563
    frame_start(s);
3564
    //keyframe flag dupliaction mess FIXME
3565
    if(avctx->debug&FF_DEBUG_PICT_INFO)
3566
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3567
    
3568
    decode_blocks(s);
3569

    
3570
    for(plane_index=0; plane_index<3; plane_index++){
3571
        Plane *p= &s->plane[plane_index];
3572
        int w= p->width;
3573
        int h= p->height;
3574
        int x, y;
3575
        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
3576
        SubBand * correlate_band;
3577
        
3578
if(s->avctx->debug&2048){
3579
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3580
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3581

    
3582
        for(y=0; y<h; y++){
3583
            for(x=0; x<w; x++){
3584
                int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
3585
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3586
            }
3587
        }
3588
}
3589

    
3590
{   START_TIMER
3591
    for(level=0; level<s->spatial_decomposition_count; level++){
3592
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3593
            SubBand *b= &p->band[level][orientation];
3594
            unpack_coeffs(s, b, b->parent, orientation);
3595
        }
3596
    }
3597
    STOP_TIMER("unpack coeffs");
3598
}
3599
        
3600
        /* Handle level 0, orientation 0 specially. It is particularly resistant to slicing but fortunately quite small, so process it in one pass. */
3601
        correlate_band = &p->band[0][0];
3602
        decode_subband_slice_buffered(s, correlate_band, &s->sb, 0, correlate_band->height, decode_state[0][0]);
3603
        correlate_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0);
3604
        dequantize_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride);
3605

    
3606
{START_TIMER
3607
    const int mb_h= s->b_height << s->block_max_depth;
3608
    const int block_size = MB_SIZE >> s->block_max_depth;
3609
    const int block_w    = plane_index ? block_size/2 : block_size;
3610
    int mb_y;
3611
    dwt_compose_t cs[MAX_DECOMPOSITIONS];
3612
    int yd=0, yq=0;
3613
    int y;
3614
    int end_y;
3615

    
3616
    ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
3617
    for(mb_y=0; mb_y<=mb_h; mb_y++){
3618
        
3619
        const int slice_starty = block_w*mb_y;
3620
        const int slice_h = block_w*(mb_y+1);
3621

    
3622
        {        
3623
        START_TIMER
3624
        for(level=0; level<s->spatial_decomposition_count; level++){
3625
            for(orientation=level ? 1 : 1; orientation<4; orientation++){
3626
                SubBand *b= &p->band[level][orientation];
3627
                int start_y;
3628
                int end_y;
3629
                int our_mb_start = mb_y;
3630
                int our_mb_end = (mb_y + 1);
3631
                start_y = FFMIN(b->height, (mb_y ? ((block_w * our_mb_start - 4) >> (s->spatial_decomposition_count - level)) + 5 : 0));
3632
                end_y = FFMIN(b->height, (((block_w * our_mb_end - 4) >> (s->spatial_decomposition_count - level)) + 5));
3633
                    
3634
                if (start_y != end_y)
3635
                    decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
3636
            }
3637
        }
3638
        STOP_TIMER("decode_subband_slice");
3639
        }
3640
        
3641
{   START_TIMER
3642
        for(; yd<slice_h; yd+=4){
3643
            ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
3644
        }
3645
    STOP_TIMER("idwt slice");}
3646

    
3647
        
3648
        if(s->qlog == LOSSLESS_QLOG){
3649
            for(; yq<slice_h && yq<h; yq++){
3650
                DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
3651
                for(x=0; x<w; x++){
3652
                    line[x] <<= FRAC_BITS;
3653
                }
3654
            }
3655
        }
3656

    
3657
        predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
3658
        
3659
        /* Nasty hack based empirically on how predict_slice_buffered() hits the buffer. */
3660
        /* FIXME If possible, make predict_slice fit into the slice. As of now, it works on some previous lines (up to slice_height / 2) if the condition on the next line is false. */
3661
        if (s->keyframe || (s->avctx->debug&512)){
3662
            y = FFMIN(p->height, slice_starty);
3663
            end_y = FFMIN(p->height, slice_h);
3664
        }
3665
        else{
3666
            y = FFMAX(0, FFMIN(p->height, slice_starty - (block_w >> 1)));
3667
            end_y = FFMAX(0, FFMIN(p->height, slice_h - (block_w >> 1)));
3668
        }
3669
        while(y < end_y)
3670
            slice_buffer_release(&s->sb, y++);
3671
    }
3672
    
3673
    slice_buffer_flush(&s->sb);
3674
    
3675
STOP_TIMER("idwt + predict_slices")}
3676
    }
3677
            
3678
    emms_c();
3679

    
3680
    if(s->last_picture.data[0])
3681
        avctx->release_buffer(avctx, &s->last_picture);
3682

    
3683
if(!(s->avctx->debug&2048))        
3684
    *picture= s->current_picture;
3685
else
3686
    *picture= s->mconly_picture;
3687
    
3688
    *data_size = sizeof(AVFrame);
3689
    
3690
    bytes_read= c->bytestream - c->bytestream_start;
3691
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
3692

    
3693
    return bytes_read;
3694
}
3695

    
3696
static int decode_end(AVCodecContext *avctx)
3697
{
3698
    SnowContext *s = avctx->priv_data;
3699

    
3700
    slice_buffer_destroy(&s->sb);
3701
    
3702
    common_end(s);
3703

    
3704
    return 0;
3705
}
3706

    
3707
AVCodec snow_decoder = {
3708
    "snow",
3709
    CODEC_TYPE_VIDEO,
3710
    CODEC_ID_SNOW,
3711
    sizeof(SnowContext),
3712
    decode_init,
3713
    NULL,
3714
    decode_end,
3715
    decode_frame,
3716
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3717
    NULL
3718
};
3719

    
3720
#ifdef CONFIG_ENCODERS
3721
AVCodec snow_encoder = {
3722
    "snow",
3723
    CODEC_TYPE_VIDEO,
3724
    CODEC_ID_SNOW,
3725
    sizeof(SnowContext),
3726
    encode_init,
3727
    encode_frame,
3728
    encode_end,
3729
};
3730
#endif
3731

    
3732

    
3733
#if 0
3734
#undef malloc
3735
#undef free
3736
#undef printf
3737

3738
int main(){
3739
    int width=256;
3740
    int height=256;
3741
    int buffer[2][width*height];
3742
    SnowContext s;
3743
    int i;
3744
    s.spatial_decomposition_count=6;
3745
    s.spatial_decomposition_type=1;
3746
    
3747
    printf("testing 5/3 DWT\n");
3748
    for(i=0; i<width*height; i++)
3749
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3750
    
3751
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3752
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3753
    
3754
    for(i=0; i<width*height; i++)
3755
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3756

3757
    printf("testing 9/7 DWT\n");
3758
    s.spatial_decomposition_type=0;
3759
    for(i=0; i<width*height; i++)
3760
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3761
    
3762
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3763
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3764
    
3765
    for(i=0; i<width*height; i++)
3766
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3767
        
3768
    printf("testing AC coder\n");
3769
    memset(s.header_state, 0, sizeof(s.header_state));
3770
    ff_init_range_encoder(&s.c, buffer[0], 256*256);
3771
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3772
        
3773
    for(i=-256; i<256; i++){
3774
START_TIMER
3775
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3776
STOP_TIMER("put_symbol")
3777
    }
3778
    ff_rac_terminate(&s.c);
3779

3780
    memset(s.header_state, 0, sizeof(s.header_state));
3781
    ff_init_range_decoder(&s.c, buffer[0], 256*256);
3782
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3783
    
3784
    for(i=-256; i<256; i++){
3785
        int j;
3786
START_TIMER
3787
        j= get_symbol(&s.c, s.header_state, 1);
3788
STOP_TIMER("get_symbol")
3789
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3790
    }
3791
{
3792
int level, orientation, x, y;
3793
int64_t errors[8][4];
3794
int64_t g=0;
3795

3796
    memset(errors, 0, sizeof(errors));
3797
    s.spatial_decomposition_count=3;
3798
    s.spatial_decomposition_type=0;
3799
    for(level=0; level<s.spatial_decomposition_count; level++){
3800
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3801
            int w= width  >> (s.spatial_decomposition_count-level);
3802
            int h= height >> (s.spatial_decomposition_count-level);
3803
            int stride= width  << (s.spatial_decomposition_count-level);
3804
            DWTELEM *buf= buffer[0];
3805
            int64_t error=0;
3806

3807
            if(orientation&1) buf+=w;
3808
            if(orientation>1) buf+=stride>>1;
3809
            
3810
            memset(buffer[0], 0, sizeof(int)*width*height);
3811
            buf[w/2 + h/2*stride]= 256*256;
3812
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3813
            for(y=0; y<height; y++){
3814
                for(x=0; x<width; x++){
3815
                    int64_t d= buffer[0][x + y*width];
3816
                    error += d*d;
3817
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3818
                }
3819
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3820
            }
3821
            error= (int)(sqrt(error)+0.5);
3822
            errors[level][orientation]= error;
3823
            if(g) g=ff_gcd(g, error);
3824
            else g= error;
3825
        }
3826
    }
3827
    printf("static int const visual_weight[][4]={\n");
3828
    for(level=0; level<s.spatial_decomposition_count; level++){
3829
        printf("  {");
3830
        for(orientation=0; orientation<4; orientation++){
3831
            printf("%8lld,", errors[level][orientation]/g);
3832
        }
3833
        printf("},\n");
3834
    }
3835
    printf("};\n");
3836
    {
3837
            int level=2;
3838
            int orientation=3;
3839
            int w= width  >> (s.spatial_decomposition_count-level);
3840
            int h= height >> (s.spatial_decomposition_count-level);
3841
            int stride= width  << (s.spatial_decomposition_count-level);
3842
            DWTELEM *buf= buffer[0];
3843
            int64_t error=0;
3844

3845
            buf+=w;
3846
            buf+=stride>>1;
3847
            
3848
            memset(buffer[0], 0, sizeof(int)*width*height);
3849
#if 1
3850
            for(y=0; y<height; y++){
3851
                for(x=0; x<width; x++){
3852
                    int tab[4]={0,2,3,1};
3853
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3854
                }
3855
            }
3856
            ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3857
#else
3858
            for(y=0; y<h; y++){
3859
                for(x=0; x<w; x++){
3860
                    buf[x + y*stride  ]=169;
3861
                    buf[x + y*stride-w]=64;
3862
                }
3863
            }
3864
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3865
#endif
3866
            for(y=0; y<height; y++){
3867
                for(x=0; x<width; x++){
3868
                    int64_t d= buffer[0][x + y*width];
3869
                    error += d*d;
3870
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3871
                }
3872
                if(ABS(height/2-y)<9) printf("\n");
3873
            }
3874
    }
3875

    
3876
}
3877
    return 0;
3878
}
3879
#endif
3880