Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 2cd34043

History | View | Annotate | Download (134 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22

    
23
#include "rangecoder.h"
24
#define MID_STATE 128
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QSHIFT 5
35
#define QROOT (1<<QSHIFT)
36
#define LOSSLESS_QLOG -128
37
#define FRAC_BITS 8
38

    
39
static const int8_t quant3[256]={
40
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56
};
57
static const int8_t quant3b[256]={
58
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74
};
75
static const int8_t quant3bA[256]={
76
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92
};
93
static const int8_t quant5[256]={
94
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110
};
111
static const int8_t quant7[256]={
112
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128
};
129
static const int8_t quant9[256]={
130
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146
};
147
static const int8_t quant11[256]={
148
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164
};
165
static const int8_t quant13[256]={
166
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182
};
183

    
184
#define LOG2_OBMC_MAX 6
185
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
186
#if 0 //64*cubic
187
static const uint8_t obmc32[1024]={
188
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220
//error:0.000022
221
};
222
static const uint8_t obmc16[256]={
223
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239
//error:0.000033
240
};
241
#elif 1 // 64*linear
242
static const uint8_t obmc32[1024]={
243
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275
 //error:0.000020
276
};
277
static const uint8_t obmc16[256]={
278
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294
//error:0.000015
295
};
296
#else //64*cos
297
static const uint8_t obmc32[1024]={
298
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330
//error:0.000022
331
};
332
static const uint8_t obmc16[256]={
333
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349
//error:0.000022
350
};
351
#endif
352

    
353
//linear *64
354
static const uint8_t obmc8[64]={
355
 1, 3, 5, 7, 7, 5, 3, 1,
356
 3, 9,15,21,21,15, 9, 3,
357
 5,15,25,35,35,25,15, 5,
358
 7,21,35,49,49,35,21, 7,
359
 7,21,35,49,49,35,21, 7,
360
 5,15,25,35,35,25,15, 5,
361
 3, 9,15,21,21,15, 9, 3,
362
 1, 3, 5, 7, 7, 5, 3, 1,
363
//error:0.000000
364
};
365

    
366
//linear *64
367
static const uint8_t obmc4[16]={
368
 4,12,12, 4,
369
12,36,36,12,
370
12,36,36,12,
371
 4,12,12, 4,
372
//error:0.000000
373
};
374

    
375
static const uint8_t *obmc_tab[4]={
376
    obmc32, obmc16, obmc8, obmc4
377
};
378

    
379
typedef struct BlockNode{
380
    int16_t mx;
381
    int16_t my;
382
    uint8_t color[3];
383
    uint8_t type;
384
//#define TYPE_SPLIT    1
385
#define BLOCK_INTRA   1
386
//#define TYPE_NOCOLOR  4
387
    uint8_t level; //FIXME merge into type?
388
}BlockNode;
389

    
390
#define LOG2_MB_SIZE 4
391
#define MB_SIZE (1<<LOG2_MB_SIZE)
392

    
393
typedef struct x_and_coeff{
394
    int16_t x;
395
    uint16_t coeff;
396
} x_and_coeff;
397

    
398
typedef struct SubBand{
399
    int level;
400
    int stride;
401
    int width;
402
    int height;
403
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
404
    DWTELEM *buf;
405
    int buf_x_offset;
406
    int buf_y_offset;
407
    int stride_line; ///< Stride measured in lines, not pixels.
408
    x_and_coeff * x_coeff;
409
    struct SubBand *parent;
410
    uint8_t state[/*7*2*/ 7 + 512][32];
411
}SubBand;
412

    
413
typedef struct Plane{
414
    int width;
415
    int height;
416
    SubBand band[MAX_DECOMPOSITIONS][4];
417
}Plane;
418

    
419
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
420
typedef struct {
421
    DWTELEM * * line; ///< For use by idwt and predict_slices.
422
    DWTELEM * * data_stack; ///< Used for internal purposes.
423
    int data_stack_top;
424
    int line_count;
425
    int line_width;
426
    int data_count;
427
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
428
} slice_buffer;
429

    
430
typedef struct SnowContext{
431
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
432

    
433
    AVCodecContext *avctx;
434
    RangeCoder c;
435
    DSPContext dsp;
436
    AVFrame input_picture;
437
    AVFrame current_picture;
438
    AVFrame last_picture;
439
    AVFrame mconly_picture;
440
//     uint8_t q_context[16];
441
    uint8_t header_state[32];
442
    uint8_t block_state[128 + 32*128];
443
    int keyframe;
444
    int always_reset;
445
    int version;
446
    int spatial_decomposition_type;
447
    int temporal_decomposition_type;
448
    int spatial_decomposition_count;
449
    int temporal_decomposition_count;
450
    DWTELEM *spatial_dwt_buffer;
451
    int colorspace_type;
452
    int chroma_h_shift;
453
    int chroma_v_shift;
454
    int spatial_scalability;
455
    int qlog;
456
    int lambda;
457
    int lambda2;
458
    int mv_scale;
459
    int qbias;
460
#define QBIAS_SHIFT 3
461
    int b_width;
462
    int b_height;
463
    int block_max_depth;
464
    Plane plane[MAX_PLANES];
465
    BlockNode *block;
466
    slice_buffer sb;
467

    
468
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
469
}SnowContext;
470

    
471
typedef struct {
472
    DWTELEM *b0;
473
    DWTELEM *b1;
474
    DWTELEM *b2;
475
    DWTELEM *b3;
476
    int y;
477
} dwt_compose_t;
478

    
479
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
480
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
481

    
482
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
483
{
484
    int i;
485
  
486
    buf->base_buffer = base_buffer;
487
    buf->line_count = line_count;
488
    buf->line_width = line_width;
489
    buf->data_count = max_allocated_lines;
490
    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
491
    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
492
  
493
    for (i = 0; i < max_allocated_lines; i++)
494
    {
495
      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
496
    }
497
    
498
    buf->data_stack_top = max_allocated_lines - 1;
499
}
500

    
501
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
502
{
503
    int i;
504
    int offset;
505
    DWTELEM * buffer;
506
  
507
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);  
508
  
509
    assert(buf->data_stack_top >= 0);
510
//  assert(!buf->line[line]);
511
    if (buf->line[line])
512
        return buf->line[line];
513
    
514
    offset = buf->line_width * line;
515
    buffer = buf->data_stack[buf->data_stack_top];
516
    buf->data_stack_top--;
517
    buf->line[line] = buffer;
518
  
519
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
520
  
521
    return buffer;
522
}
523

    
524
static void slice_buffer_release(slice_buffer * buf, int line)
525
{
526
    int i;
527
    int offset;
528
    DWTELEM * buffer;
529

    
530
    assert(line >= 0 && line < buf->line_count);
531
    assert(buf->line[line]);
532

    
533
    offset = buf->line_width * line;
534
    buffer = buf->line[line];
535
    buf->data_stack_top++;
536
    buf->data_stack[buf->data_stack_top] = buffer;
537
    buf->line[line] = NULL;
538
  
539
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
540
}
541

    
542
static void slice_buffer_flush(slice_buffer * buf)
543
{
544
    int i;
545
    for (i = 0; i < buf->line_count; i++)
546
    {
547
        if (buf->line[i])
548
        {
549
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
550
            slice_buffer_release(buf, i);
551
        }
552
    }
553
}
554

    
555
static void slice_buffer_destroy(slice_buffer * buf)
556
{
557
    int i;
558
    slice_buffer_flush(buf);
559
  
560
    for (i = buf->data_count - 1; i >= 0; i--)
561
    {
562
        assert(buf->data_stack[i]);
563
        av_free(buf->data_stack[i]);
564
    }
565
    assert(buf->data_stack);
566
    av_free(buf->data_stack);
567
    assert(buf->line);
568
    av_free(buf->line);
569
}
570

    
571
#ifdef        __sgi
572
// Avoid a name clash on SGI IRIX
573
#undef        qexp
574
#endif
575
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
576
static uint8_t qexp[QROOT];
577

    
578
static inline int mirror(int v, int m){
579
    if     (v<0) return -v;
580
    else if(v>m) return 2*m-v;
581
    else         return v;
582
}
583

    
584
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
585
    int i;
586

    
587
    if(v){
588
        const int a= ABS(v);
589
        const int e= av_log2(a);
590
#if 1
591
        const int el= FFMIN(e, 10);   
592
        put_rac(c, state+0, 0);
593

    
594
        for(i=0; i<el; i++){
595
            put_rac(c, state+1+i, 1);  //1..10
596
        }
597
        for(; i<e; i++){
598
            put_rac(c, state+1+9, 1);  //1..10
599
        }
600
        put_rac(c, state+1+FFMIN(i,9), 0);
601

    
602
        for(i=e-1; i>=el; i--){
603
            put_rac(c, state+22+9, (a>>i)&1); //22..31
604
        }
605
        for(; i>=0; i--){
606
            put_rac(c, state+22+i, (a>>i)&1); //22..31
607
        }
608

    
609
        if(is_signed)
610
            put_rac(c, state+11 + el, v < 0); //11..21
611
#else
612
        
613
        put_rac(c, state+0, 0);
614
        if(e<=9){
615
            for(i=0; i<e; i++){
616
                put_rac(c, state+1+i, 1);  //1..10
617
            }
618
            put_rac(c, state+1+i, 0);
619

    
620
            for(i=e-1; i>=0; i--){
621
                put_rac(c, state+22+i, (a>>i)&1); //22..31
622
            }
623

    
624
            if(is_signed)
625
                put_rac(c, state+11 + e, v < 0); //11..21
626
        }else{
627
            for(i=0; i<e; i++){
628
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
629
            }
630
            put_rac(c, state+1+FFMIN(i,9), 0);
631

    
632
            for(i=e-1; i>=0; i--){
633
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
634
            }
635

    
636
            if(is_signed)
637
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
638
        }
639
#endif
640
    }else{
641
        put_rac(c, state+0, 1);
642
    }
643
}
644

    
645
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
646
    if(get_rac(c, state+0))
647
        return 0;
648
    else{
649
        int i, e, a;
650
        e= 0;
651
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
652
            e++;
653
        }
654

    
655
        a= 1;
656
        for(i=e-1; i>=0; i--){
657
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
658
        }
659

    
660
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
661
            return -a;
662
        else
663
            return a;
664
    }
665
}
666

    
667
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
668
    int i;
669
    int r= log2>=0 ? 1<<log2 : 1;
670

    
671
    assert(v>=0);
672
    assert(log2>=-4);
673

    
674
    while(v >= r){
675
        put_rac(c, state+4+log2, 1);
676
        v -= r;
677
        log2++;
678
        if(log2>0) r+=r;
679
    }
680
    put_rac(c, state+4+log2, 0);
681
    
682
    for(i=log2-1; i>=0; i--){
683
        put_rac(c, state+31-i, (v>>i)&1);
684
    }
685
}
686

    
687
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
688
    int i;
689
    int r= log2>=0 ? 1<<log2 : 1;
690
    int v=0;
691

    
692
    assert(log2>=-4);
693

    
694
    while(get_rac(c, state+4+log2)){
695
        v+= r;
696
        log2++;
697
        if(log2>0) r+=r;
698
    }
699
    
700
    for(i=log2-1; i>=0; i--){
701
        v+= get_rac(c, state+31-i)<<i;
702
    }
703

    
704
    return v;
705
}
706

    
707
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
708
    const int mirror_left= !highpass;
709
    const int mirror_right= (width&1) ^ highpass;
710
    const int w= (width>>1) - 1 + (highpass & width);
711
    int i;
712

    
713
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
714
    if(mirror_left){
715
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
716
        dst += dst_step;
717
        src += src_step;
718
    }
719
    
720
    for(i=0; i<w; i++){
721
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
722
    }
723
    
724
    if(mirror_right){
725
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
726
    }
727
}
728

    
729
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
730
    const int mirror_left= !highpass;
731
    const int mirror_right= (width&1) ^ highpass;
732
    const int w= (width>>1) - 1 + (highpass & width);
733
    int i;
734

    
735
    if(mirror_left){
736
        int r= 3*2*ref[0];
737
        r += r>>4;
738
        r += r>>8;
739
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
740
        dst += dst_step;
741
        src += src_step;
742
    }
743
    
744
    for(i=0; i<w; i++){
745
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
746
        r += r>>4;
747
        r += r>>8;
748
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
749
    }
750
    
751
    if(mirror_right){
752
        int r= 3*2*ref[w*ref_step];
753
        r += r>>4;
754
        r += r>>8;
755
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
756
    }
757
}
758

    
759
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
760
    const int mirror_left= !highpass;
761
    const int mirror_right= (width&1) ^ highpass;
762
    const int w= (width>>1) - 1 + (highpass & width);
763
    int i;
764

    
765
    assert(shift == 4);
766
#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
767
    if(mirror_left){
768
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
769
        dst += dst_step;
770
        src += src_step;
771
    }
772
    
773
    for(i=0; i<w; i++){
774
        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
775
    }
776
    
777
    if(mirror_right){
778
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
779
    }
780
}
781

    
782

    
783
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
784
    int x, i;
785
    
786
    for(x=start; x<width; x+=2){
787
        int64_t sum=0;
788

    
789
        for(i=0; i<n; i++){
790
            int x2= x + 2*i - n + 1;
791
            if     (x2<     0) x2= -x2;
792
            else if(x2>=width) x2= 2*width-x2-2;
793
            sum += coeffs[i]*(int64_t)dst[x2];
794
        }
795
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
796
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
797
    }
798
}
799

    
800
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
801
    int x, y, i;
802
    for(y=start; y<height; y+=2){
803
        for(x=0; x<width; x++){
804
            int64_t sum=0;
805
    
806
            for(i=0; i<n; i++){
807
                int y2= y + 2*i - n + 1;
808
                if     (y2<      0) y2= -y2;
809
                else if(y2>=height) y2= 2*height-y2-2;
810
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
811
            }
812
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
813
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
814
        }
815
    }
816
}
817

    
818
#define SCALEX 1
819
#define LX0 0
820
#define LX1 1
821

    
822
#if 0 // more accurate 9/7
823
#define N1 2
824
#define SHIFT1 14
825
#define COEFFS1 (int[]){-25987,-25987}
826
#define N2 2
827
#define SHIFT2 19
828
#define COEFFS2 (int[]){-27777,-27777}
829
#define N3 2
830
#define SHIFT3 15
831
#define COEFFS3 (int[]){28931,28931}
832
#define N4 2
833
#define SHIFT4 15
834
#define COEFFS4 (int[]){14533,14533}
835
#elif 1 // 13/7 CRF
836
#define N1 4
837
#define SHIFT1 4
838
#define COEFFS1 (int[]){1,-9,-9,1}
839
#define N2 4
840
#define SHIFT2 4
841
#define COEFFS2 (int[]){-1,5,5,-1}
842
#define N3 0
843
#define SHIFT3 1
844
#define COEFFS3 NULL
845
#define N4 0
846
#define SHIFT4 1
847
#define COEFFS4 NULL
848
#elif 1 // 3/5
849
#define LX0 1
850
#define LX1 0
851
#define SCALEX 0.5
852
#define N1 2
853
#define SHIFT1 1
854
#define COEFFS1 (int[]){1,1}
855
#define N2 2
856
#define SHIFT2 2
857
#define COEFFS2 (int[]){-1,-1}
858
#define N3 0
859
#define SHIFT3 0
860
#define COEFFS3 NULL
861
#define N4 0
862
#define SHIFT4 0
863
#define COEFFS4 NULL
864
#elif 1 // 11/5 
865
#define N1 0
866
#define SHIFT1 1
867
#define COEFFS1 NULL
868
#define N2 2
869
#define SHIFT2 2
870
#define COEFFS2 (int[]){-1,-1}
871
#define N3 2
872
#define SHIFT3 0
873
#define COEFFS3 (int[]){-1,-1}
874
#define N4 4
875
#define SHIFT4 7
876
#define COEFFS4 (int[]){-5,29,29,-5}
877
#define SCALEX 4
878
#elif 1 // 9/7 CDF
879
#define N1 2
880
#define SHIFT1 7
881
#define COEFFS1 (int[]){-203,-203}
882
#define N2 2
883
#define SHIFT2 12
884
#define COEFFS2 (int[]){-217,-217}
885
#define N3 2
886
#define SHIFT3 7
887
#define COEFFS3 (int[]){113,113}
888
#define N4 2
889
#define SHIFT4 9
890
#define COEFFS4 (int[]){227,227}
891
#define SCALEX 1
892
#elif 1 // 7/5 CDF
893
#define N1 0
894
#define SHIFT1 1
895
#define COEFFS1 NULL
896
#define N2 2
897
#define SHIFT2 2
898
#define COEFFS2 (int[]){-1,-1}
899
#define N3 2
900
#define SHIFT3 0
901
#define COEFFS3 (int[]){-1,-1}
902
#define N4 2
903
#define SHIFT4 4
904
#define COEFFS4 (int[]){3,3}
905
#elif 1 // 9/7 MN
906
#define N1 4
907
#define SHIFT1 4
908
#define COEFFS1 (int[]){1,-9,-9,1}
909
#define N2 2
910
#define SHIFT2 2
911
#define COEFFS2 (int[]){1,1}
912
#define N3 0
913
#define SHIFT3 1
914
#define COEFFS3 NULL
915
#define N4 0
916
#define SHIFT4 1
917
#define COEFFS4 NULL
918
#else // 13/7 CRF
919
#define N1 4
920
#define SHIFT1 4
921
#define COEFFS1 (int[]){1,-9,-9,1}
922
#define N2 4
923
#define SHIFT2 4
924
#define COEFFS2 (int[]){-1,5,5,-1}
925
#define N3 0
926
#define SHIFT3 1
927
#define COEFFS3 NULL
928
#define N4 0
929
#define SHIFT4 1
930
#define COEFFS4 NULL
931
#endif
932
static void horizontal_decomposeX(DWTELEM *b, int width){
933
    DWTELEM temp[width];
934
    const int width2= width>>1;
935
    const int w2= (width+1)>>1;
936
    int A1,A2,A3,A4, x;
937

    
938
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
939
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
940
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
941
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
942
    
943
    for(x=0; x<width2; x++){
944
        temp[x   ]= b[2*x    ];
945
        temp[x+w2]= b[2*x + 1];
946
    }
947
    if(width&1)
948
        temp[x   ]= b[2*x    ];
949
    memcpy(b, temp, width*sizeof(int));
950
}
951

    
952
static void horizontal_composeX(DWTELEM *b, int width){
953
    DWTELEM temp[width];
954
    const int width2= width>>1;
955
    int A1,A2,A3,A4, x;
956
    const int w2= (width+1)>>1;
957

    
958
    memcpy(temp, b, width*sizeof(int));
959
    for(x=0; x<width2; x++){
960
        b[2*x    ]= temp[x   ];
961
        b[2*x + 1]= temp[x+w2];
962
    }
963
    if(width&1)
964
        b[2*x    ]= temp[x   ];
965

    
966
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
967
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
968
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
969
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
970
}
971

    
972
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
973
    int x, y;
974
  
975
    for(y=0; y<height; y++){
976
        for(x=0; x<width; x++){
977
            buffer[y*stride + x] *= SCALEX;
978
        }
979
    }
980

    
981
    for(y=0; y<height; y++){
982
        horizontal_decomposeX(buffer + y*stride, width);
983
    }
984
    
985
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
986
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
987
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
988
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
989
}
990

    
991
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
992
    int x, y;
993
  
994
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
995
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
996
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
997
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
998

    
999
    for(y=0; y<height; y++){
1000
        horizontal_composeX(buffer + y*stride, width);
1001
    }
1002

    
1003
    for(y=0; y<height; y++){
1004
        for(x=0; x<width; x++){
1005
            buffer[y*stride + x] /= SCALEX;
1006
        }
1007
    }
1008
}
1009

    
1010
static void horizontal_decompose53i(DWTELEM *b, int width){
1011
    DWTELEM temp[width];
1012
    const int width2= width>>1;
1013
    int A1,A2,A3,A4, x;
1014
    const int w2= (width+1)>>1;
1015

    
1016
    for(x=0; x<width2; x++){
1017
        temp[x   ]= b[2*x    ];
1018
        temp[x+w2]= b[2*x + 1];
1019
    }
1020
    if(width&1)
1021
        temp[x   ]= b[2*x    ];
1022
#if 0
1023
    A2= temp[1       ];
1024
    A4= temp[0       ];
1025
    A1= temp[0+width2];
1026
    A1 -= (A2 + A4)>>1;
1027
    A4 += (A1 + 1)>>1;
1028
    b[0+width2] = A1;
1029
    b[0       ] = A4;
1030
    for(x=1; x+1<width2; x+=2){
1031
        A3= temp[x+width2];
1032
        A4= temp[x+1     ];
1033
        A3 -= (A2 + A4)>>1;
1034
        A2 += (A1 + A3 + 2)>>2;
1035
        b[x+width2] = A3;
1036
        b[x       ] = A2;
1037

1038
        A1= temp[x+1+width2];
1039
        A2= temp[x+2       ];
1040
        A1 -= (A2 + A4)>>1;
1041
        A4 += (A1 + A3 + 2)>>2;
1042
        b[x+1+width2] = A1;
1043
        b[x+1       ] = A4;
1044
    }
1045
    A3= temp[width-1];
1046
    A3 -= A2;
1047
    A2 += (A1 + A3 + 2)>>2;
1048
    b[width -1] = A3;
1049
    b[width2-1] = A2;
1050
#else        
1051
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1052
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
1053
#endif
1054
}
1055

    
1056
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1057
    int i;
1058
    
1059
    for(i=0; i<width; i++){
1060
        b1[i] -= (b0[i] + b2[i])>>1;
1061
    }
1062
}
1063

    
1064
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1065
    int i;
1066
    
1067
    for(i=0; i<width; i++){
1068
        b1[i] += (b0[i] + b2[i] + 2)>>2;
1069
    }
1070
}
1071

    
1072
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1073
    int y;
1074
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1075
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
1076
  
1077
    for(y=-2; y<height; y+=2){
1078
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1079
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1080

    
1081
{START_TIMER
1082
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
1083
        if(y+2 < height) horizontal_decompose53i(b3, width);
1084
STOP_TIMER("horizontal_decompose53i")}
1085
        
1086
{START_TIMER
1087
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1088
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1089
STOP_TIMER("vertical_decompose53i*")}
1090
        
1091
        b0=b2;
1092
        b1=b3;
1093
    }
1094
}
1095

    
1096
#define liftS lift
1097
#define lift5 lift
1098
#if 1
1099
#define W_AM 3
1100
#define W_AO 0
1101
#define W_AS 1
1102

    
1103
#undef liftS
1104
#define W_BM 1
1105
#define W_BO 8
1106
#define W_BS 4
1107

    
1108
#define W_CM 1
1109
#define W_CO 0
1110
#define W_CS 0
1111

    
1112
#define W_DM 3
1113
#define W_DO 4
1114
#define W_DS 3
1115
#elif 0
1116
#define W_AM 55
1117
#define W_AO 16
1118
#define W_AS 5
1119

    
1120
#define W_BM 3
1121
#define W_BO 32
1122
#define W_BS 6
1123

    
1124
#define W_CM 127
1125
#define W_CO 64
1126
#define W_CS 7
1127

    
1128
#define W_DM 7
1129
#define W_DO 8
1130
#define W_DS 4
1131
#elif 0
1132
#define W_AM 97
1133
#define W_AO 32
1134
#define W_AS 6
1135

    
1136
#define W_BM 63
1137
#define W_BO 512
1138
#define W_BS 10
1139

    
1140
#define W_CM 13
1141
#define W_CO 8
1142
#define W_CS 4
1143

    
1144
#define W_DM 15
1145
#define W_DO 16
1146
#define W_DS 5
1147

    
1148
#else
1149

    
1150
#define W_AM 203
1151
#define W_AO 64
1152
#define W_AS 7
1153

    
1154
#define W_BM 217
1155
#define W_BO 2048
1156
#define W_BS 12
1157

    
1158
#define W_CM 113
1159
#define W_CO 64
1160
#define W_CS 7
1161

    
1162
#define W_DM 227
1163
#define W_DO 128
1164
#define W_DS 9
1165
#endif
1166
static void horizontal_decompose97i(DWTELEM *b, int width){
1167
    DWTELEM temp[width];
1168
    const int w2= (width+1)>>1;
1169

    
1170
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1171
    liftS(temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1172
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1173
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1174
}
1175

    
1176

    
1177
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1178
    int i;
1179
    
1180
    for(i=0; i<width; i++){
1181
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1182
    }
1183
}
1184

    
1185
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1186
    int i;
1187
    
1188
    for(i=0; i<width; i++){
1189
#ifdef lift5
1190
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1191
#else
1192
        int r= 3*(b0[i] + b2[i]);
1193
        r+= r>>4;
1194
        r+= r>>8;
1195
        b1[i] += (r+W_CO)>>W_CS;
1196
#endif
1197
    }
1198
}
1199

    
1200
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1201
    int i;
1202
    
1203
    for(i=0; i<width; i++){
1204
#ifdef liftS
1205
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1206
#else
1207
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1208
#endif
1209
    }
1210
}
1211

    
1212
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1213
    int i;
1214
    
1215
    for(i=0; i<width; i++){
1216
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1217
    }
1218
}
1219

    
1220
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1221
    int y;
1222
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1223
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1224
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1225
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1226
  
1227
    for(y=-4; y<height; y+=2){
1228
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1229
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1230

    
1231
{START_TIMER
1232
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1233
        if(y+4 < height) horizontal_decompose97i(b5, width);
1234
if(width>400){
1235
STOP_TIMER("horizontal_decompose97i")
1236
}}
1237
        
1238
{START_TIMER
1239
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1240
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1241
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1242
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1243

    
1244
if(width>400){
1245
STOP_TIMER("vertical_decompose97i")
1246
}}
1247
        
1248
        b0=b2;
1249
        b1=b3;
1250
        b2=b4;
1251
        b3=b5;
1252
    }
1253
}
1254

    
1255
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1256
    int level;
1257
    
1258
    for(level=0; level<decomposition_count; level++){
1259
        switch(type){
1260
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1261
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1262
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1263
        }
1264
    }
1265
}
1266

    
1267
static void horizontal_compose53i(DWTELEM *b, int width){
1268
    DWTELEM temp[width];
1269
    const int width2= width>>1;
1270
    const int w2= (width+1)>>1;
1271
    int A1,A2,A3,A4, x;
1272

    
1273
#if 0
1274
    A2= temp[1       ];
1275
    A4= temp[0       ];
1276
    A1= temp[0+width2];
1277
    A1 -= (A2 + A4)>>1;
1278
    A4 += (A1 + 1)>>1;
1279
    b[0+width2] = A1;
1280
    b[0       ] = A4;
1281
    for(x=1; x+1<width2; x+=2){
1282
        A3= temp[x+width2];
1283
        A4= temp[x+1     ];
1284
        A3 -= (A2 + A4)>>1;
1285
        A2 += (A1 + A3 + 2)>>2;
1286
        b[x+width2] = A3;
1287
        b[x       ] = A2;
1288

1289
        A1= temp[x+1+width2];
1290
        A2= temp[x+2       ];
1291
        A1 -= (A2 + A4)>>1;
1292
        A4 += (A1 + A3 + 2)>>2;
1293
        b[x+1+width2] = A1;
1294
        b[x+1       ] = A4;
1295
    }
1296
    A3= temp[width-1];
1297
    A3 -= A2;
1298
    A2 += (A1 + A3 + 2)>>2;
1299
    b[width -1] = A3;
1300
    b[width2-1] = A2;
1301
#else   
1302
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1303
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1304
#endif
1305
    for(x=0; x<width2; x++){
1306
        b[2*x    ]= temp[x   ];
1307
        b[2*x + 1]= temp[x+w2];
1308
    }
1309
    if(width&1)
1310
        b[2*x    ]= temp[x   ];
1311
}
1312

    
1313
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1314
    int i;
1315
    
1316
    for(i=0; i<width; i++){
1317
        b1[i] += (b0[i] + b2[i])>>1;
1318
    }
1319
}
1320

    
1321
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1322
    int i;
1323
    
1324
    for(i=0; i<width; i++){
1325
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1326
    }
1327
}
1328

    
1329
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1330
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1331
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1332
    cs->y = -1;
1333
}
1334

    
1335
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1336
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1337
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1338
    cs->y = -1;
1339
}
1340

    
1341
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1342
    int y= cs->y;
1343
    int mirror0 = mirror(y-1, height-1);
1344
    int mirror1 = mirror(y  , height-1);
1345
    int mirror2 = mirror(y+1, height-1);
1346
    int mirror3 = mirror(y+2, height-1);
1347
    
1348
    DWTELEM *b0= cs->b0;
1349
    DWTELEM *b1= cs->b1;
1350
    DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1351
    DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1352

    
1353
{START_TIMER
1354
        if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1355
        if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1356
STOP_TIMER("vertical_compose53i*")}
1357

    
1358
{START_TIMER
1359
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1360
        if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1361
STOP_TIMER("horizontal_compose53i")}
1362

    
1363
    cs->b0 = b2;
1364
    cs->b1 = b3;
1365
    cs->y += 2;
1366
}
1367

    
1368
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1369
    int y= cs->y;
1370
    DWTELEM *b0= cs->b0;
1371
    DWTELEM *b1= cs->b1;
1372
    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1373
    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1374

    
1375
{START_TIMER
1376
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1377
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1378
STOP_TIMER("vertical_compose53i*")}
1379

    
1380
{START_TIMER
1381
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1382
        if(b0 <= b2) horizontal_compose53i(b1, width);
1383
STOP_TIMER("horizontal_compose53i")}
1384

    
1385
    cs->b0 = b2;
1386
    cs->b1 = b3;
1387
    cs->y += 2;
1388
}
1389

    
1390
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1391
    dwt_compose_t cs;
1392
    spatial_compose53i_init(&cs, buffer, height, stride);
1393
    while(cs.y <= height)
1394
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1395
}   
1396

    
1397
 
1398
static void horizontal_compose97i(DWTELEM *b, int width){
1399
    DWTELEM temp[width];
1400
    const int w2= (width+1)>>1;
1401

    
1402
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1403
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1404
    liftS(b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1405
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1406
}
1407

    
1408
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1409
    int i;
1410
    
1411
    for(i=0; i<width; i++){
1412
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1413
    }
1414
}
1415

    
1416
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1417
    int i;
1418
    
1419
    for(i=0; i<width; i++){
1420
#ifdef lift5
1421
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1422
#else
1423
        int r= 3*(b0[i] + b2[i]);
1424
        r+= r>>4;
1425
        r+= r>>8;
1426
        b1[i] -= (r+W_CO)>>W_CS;
1427
#endif
1428
    }
1429
}
1430

    
1431
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1432
    int i;
1433
    
1434
    for(i=0; i<width; i++){
1435
#ifdef liftS
1436
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1437
#else
1438
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1439
#endif
1440
    }
1441
}
1442

    
1443
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1444
    int i;
1445
    
1446
    for(i=0; i<width; i++){
1447
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1448
    }
1449
}
1450

    
1451
static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1452
    int i;
1453
    
1454
    for(i=0; i<width; i++){
1455
        int r;
1456
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1457
#ifdef lift5
1458
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1459
#else
1460
        r= 3*(b2[i] + b4[i]);
1461
        r+= r>>4;
1462
        r+= r>>8;
1463
        b3[i] -= (r+W_CO)>>W_CS;
1464
#endif
1465
#ifdef liftS
1466
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1467
#else
1468
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1469
#endif
1470
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1471
    }
1472
}
1473

    
1474
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1475
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1476
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1477
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1478
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1479
    cs->y = -3;
1480
}
1481

    
1482
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1483
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1484
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1485
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1486
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1487
    cs->y = -3;
1488
}
1489

    
1490
static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1491
    int y = cs->y;
1492
    
1493
    int mirror0 = mirror(y - 1, height - 1);
1494
    int mirror1 = mirror(y + 0, height - 1);
1495
    int mirror2 = mirror(y + 1, height - 1);
1496
    int mirror3 = mirror(y + 2, height - 1);
1497
    int mirror4 = mirror(y + 3, height - 1);
1498
    int mirror5 = mirror(y + 4, height - 1);
1499
    DWTELEM *b0= cs->b0;
1500
    DWTELEM *b1= cs->b1;
1501
    DWTELEM *b2= cs->b2;
1502
    DWTELEM *b3= cs->b3;
1503
    DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1504
    DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1505
        
1506
{START_TIMER
1507
    if(y>0 && y+4<height){
1508
        vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1509
    }else{
1510
        if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1511
        if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1512
        if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1513
        if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1514
    }
1515
if(width>400){
1516
STOP_TIMER("vertical_compose97i")}}
1517

    
1518
{START_TIMER
1519
        if(y-1>=  0) horizontal_compose97i(b0, width);
1520
        if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1521
if(width>400 && mirror0 <= mirror2){
1522
STOP_TIMER("horizontal_compose97i")}}
1523

    
1524
    cs->b0=b2;
1525
    cs->b1=b3;
1526
    cs->b2=b4;
1527
    cs->b3=b5;
1528
    cs->y += 2;
1529
}
1530

    
1531
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1532
    int y = cs->y;
1533
    DWTELEM *b0= cs->b0;
1534
    DWTELEM *b1= cs->b1;
1535
    DWTELEM *b2= cs->b2;
1536
    DWTELEM *b3= cs->b3;
1537
    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1538
    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1539

    
1540
        if(stride == width && y+4 < height && 0){ 
1541
            int x;
1542
            for(x=0; x<width/2; x++)
1543
                b5[x] += 64*2;
1544
            for(; x<width; x++)
1545
                b5[x] += 169*2;
1546
        }
1547
        
1548
{START_TIMER
1549
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1550
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1551
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1552
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1553
if(width>400){
1554
STOP_TIMER("vertical_compose97i")}}
1555

    
1556
{START_TIMER
1557
        if(y-1>=  0) horizontal_compose97i(b0, width);
1558
        if(b0 <= b2) horizontal_compose97i(b1, width);
1559
if(width>400 && b0 <= b2){
1560
STOP_TIMER("horizontal_compose97i")}}
1561

    
1562
    cs->b0=b2;
1563
    cs->b1=b3;
1564
    cs->b2=b4;
1565
    cs->b3=b5;
1566
    cs->y += 2;
1567
}
1568

    
1569
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1570
    dwt_compose_t cs;
1571
    spatial_compose97i_init(&cs, buffer, height, stride);
1572
    while(cs.y <= height)
1573
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1574
}
1575

    
1576
void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1577
    int level;
1578
    for(level=decomposition_count-1; level>=0; level--){
1579
        switch(type){
1580
        case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1581
        case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1582
        /* not slicified yet */
1583
        case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1584
          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1585
        }
1586
    }
1587
}
1588

    
1589
void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1590
    int level;
1591
    for(level=decomposition_count-1; level>=0; level--){
1592
        switch(type){
1593
        case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1594
        case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1595
        /* not slicified yet */
1596
        case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1597
        }
1598
    }
1599
}
1600

    
1601
void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1602
    const int support = type==1 ? 3 : 5;
1603
    int level;
1604
    if(type==2) return;
1605

    
1606
    for(level=decomposition_count-1; level>=0; level--){
1607
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1608
            switch(type){
1609
            case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1610
                    break;
1611
            case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1612
                    break;
1613
            case 2: break;
1614
            }
1615
        }
1616
    }
1617
}
1618

    
1619
void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1620
    const int support = type==1 ? 3 : 5;
1621
    int level;
1622
    if(type==2) return;
1623

    
1624
    for(level=decomposition_count-1; level>=0; level--){
1625
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1626
            switch(type){
1627
            case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1628
                    break;
1629
            case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1630
                    break;
1631
            case 2: break;
1632
            }
1633
        }
1634
    }
1635
}
1636

    
1637
void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1638
    if(type==2){
1639
        int level;
1640
        for(level=decomposition_count-1; level>=0; level--)
1641
            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
1642
    }else{
1643
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1644
        int y;
1645
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1646
        for(y=0; y<height; y+=4)
1647
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1648
    }
1649
}
1650

    
1651
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1652
    const int w= b->width;
1653
    const int h= b->height;
1654
    int x, y;
1655

    
1656
    if(1){
1657
        int run=0;
1658
        int runs[w*h];
1659
        int run_index=0;
1660
                
1661
        for(y=0; y<h; y++){
1662
            for(x=0; x<w; x++){
1663
                int v, p=0;
1664
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1665
                v= src[x + y*stride];
1666

    
1667
                if(y){
1668
                    t= src[x + (y-1)*stride];
1669
                    if(x){
1670
                        lt= src[x - 1 + (y-1)*stride];
1671
                    }
1672
                    if(x + 1 < w){
1673
                        rt= src[x + 1 + (y-1)*stride];
1674
                    }
1675
                }
1676
                if(x){
1677
                    l= src[x - 1 + y*stride];
1678
                    /*if(x > 1){
1679
                        if(orientation==1) ll= src[y + (x-2)*stride];
1680
                        else               ll= src[x - 2 + y*stride];
1681
                    }*/
1682
                }
1683
                if(parent){
1684
                    int px= x>>1;
1685
                    int py= y>>1;
1686
                    if(px<b->parent->width && py<b->parent->height) 
1687
                        p= parent[px + py*2*stride];
1688
                }
1689
                if(!(/*ll|*/l|lt|t|rt|p)){
1690
                    if(v){
1691
                        runs[run_index++]= run;
1692
                        run=0;
1693
                    }else{
1694
                        run++;
1695
                    }
1696
                }
1697
            }
1698
        }
1699
        runs[run_index++]= run;
1700
        run_index=0;
1701
        run= runs[run_index++];
1702

    
1703
        put_symbol2(&s->c, b->state[1], run, 3);
1704
        
1705
        for(y=0; y<h; y++){
1706
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1707
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1708
                return -1;
1709
            }
1710
            for(x=0; x<w; x++){
1711
                int v, p=0;
1712
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1713
                v= src[x + y*stride];
1714

    
1715
                if(y){
1716
                    t= src[x + (y-1)*stride];
1717
                    if(x){
1718
                        lt= src[x - 1 + (y-1)*stride];
1719
                    }
1720
                    if(x + 1 < w){
1721
                        rt= src[x + 1 + (y-1)*stride];
1722
                    }
1723
                }
1724
                if(x){
1725
                    l= src[x - 1 + y*stride];
1726
                    /*if(x > 1){
1727
                        if(orientation==1) ll= src[y + (x-2)*stride];
1728
                        else               ll= src[x - 2 + y*stride];
1729
                    }*/
1730
                }
1731
                if(parent){
1732
                    int px= x>>1;
1733
                    int py= y>>1;
1734
                    if(px<b->parent->width && py<b->parent->height) 
1735
                        p= parent[px + py*2*stride];
1736
                }
1737
                if(/*ll|*/l|lt|t|rt|p){
1738
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1739

    
1740
                    put_rac(&s->c, &b->state[0][context], !!v);
1741
                }else{
1742
                    if(!run){
1743
                        run= runs[run_index++];
1744

    
1745
                        put_symbol2(&s->c, b->state[1], run, 3);
1746
                        assert(v);
1747
                    }else{
1748
                        run--;
1749
                        assert(!v);
1750
                    }
1751
                }
1752
                if(v){
1753
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1754
                    int l2= 2*ABS(l) + (l<0);
1755
                    int t2= 2*ABS(t) + (t<0);
1756

    
1757
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1758
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1759
                }
1760
            }
1761
        }
1762
    }
1763
    return 0;
1764
}
1765

    
1766
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
1767
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1768
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1769
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1770
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1771
}
1772

    
1773
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1774
    const int w= b->width;
1775
    const int h= b->height;
1776
    int x,y;
1777
    
1778
    if(1){
1779
        int run;
1780
        x_and_coeff *xc= b->x_coeff;
1781
        x_and_coeff *prev_xc= NULL;
1782
        x_and_coeff *prev2_xc= xc;
1783
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1784
        x_and_coeff *prev_parent_xc= parent_xc;
1785

    
1786
        run= get_symbol2(&s->c, b->state[1], 3);
1787
        for(y=0; y<h; y++){
1788
            int v=0;
1789
            int lt=0, t=0, rt=0;
1790

    
1791
            if(y && prev_xc->x == 0){
1792
                rt= prev_xc->coeff;
1793
            }
1794
            for(x=0; x<w; x++){
1795
                int p=0;
1796
                const int l= v;
1797
                
1798
                lt= t; t= rt;
1799

    
1800
                if(y){
1801
                    if(prev_xc->x <= x)
1802
                        prev_xc++;
1803
                    if(prev_xc->x == x + 1)
1804
                        rt= prev_xc->coeff;
1805
                    else
1806
                        rt=0;
1807
                }
1808
                if(parent_xc){
1809
                    if(x>>1 > parent_xc->x){
1810
                        parent_xc++;
1811
                    }
1812
                    if(x>>1 == parent_xc->x){
1813
                        p= parent_xc->coeff;
1814
                    }
1815
                }
1816
                if(/*ll|*/l|lt|t|rt|p){
1817
                    int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1818

    
1819
                    v=get_rac(&s->c, &b->state[0][context]);
1820
                    if(v){
1821
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1822
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1823
                        
1824
                        xc->x=x;
1825
                        (xc++)->coeff= v;
1826
                    }
1827
                }else{
1828
                    if(!run){
1829
                        run= get_symbol2(&s->c, b->state[1], 3);
1830
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1831
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1832
                        
1833
                        xc->x=x;
1834
                        (xc++)->coeff= v;
1835
                    }else{
1836
                        int max_run;
1837
                        run--;
1838
                        v=0;
1839

    
1840
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1841
                        else  max_run= FFMIN(run, w-x-1);
1842
                        if(parent_xc)
1843
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1844
                        x+= max_run;
1845
                        run-= max_run;
1846
                    }
1847
                }
1848
            }
1849
            (xc++)->x= w+1; //end marker
1850
            prev_xc= prev2_xc;
1851
            prev2_xc= xc;
1852
            
1853
            if(parent_xc){
1854
                if(y&1){
1855
                    while(parent_xc->x != parent->width+1)
1856
                        parent_xc++;
1857
                    parent_xc++;
1858
                    prev_parent_xc= parent_xc;
1859
                }else{
1860
                    parent_xc= prev_parent_xc;
1861
                }
1862
            }
1863
        }
1864

    
1865
        (xc++)->x= w+1; //end marker
1866
    }
1867
}
1868

    
1869
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1870
    const int w= b->width;
1871
    int x,y;
1872
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1873
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1874
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1875
    int new_index = 0;
1876
    
1877
    START_TIMER
1878

    
1879
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1880
        qadd= 0;
1881
        qmul= 1<<QEXPSHIFT;
1882
    }
1883

    
1884
    /* If we are on the second or later slice, restore our index. */
1885
    if (start_y != 0)
1886
        new_index = save_state[0];
1887

    
1888
        
1889
    for(y=start_y; y<h; y++){
1890
        int x = 0;
1891
        int v;
1892
        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1893
        memset(line, 0, b->width*sizeof(DWTELEM));
1894
        v = b->x_coeff[new_index].coeff;
1895
        x = b->x_coeff[new_index++].x;
1896
        while(x < w)
1897
        {
1898
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1899
            register int u= -(v&1);
1900
            line[x] = (t^u) - u;
1901

    
1902
            v = b->x_coeff[new_index].coeff;
1903
            x = b->x_coeff[new_index++].x;
1904
        }
1905
    }
1906
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1907
        STOP_TIMER("decode_subband")
1908
    }
1909
        
1910
    /* Save our variables for the next slice. */
1911
    save_state[0] = new_index;
1912
        
1913
    return;
1914
}
1915

    
1916
static void reset_contexts(SnowContext *s){
1917
    int plane_index, level, orientation;
1918

    
1919
    for(plane_index=0; plane_index<3; plane_index++){
1920
        for(level=0; level<s->spatial_decomposition_count; level++){
1921
            for(orientation=level ? 1:0; orientation<4; orientation++){
1922
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1923
            }
1924
        }
1925
    }
1926
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1927
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1928
}
1929

    
1930
static int alloc_blocks(SnowContext *s){
1931
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1932
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1933
    
1934
    s->b_width = w;
1935
    s->b_height= h;
1936
    
1937
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1938
    return 0;
1939
}
1940

    
1941
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1942
    uint8_t *bytestream= d->bytestream;
1943
    uint8_t *bytestream_start= d->bytestream_start;
1944
    *d= *s;
1945
    d->bytestream= bytestream;
1946
    d->bytestream_start= bytestream_start;
1947
}
1948

    
1949
//near copy & paste from dsputil, FIXME
1950
static int pix_sum(uint8_t * pix, int line_size, int w)
1951
{
1952
    int s, i, j;
1953

    
1954
    s = 0;
1955
    for (i = 0; i < w; i++) {
1956
        for (j = 0; j < w; j++) {
1957
            s += pix[0];
1958
            pix ++;
1959
        }
1960
        pix += line_size - w;
1961
    }
1962
    return s;
1963
}
1964

    
1965
//near copy & paste from dsputil, FIXME
1966
static int pix_norm1(uint8_t * pix, int line_size, int w)
1967
{
1968
    int s, i, j;
1969
    uint32_t *sq = squareTbl + 256;
1970

    
1971
    s = 0;
1972
    for (i = 0; i < w; i++) {
1973
        for (j = 0; j < w; j ++) {
1974
            s += sq[pix[0]];
1975
            pix ++;
1976
        }
1977
        pix += line_size - w;
1978
    }
1979
    return s;
1980
}
1981

    
1982
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1983
    const int w= s->b_width << s->block_max_depth;
1984
    const int rem_depth= s->block_max_depth - level;
1985
    const int index= (x + y*w) << rem_depth;
1986
    const int block_w= 1<<rem_depth;
1987
    BlockNode block;
1988
    int i,j;
1989
    
1990
    block.color[0]= l;
1991
    block.color[1]= cb;
1992
    block.color[2]= cr;
1993
    block.mx= mx;
1994
    block.my= my;
1995
    block.type= type;
1996
    block.level= level;
1997

    
1998
    for(j=0; j<block_w; j++){
1999
        for(i=0; i<block_w; i++){
2000
            s->block[index + i + j*w]= block;
2001
        }
2002
    }
2003
}
2004

    
2005
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2006
    const int offset[3]= {
2007
          y*c->  stride + x,
2008
        ((y*c->uvstride + x)>>1),
2009
        ((y*c->uvstride + x)>>1),
2010
    };
2011
    int i;
2012
    for(i=0; i<3; i++){
2013
        c->src[0][i]= src [i];
2014
        c->ref[0][i]= ref [i] + offset[i];
2015
    }
2016
    assert(!ref_index);
2017
}
2018

    
2019
//FIXME copy&paste
2020
#define P_LEFT P[1]
2021
#define P_TOP P[2]
2022
#define P_TOPRIGHT P[3]
2023
#define P_MEDIAN P[4]
2024
#define P_MV1 P[9]
2025
#define FLAG_QPEL   1 //must be 1
2026

    
2027
static int encode_q_branch(SnowContext *s, int level, int x, int y){
2028
    uint8_t p_buffer[1024];
2029
    uint8_t i_buffer[1024];
2030
    uint8_t p_state[sizeof(s->block_state)];
2031
    uint8_t i_state[sizeof(s->block_state)];
2032
    RangeCoder pc, ic;
2033
    uint8_t *pbbak= s->c.bytestream;
2034
    uint8_t *pbbak_start= s->c.bytestream_start;
2035
    int score, score2, iscore, i_len, p_len, block_s, sum;
2036
    const int w= s->b_width  << s->block_max_depth;
2037
    const int h= s->b_height << s->block_max_depth;
2038
    const int rem_depth= s->block_max_depth - level;
2039
    const int index= (x + y*w) << rem_depth;
2040
    const int block_w= 1<<(LOG2_MB_SIZE - level);
2041
    static BlockNode null_block= { //FIXME add border maybe
2042
        .color= {128,128,128},
2043
        .mx= 0,
2044
        .my= 0,
2045
        .type= 0,
2046
        .level= 0,
2047
    };
2048
    int trx= (x+1)<<rem_depth;
2049
    int try= (y+1)<<rem_depth;
2050
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2051
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2052
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2053
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2054
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2055
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2056
    int pl = left->color[0];
2057
    int pcb= left->color[1];
2058
    int pcr= left->color[2];
2059
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2060
    int pmy= mid_pred(left->my, top->my, tr->my);
2061
    int mx=0, my=0;
2062
    int l,cr,cb, i;
2063
    const int stride= s->current_picture.linesize[0];
2064
    const int uvstride= s->current_picture.linesize[1];
2065
    const int instride= s->input_picture.linesize[0];
2066
    const int uvinstride= s->input_picture.linesize[1];
2067
    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
2068
    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
2069
    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
2070
    uint8_t current_mb[3][stride*block_w];
2071
    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
2072
    int P[10][2];
2073
    int16_t last_mv[3][2];
2074
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2075
    const int shift= 1+qpel;
2076
    MotionEstContext *c= &s->m.me;
2077
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2078
    int my_context= av_log2(2*ABS(left->my - top->my));
2079
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2080

    
2081
    assert(sizeof(s->block_state) >= 256);
2082
    if(s->keyframe){
2083
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2084
        return 0;
2085
    }
2086

    
2087
    //FIXME optimize
2088
    for(i=0; i<block_w; i++)
2089
        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
2090
    for(i=0; i<block_w>>1; i++)
2091
        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
2092
    for(i=0; i<block_w>>1; i++)
2093
        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
2094

    
2095
//    clip predictors / edge ?
2096

    
2097
    P_LEFT[0]= left->mx;
2098
    P_LEFT[1]= left->my;
2099
    P_TOP [0]= top->mx;
2100
    P_TOP [1]= top->my;
2101
    P_TOPRIGHT[0]= tr->mx;
2102
    P_TOPRIGHT[1]= tr->my;
2103
    
2104
    last_mv[0][0]= s->block[index].mx;
2105
    last_mv[0][1]= s->block[index].my;
2106
    last_mv[1][0]= right->mx;
2107
    last_mv[1][1]= right->my;
2108
    last_mv[2][0]= bottom->mx;
2109
    last_mv[2][1]= bottom->my;
2110
    
2111
    s->m.mb_stride=2;
2112
    s->m.mb_x= 
2113
    s->m.mb_y= 0;
2114
    s->m.me.skip= 0;
2115

    
2116
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2117
    
2118
    assert(s->m.me.  stride ==   stride);
2119
    assert(s->m.me.uvstride == uvstride);
2120
    
2121
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2122
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2123
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2124
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2125
    
2126
    c->xmin = - x*block_w - 16+2;
2127
    c->ymin = - y*block_w - 16+2;
2128
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2129
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2130

    
2131
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2132
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
2133
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2134
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2135
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2136
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2137
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2138

    
2139
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2140
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2141

    
2142
    if (!y) {
2143
        c->pred_x= P_LEFT[0];
2144
        c->pred_y= P_LEFT[1];
2145
    } else {
2146
        c->pred_x = P_MEDIAN[0];
2147
        c->pred_y = P_MEDIAN[1];
2148
    }
2149

    
2150
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
2151
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2152

    
2153
    assert(mx >= c->xmin);
2154
    assert(mx <= c->xmax);
2155
    assert(my >= c->ymin);
2156
    assert(my <= c->ymax);
2157
    
2158
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2159
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2160
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2161
                             
2162
  //  subpel search
2163
    pc= s->c;
2164
    pc.bytestream_start=
2165
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2166
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2167

    
2168
    if(level!=s->block_max_depth)
2169
        put_rac(&pc, &p_state[4 + s_context], 1);
2170
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2171
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2172
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2173
    p_len= pc.bytestream - pc.bytestream_start;
2174
    score += (s->lambda2*(p_len*8
2175
              + (pc.outstanding_count - s->c.outstanding_count)*8
2176
              + (-av_log2(pc.range)    + av_log2(s->c.range))
2177
             ))>>FF_LAMBDA_SHIFT;
2178

    
2179
    block_s= block_w*block_w;
2180
    sum = pix_sum(&current_mb[0][0], stride, block_w);
2181
    l= (sum + block_s/2)/block_s;
2182
    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
2183
    
2184
    block_s= block_w*block_w>>2;
2185
    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
2186
    cb= (sum + block_s/2)/block_s;
2187
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2188
    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
2189
    cr= (sum + block_s/2)/block_s;
2190
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2191

    
2192
    ic= s->c;
2193
    ic.bytestream_start=
2194
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2195
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2196
    if(level!=s->block_max_depth)
2197
        put_rac(&ic, &i_state[4 + s_context], 1);
2198
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2199
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2200
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2201
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2202
    i_len= ic.bytestream - ic.bytestream_start;
2203
    iscore += (s->lambda2*(i_len*8
2204
              + (ic.outstanding_count - s->c.outstanding_count)*8
2205
              + (-av_log2(ic.range)    + av_log2(s->c.range))
2206
             ))>>FF_LAMBDA_SHIFT;
2207

    
2208
//    assert(score==256*256*256*64-1);
2209
    assert(iscore < 255*255*256 + s->lambda2*10);
2210
    assert(iscore >= 0);
2211
    assert(l>=0 && l<=255);
2212
    assert(pl>=0 && pl<=255);
2213

    
2214
    if(level==0){
2215
        int varc= iscore >> 8;
2216
        int vard= score >> 8;
2217
        if (vard <= 64 || vard < varc)
2218
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2219
        else
2220
            c->scene_change_score+= s->m.qscale;
2221
    }
2222
        
2223
    if(level!=s->block_max_depth){
2224
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2225
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2226
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2227
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2228
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2229
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2230
    
2231
        if(score2 < score && score2 < iscore)
2232
            return score2;
2233
    }
2234
    
2235
    if(iscore < score){
2236
        memcpy(pbbak, i_buffer, i_len);
2237
        s->c= ic;
2238
        s->c.bytestream_start= pbbak_start;
2239
        s->c.bytestream= pbbak + i_len;
2240
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2241
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2242
        return iscore;
2243
    }else{
2244
        memcpy(pbbak, p_buffer, p_len);
2245
        s->c= pc;
2246
        s->c.bytestream_start= pbbak_start;
2247
        s->c.bytestream= pbbak + p_len;
2248
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2249
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2250
        return score;
2251
    }
2252
}
2253

    
2254
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2255
    const int w= s->b_width << s->block_max_depth;
2256
    const int rem_depth= s->block_max_depth - level;
2257
    const int index= (x + y*w) << rem_depth;
2258
    static BlockNode null_block= { //FIXME add border maybe
2259
        .color= {128,128,128},
2260
        .mx= 0,
2261
        .my= 0,
2262
        .type= 0,
2263
        .level= 0,
2264
    };
2265
    int trx= (x+1)<<rem_depth;
2266
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2267
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2268
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2269
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2270
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2271
    
2272
    if(s->keyframe){
2273
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2274
        return;
2275
    }
2276

    
2277
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2278
        int type;
2279
        int l = left->color[0];
2280
        int cb= left->color[1];
2281
        int cr= left->color[2];
2282
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2283
        int my= mid_pred(left->my, top->my, tr->my);
2284
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2285
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2286
        
2287
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2288

    
2289
        if(type){
2290
            l += get_symbol(&s->c, &s->block_state[32], 1);
2291
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2292
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2293
        }else{
2294
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2295
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2296
        }
2297
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2298
    }else{
2299
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2300
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2301
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2302
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2303
    }
2304
}
2305

    
2306
static void encode_blocks(SnowContext *s){
2307
    int x, y;
2308
    int w= s->b_width;
2309
    int h= s->b_height;
2310

    
2311
    for(y=0; y<h; y++){
2312
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2313
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2314
            return;
2315
        }
2316
        for(x=0; x<w; x++){
2317
            encode_q_branch(s, 0, x, y);
2318
        }
2319
    }
2320
}
2321

    
2322
static void decode_blocks(SnowContext *s){
2323
    int x, y;
2324
    int w= s->b_width;
2325
    int h= s->b_height;
2326

    
2327
    for(y=0; y<h; y++){
2328
        for(x=0; x<w; x++){
2329
            decode_q_branch(s, 0, x, y);
2330
        }
2331
    }
2332
}
2333

    
2334
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2335
    int x, y;
2336
START_TIMER
2337
    for(y=0; y < b_h+5; y++){
2338
        for(x=0; x < b_w; x++){
2339
            int a0= src[x    ];
2340
            int a1= src[x + 1];
2341
            int a2= src[x + 2];
2342
            int a3= src[x + 3];
2343
            int a4= src[x + 4];
2344
            int a5= src[x + 5];
2345
//            int am= 9*(a1+a2) - (a0+a3);
2346
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2347
//            int am= 18*(a2+a3) - 2*(a1+a4);
2348
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2349
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2350

    
2351
//            if(b_w==16) am= 8*(a1+a2);
2352

    
2353
            if(dx<8) tmp[x]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2354
            else     tmp[x]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2355

    
2356
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2357
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2358
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2359
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2360
        }
2361
        tmp += stride;
2362
        src += stride;
2363
    }
2364
    tmp -= (b_h+5)*stride;
2365
    
2366
    for(y=0; y < b_h; y++){
2367
        for(x=0; x < b_w; x++){
2368
            int a0= tmp[x + 0*stride];
2369
            int a1= tmp[x + 1*stride];
2370
            int a2= tmp[x + 2*stride];
2371
            int a3= tmp[x + 3*stride];
2372
            int a4= tmp[x + 4*stride];
2373
            int a5= tmp[x + 5*stride];
2374
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2375
//            int am= 18*(a2+a3) - 2*(a1+a4);
2376
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2377
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2378
            
2379
//            if(b_w==16) am= 8*(a1+a2);
2380

    
2381
            if(dy<8) dst[x]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2382
            else     dst[x]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2383

    
2384
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2385
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2386
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2387
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2388
        }
2389
        dst += stride;
2390
        tmp += stride;
2391
    }
2392
STOP_TIMER("mc_block")
2393
}
2394

    
2395
#define mca(dx,dy,b_w)\
2396
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2397
    uint8_t tmp[stride*(b_w+5)];\
2398
    assert(h==b_w);\
2399
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2400
}
2401

    
2402
mca( 0, 0,16)
2403
mca( 8, 0,16)
2404
mca( 0, 8,16)
2405
mca( 8, 8,16)
2406
mca( 0, 0,8)
2407
mca( 8, 0,8)
2408
mca( 0, 8,8)
2409
mca( 8, 8,8)
2410

    
2411
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2412
    if(block->type){
2413
        int x, y;
2414
        const int color= block->color[plane_index];
2415
        for(y=0; y < b_h; y++){
2416
            for(x=0; x < b_w; x++){
2417
                dst[x + y*stride]= color;
2418
            }
2419
        }
2420
    }else{
2421
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2422
        int mx= block->mx*scale;
2423
        int my= block->my*scale;
2424
        const int dx= mx&15;
2425
        const int dy= my&15;
2426
        sx += (mx>>4) - 2;
2427
        sy += (my>>4) - 2;
2428
        src += sx + sy*stride;
2429
        if(   (unsigned)sx >= w - b_w - 4
2430
           || (unsigned)sy >= h - b_h - 4){
2431
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2432
            src= tmp + MB_SIZE;
2433
        }
2434
        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2435
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2436
        else
2437
            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2438
    }
2439
}
2440

    
2441
static always_inline int same_block(BlockNode *a, BlockNode *b){
2442
    return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
2443
}
2444

    
2445
//FIXME name clenup (b_w, block_w, b_width stuff)
2446
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2447
    DWTELEM * dst = NULL;
2448
    const int b_width = s->b_width  << s->block_max_depth;
2449
    const int b_height= s->b_height << s->block_max_depth;
2450
    const int b_stride= b_width;
2451
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2452
    BlockNode *rt= lt+1;
2453
    BlockNode *lb= lt+b_stride;
2454
    BlockNode *rb= lb+1;
2455
    uint8_t *block[4]; 
2456
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2457
    int x,y;
2458

    
2459
    if(b_x<0){
2460
        lt= rt;
2461
        lb= rb;
2462
    }else if(b_x + 1 >= b_width){
2463
        rt= lt;
2464
        rb= lb;
2465
    }
2466
    if(b_y<0){
2467
        lt= lb;
2468
        rt= rb;
2469
    }else if(b_y + 1 >= b_height){
2470
        lb= lt;
2471
        rb= rt;
2472
    }
2473
        
2474
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2475
        obmc -= src_x;
2476
        b_w += src_x;
2477
        src_x=0;
2478
    }else if(src_x + b_w > w){
2479
        b_w = w - src_x;
2480
    }
2481
    if(src_y<0){
2482
        obmc -= src_y*obmc_stride;
2483
        b_h += src_y;
2484
        src_y=0;
2485
    }else if(src_y + b_h> h){
2486
        b_h = h - src_y;
2487
    }
2488
    
2489
    if(b_w<=0 || b_h<=0) return;
2490

    
2491
assert(src_stride > 7*MB_SIZE);
2492
//    old_dst += src_x + src_y*dst_stride;
2493
    dst8+= src_x + src_y*src_stride;
2494
//    src += src_x + src_y*src_stride;
2495

    
2496
    block[0]= tmp+3*MB_SIZE;
2497
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2498

    
2499
    if(same_block(lt, rt)){
2500
        block[1]= block[0];
2501
    }else{
2502
        block[1]= tmp + 4*MB_SIZE;
2503
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2504
    }
2505
        
2506
    if(same_block(lt, lb)){
2507
        block[2]= block[0];
2508
    }else if(same_block(rt, lb)){
2509
        block[2]= block[1];
2510
    }else{
2511
        block[2]= tmp+5*MB_SIZE;
2512
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2513
    }
2514

    
2515
    if(same_block(lt, rb) ){
2516
        block[3]= block[0];
2517
    }else if(same_block(rt, rb)){
2518
        block[3]= block[1];
2519
    }else if(same_block(lb, rb)){
2520
        block[3]= block[2];
2521
    }else{
2522
        block[3]= tmp+6*MB_SIZE;
2523
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2524
    }
2525
#if 0
2526
    for(y=0; y<b_h; y++){
2527
        for(x=0; x<b_w; x++){
2528
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2529
            if(add) dst[x + y*dst_stride] += v;
2530
            else    dst[x + y*dst_stride] -= v;
2531
        }
2532
    }
2533
    for(y=0; y<b_h; y++){
2534
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2535
        for(x=0; x<b_w; x++){
2536
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2537
            if(add) dst[x + y*dst_stride] += v;
2538
            else    dst[x + y*dst_stride] -= v;
2539
        }
2540
    }
2541
    for(y=0; y<b_h; y++){
2542
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2543
        for(x=0; x<b_w; x++){
2544
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2545
            if(add) dst[x + y*dst_stride] += v;
2546
            else    dst[x + y*dst_stride] -= v;
2547
        }
2548
    }
2549
    for(y=0; y<b_h; y++){
2550
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2551
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2552
        for(x=0; x<b_w; x++){
2553
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2554
            if(add) dst[x + y*dst_stride] += v;
2555
            else    dst[x + y*dst_stride] -= v;
2556
        }
2557
    }
2558
#else
2559
{
2560

    
2561
    START_TIMER
2562
    
2563
    int block_index = 0;
2564
    for(y=0; y<b_h; y++){
2565
        //FIXME ugly missue of obmc_stride
2566
        uint8_t *obmc1= obmc + y*obmc_stride;
2567
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2568
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2569
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2570
        dst = slice_buffer_get_line(sb, src_y + y);
2571
        for(x=0; x<b_w; x++){
2572
            int v=   obmc1[x] * block[3][x + y*src_stride]
2573
                    +obmc2[x] * block[2][x + y*src_stride]
2574
                    +obmc3[x] * block[1][x + y*src_stride]
2575
                    +obmc4[x] * block[0][x + y*src_stride];
2576

    
2577
            v <<= 8 - LOG2_OBMC_MAX;
2578
            if(FRAC_BITS != 8){
2579
                v += 1<<(7 - FRAC_BITS);
2580
                v >>= 8 - FRAC_BITS;
2581
            }
2582
            if(add){
2583
//                v += old_dst[x + y*dst_stride];
2584
                v += dst[x + src_x];
2585
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2586
                if(v&(~255)) v= ~(v>>31);
2587
                dst8[x + y*src_stride] = v;
2588
            }else{
2589
//                old_dst[x + y*dst_stride] -= v;
2590
                dst[x + src_x] -= v;
2591
            }
2592
        }
2593
    }
2594
        STOP_TIMER("Inner add y block")
2595
}
2596
#endif
2597
}
2598

    
2599
//FIXME name clenup (b_w, block_w, b_width stuff)
2600
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2601
    const int b_width = s->b_width  << s->block_max_depth;
2602
    const int b_height= s->b_height << s->block_max_depth;
2603
    const int b_stride= b_width;
2604
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2605
    BlockNode *rt= lt+1;
2606
    BlockNode *lb= lt+b_stride;
2607
    BlockNode *rb= lb+1;
2608
    uint8_t *block[4]; 
2609
    uint8_t tmp[src_stride*(b_h+5)]; //FIXME align
2610
    int x,y;
2611

    
2612
    if(b_x<0){
2613
        lt= rt;
2614
        lb= rb;
2615
    }else if(b_x + 1 >= b_width){
2616
        rt= lt;
2617
        rb= lb;
2618
    }
2619
    if(b_y<0){
2620
        lt= lb;
2621
        rt= rb;
2622
    }else if(b_y + 1 >= b_height){
2623
        lb= lt;
2624
        rb= rt;
2625
    }
2626
        
2627
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2628
        obmc -= src_x;
2629
        b_w += src_x;
2630
        src_x=0;
2631
    }else if(src_x + b_w > w){
2632
        b_w = w - src_x;
2633
    }
2634
    if(src_y<0){
2635
        obmc -= src_y*obmc_stride;
2636
        b_h += src_y;
2637
        src_y=0;
2638
    }else if(src_y + b_h> h){
2639
        b_h = h - src_y;
2640
    }
2641
    
2642
    if(b_w<=0 || b_h<=0) return;
2643

    
2644
assert(src_stride > 7*MB_SIZE);
2645
    dst += src_x + src_y*dst_stride;
2646
    dst8+= src_x + src_y*src_stride;
2647
//    src += src_x + src_y*src_stride;
2648

    
2649
    block[0]= tmp+3*MB_SIZE;
2650
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
2651

    
2652
    if(same_block(lt, rt)){
2653
        block[1]= block[0];
2654
    }else{
2655
        block[1]= tmp + 4*MB_SIZE;
2656
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2657
    }
2658
        
2659
    if(same_block(lt, lb)){
2660
        block[2]= block[0];
2661
    }else if(same_block(rt, lb)){
2662
        block[2]= block[1];
2663
    }else{
2664
        block[2]= tmp+5*MB_SIZE;
2665
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2666
    }
2667

    
2668
    if(same_block(lt, rb) ){
2669
        block[3]= block[0];
2670
    }else if(same_block(rt, rb)){
2671
        block[3]= block[1];
2672
    }else if(same_block(lb, rb)){
2673
        block[3]= block[2];
2674
    }else{
2675
        block[3]= tmp+6*MB_SIZE;
2676
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2677
    }
2678
#if 0
2679
    for(y=0; y<b_h; y++){
2680
        for(x=0; x<b_w; x++){
2681
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2682
            if(add) dst[x + y*dst_stride] += v;
2683
            else    dst[x + y*dst_stride] -= v;
2684
        }
2685
    }
2686
    for(y=0; y<b_h; y++){
2687
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2688
        for(x=0; x<b_w; x++){
2689
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2690
            if(add) dst[x + y*dst_stride] += v;
2691
            else    dst[x + y*dst_stride] -= v;
2692
        }
2693
    }
2694
    for(y=0; y<b_h; y++){
2695
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2696
        for(x=0; x<b_w; x++){
2697
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2698
            if(add) dst[x + y*dst_stride] += v;
2699
            else    dst[x + y*dst_stride] -= v;
2700
        }
2701
    }
2702
    for(y=0; y<b_h; y++){
2703
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2704
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2705
        for(x=0; x<b_w; x++){
2706
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2707
            if(add) dst[x + y*dst_stride] += v;
2708
            else    dst[x + y*dst_stride] -= v;
2709
        }
2710
    }
2711
#else
2712
    for(y=0; y<b_h; y++){
2713
        //FIXME ugly missue of obmc_stride
2714
        uint8_t *obmc1= obmc + y*obmc_stride;
2715
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2716
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2717
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2718
        for(x=0; x<b_w; x++){
2719
            int v=   obmc1[x] * block[3][x + y*src_stride]
2720
                    +obmc2[x] * block[2][x + y*src_stride]
2721
                    +obmc3[x] * block[1][x + y*src_stride]
2722
                    +obmc4[x] * block[0][x + y*src_stride];
2723
            
2724
            v <<= 8 - LOG2_OBMC_MAX;
2725
            if(FRAC_BITS != 8){
2726
                v += 1<<(7 - FRAC_BITS);
2727
                v >>= 8 - FRAC_BITS;
2728
            }
2729
            if(add){
2730
                v += dst[x + y*dst_stride];
2731
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2732
                if(v&(~255)) v= ~(v>>31);
2733
                dst8[x + y*src_stride] = v;
2734
            }else{
2735
                dst[x + y*dst_stride] -= v;
2736
            }
2737
        }
2738
    }
2739
#endif
2740
}
2741

    
2742
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2743
    Plane *p= &s->plane[plane_index];
2744
    const int mb_w= s->b_width  << s->block_max_depth;
2745
    const int mb_h= s->b_height << s->block_max_depth;
2746
    int x, y, mb_x;
2747
    int block_size = MB_SIZE >> s->block_max_depth;
2748
    int block_w    = plane_index ? block_size/2 : block_size;
2749
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2750
    int obmc_stride= plane_index ? block_size : 2*block_size;
2751
    int ref_stride= s->current_picture.linesize[plane_index];
2752
    uint8_t *ref  = s->last_picture.data[plane_index];
2753
    uint8_t *dst8= s->current_picture.data[plane_index];
2754
    int w= p->width;
2755
    int h= p->height;
2756
    START_TIMER
2757
    
2758
    if(s->keyframe || (s->avctx->debug&512)){
2759
        if(mb_y==mb_h)
2760
            return;
2761

    
2762
        if(add){
2763
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2764
            {
2765
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2766
                DWTELEM * line = sb->line[y];
2767
                for(x=0; x<w; x++)
2768
                {
2769
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2770
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2771
                    v >>= FRAC_BITS;
2772
                    if(v&(~255)) v= ~(v>>31);
2773
                    dst8[x + y*ref_stride]= v;
2774
                }
2775
            }
2776
        }else{
2777
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2778
            {
2779
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2780
                DWTELEM * line = sb->line[y];
2781
                for(x=0; x<w; x++)
2782
                {
2783
                    line[x] -= 128 << FRAC_BITS;
2784
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2785
                }
2786
            }
2787
        }
2788

    
2789
        return;
2790
    }
2791
    
2792
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2793
            START_TIMER
2794

    
2795
            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, 
2796
                       block_w*mb_x - block_w/2,
2797
                       block_w*mb_y - block_w/2,
2798
                       block_w, block_w,
2799
                       w, h,
2800
                       w, ref_stride, obmc_stride,
2801
                       mb_x - 1, mb_y - 1,
2802
                       add, plane_index);
2803
            
2804
            STOP_TIMER("add_yblock")
2805
        }
2806
    
2807
    STOP_TIMER("predict_slice")
2808
}
2809

    
2810
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2811
    Plane *p= &s->plane[plane_index];
2812
    const int mb_w= s->b_width  << s->block_max_depth;
2813
    const int mb_h= s->b_height << s->block_max_depth;
2814
    int x, y, mb_x;
2815
    int block_size = MB_SIZE >> s->block_max_depth;
2816
    int block_w    = plane_index ? block_size/2 : block_size;
2817
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2818
    int obmc_stride= plane_index ? block_size : 2*block_size;
2819
    int ref_stride= s->current_picture.linesize[plane_index];
2820
    uint8_t *ref  = s->last_picture.data[plane_index];
2821
    uint8_t *dst8= s->current_picture.data[plane_index];
2822
    int w= p->width;
2823
    int h= p->height;
2824
    START_TIMER
2825
    
2826
    if(s->keyframe || (s->avctx->debug&512)){
2827
        if(mb_y==mb_h)
2828
            return;
2829

    
2830
        if(add){
2831
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2832
                for(x=0; x<w; x++){
2833
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2834
                    v >>= FRAC_BITS;
2835
                    if(v&(~255)) v= ~(v>>31);
2836
                    dst8[x + y*ref_stride]= v;
2837
                }
2838
            }
2839
        }else{
2840
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2841
                for(x=0; x<w; x++){
2842
                    buf[x + y*w]-= 128<<FRAC_BITS;
2843
                }
2844
            }
2845
        }
2846

    
2847
        return;
2848
    }
2849
    
2850
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2851
            START_TIMER
2852

    
2853
            add_yblock(s, buf, dst8, ref, obmc, 
2854
                       block_w*mb_x - block_w/2,
2855
                       block_w*mb_y - block_w/2,
2856
                       block_w, block_w,
2857
                       w, h,
2858
                       w, ref_stride, obmc_stride,
2859
                       mb_x - 1, mb_y - 1,
2860
                       add, plane_index);
2861
            
2862
            STOP_TIMER("add_yblock")
2863
        }
2864
    
2865
    STOP_TIMER("predict_slice")
2866
}
2867

    
2868
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2869
    const int mb_h= s->b_height << s->block_max_depth;
2870
    int mb_y;
2871
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2872
        predict_slice(s, buf, plane_index, add, mb_y);
2873
}
2874

    
2875
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
2876
    const int level= b->level;
2877
    const int w= b->width;
2878
    const int h= b->height;
2879
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2880
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2881
    int x,y, thres1, thres2;
2882
    START_TIMER
2883

    
2884
    if(s->qlog == LOSSLESS_QLOG) return;
2885
 
2886
    bias= bias ? 0 : (3*qmul)>>3;
2887
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
2888
    thres2= 2*thres1;
2889
    
2890
    if(!bias){
2891
        for(y=0; y<h; y++){
2892
            for(x=0; x<w; x++){
2893
                int i= src[x + y*stride];
2894
                
2895
                if((unsigned)(i+thres1) > thres2){
2896
                    if(i>=0){
2897
                        i<<= QEXPSHIFT;
2898
                        i/= qmul; //FIXME optimize
2899
                        src[x + y*stride]=  i;
2900
                    }else{
2901
                        i= -i;
2902
                        i<<= QEXPSHIFT;
2903
                        i/= qmul; //FIXME optimize
2904
                        src[x + y*stride]= -i;
2905
                    }
2906
                }else
2907
                    src[x + y*stride]= 0;
2908
            }
2909
        }
2910
    }else{
2911
        for(y=0; y<h; y++){
2912
            for(x=0; x<w; x++){
2913
                int i= src[x + y*stride]; 
2914
                
2915
                if((unsigned)(i+thres1) > thres2){
2916
                    if(i>=0){
2917
                        i<<= QEXPSHIFT;
2918
                        i= (i + bias) / qmul; //FIXME optimize
2919
                        src[x + y*stride]=  i;
2920
                    }else{
2921
                        i= -i;
2922
                        i<<= QEXPSHIFT;
2923
                        i= (i + bias) / qmul; //FIXME optimize
2924
                        src[x + y*stride]= -i;
2925
                    }
2926
                }else
2927
                    src[x + y*stride]= 0;
2928
            }
2929
        }
2930
    }
2931
    if(level+1 == s->spatial_decomposition_count){
2932
//        STOP_TIMER("quantize")
2933
    }
2934
}
2935

    
2936
static void dequantize_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride){
2937
    const int w= b->width;
2938
    const int h= b->height;
2939
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2940
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2941
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2942
    int x,y;
2943
    START_TIMER
2944
    
2945
    if(s->qlog == LOSSLESS_QLOG) return;
2946
    
2947
    for(y=0; y<h; y++){
2948
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2949
        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2950
        for(x=0; x<w; x++){
2951
            int i= line[x];
2952
            if(i<0){
2953
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2954
            }else if(i>0){
2955
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2956
            }
2957
        }
2958
    }
2959
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2960
        STOP_TIMER("dquant")
2961
    }
2962
}
2963

    
2964
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
2965
    const int w= b->width;
2966
    const int h= b->height;
2967
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
2968
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2969
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2970
    int x,y;
2971
    START_TIMER
2972
    
2973
    if(s->qlog == LOSSLESS_QLOG) return;
2974
    
2975
    for(y=0; y<h; y++){
2976
        for(x=0; x<w; x++){
2977
            int i= src[x + y*stride];
2978
            if(i<0){
2979
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2980
            }else if(i>0){
2981
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2982
            }
2983
        }
2984
    }
2985
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
2986
        STOP_TIMER("dquant")
2987
    }
2988
}
2989

    
2990
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
2991
    const int w= b->width;
2992
    const int h= b->height;
2993
    int x,y;
2994
    
2995
    for(y=h-1; y>=0; y--){
2996
        for(x=w-1; x>=0; x--){
2997
            int i= x + y*stride;
2998
            
2999
            if(x){
3000
                if(use_median){
3001
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3002
                    else  src[i] -= src[i - 1];
3003
                }else{
3004
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3005
                    else  src[i] -= src[i - 1];
3006
                }
3007
            }else{
3008
                if(y) src[i] -= src[i - stride];
3009
            }
3010
        }
3011
    }
3012
}
3013

    
3014
static void correlate_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3015
    const int w= b->width;
3016
    const int h= b->height;
3017
    int x,y;
3018
    
3019
//    START_TIMER
3020
    
3021
    DWTELEM * line;
3022
    DWTELEM * prev;
3023
    
3024
    for(y=0; y<h; y++){
3025
        prev = line;
3026
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3027
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3028
        for(x=0; x<w; x++){
3029
            if(x){
3030
                if(use_median){
3031
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3032
                    else  line[x] += line[x - 1];
3033
                }else{
3034
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3035
                    else  line[x] += line[x - 1];
3036
                }
3037
            }else{
3038
                if(y) line[x] += prev[x];
3039
            }
3040
        }
3041
    }
3042
    
3043
//    STOP_TIMER("correlate")
3044
}
3045

    
3046
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3047
    const int w= b->width;
3048
    const int h= b->height;
3049
    int x,y;
3050
    
3051
    for(y=0; y<h; y++){
3052
        for(x=0; x<w; x++){
3053
            int i= x + y*stride;
3054
            
3055
            if(x){
3056
                if(use_median){
3057
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3058
                    else  src[i] += src[i - 1];
3059
                }else{
3060
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3061
                    else  src[i] += src[i - 1];
3062
                }
3063
            }else{
3064
                if(y) src[i] += src[i - stride];
3065
            }
3066
        }
3067
    }
3068
}
3069

    
3070
static void encode_header(SnowContext *s){
3071
    int plane_index, level, orientation;
3072
    uint8_t kstate[32]; 
3073
    
3074
    memset(kstate, MID_STATE, sizeof(kstate));   
3075

    
3076
    put_rac(&s->c, kstate, s->keyframe);
3077
    if(s->keyframe || s->always_reset)
3078
        reset_contexts(s);
3079
    if(s->keyframe){
3080
        put_symbol(&s->c, s->header_state, s->version, 0);
3081
        put_rac(&s->c, s->header_state, s->always_reset);
3082
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3083
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3084
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3085
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3086
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3087
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3088
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3089
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3090

    
3091
        for(plane_index=0; plane_index<2; plane_index++){
3092
            for(level=0; level<s->spatial_decomposition_count; level++){
3093
                for(orientation=level ? 1:0; orientation<4; orientation++){
3094
                    if(orientation==2) continue;
3095
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3096
                }
3097
            }
3098
        }
3099
    }
3100
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3101
    put_symbol(&s->c, s->header_state, s->qlog, 1); 
3102
    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
3103
    put_symbol(&s->c, s->header_state, s->qbias, 1);
3104
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3105
}
3106

    
3107
static int decode_header(SnowContext *s){
3108
    int plane_index, level, orientation;
3109
    uint8_t kstate[32];
3110

    
3111
    memset(kstate, MID_STATE, sizeof(kstate));   
3112

    
3113
    s->keyframe= get_rac(&s->c, kstate);
3114
    if(s->keyframe || s->always_reset)
3115
        reset_contexts(s);
3116
    if(s->keyframe){
3117
        s->version= get_symbol(&s->c, s->header_state, 0);
3118
        if(s->version>0){
3119
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3120
            return -1;
3121
        }
3122
        s->always_reset= get_rac(&s->c, s->header_state);
3123
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3124
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3125
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3126
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3127
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3128
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3129
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3130
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3131

    
3132
        for(plane_index=0; plane_index<3; plane_index++){
3133
            for(level=0; level<s->spatial_decomposition_count; level++){
3134
                for(orientation=level ? 1:0; orientation<4; orientation++){
3135
                    int q;
3136
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3137
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3138
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3139
                    s->plane[plane_index].band[level][orientation].qlog= q;
3140
                }
3141
            }
3142
        }
3143
    }
3144
    
3145
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3146
    if(s->spatial_decomposition_type > 2){
3147
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3148
        return -1;
3149
    }
3150
    
3151
    s->qlog= get_symbol(&s->c, s->header_state, 1);
3152
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3153
    s->qbias= get_symbol(&s->c, s->header_state, 1);
3154
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3155

    
3156
    return 0;
3157
}
3158

    
3159
static void init_qexp(){
3160
    int i;
3161
    double v=128;
3162

    
3163
    for(i=0; i<QROOT; i++){
3164
        qexp[i]= lrintf(v);
3165
        v *= pow(2, 1.0 / QROOT); 
3166
    }
3167
}
3168

    
3169
static int common_init(AVCodecContext *avctx){
3170
    SnowContext *s = avctx->priv_data;
3171
    int width, height;
3172
    int level, orientation, plane_index, dec;
3173

    
3174
    s->avctx= avctx;
3175
        
3176
    dsputil_init(&s->dsp, avctx);
3177

    
3178
#define mcf(dx,dy)\
3179
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3180
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3181
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3182
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3183
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3184
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3185

    
3186
    mcf( 0, 0)
3187
    mcf( 4, 0)
3188
    mcf( 8, 0)
3189
    mcf(12, 0)
3190
    mcf( 0, 4)
3191
    mcf( 4, 4)
3192
    mcf( 8, 4)
3193
    mcf(12, 4)
3194
    mcf( 0, 8)
3195
    mcf( 4, 8)
3196
    mcf( 8, 8)
3197
    mcf(12, 8)
3198
    mcf( 0,12)
3199
    mcf( 4,12)
3200
    mcf( 8,12)
3201
    mcf(12,12)
3202

    
3203
#define mcfh(dx,dy)\
3204
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3205
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3206
        mc_block_hpel ## dx ## dy ## 16;\
3207
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3208
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3209
        mc_block_hpel ## dx ## dy ## 8;
3210

    
3211
    mcfh(0, 0)
3212
    mcfh(8, 0)
3213
    mcfh(0, 8)
3214
    mcfh(8, 8)
3215

    
3216
    if(!qexp[0])
3217
        init_qexp();
3218

    
3219
    dec= s->spatial_decomposition_count= 5;
3220
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3221
    
3222
    s->chroma_h_shift= 1; //FIXME XXX
3223
    s->chroma_v_shift= 1;
3224
    
3225
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3226
    
3227
    width= s->avctx->width;
3228
    height= s->avctx->height;
3229

    
3230
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3231
    
3232
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3233
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3234
    
3235
    for(plane_index=0; plane_index<3; plane_index++){    
3236
        int w= s->avctx->width;
3237
        int h= s->avctx->height;
3238

    
3239
        if(plane_index){
3240
            w>>= s->chroma_h_shift;
3241
            h>>= s->chroma_v_shift;
3242
        }
3243
        s->plane[plane_index].width = w;
3244
        s->plane[plane_index].height= h;
3245
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3246
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3247
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3248
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3249
                
3250
                b->buf= s->spatial_dwt_buffer;
3251
                b->level= level;
3252
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3253
                b->width = (w + !(orientation&1))>>1;
3254
                b->height= (h + !(orientation>1))>>1;
3255
                
3256
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3257
                b->buf_x_offset = 0;
3258
                b->buf_y_offset = 0;
3259
                
3260
                if(orientation&1){
3261
                    b->buf += (w+1)>>1;
3262
                    b->buf_x_offset = (w+1)>>1;
3263
                }
3264
                if(orientation>1){
3265
                    b->buf += b->stride>>1;
3266
                    b->buf_y_offset = b->stride_line >> 1;
3267
                }
3268
                
3269
                if(level)
3270
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3271
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3272
            }
3273
            w= (w+1)>>1;
3274
            h= (h+1)>>1;
3275
        }
3276
    }
3277
    
3278
    reset_contexts(s);
3279
/*    
3280
    width= s->width= avctx->width;
3281
    height= s->height= avctx->height;
3282
    
3283
    assert(width && height);
3284
*/
3285
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3286
    
3287
    return 0;
3288
}
3289

    
3290

    
3291
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3292
    int width = p->width;
3293
    int height= p->height;
3294
    int level, orientation, x, y;
3295

    
3296
    for(level=0; level<s->spatial_decomposition_count; level++){
3297
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3298
            SubBand *b= &p->band[level][orientation];
3299
            DWTELEM *buf= b->buf;
3300
            int64_t error=0;
3301
            
3302
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3303
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
3304
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3305
            for(y=0; y<height; y++){
3306
                for(x=0; x<width; x++){
3307
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
3308
                    error += d*d;
3309
                }
3310
            }
3311

    
3312
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3313
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3314
        }
3315
    }
3316
}
3317

    
3318
static int encode_init(AVCodecContext *avctx)
3319
{
3320
    SnowContext *s = avctx->priv_data;
3321
    int plane_index;
3322

    
3323
    if(avctx->strict_std_compliance >= 0){
3324
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it wont be decodeable with future versions!!!\n"
3325
               "use vstrict=-1 / -strict -1 to use it anyway\n");
3326
        return -1;
3327
    }
3328
 
3329
    common_init(avctx);
3330
    alloc_blocks(s);
3331
 
3332
    s->version=0;
3333
    
3334
    s->m.avctx   = avctx;
3335
    s->m.flags   = avctx->flags;
3336
    s->m.bit_rate= avctx->bit_rate;
3337

    
3338
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3339
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3340
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3341
    h263_encode_init(&s->m); //mv_penalty
3342

    
3343
    if(avctx->flags&CODEC_FLAG_PASS1){
3344
        if(!avctx->stats_out)
3345
            avctx->stats_out = av_mallocz(256);
3346
    }
3347
    if(avctx->flags&CODEC_FLAG_PASS2){
3348
        if(ff_rate_control_init(&s->m) < 0)
3349
            return -1;
3350
    }
3351

    
3352
    for(plane_index=0; plane_index<3; plane_index++){
3353
        calculate_vissual_weight(s, &s->plane[plane_index]);
3354
    }
3355
    
3356
    
3357
    avctx->coded_frame= &s->current_picture;
3358
    switch(avctx->pix_fmt){
3359
//    case PIX_FMT_YUV444P:
3360
//    case PIX_FMT_YUV422P:
3361
    case PIX_FMT_YUV420P:
3362
    case PIX_FMT_GRAY8:
3363
//    case PIX_FMT_YUV411P:
3364
//    case PIX_FMT_YUV410P:
3365
        s->colorspace_type= 0;
3366
        break;
3367
/*    case PIX_FMT_RGBA32:
3368
        s->colorspace= 1;
3369
        break;*/
3370
    default:
3371
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3372
        return -1;
3373
    }
3374
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3375
    s->chroma_h_shift= 1;
3376
    s->chroma_v_shift= 1;
3377
    return 0;
3378
}
3379

    
3380
static int frame_start(SnowContext *s){
3381
   AVFrame tmp;
3382
   int w= s->avctx->width; //FIXME round up to x16 ?
3383
   int h= s->avctx->height;
3384

    
3385
    if(s->current_picture.data[0]){
3386
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
3387
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3388
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3389
    }
3390

    
3391
    tmp= s->last_picture;
3392
    s->last_picture= s->current_picture;
3393
    s->current_picture= tmp;
3394
    
3395
    s->current_picture.reference= 1;
3396
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3397
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3398
        return -1;
3399
    }
3400
    
3401
    return 0;
3402
}
3403

    
3404
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3405
    SnowContext *s = avctx->priv_data;
3406
    RangeCoder * const c= &s->c;
3407
    AVFrame *pict = data;
3408
    const int width= s->avctx->width;
3409
    const int height= s->avctx->height;
3410
    int level, orientation, plane_index;
3411

    
3412
    ff_init_range_encoder(c, buf, buf_size);
3413
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3414
    
3415
    s->input_picture = *pict;
3416

    
3417
    if(avctx->flags&CODEC_FLAG_PASS2){
3418
        s->m.pict_type =
3419
        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3420
        s->keyframe= pict->pict_type==FF_I_TYPE;
3421
        s->m.picture_number= avctx->frame_number;
3422
        pict->quality= ff_rate_estimate_qscale(&s->m);
3423
    }else{
3424
        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3425
        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3426
    }
3427
    
3428
    if(pict->quality){
3429
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3430
        //<64 >60
3431
        s->qlog += 61*QROOT/8;
3432
    }else{
3433
        s->qlog= LOSSLESS_QLOG;
3434
    }
3435

    
3436
    frame_start(s);
3437
    s->current_picture.key_frame= s->keyframe;
3438

    
3439
    s->m.current_picture_ptr= &s->m.current_picture;
3440
    if(pict->pict_type == P_TYPE){
3441
        int block_width = (width +15)>>4;
3442
        int block_height= (height+15)>>4;
3443
        int stride= s->current_picture.linesize[0];
3444
        
3445
        assert(s->current_picture.data[0]);
3446
        assert(s->last_picture.data[0]);
3447
     
3448
        s->m.avctx= s->avctx;
3449
        s->m.current_picture.data[0]= s->current_picture.data[0];
3450
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
3451
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3452
        s->m.   last_picture_ptr= &s->m.   last_picture;
3453
        s->m.linesize=
3454
        s->m.   last_picture.linesize[0]=
3455
        s->m.    new_picture.linesize[0]=
3456
        s->m.current_picture.linesize[0]= stride;
3457
        s->m.uvlinesize= s->current_picture.linesize[1];
3458
        s->m.width = width;
3459
        s->m.height= height;
3460
        s->m.mb_width = block_width;
3461
        s->m.mb_height= block_height;
3462
        s->m.mb_stride=   s->m.mb_width+1;
3463
        s->m.b8_stride= 2*s->m.mb_width+1;
3464
        s->m.f_code=1;
3465
        s->m.pict_type= pict->pict_type;
3466
        s->m.me_method= s->avctx->me_method;
3467
        s->m.me.scene_change_score=0;
3468
        s->m.flags= s->avctx->flags;
3469
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3470
        s->m.out_format= FMT_H263;
3471
        s->m.unrestricted_mv= 1;
3472

    
3473
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3474
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3475
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3476

    
3477
        s->m.dsp= s->dsp; //move
3478
        ff_init_me(&s->m);
3479
    }
3480
    
3481
redo_frame:
3482
        
3483
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3484

    
3485
    encode_header(s);
3486
    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3487
    encode_blocks(s);
3488
    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3489
      
3490
    for(plane_index=0; plane_index<3; plane_index++){
3491
        Plane *p= &s->plane[plane_index];
3492
        int w= p->width;
3493
        int h= p->height;
3494
        int x, y;
3495
//        int bits= put_bits_count(&s->c.pb);
3496

    
3497
        //FIXME optimize
3498
     if(pict->data[plane_index]) //FIXME gray hack
3499
        for(y=0; y<h; y++){
3500
            for(x=0; x<w; x++){
3501
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3502
            }
3503
        }
3504
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3505
        
3506
        if(   plane_index==0 
3507
           && pict->pict_type == P_TYPE 
3508
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3509
            ff_init_range_encoder(c, buf, buf_size);
3510
            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3511
            pict->pict_type= FF_I_TYPE;
3512
            s->keyframe=1;
3513
            reset_contexts(s);
3514
            goto redo_frame;
3515
        }
3516
        
3517
        if(s->qlog == LOSSLESS_QLOG){
3518
            for(y=0; y<h; y++){
3519
                for(x=0; x<w; x++){
3520
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3521
                }
3522
            }
3523
        }
3524
 
3525
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3526

    
3527
        for(level=0; level<s->spatial_decomposition_count; level++){
3528
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3529
                SubBand *b= &p->band[level][orientation];
3530
                
3531
                quantize(s, b, b->buf, b->stride, s->qbias);
3532
                if(orientation==0)
3533
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3534
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3535
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
3536
                if(orientation==0)
3537
                    correlate(s, b, b->buf, b->stride, 1, 0);
3538
            }
3539
        }
3540
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3541

    
3542
        for(level=0; level<s->spatial_decomposition_count; level++){
3543
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3544
                SubBand *b= &p->band[level][orientation];
3545

    
3546
                dequantize(s, b, b->buf, b->stride);
3547
            }
3548
        }
3549

    
3550
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3551
        if(s->qlog == LOSSLESS_QLOG){
3552
            for(y=0; y<h; y++){
3553
                for(x=0; x<w; x++){
3554
                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
3555
                }
3556
            }
3557
        }
3558
{START_TIMER
3559
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3560
STOP_TIMER("pred-conv")}
3561
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3562
            int64_t error= 0;
3563
            
3564
    if(pict->data[plane_index]) //FIXME gray hack
3565
            for(y=0; y<h; y++){
3566
                for(x=0; x<w; x++){
3567
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3568
                    error += d*d;
3569
                }
3570
            }
3571
            s->avctx->error[plane_index] += error;
3572
            s->current_picture.error[plane_index] = error;
3573
        }
3574
    }
3575

    
3576
    if(s->last_picture.data[0])
3577
        avctx->release_buffer(avctx, &s->last_picture);
3578

    
3579
    s->current_picture.coded_picture_number = avctx->frame_number;
3580
    s->current_picture.pict_type = pict->pict_type;
3581
    s->current_picture.quality = pict->quality;
3582
    if(avctx->flags&CODEC_FLAG_PASS1){
3583
        s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
3584
        s->m.current_picture.display_picture_number =
3585
        s->m.current_picture.coded_picture_number = avctx->frame_number;
3586
        s->m.pict_type = pict->pict_type;
3587
        s->m.current_picture.quality = pict->quality;
3588
        ff_write_pass1_stats(&s->m);
3589
    }
3590
    if(avctx->flags&CODEC_FLAG_PASS2){
3591
        s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3592
    }
3593

    
3594
    emms_c();
3595
    
3596
    return ff_rac_terminate(c);
3597
}
3598

    
3599
static void common_end(SnowContext *s){
3600
    int plane_index, level, orientation;
3601

    
3602
    av_freep(&s->spatial_dwt_buffer);
3603

    
3604
    av_freep(&s->m.me.scratchpad);    
3605
    av_freep(&s->m.me.map);
3606
    av_freep(&s->m.me.score_map);
3607
 
3608
    av_freep(&s->block);
3609

    
3610
    for(plane_index=0; plane_index<3; plane_index++){    
3611
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3612
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3613
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3614
                
3615
                av_freep(&b->x_coeff);
3616
            }
3617
        }
3618
    }
3619
}
3620

    
3621
static int encode_end(AVCodecContext *avctx)
3622
{
3623
    SnowContext *s = avctx->priv_data;
3624

    
3625
    common_end(s);
3626
    av_free(avctx->stats_out);
3627

    
3628
    return 0;
3629
}
3630

    
3631
static int decode_init(AVCodecContext *avctx)
3632
{
3633
    SnowContext *s = avctx->priv_data;
3634
    int block_size;
3635

    
3636
    common_init(avctx);
3637
    
3638
    block_size = MB_SIZE >> s->block_max_depth;
3639
    /* FIXME block_size * 2 is determined empirically. block_size * 1.5 is definitely needed, but I (Robert) cannot figure out why more than that is needed. Perhaps there is a bug, or perhaps I overlooked some demands that are placed on the buffer. */
3640
    /* FIXME The formula is WRONG. For height > 480, the buffer will overflow. */
3641
    /* FIXME For now, I will use a full frame of lines. Fortunately, this should not materially effect cache performance because lines are allocated using a stack, so if in fact only 50 out of 496 lines are needed at a time, the other 446 will sit allocated but never accessed. */
3642
//    slice_buffer_init(s->plane[0].sb, s->plane[0].height, (block_size * 2) + (s->spatial_decomposition_count * s->spatial_decomposition_count), s->plane[0].width, s->spatial_dwt_buffer);
3643
    slice_buffer_init(&s->sb, s->plane[0].height, s->plane[0].height, s->plane[0].width, s->spatial_dwt_buffer);
3644
    
3645
    return 0;
3646
}
3647

    
3648
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
3649
    SnowContext *s = avctx->priv_data;
3650
    RangeCoder * const c= &s->c;
3651
    int bytes_read;
3652
    AVFrame *picture = data;
3653
    int level, orientation, plane_index;
3654

    
3655
    ff_init_range_decoder(c, buf, buf_size);
3656
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3657

    
3658
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
3659
    decode_header(s);
3660
    if(!s->block) alloc_blocks(s);
3661

    
3662
    frame_start(s);
3663
    //keyframe flag dupliaction mess FIXME
3664
    if(avctx->debug&FF_DEBUG_PICT_INFO)
3665
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
3666
    
3667
    decode_blocks(s);
3668

    
3669
    for(plane_index=0; plane_index<3; plane_index++){
3670
        Plane *p= &s->plane[plane_index];
3671
        int w= p->width;
3672
        int h= p->height;
3673
        int x, y;
3674
        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
3675
        SubBand * correlate_band;
3676
        
3677
if(s->avctx->debug&2048){
3678
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
3679
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3680

    
3681
        for(y=0; y<h; y++){
3682
            for(x=0; x<w; x++){
3683
                int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
3684
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
3685
            }
3686
        }
3687
}
3688

    
3689
{   START_TIMER
3690
    for(level=0; level<s->spatial_decomposition_count; level++){
3691
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3692
            SubBand *b= &p->band[level][orientation];
3693
            unpack_coeffs(s, b, b->parent, orientation);
3694
        }
3695
    }
3696
    STOP_TIMER("unpack coeffs");
3697
}
3698
        
3699
        /* Handle level 0, orientation 0 specially. It is particularly resistant to slicing but fortunately quite small, so process it in one pass. */
3700
        correlate_band = &p->band[0][0];
3701
        decode_subband_slice_buffered(s, correlate_band, &s->sb, 0, correlate_band->height, decode_state[0][0]);
3702
        correlate_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0);
3703
        dequantize_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride);
3704

    
3705
{START_TIMER
3706
    const int mb_h= s->b_height << s->block_max_depth;
3707
    const int block_size = MB_SIZE >> s->block_max_depth;
3708
    const int block_w    = plane_index ? block_size/2 : block_size;
3709
    int mb_y;
3710
    dwt_compose_t cs[MAX_DECOMPOSITIONS];
3711
    int yd=0, yq=0;
3712
    int y;
3713
    int end_y;
3714

    
3715
    ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
3716
    for(mb_y=0; mb_y<=mb_h; mb_y++){
3717
        
3718
        const int slice_starty = block_w*mb_y;
3719
        const int slice_h = block_w*(mb_y+1);
3720

    
3721
        {        
3722
        START_TIMER
3723
        for(level=0; level<s->spatial_decomposition_count; level++){
3724
            for(orientation=level ? 1 : 1; orientation<4; orientation++){
3725
                SubBand *b= &p->band[level][orientation];
3726
                int start_y;
3727
                int end_y;
3728
                int our_mb_start = mb_y;
3729
                int our_mb_end = (mb_y + 1);
3730
                start_y = FFMIN(b->height, (mb_y ? ((block_w * our_mb_start - 4) >> (s->spatial_decomposition_count - level)) + 5 : 0));
3731
                end_y = FFMIN(b->height, (((block_w * our_mb_end - 4) >> (s->spatial_decomposition_count - level)) + 5));
3732
                    
3733
                if (start_y != end_y)
3734
                    decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
3735
            }
3736
        }
3737
        STOP_TIMER("decode_subband_slice");
3738
        }
3739
        
3740
{   START_TIMER
3741
        for(; yd<slice_h; yd+=4){
3742
            ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
3743
        }
3744
    STOP_TIMER("idwt slice");}
3745

    
3746
        
3747
        if(s->qlog == LOSSLESS_QLOG){
3748
            for(; yq<slice_h && yq<h; yq++){
3749
                DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
3750
                for(x=0; x<w; x++){
3751
                    line[x] <<= FRAC_BITS;
3752
                }
3753
            }
3754
        }
3755

    
3756
        predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
3757
        
3758
        /* Nasty hack based empirically on how predict_slice_buffered() hits the buffer. */
3759
        /* FIXME If possible, make predict_slice fit into the slice. As of now, it works on some previous lines (up to slice_height / 2) if the condition on the next line is false. */
3760
        if (s->keyframe || (s->avctx->debug&512)){
3761
            y = FFMIN(p->height, slice_starty);
3762
            end_y = FFMIN(p->height, slice_h);
3763
        }
3764
        else{
3765
            y = FFMAX(0, FFMIN(p->height, slice_starty - (block_w >> 1)));
3766
            end_y = FFMAX(0, FFMIN(p->height, slice_h - (block_w >> 1)));
3767
        }
3768
        while(y < end_y)
3769
            slice_buffer_release(&s->sb, y++);
3770
    }
3771
    
3772
    slice_buffer_flush(&s->sb);
3773
    
3774
STOP_TIMER("idwt + predict_slices")}
3775
    }
3776
            
3777
    emms_c();
3778

    
3779
    if(s->last_picture.data[0])
3780
        avctx->release_buffer(avctx, &s->last_picture);
3781

    
3782
if(!(s->avctx->debug&2048))        
3783
    *picture= s->current_picture;
3784
else
3785
    *picture= s->mconly_picture;
3786
    
3787
    *data_size = sizeof(AVFrame);
3788
    
3789
    bytes_read= c->bytestream - c->bytestream_start;
3790
    if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
3791

    
3792
    return bytes_read;
3793
}
3794

    
3795
static int decode_end(AVCodecContext *avctx)
3796
{
3797
    SnowContext *s = avctx->priv_data;
3798

    
3799
    slice_buffer_destroy(&s->sb);
3800
    
3801
    common_end(s);
3802

    
3803
    return 0;
3804
}
3805

    
3806
AVCodec snow_decoder = {
3807
    "snow",
3808
    CODEC_TYPE_VIDEO,
3809
    CODEC_ID_SNOW,
3810
    sizeof(SnowContext),
3811
    decode_init,
3812
    NULL,
3813
    decode_end,
3814
    decode_frame,
3815
    0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
3816
    NULL
3817
};
3818

    
3819
#ifdef CONFIG_ENCODERS
3820
AVCodec snow_encoder = {
3821
    "snow",
3822
    CODEC_TYPE_VIDEO,
3823
    CODEC_ID_SNOW,
3824
    sizeof(SnowContext),
3825
    encode_init,
3826
    encode_frame,
3827
    encode_end,
3828
};
3829
#endif
3830

    
3831

    
3832
#if 0
3833
#undef malloc
3834
#undef free
3835
#undef printf
3836

3837
int main(){
3838
    int width=256;
3839
    int height=256;
3840
    int buffer[2][width*height];
3841
    SnowContext s;
3842
    int i;
3843
    s.spatial_decomposition_count=6;
3844
    s.spatial_decomposition_type=1;
3845
    
3846
    printf("testing 5/3 DWT\n");
3847
    for(i=0; i<width*height; i++)
3848
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3849
    
3850
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3851
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3852
    
3853
    for(i=0; i<width*height; i++)
3854
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3855

3856
    printf("testing 9/7 DWT\n");
3857
    s.spatial_decomposition_type=0;
3858
    for(i=0; i<width*height; i++)
3859
        buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
3860
    
3861
    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3862
    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3863
    
3864
    for(i=0; i<width*height; i++)
3865
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
3866
        
3867
    printf("testing AC coder\n");
3868
    memset(s.header_state, 0, sizeof(s.header_state));
3869
    ff_init_range_encoder(&s.c, buffer[0], 256*256);
3870
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3871
        
3872
    for(i=-256; i<256; i++){
3873
START_TIMER
3874
        put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
3875
STOP_TIMER("put_symbol")
3876
    }
3877
    ff_rac_terminate(&s.c);
3878

3879
    memset(s.header_state, 0, sizeof(s.header_state));
3880
    ff_init_range_decoder(&s.c, buffer[0], 256*256);
3881
    ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
3882
    
3883
    for(i=-256; i<256; i++){
3884
        int j;
3885
START_TIMER
3886
        j= get_symbol(&s.c, s.header_state, 1);
3887
STOP_TIMER("get_symbol")
3888
        if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
3889
    }
3890
{
3891
int level, orientation, x, y;
3892
int64_t errors[8][4];
3893
int64_t g=0;
3894

3895
    memset(errors, 0, sizeof(errors));
3896
    s.spatial_decomposition_count=3;
3897
    s.spatial_decomposition_type=0;
3898
    for(level=0; level<s.spatial_decomposition_count; level++){
3899
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3900
            int w= width  >> (s.spatial_decomposition_count-level);
3901
            int h= height >> (s.spatial_decomposition_count-level);
3902
            int stride= width  << (s.spatial_decomposition_count-level);
3903
            DWTELEM *buf= buffer[0];
3904
            int64_t error=0;
3905

3906
            if(orientation&1) buf+=w;
3907
            if(orientation>1) buf+=stride>>1;
3908
            
3909
            memset(buffer[0], 0, sizeof(int)*width*height);
3910
            buf[w/2 + h/2*stride]= 256*256;
3911
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3912
            for(y=0; y<height; y++){
3913
                for(x=0; x<width; x++){
3914
                    int64_t d= buffer[0][x + y*width];
3915
                    error += d*d;
3916
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
3917
                }
3918
                if(ABS(height/2-y)<9 && level==2) printf("\n");
3919
            }
3920
            error= (int)(sqrt(error)+0.5);
3921
            errors[level][orientation]= error;
3922
            if(g) g=ff_gcd(g, error);
3923
            else g= error;
3924
        }
3925
    }
3926
    printf("static int const visual_weight[][4]={\n");
3927
    for(level=0; level<s.spatial_decomposition_count; level++){
3928
        printf("  {");
3929
        for(orientation=0; orientation<4; orientation++){
3930
            printf("%8lld,", errors[level][orientation]/g);
3931
        }
3932
        printf("},\n");
3933
    }
3934
    printf("};\n");
3935
    {
3936
            int level=2;
3937
            int orientation=3;
3938
            int w= width  >> (s.spatial_decomposition_count-level);
3939
            int h= height >> (s.spatial_decomposition_count-level);
3940
            int stride= width  << (s.spatial_decomposition_count-level);
3941
            DWTELEM *buf= buffer[0];
3942
            int64_t error=0;
3943

3944
            buf+=w;
3945
            buf+=stride>>1;
3946
            
3947
            memset(buffer[0], 0, sizeof(int)*width*height);
3948
#if 1
3949
            for(y=0; y<height; y++){
3950
                for(x=0; x<width; x++){
3951
                    int tab[4]={0,2,3,1};
3952
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
3953
                }
3954
            }
3955
            ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3956
#else
3957
            for(y=0; y<h; y++){
3958
                for(x=0; x<w; x++){
3959
                    buf[x + y*stride  ]=169;
3960
                    buf[x + y*stride-w]=64;
3961
                }
3962
            }
3963
            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3964
#endif
3965
            for(y=0; y<height; y++){
3966
                for(x=0; x<width; x++){
3967
                    int64_t d= buffer[0][x + y*width];
3968
                    error += d*d;
3969
                    if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
3970
                }
3971
                if(ABS(height/2-y)<9) printf("\n");
3972
            }
3973
    }
3974

    
3975
}
3976
    return 0;
3977
}
3978
#endif
3979