Statistics
| Branch: | Revision:

ffmpeg / libavcodec / snow.c @ 735f9f34

History | View | Annotate | Download (148 KB)

1
/*
2
 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, write to the Free Software
16
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
17
 */
18

    
19
#include "avcodec.h"
20
#include "common.h"
21
#include "dsputil.h"
22

    
23
#include "rangecoder.h"
24
#define MID_STATE 128
25

    
26
#include "mpegvideo.h"
27

    
28
#undef NDEBUG
29
#include <assert.h>
30

    
31
#define MAX_DECOMPOSITIONS 8
32
#define MAX_PLANES 4
33
#define DWTELEM int
34
#define QSHIFT 5
35
#define QROOT (1<<QSHIFT)
36
#define LOSSLESS_QLOG -128
37
#define FRAC_BITS 8
38

    
39
static const int8_t quant3[256]={
40
 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
56
};
57
static const int8_t quant3b[256]={
58
 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65
 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
74
};
75
static const int8_t quant3bA[256]={
76
 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91
 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
92
};
93
static const int8_t quant5[256]={
94
 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
110
};
111
static const int8_t quant7[256]={
112
 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113
 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114
 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119
 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125
-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127
-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
128
};
129
static const int8_t quant9[256]={
130
 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131
 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145
-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
146
};
147
static const int8_t quant11[256]={
148
 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149
 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150
 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162
-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163
-4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
164
};
165
static const int8_t quant13[256]={
166
 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167
 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168
 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169
 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173
 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178
-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180
-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181
-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182
};
183

    
184
#define LOG2_OBMC_MAX 6
185
#define OBMC_MAX (1<<(LOG2_OBMC_MAX))
186
#if 0 //64*cubic
187
static const uint8_t obmc32[1024]={
188
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204
 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205
 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206
 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207
 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208
 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209
 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210
 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211
 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212
 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213
 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214
 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215
 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216
 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
220
//error:0.000022
221
};
222
static const uint8_t obmc16[256]={
223
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231
 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232
 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233
 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234
 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235
 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236
 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237
 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
239
//error:0.000033
240
};
241
#elif 1 // 64*linear
242
static const uint8_t obmc32[1024]={
243
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259
 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260
 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261
 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262
 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263
 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264
 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265
 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266
 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267
 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268
 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269
 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270
 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271
 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272
 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273
 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274
 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
275
 //error:0.000020
276
};
277
static const uint8_t obmc16[256]={
278
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286
 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287
 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288
 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289
 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290
 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291
 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292
 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293
 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
294
//error:0.000015
295
};
296
#else //64*cos
297
static const uint8_t obmc32[1024]={
298
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314
 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315
 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316
 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317
 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318
 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319
 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320
 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321
 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322
 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323
 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324
 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325
 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326
 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327
 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329
 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
330
//error:0.000022
331
};
332
static const uint8_t obmc16[256]={
333
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341
 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342
 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343
 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344
 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345
 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346
 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347
 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348
 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
349
//error:0.000022
350
};
351
#endif
352

    
353
//linear *64
354
static const uint8_t obmc8[64]={
355
 1, 3, 5, 7, 7, 5, 3, 1,
356
 3, 9,15,21,21,15, 9, 3,
357
 5,15,25,35,35,25,15, 5,
358
 7,21,35,49,49,35,21, 7,
359
 7,21,35,49,49,35,21, 7,
360
 5,15,25,35,35,25,15, 5,
361
 3, 9,15,21,21,15, 9, 3,
362
 1, 3, 5, 7, 7, 5, 3, 1,
363
//error:0.000000
364
};
365

    
366
//linear *64
367
static const uint8_t obmc4[16]={
368
 4,12,12, 4,
369
12,36,36,12,
370
12,36,36,12,
371
 4,12,12, 4,
372
//error:0.000000
373
};
374

    
375
static const uint8_t *obmc_tab[4]={
376
    obmc32, obmc16, obmc8, obmc4
377
};
378

    
379
typedef struct BlockNode{
380
    int16_t mx;
381
    int16_t my;
382
    uint8_t color[3];
383
    uint8_t type;
384
//#define TYPE_SPLIT    1
385
#define BLOCK_INTRA   1
386
#define BLOCK_OPT     2
387
//#define TYPE_NOCOLOR  4
388
    uint8_t level; //FIXME merge into type?
389
}BlockNode;
390

    
391
static const BlockNode null_block= { //FIXME add border maybe
392
    .color= {128,128,128},
393
    .mx= 0,
394
    .my= 0,
395
    .type= 0,
396
    .level= 0,
397
};
398

    
399
#define LOG2_MB_SIZE 4
400
#define MB_SIZE (1<<LOG2_MB_SIZE)
401

    
402
typedef struct x_and_coeff{
403
    int16_t x;
404
    uint16_t coeff;
405
} x_and_coeff;
406

    
407
typedef struct SubBand{
408
    int level;
409
    int stride;
410
    int width;
411
    int height;
412
    int qlog;                                   ///< log(qscale)/log[2^(1/6)]
413
    DWTELEM *buf;
414
    int buf_x_offset;
415
    int buf_y_offset;
416
    int stride_line; ///< Stride measured in lines, not pixels.
417
    x_and_coeff * x_coeff;
418
    struct SubBand *parent;
419
    uint8_t state[/*7*2*/ 7 + 512][32];
420
}SubBand;
421

    
422
typedef struct Plane{
423
    int width;
424
    int height;
425
    SubBand band[MAX_DECOMPOSITIONS][4];
426
}Plane;
427

    
428
/** Used to minimize the amount of memory used in order to optimize cache performance. **/
429
typedef struct {
430
    DWTELEM * * line; ///< For use by idwt and predict_slices.
431
    DWTELEM * * data_stack; ///< Used for internal purposes.
432
    int data_stack_top;
433
    int line_count;
434
    int line_width;
435
    int data_count;
436
    DWTELEM * base_buffer; ///< Buffer that this structure is caching.
437
} slice_buffer;
438

    
439
typedef struct SnowContext{
440
//    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
441

    
442
    AVCodecContext *avctx;
443
    RangeCoder c;
444
    DSPContext dsp;
445
    AVFrame new_picture;
446
    AVFrame input_picture;              ///< new_picture with the internal linesizes
447
    AVFrame current_picture;
448
    AVFrame last_picture;
449
    AVFrame mconly_picture;
450
//     uint8_t q_context[16];
451
    uint8_t header_state[32];
452
    uint8_t block_state[128 + 32*128];
453
    int keyframe;
454
    int always_reset;
455
    int version;
456
    int spatial_decomposition_type;
457
    int temporal_decomposition_type;
458
    int spatial_decomposition_count;
459
    int temporal_decomposition_count;
460
    DWTELEM *spatial_dwt_buffer;
461
    int colorspace_type;
462
    int chroma_h_shift;
463
    int chroma_v_shift;
464
    int spatial_scalability;
465
    int qlog;
466
    int lambda;
467
    int lambda2;
468
    int mv_scale;
469
    int qbias;
470
#define QBIAS_SHIFT 3
471
    int b_width;
472
    int b_height;
473
    int block_max_depth;
474
    Plane plane[MAX_PLANES];
475
    BlockNode *block;
476
#define ME_CACHE_SIZE 1024
477
    int me_cache[ME_CACHE_SIZE];
478
    int me_cache_generation;
479
    slice_buffer sb;
480

    
481
    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
482
}SnowContext;
483

    
484
typedef struct {
485
    DWTELEM *b0;
486
    DWTELEM *b1;
487
    DWTELEM *b2;
488
    DWTELEM *b3;
489
    int y;
490
} dwt_compose_t;
491

    
492
#define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
493
//#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
494

    
495
static void iterative_me(SnowContext *s);
496

    
497
static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
498
{
499
    int i;
500

    
501
    buf->base_buffer = base_buffer;
502
    buf->line_count = line_count;
503
    buf->line_width = line_width;
504
    buf->data_count = max_allocated_lines;
505
    buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
506
    buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
507

    
508
    for (i = 0; i < max_allocated_lines; i++)
509
    {
510
      buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
511
    }
512

    
513
    buf->data_stack_top = max_allocated_lines - 1;
514
}
515

    
516
static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
517
{
518
    int offset;
519
    DWTELEM * buffer;
520

    
521
//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
522

    
523
    assert(buf->data_stack_top >= 0);
524
//  assert(!buf->line[line]);
525
    if (buf->line[line])
526
        return buf->line[line];
527

    
528
    offset = buf->line_width * line;
529
    buffer = buf->data_stack[buf->data_stack_top];
530
    buf->data_stack_top--;
531
    buf->line[line] = buffer;
532

    
533
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
534

    
535
    return buffer;
536
}
537

    
538
static void slice_buffer_release(slice_buffer * buf, int line)
539
{
540
    int offset;
541
    DWTELEM * buffer;
542

    
543
    assert(line >= 0 && line < buf->line_count);
544
    assert(buf->line[line]);
545

    
546
    offset = buf->line_width * line;
547
    buffer = buf->line[line];
548
    buf->data_stack_top++;
549
    buf->data_stack[buf->data_stack_top] = buffer;
550
    buf->line[line] = NULL;
551

    
552
//  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
553
}
554

    
555
static void slice_buffer_flush(slice_buffer * buf)
556
{
557
    int i;
558
    for (i = 0; i < buf->line_count; i++)
559
    {
560
        if (buf->line[i])
561
        {
562
//      av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
563
            slice_buffer_release(buf, i);
564
        }
565
    }
566
}
567

    
568
static void slice_buffer_destroy(slice_buffer * buf)
569
{
570
    int i;
571
    slice_buffer_flush(buf);
572

    
573
    for (i = buf->data_count - 1; i >= 0; i--)
574
    {
575
        assert(buf->data_stack[i]);
576
        av_free(buf->data_stack[i]);
577
    }
578
    assert(buf->data_stack);
579
    av_free(buf->data_stack);
580
    assert(buf->line);
581
    av_free(buf->line);
582
}
583

    
584
#ifdef __sgi
585
// Avoid a name clash on SGI IRIX
586
#undef qexp
587
#endif
588
#define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
589
static uint8_t qexp[QROOT];
590

    
591
static inline int mirror(int v, int m){
592
    if     (v<0) return -v;
593
    else if(v>m) return 2*m-v;
594
    else         return v;
595
}
596

    
597
static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
598
    int i;
599

    
600
    if(v){
601
        const int a= ABS(v);
602
        const int e= av_log2(a);
603
#if 1
604
        const int el= FFMIN(e, 10);
605
        put_rac(c, state+0, 0);
606

    
607
        for(i=0; i<el; i++){
608
            put_rac(c, state+1+i, 1);  //1..10
609
        }
610
        for(; i<e; i++){
611
            put_rac(c, state+1+9, 1);  //1..10
612
        }
613
        put_rac(c, state+1+FFMIN(i,9), 0);
614

    
615
        for(i=e-1; i>=el; i--){
616
            put_rac(c, state+22+9, (a>>i)&1); //22..31
617
        }
618
        for(; i>=0; i--){
619
            put_rac(c, state+22+i, (a>>i)&1); //22..31
620
        }
621

    
622
        if(is_signed)
623
            put_rac(c, state+11 + el, v < 0); //11..21
624
#else
625

    
626
        put_rac(c, state+0, 0);
627
        if(e<=9){
628
            for(i=0; i<e; i++){
629
                put_rac(c, state+1+i, 1);  //1..10
630
            }
631
            put_rac(c, state+1+i, 0);
632

    
633
            for(i=e-1; i>=0; i--){
634
                put_rac(c, state+22+i, (a>>i)&1); //22..31
635
            }
636

    
637
            if(is_signed)
638
                put_rac(c, state+11 + e, v < 0); //11..21
639
        }else{
640
            for(i=0; i<e; i++){
641
                put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
642
            }
643
            put_rac(c, state+1+FFMIN(i,9), 0);
644

    
645
            for(i=e-1; i>=0; i--){
646
                put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
647
            }
648

    
649
            if(is_signed)
650
                put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
651
        }
652
#endif
653
    }else{
654
        put_rac(c, state+0, 1);
655
    }
656
}
657

    
658
static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
659
    if(get_rac(c, state+0))
660
        return 0;
661
    else{
662
        int i, e, a;
663
        e= 0;
664
        while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
665
            e++;
666
        }
667

    
668
        a= 1;
669
        for(i=e-1; i>=0; i--){
670
            a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
671
        }
672

    
673
        if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
674
            return -a;
675
        else
676
            return a;
677
    }
678
}
679

    
680
static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
681
    int i;
682
    int r= log2>=0 ? 1<<log2 : 1;
683

    
684
    assert(v>=0);
685
    assert(log2>=-4);
686

    
687
    while(v >= r){
688
        put_rac(c, state+4+log2, 1);
689
        v -= r;
690
        log2++;
691
        if(log2>0) r+=r;
692
    }
693
    put_rac(c, state+4+log2, 0);
694

    
695
    for(i=log2-1; i>=0; i--){
696
        put_rac(c, state+31-i, (v>>i)&1);
697
    }
698
}
699

    
700
static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
701
    int i;
702
    int r= log2>=0 ? 1<<log2 : 1;
703
    int v=0;
704

    
705
    assert(log2>=-4);
706

    
707
    while(get_rac(c, state+4+log2)){
708
        v+= r;
709
        log2++;
710
        if(log2>0) r+=r;
711
    }
712

    
713
    for(i=log2-1; i>=0; i--){
714
        v+= get_rac(c, state+31-i)<<i;
715
    }
716

    
717
    return v;
718
}
719

    
720
static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
721
    const int mirror_left= !highpass;
722
    const int mirror_right= (width&1) ^ highpass;
723
    const int w= (width>>1) - 1 + (highpass & width);
724
    int i;
725

    
726
#define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
727
    if(mirror_left){
728
        dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
729
        dst += dst_step;
730
        src += src_step;
731
    }
732

    
733
    for(i=0; i<w; i++){
734
        dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
735
    }
736

    
737
    if(mirror_right){
738
        dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
739
    }
740
}
741

    
742
static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
743
    const int mirror_left= !highpass;
744
    const int mirror_right= (width&1) ^ highpass;
745
    const int w= (width>>1) - 1 + (highpass & width);
746
    int i;
747

    
748
    if(mirror_left){
749
        int r= 3*2*ref[0];
750
        r += r>>4;
751
        r += r>>8;
752
        dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
753
        dst += dst_step;
754
        src += src_step;
755
    }
756

    
757
    for(i=0; i<w; i++){
758
        int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
759
        r += r>>4;
760
        r += r>>8;
761
        dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
762
    }
763

    
764
    if(mirror_right){
765
        int r= 3*2*ref[w*ref_step];
766
        r += r>>4;
767
        r += r>>8;
768
        dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
769
    }
770
}
771

    
772
static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
773
    const int mirror_left= !highpass;
774
    const int mirror_right= (width&1) ^ highpass;
775
    const int w= (width>>1) - 1 + (highpass & width);
776
    int i;
777

    
778
    assert(shift == 4);
779
#define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
780
    if(mirror_left){
781
        dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
782
        dst += dst_step;
783
        src += src_step;
784
    }
785

    
786
    for(i=0; i<w; i++){
787
        dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
788
    }
789

    
790
    if(mirror_right){
791
        dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
792
    }
793
}
794

    
795

    
796
static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
797
    int x, i;
798

    
799
    for(x=start; x<width; x+=2){
800
        int64_t sum=0;
801

    
802
        for(i=0; i<n; i++){
803
            int x2= x + 2*i - n + 1;
804
            if     (x2<     0) x2= -x2;
805
            else if(x2>=width) x2= 2*width-x2-2;
806
            sum += coeffs[i]*(int64_t)dst[x2];
807
        }
808
        if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
809
        else        dst[x] += (sum + (1<<shift)/2)>>shift;
810
    }
811
}
812

    
813
static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
814
    int x, y, i;
815
    for(y=start; y<height; y+=2){
816
        for(x=0; x<width; x++){
817
            int64_t sum=0;
818

    
819
            for(i=0; i<n; i++){
820
                int y2= y + 2*i - n + 1;
821
                if     (y2<      0) y2= -y2;
822
                else if(y2>=height) y2= 2*height-y2-2;
823
                sum += coeffs[i]*(int64_t)dst[x + y2*stride];
824
            }
825
            if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
826
            else        dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
827
        }
828
    }
829
}
830

    
831
#define SCALEX 1
832
#define LX0 0
833
#define LX1 1
834

    
835
#if 0 // more accurate 9/7
836
#define N1 2
837
#define SHIFT1 14
838
#define COEFFS1 (int[]){-25987,-25987}
839
#define N2 2
840
#define SHIFT2 19
841
#define COEFFS2 (int[]){-27777,-27777}
842
#define N3 2
843
#define SHIFT3 15
844
#define COEFFS3 (int[]){28931,28931}
845
#define N4 2
846
#define SHIFT4 15
847
#define COEFFS4 (int[]){14533,14533}
848
#elif 1 // 13/7 CRF
849
#define N1 4
850
#define SHIFT1 4
851
#define COEFFS1 (int[]){1,-9,-9,1}
852
#define N2 4
853
#define SHIFT2 4
854
#define COEFFS2 (int[]){-1,5,5,-1}
855
#define N3 0
856
#define SHIFT3 1
857
#define COEFFS3 NULL
858
#define N4 0
859
#define SHIFT4 1
860
#define COEFFS4 NULL
861
#elif 1 // 3/5
862
#define LX0 1
863
#define LX1 0
864
#define SCALEX 0.5
865
#define N1 2
866
#define SHIFT1 1
867
#define COEFFS1 (int[]){1,1}
868
#define N2 2
869
#define SHIFT2 2
870
#define COEFFS2 (int[]){-1,-1}
871
#define N3 0
872
#define SHIFT3 0
873
#define COEFFS3 NULL
874
#define N4 0
875
#define SHIFT4 0
876
#define COEFFS4 NULL
877
#elif 1 // 11/5
878
#define N1 0
879
#define SHIFT1 1
880
#define COEFFS1 NULL
881
#define N2 2
882
#define SHIFT2 2
883
#define COEFFS2 (int[]){-1,-1}
884
#define N3 2
885
#define SHIFT3 0
886
#define COEFFS3 (int[]){-1,-1}
887
#define N4 4
888
#define SHIFT4 7
889
#define COEFFS4 (int[]){-5,29,29,-5}
890
#define SCALEX 4
891
#elif 1 // 9/7 CDF
892
#define N1 2
893
#define SHIFT1 7
894
#define COEFFS1 (int[]){-203,-203}
895
#define N2 2
896
#define SHIFT2 12
897
#define COEFFS2 (int[]){-217,-217}
898
#define N3 2
899
#define SHIFT3 7
900
#define COEFFS3 (int[]){113,113}
901
#define N4 2
902
#define SHIFT4 9
903
#define COEFFS4 (int[]){227,227}
904
#define SCALEX 1
905
#elif 1 // 7/5 CDF
906
#define N1 0
907
#define SHIFT1 1
908
#define COEFFS1 NULL
909
#define N2 2
910
#define SHIFT2 2
911
#define COEFFS2 (int[]){-1,-1}
912
#define N3 2
913
#define SHIFT3 0
914
#define COEFFS3 (int[]){-1,-1}
915
#define N4 2
916
#define SHIFT4 4
917
#define COEFFS4 (int[]){3,3}
918
#elif 1 // 9/7 MN
919
#define N1 4
920
#define SHIFT1 4
921
#define COEFFS1 (int[]){1,-9,-9,1}
922
#define N2 2
923
#define SHIFT2 2
924
#define COEFFS2 (int[]){1,1}
925
#define N3 0
926
#define SHIFT3 1
927
#define COEFFS3 NULL
928
#define N4 0
929
#define SHIFT4 1
930
#define COEFFS4 NULL
931
#else // 13/7 CRF
932
#define N1 4
933
#define SHIFT1 4
934
#define COEFFS1 (int[]){1,-9,-9,1}
935
#define N2 4
936
#define SHIFT2 4
937
#define COEFFS2 (int[]){-1,5,5,-1}
938
#define N3 0
939
#define SHIFT3 1
940
#define COEFFS3 NULL
941
#define N4 0
942
#define SHIFT4 1
943
#define COEFFS4 NULL
944
#endif
945
static void horizontal_decomposeX(DWTELEM *b, int width){
946
    DWTELEM temp[width];
947
    const int width2= width>>1;
948
    const int w2= (width+1)>>1;
949
    int x;
950

    
951
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
952
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
953
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
954
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
955

    
956
    for(x=0; x<width2; x++){
957
        temp[x   ]= b[2*x    ];
958
        temp[x+w2]= b[2*x + 1];
959
    }
960
    if(width&1)
961
        temp[x   ]= b[2*x    ];
962
    memcpy(b, temp, width*sizeof(int));
963
}
964

    
965
static void horizontal_composeX(DWTELEM *b, int width){
966
    DWTELEM temp[width];
967
    const int width2= width>>1;
968
    int x;
969
    const int w2= (width+1)>>1;
970

    
971
    memcpy(temp, b, width*sizeof(int));
972
    for(x=0; x<width2; x++){
973
        b[2*x    ]= temp[x   ];
974
        b[2*x + 1]= temp[x+w2];
975
    }
976
    if(width&1)
977
        b[2*x    ]= temp[x   ];
978

    
979
    inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
980
    inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
981
    inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
982
    inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
983
}
984

    
985
static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
986
    int x, y;
987

    
988
    for(y=0; y<height; y++){
989
        for(x=0; x<width; x++){
990
            buffer[y*stride + x] *= SCALEX;
991
        }
992
    }
993

    
994
    for(y=0; y<height; y++){
995
        horizontal_decomposeX(buffer + y*stride, width);
996
    }
997

    
998
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
999
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1000
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1001
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1002
}
1003

    
1004
static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1005
    int x, y;
1006

    
1007
    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1008
    inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1009
    inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1010
    inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1011

    
1012
    for(y=0; y<height; y++){
1013
        horizontal_composeX(buffer + y*stride, width);
1014
    }
1015

    
1016
    for(y=0; y<height; y++){
1017
        for(x=0; x<width; x++){
1018
            buffer[y*stride + x] /= SCALEX;
1019
        }
1020
    }
1021
}
1022

    
1023
static void horizontal_decompose53i(DWTELEM *b, int width){
1024
    DWTELEM temp[width];
1025
    const int width2= width>>1;
1026
    int x;
1027
    const int w2= (width+1)>>1;
1028

    
1029
    for(x=0; x<width2; x++){
1030
        temp[x   ]= b[2*x    ];
1031
        temp[x+w2]= b[2*x + 1];
1032
    }
1033
    if(width&1)
1034
        temp[x   ]= b[2*x    ];
1035
#if 0
1036
    {
1037
    int A1,A2,A3,A4;
1038
    A2= temp[1       ];
1039
    A4= temp[0       ];
1040
    A1= temp[0+width2];
1041
    A1 -= (A2 + A4)>>1;
1042
    A4 += (A1 + 1)>>1;
1043
    b[0+width2] = A1;
1044
    b[0       ] = A4;
1045
    for(x=1; x+1<width2; x+=2){
1046
        A3= temp[x+width2];
1047
        A4= temp[x+1     ];
1048
        A3 -= (A2 + A4)>>1;
1049
        A2 += (A1 + A3 + 2)>>2;
1050
        b[x+width2] = A3;
1051
        b[x       ] = A2;
1052

1053
        A1= temp[x+1+width2];
1054
        A2= temp[x+2       ];
1055
        A1 -= (A2 + A4)>>1;
1056
        A4 += (A1 + A3 + 2)>>2;
1057
        b[x+1+width2] = A1;
1058
        b[x+1       ] = A4;
1059
    }
1060
    A3= temp[width-1];
1061
    A3 -= A2;
1062
    A2 += (A1 + A3 + 2)>>2;
1063
    b[width -1] = A3;
1064
    b[width2-1] = A2;
1065
    }
1066
#else
1067
    lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1068
    lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
1069
#endif
1070
}
1071

    
1072
static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1073
    int i;
1074

    
1075
    for(i=0; i<width; i++){
1076
        b1[i] -= (b0[i] + b2[i])>>1;
1077
    }
1078
}
1079

    
1080
static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1081
    int i;
1082

    
1083
    for(i=0; i<width; i++){
1084
        b1[i] += (b0[i] + b2[i] + 2)>>2;
1085
    }
1086
}
1087

    
1088
static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1089
    int y;
1090
    DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1091
    DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
1092

    
1093
    for(y=-2; y<height; y+=2){
1094
        DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1095
        DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1096

    
1097
{START_TIMER
1098
        if(b1 <= b3)     horizontal_decompose53i(b2, width);
1099
        if(y+2 < height) horizontal_decompose53i(b3, width);
1100
STOP_TIMER("horizontal_decompose53i")}
1101

    
1102
{START_TIMER
1103
        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1104
        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1105
STOP_TIMER("vertical_decompose53i*")}
1106

    
1107
        b0=b2;
1108
        b1=b3;
1109
    }
1110
}
1111

    
1112
#define liftS lift
1113
#define lift5 lift
1114
#if 1
1115
#define W_AM 3
1116
#define W_AO 0
1117
#define W_AS 1
1118

    
1119
#undef liftS
1120
#define W_BM 1
1121
#define W_BO 8
1122
#define W_BS 4
1123

    
1124
#define W_CM 1
1125
#define W_CO 0
1126
#define W_CS 0
1127

    
1128
#define W_DM 3
1129
#define W_DO 4
1130
#define W_DS 3
1131
#elif 0
1132
#define W_AM 55
1133
#define W_AO 16
1134
#define W_AS 5
1135

    
1136
#define W_BM 3
1137
#define W_BO 32
1138
#define W_BS 6
1139

    
1140
#define W_CM 127
1141
#define W_CO 64
1142
#define W_CS 7
1143

    
1144
#define W_DM 7
1145
#define W_DO 8
1146
#define W_DS 4
1147
#elif 0
1148
#define W_AM 97
1149
#define W_AO 32
1150
#define W_AS 6
1151

    
1152
#define W_BM 63
1153
#define W_BO 512
1154
#define W_BS 10
1155

    
1156
#define W_CM 13
1157
#define W_CO 8
1158
#define W_CS 4
1159

    
1160
#define W_DM 15
1161
#define W_DO 16
1162
#define W_DS 5
1163

    
1164
#else
1165

    
1166
#define W_AM 203
1167
#define W_AO 64
1168
#define W_AS 7
1169

    
1170
#define W_BM 217
1171
#define W_BO 2048
1172
#define W_BS 12
1173

    
1174
#define W_CM 113
1175
#define W_CO 64
1176
#define W_CS 7
1177

    
1178
#define W_DM 227
1179
#define W_DO 128
1180
#define W_DS 9
1181
#endif
1182
static void horizontal_decompose97i(DWTELEM *b, int width){
1183
    DWTELEM temp[width];
1184
    const int w2= (width+1)>>1;
1185

    
1186
    lift (temp+w2, b    +1, b      , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1187
    liftS(temp   , b      , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1188
    lift5(b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
1189
    lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
1190
}
1191

    
1192

    
1193
static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1194
    int i;
1195

    
1196
    for(i=0; i<width; i++){
1197
        b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1198
    }
1199
}
1200

    
1201
static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1202
    int i;
1203

    
1204
    for(i=0; i<width; i++){
1205
#ifdef lift5
1206
        b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1207
#else
1208
        int r= 3*(b0[i] + b2[i]);
1209
        r+= r>>4;
1210
        r+= r>>8;
1211
        b1[i] += (r+W_CO)>>W_CS;
1212
#endif
1213
    }
1214
}
1215

    
1216
static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1217
    int i;
1218

    
1219
    for(i=0; i<width; i++){
1220
#ifdef liftS
1221
        b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1222
#else
1223
        b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1224
#endif
1225
    }
1226
}
1227

    
1228
static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1229
    int i;
1230

    
1231
    for(i=0; i<width; i++){
1232
        b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1233
    }
1234
}
1235

    
1236
static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1237
    int y;
1238
    DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1239
    DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
1240
    DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1241
    DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1242

    
1243
    for(y=-4; y<height; y+=2){
1244
        DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1245
        DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1246

    
1247
{START_TIMER
1248
        if(b3 <= b5)     horizontal_decompose97i(b4, width);
1249
        if(y+4 < height) horizontal_decompose97i(b5, width);
1250
if(width>400){
1251
STOP_TIMER("horizontal_decompose97i")
1252
}}
1253

    
1254
{START_TIMER
1255
        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1256
        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1257
        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1258
        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1259

    
1260
if(width>400){
1261
STOP_TIMER("vertical_decompose97i")
1262
}}
1263

    
1264
        b0=b2;
1265
        b1=b3;
1266
        b2=b4;
1267
        b3=b5;
1268
    }
1269
}
1270

    
1271
void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1272
    int level;
1273

    
1274
    for(level=0; level<decomposition_count; level++){
1275
        switch(type){
1276
        case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1277
        case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1278
        case 2: spatial_decomposeX  (buffer, width>>level, height>>level, stride<<level); break;
1279
        }
1280
    }
1281
}
1282

    
1283
static void horizontal_compose53i(DWTELEM *b, int width){
1284
    DWTELEM temp[width];
1285
    const int width2= width>>1;
1286
    const int w2= (width+1)>>1;
1287
    int x;
1288

    
1289
#if 0
1290
    int A1,A2,A3,A4;
1291
    A2= temp[1       ];
1292
    A4= temp[0       ];
1293
    A1= temp[0+width2];
1294
    A1 -= (A2 + A4)>>1;
1295
    A4 += (A1 + 1)>>1;
1296
    b[0+width2] = A1;
1297
    b[0       ] = A4;
1298
    for(x=1; x+1<width2; x+=2){
1299
        A3= temp[x+width2];
1300
        A4= temp[x+1     ];
1301
        A3 -= (A2 + A4)>>1;
1302
        A2 += (A1 + A3 + 2)>>2;
1303
        b[x+width2] = A3;
1304
        b[x       ] = A2;
1305

1306
        A1= temp[x+1+width2];
1307
        A2= temp[x+2       ];
1308
        A1 -= (A2 + A4)>>1;
1309
        A4 += (A1 + A3 + 2)>>2;
1310
        b[x+1+width2] = A1;
1311
        b[x+1       ] = A4;
1312
    }
1313
    A3= temp[width-1];
1314
    A3 -= A2;
1315
    A2 += (A1 + A3 + 2)>>2;
1316
    b[width -1] = A3;
1317
    b[width2-1] = A2;
1318
#else
1319
    lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1320
    lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1321
#endif
1322
    for(x=0; x<width2; x++){
1323
        b[2*x    ]= temp[x   ];
1324
        b[2*x + 1]= temp[x+w2];
1325
    }
1326
    if(width&1)
1327
        b[2*x    ]= temp[x   ];
1328
}
1329

    
1330
static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1331
    int i;
1332

    
1333
    for(i=0; i<width; i++){
1334
        b1[i] += (b0[i] + b2[i])>>1;
1335
    }
1336
}
1337

    
1338
static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1339
    int i;
1340

    
1341
    for(i=0; i<width; i++){
1342
        b1[i] -= (b0[i] + b2[i] + 2)>>2;
1343
    }
1344
}
1345

    
1346
static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1347
    cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1348
    cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1349
    cs->y = -1;
1350
}
1351

    
1352
static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1353
    cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1354
    cs->b1 = buffer + mirror(-1  , height-1)*stride;
1355
    cs->y = -1;
1356
}
1357

    
1358
static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1359
    int y= cs->y;
1360
    int mirror0 = mirror(y-1, height-1);
1361
    int mirror1 = mirror(y  , height-1);
1362
    int mirror2 = mirror(y+1, height-1);
1363
    int mirror3 = mirror(y+2, height-1);
1364

    
1365
    DWTELEM *b0= cs->b0;
1366
    DWTELEM *b1= cs->b1;
1367
    DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1368
    DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1369

    
1370
{START_TIMER
1371
        if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1372
        if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1373
STOP_TIMER("vertical_compose53i*")}
1374

    
1375
{START_TIMER
1376
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1377
        if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1378
STOP_TIMER("horizontal_compose53i")}
1379

    
1380
    cs->b0 = b2;
1381
    cs->b1 = b3;
1382
    cs->y += 2;
1383
}
1384

    
1385
static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1386
    int y= cs->y;
1387
    DWTELEM *b0= cs->b0;
1388
    DWTELEM *b1= cs->b1;
1389
    DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1390
    DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1391

    
1392
{START_TIMER
1393
        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1394
        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1395
STOP_TIMER("vertical_compose53i*")}
1396

    
1397
{START_TIMER
1398
        if(y-1 >= 0) horizontal_compose53i(b0, width);
1399
        if(b0 <= b2) horizontal_compose53i(b1, width);
1400
STOP_TIMER("horizontal_compose53i")}
1401

    
1402
    cs->b0 = b2;
1403
    cs->b1 = b3;
1404
    cs->y += 2;
1405
}
1406

    
1407
static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1408
    dwt_compose_t cs;
1409
    spatial_compose53i_init(&cs, buffer, height, stride);
1410
    while(cs.y <= height)
1411
        spatial_compose53i_dy(&cs, buffer, width, height, stride);
1412
}
1413

    
1414

    
1415
static void horizontal_compose97i(DWTELEM *b, int width){
1416
    DWTELEM temp[width];
1417
    const int w2= (width+1)>>1;
1418

    
1419
    lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1420
    lift5(temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1421
    liftS(b      , temp   , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1422
    lift (b+1    , temp+w2, b      , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1423
}
1424

    
1425
static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1426
    int i;
1427

    
1428
    for(i=0; i<width; i++){
1429
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1430
    }
1431
}
1432

    
1433
static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1434
    int i;
1435

    
1436
    for(i=0; i<width; i++){
1437
#ifdef lift5
1438
        b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1439
#else
1440
        int r= 3*(b0[i] + b2[i]);
1441
        r+= r>>4;
1442
        r+= r>>8;
1443
        b1[i] -= (r+W_CO)>>W_CS;
1444
#endif
1445
    }
1446
}
1447

    
1448
static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1449
    int i;
1450

    
1451
    for(i=0; i<width; i++){
1452
#ifdef liftS
1453
        b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1454
#else
1455
        b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1456
#endif
1457
    }
1458
}
1459

    
1460
static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1461
    int i;
1462

    
1463
    for(i=0; i<width; i++){
1464
        b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1465
    }
1466
}
1467

    
1468
static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1469
    int i;
1470

    
1471
    for(i=0; i<width; i++){
1472
#ifndef lift5
1473
        int r;
1474
#endif
1475
        b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1476
#ifdef lift5
1477
        b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1478
#else
1479
        r= 3*(b2[i] + b4[i]);
1480
        r+= r>>4;
1481
        r+= r>>8;
1482
        b3[i] -= (r+W_CO)>>W_CS;
1483
#endif
1484
#ifdef liftS
1485
        b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1486
#else
1487
        b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1488
#endif
1489
        b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1490
    }
1491
}
1492

    
1493
static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1494
    cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1495
    cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1496
    cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1497
    cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1498
    cs->y = -3;
1499
}
1500

    
1501
static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1502
    cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1503
    cs->b1 = buffer + mirror(-3  , height-1)*stride;
1504
    cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1505
    cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1506
    cs->y = -3;
1507
}
1508

    
1509
static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1510
    int y = cs->y;
1511

    
1512
    int mirror0 = mirror(y - 1, height - 1);
1513
    int mirror1 = mirror(y + 0, height - 1);
1514
    int mirror2 = mirror(y + 1, height - 1);
1515
    int mirror3 = mirror(y + 2, height - 1);
1516
    int mirror4 = mirror(y + 3, height - 1);
1517
    int mirror5 = mirror(y + 4, height - 1);
1518
    DWTELEM *b0= cs->b0;
1519
    DWTELEM *b1= cs->b1;
1520
    DWTELEM *b2= cs->b2;
1521
    DWTELEM *b3= cs->b3;
1522
    DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1523
    DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1524

    
1525
{START_TIMER
1526
    if(y>0 && y+4<height){
1527
        vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1528
    }else{
1529
        if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1530
        if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1531
        if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1532
        if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1533
    }
1534
if(width>400){
1535
STOP_TIMER("vertical_compose97i")}}
1536

    
1537
{START_TIMER
1538
        if(y-1>=  0) horizontal_compose97i(b0, width);
1539
        if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1540
if(width>400 && mirror0 <= mirror2){
1541
STOP_TIMER("horizontal_compose97i")}}
1542

    
1543
    cs->b0=b2;
1544
    cs->b1=b3;
1545
    cs->b2=b4;
1546
    cs->b3=b5;
1547
    cs->y += 2;
1548
}
1549

    
1550
static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1551
    int y = cs->y;
1552
    DWTELEM *b0= cs->b0;
1553
    DWTELEM *b1= cs->b1;
1554
    DWTELEM *b2= cs->b2;
1555
    DWTELEM *b3= cs->b3;
1556
    DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1557
    DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1558

    
1559
        if(stride == width && y+4 < height && 0){
1560
            int x;
1561
            for(x=0; x<width/2; x++)
1562
                b5[x] += 64*2;
1563
            for(; x<width; x++)
1564
                b5[x] += 169*2;
1565
        }
1566

    
1567
{START_TIMER
1568
        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1569
        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1570
        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1571
        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1572
if(width>400){
1573
STOP_TIMER("vertical_compose97i")}}
1574

    
1575
{START_TIMER
1576
        if(y-1>=  0) horizontal_compose97i(b0, width);
1577
        if(b0 <= b2) horizontal_compose97i(b1, width);
1578
if(width>400 && b0 <= b2){
1579
STOP_TIMER("horizontal_compose97i")}}
1580

    
1581
    cs->b0=b2;
1582
    cs->b1=b3;
1583
    cs->b2=b4;
1584
    cs->b3=b5;
1585
    cs->y += 2;
1586
}
1587

    
1588
static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1589
    dwt_compose_t cs;
1590
    spatial_compose97i_init(&cs, buffer, height, stride);
1591
    while(cs.y <= height)
1592
        spatial_compose97i_dy(&cs, buffer, width, height, stride);
1593
}
1594

    
1595
void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1596
    int level;
1597
    for(level=decomposition_count-1; level>=0; level--){
1598
        switch(type){
1599
        case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1600
        case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1601
        /* not slicified yet */
1602
        case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1603
          av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1604
        }
1605
    }
1606
}
1607

    
1608
void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1609
    int level;
1610
    for(level=decomposition_count-1; level>=0; level--){
1611
        switch(type){
1612
        case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1613
        case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1614
        /* not slicified yet */
1615
        case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1616
        }
1617
    }
1618
}
1619

    
1620
void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1621
    const int support = type==1 ? 3 : 5;
1622
    int level;
1623
    if(type==2) return;
1624

    
1625
    for(level=decomposition_count-1; level>=0; level--){
1626
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1627
            switch(type){
1628
            case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1629
                    break;
1630
            case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1631
                    break;
1632
            case 2: break;
1633
            }
1634
        }
1635
    }
1636
}
1637

    
1638
void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1639
    const int support = type==1 ? 3 : 5;
1640
    int level;
1641
    if(type==2) return;
1642

    
1643
    for(level=decomposition_count-1; level>=0; level--){
1644
        while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1645
            switch(type){
1646
            case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1647
                    break;
1648
            case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1649
                    break;
1650
            case 2: break;
1651
            }
1652
        }
1653
    }
1654
}
1655

    
1656
void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1657
    if(type==2){
1658
        int level;
1659
        for(level=decomposition_count-1; level>=0; level--)
1660
            spatial_composeX  (buffer, width>>level, height>>level, stride<<level);
1661
    }else{
1662
        dwt_compose_t cs[MAX_DECOMPOSITIONS];
1663
        int y;
1664
        ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1665
        for(y=0; y<height; y+=4)
1666
            ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1667
    }
1668
}
1669

    
1670
static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1671
    const int w= b->width;
1672
    const int h= b->height;
1673
    int x, y;
1674

    
1675
    if(1){
1676
        int run=0;
1677
        int runs[w*h];
1678
        int run_index=0;
1679
        int max_index;
1680

    
1681
        for(y=0; y<h; y++){
1682
            for(x=0; x<w; x++){
1683
                int v, p=0;
1684
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1685
                v= src[x + y*stride];
1686

    
1687
                if(y){
1688
                    t= src[x + (y-1)*stride];
1689
                    if(x){
1690
                        lt= src[x - 1 + (y-1)*stride];
1691
                    }
1692
                    if(x + 1 < w){
1693
                        rt= src[x + 1 + (y-1)*stride];
1694
                    }
1695
                }
1696
                if(x){
1697
                    l= src[x - 1 + y*stride];
1698
                    /*if(x > 1){
1699
                        if(orientation==1) ll= src[y + (x-2)*stride];
1700
                        else               ll= src[x - 2 + y*stride];
1701
                    }*/
1702
                }
1703
                if(parent){
1704
                    int px= x>>1;
1705
                    int py= y>>1;
1706
                    if(px<b->parent->width && py<b->parent->height)
1707
                        p= parent[px + py*2*stride];
1708
                }
1709
                if(!(/*ll|*/l|lt|t|rt|p)){
1710
                    if(v){
1711
                        runs[run_index++]= run;
1712
                        run=0;
1713
                    }else{
1714
                        run++;
1715
                    }
1716
                }
1717
            }
1718
        }
1719
        max_index= run_index;
1720
        runs[run_index++]= run;
1721
        run_index=0;
1722
        run= runs[run_index++];
1723

    
1724
        put_symbol2(&s->c, b->state[30], max_index, 0);
1725
        if(run_index <= max_index)
1726
            put_symbol2(&s->c, b->state[1], run, 3);
1727

    
1728
        for(y=0; y<h; y++){
1729
            if(s->c.bytestream_end - s->c.bytestream < w*40){
1730
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1731
                return -1;
1732
            }
1733
            for(x=0; x<w; x++){
1734
                int v, p=0;
1735
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
1736
                v= src[x + y*stride];
1737

    
1738
                if(y){
1739
                    t= src[x + (y-1)*stride];
1740
                    if(x){
1741
                        lt= src[x - 1 + (y-1)*stride];
1742
                    }
1743
                    if(x + 1 < w){
1744
                        rt= src[x + 1 + (y-1)*stride];
1745
                    }
1746
                }
1747
                if(x){
1748
                    l= src[x - 1 + y*stride];
1749
                    /*if(x > 1){
1750
                        if(orientation==1) ll= src[y + (x-2)*stride];
1751
                        else               ll= src[x - 2 + y*stride];
1752
                    }*/
1753
                }
1754
                if(parent){
1755
                    int px= x>>1;
1756
                    int py= y>>1;
1757
                    if(px<b->parent->width && py<b->parent->height)
1758
                        p= parent[px + py*2*stride];
1759
                }
1760
                if(/*ll|*/l|lt|t|rt|p){
1761
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1762

    
1763
                    put_rac(&s->c, &b->state[0][context], !!v);
1764
                }else{
1765
                    if(!run){
1766
                        run= runs[run_index++];
1767

    
1768
                        if(run_index <= max_index)
1769
                            put_symbol2(&s->c, b->state[1], run, 3);
1770
                        assert(v);
1771
                    }else{
1772
                        run--;
1773
                        assert(!v);
1774
                    }
1775
                }
1776
                if(v){
1777
                    int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1778
                    int l2= 2*ABS(l) + (l<0);
1779
                    int t2= 2*ABS(t) + (t<0);
1780

    
1781
                    put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1782
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1783
                }
1784
            }
1785
        }
1786
    }
1787
    return 0;
1788
}
1789

    
1790
static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1791
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
1792
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
1793
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
1794
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
1795
}
1796

    
1797
static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1798
    const int w= b->width;
1799
    const int h= b->height;
1800
    int x,y;
1801

    
1802
    if(1){
1803
        int run, runs;
1804
        x_and_coeff *xc= b->x_coeff;
1805
        x_and_coeff *prev_xc= NULL;
1806
        x_and_coeff *prev2_xc= xc;
1807
        x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1808
        x_and_coeff *prev_parent_xc= parent_xc;
1809

    
1810
        runs= get_symbol2(&s->c, b->state[30], 0);
1811
        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1812
        else           run= INT_MAX;
1813

    
1814
        for(y=0; y<h; y++){
1815
            int v=0;
1816
            int lt=0, t=0, rt=0;
1817

    
1818
            if(y && prev_xc->x == 0){
1819
                rt= prev_xc->coeff;
1820
            }
1821
            for(x=0; x<w; x++){
1822
                int p=0;
1823
                const int l= v;
1824

    
1825
                lt= t; t= rt;
1826

    
1827
                if(y){
1828
                    if(prev_xc->x <= x)
1829
                        prev_xc++;
1830
                    if(prev_xc->x == x + 1)
1831
                        rt= prev_xc->coeff;
1832
                    else
1833
                        rt=0;
1834
                }
1835
                if(parent_xc){
1836
                    if(x>>1 > parent_xc->x){
1837
                        parent_xc++;
1838
                    }
1839
                    if(x>>1 == parent_xc->x){
1840
                        p= parent_xc->coeff;
1841
                    }
1842
                }
1843
                if(/*ll|*/l|lt|t|rt|p){
1844
                    int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1845

    
1846
                    v=get_rac(&s->c, &b->state[0][context]);
1847
                    if(v){
1848
                        v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1849
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1850

    
1851
                        xc->x=x;
1852
                        (xc++)->coeff= v;
1853
                    }
1854
                }else{
1855
                    if(!run){
1856
                        if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1857
                        else           run= INT_MAX;
1858
                        v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1859
                        v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1860

    
1861
                        xc->x=x;
1862
                        (xc++)->coeff= v;
1863
                    }else{
1864
                        int max_run;
1865
                        run--;
1866
                        v=0;
1867

    
1868
                        if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1869
                        else  max_run= FFMIN(run, w-x-1);
1870
                        if(parent_xc)
1871
                            max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1872
                        x+= max_run;
1873
                        run-= max_run;
1874
                    }
1875
                }
1876
            }
1877
            (xc++)->x= w+1; //end marker
1878
            prev_xc= prev2_xc;
1879
            prev2_xc= xc;
1880

    
1881
            if(parent_xc){
1882
                if(y&1){
1883
                    while(parent_xc->x != parent->width+1)
1884
                        parent_xc++;
1885
                    parent_xc++;
1886
                    prev_parent_xc= parent_xc;
1887
                }else{
1888
                    parent_xc= prev_parent_xc;
1889
                }
1890
            }
1891
        }
1892

    
1893
        (xc++)->x= w+1; //end marker
1894
    }
1895
}
1896

    
1897
static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1898
    const int w= b->width;
1899
    int y;
1900
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1901
    int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1902
    int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1903
    int new_index = 0;
1904

    
1905
    START_TIMER
1906

    
1907
    if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1908
        qadd= 0;
1909
        qmul= 1<<QEXPSHIFT;
1910
    }
1911

    
1912
    /* If we are on the second or later slice, restore our index. */
1913
    if (start_y != 0)
1914
        new_index = save_state[0];
1915

    
1916

    
1917
    for(y=start_y; y<h; y++){
1918
        int x = 0;
1919
        int v;
1920
        DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1921
        memset(line, 0, b->width*sizeof(DWTELEM));
1922
        v = b->x_coeff[new_index].coeff;
1923
        x = b->x_coeff[new_index++].x;
1924
        while(x < w)
1925
        {
1926
            register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1927
            register int u= -(v&1);
1928
            line[x] = (t^u) - u;
1929

    
1930
            v = b->x_coeff[new_index].coeff;
1931
            x = b->x_coeff[new_index++].x;
1932
        }
1933
    }
1934
    if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1935
        STOP_TIMER("decode_subband")
1936
    }
1937

    
1938
    /* Save our variables for the next slice. */
1939
    save_state[0] = new_index;
1940

    
1941
    return;
1942
}
1943

    
1944
static void reset_contexts(SnowContext *s){
1945
    int plane_index, level, orientation;
1946

    
1947
    for(plane_index=0; plane_index<3; plane_index++){
1948
        for(level=0; level<s->spatial_decomposition_count; level++){
1949
            for(orientation=level ? 1:0; orientation<4; orientation++){
1950
                memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1951
            }
1952
        }
1953
    }
1954
    memset(s->header_state, MID_STATE, sizeof(s->header_state));
1955
    memset(s->block_state, MID_STATE, sizeof(s->block_state));
1956
}
1957

    
1958
static int alloc_blocks(SnowContext *s){
1959
    int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1960
    int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1961

    
1962
    s->b_width = w;
1963
    s->b_height= h;
1964

    
1965
    s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1966
    return 0;
1967
}
1968

    
1969
static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1970
    uint8_t *bytestream= d->bytestream;
1971
    uint8_t *bytestream_start= d->bytestream_start;
1972
    *d= *s;
1973
    d->bytestream= bytestream;
1974
    d->bytestream_start= bytestream_start;
1975
}
1976

    
1977
//near copy & paste from dsputil, FIXME
1978
static int pix_sum(uint8_t * pix, int line_size, int w)
1979
{
1980
    int s, i, j;
1981

    
1982
    s = 0;
1983
    for (i = 0; i < w; i++) {
1984
        for (j = 0; j < w; j++) {
1985
            s += pix[0];
1986
            pix ++;
1987
        }
1988
        pix += line_size - w;
1989
    }
1990
    return s;
1991
}
1992

    
1993
//near copy & paste from dsputil, FIXME
1994
static int pix_norm1(uint8_t * pix, int line_size, int w)
1995
{
1996
    int s, i, j;
1997
    uint32_t *sq = squareTbl + 256;
1998

    
1999
    s = 0;
2000
    for (i = 0; i < w; i++) {
2001
        for (j = 0; j < w; j ++) {
2002
            s += sq[pix[0]];
2003
            pix ++;
2004
        }
2005
        pix += line_size - w;
2006
    }
2007
    return s;
2008
}
2009

    
2010
static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
2011
    const int w= s->b_width << s->block_max_depth;
2012
    const int rem_depth= s->block_max_depth - level;
2013
    const int index= (x + y*w) << rem_depth;
2014
    const int block_w= 1<<rem_depth;
2015
    BlockNode block;
2016
    int i,j;
2017

    
2018
    block.color[0]= l;
2019
    block.color[1]= cb;
2020
    block.color[2]= cr;
2021
    block.mx= mx;
2022
    block.my= my;
2023
    block.type= type;
2024
    block.level= level;
2025

    
2026
    for(j=0; j<block_w; j++){
2027
        for(i=0; i<block_w; i++){
2028
            s->block[index + i + j*w]= block;
2029
        }
2030
    }
2031
}
2032

    
2033
static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2034
    const int offset[3]= {
2035
          y*c->  stride + x,
2036
        ((y*c->uvstride + x)>>1),
2037
        ((y*c->uvstride + x)>>1),
2038
    };
2039
    int i;
2040
    for(i=0; i<3; i++){
2041
        c->src[0][i]= src [i];
2042
        c->ref[0][i]= ref [i] + offset[i];
2043
    }
2044
    assert(!ref_index);
2045
}
2046

    
2047
//FIXME copy&paste
2048
#define P_LEFT P[1]
2049
#define P_TOP P[2]
2050
#define P_TOPRIGHT P[3]
2051
#define P_MEDIAN P[4]
2052
#define P_MV1 P[9]
2053
#define FLAG_QPEL   1 //must be 1
2054

    
2055
static int encode_q_branch(SnowContext *s, int level, int x, int y){
2056
    uint8_t p_buffer[1024];
2057
    uint8_t i_buffer[1024];
2058
    uint8_t p_state[sizeof(s->block_state)];
2059
    uint8_t i_state[sizeof(s->block_state)];
2060
    RangeCoder pc, ic;
2061
    uint8_t *pbbak= s->c.bytestream;
2062
    uint8_t *pbbak_start= s->c.bytestream_start;
2063
    int score, score2, iscore, i_len, p_len, block_s, sum;
2064
    const int w= s->b_width  << s->block_max_depth;
2065
    const int h= s->b_height << s->block_max_depth;
2066
    const int rem_depth= s->block_max_depth - level;
2067
    const int index= (x + y*w) << rem_depth;
2068
    const int block_w= 1<<(LOG2_MB_SIZE - level);
2069
    int trx= (x+1)<<rem_depth;
2070
    int try= (y+1)<<rem_depth;
2071
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2072
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2073
    BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2074
    BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2075
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2076
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2077
    int pl = left->color[0];
2078
    int pcb= left->color[1];
2079
    int pcr= left->color[2];
2080
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2081
    int pmy= mid_pred(left->my, top->my, tr->my);
2082
    int mx=0, my=0;
2083
    int l,cr,cb;
2084
    const int stride= s->current_picture.linesize[0];
2085
    const int uvstride= s->current_picture.linesize[1];
2086
    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
2087
                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2088
                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2089
    int P[10][2];
2090
    int16_t last_mv[3][2];
2091
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2092
    const int shift= 1+qpel;
2093
    MotionEstContext *c= &s->m.me;
2094
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2095
    int my_context= av_log2(2*ABS(left->my - top->my));
2096
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2097

    
2098
    assert(sizeof(s->block_state) >= 256);
2099
    if(s->keyframe){
2100
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2101
        return 0;
2102
    }
2103

    
2104
//    clip predictors / edge ?
2105

    
2106
    P_LEFT[0]= left->mx;
2107
    P_LEFT[1]= left->my;
2108
    P_TOP [0]= top->mx;
2109
    P_TOP [1]= top->my;
2110
    P_TOPRIGHT[0]= tr->mx;
2111
    P_TOPRIGHT[1]= tr->my;
2112

    
2113
    last_mv[0][0]= s->block[index].mx;
2114
    last_mv[0][1]= s->block[index].my;
2115
    last_mv[1][0]= right->mx;
2116
    last_mv[1][1]= right->my;
2117
    last_mv[2][0]= bottom->mx;
2118
    last_mv[2][1]= bottom->my;
2119

    
2120
    s->m.mb_stride=2;
2121
    s->m.mb_x=
2122
    s->m.mb_y= 0;
2123
    s->m.me.skip= 0;
2124

    
2125
    init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2126

    
2127
    assert(s->m.me.  stride ==   stride);
2128
    assert(s->m.me.uvstride == uvstride);
2129

    
2130
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2131
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2132
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2133
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2134

    
2135
    c->xmin = - x*block_w - 16+2;
2136
    c->ymin = - y*block_w - 16+2;
2137
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2138
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2139

    
2140
    if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
2141
    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
2142
    if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
2143
    if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
2144
    if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2145
    if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2146
    if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2147

    
2148
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2149
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2150

    
2151
    if (!y) {
2152
        c->pred_x= P_LEFT[0];
2153
        c->pred_y= P_LEFT[1];
2154
    } else {
2155
        c->pred_x = P_MEDIAN[0];
2156
        c->pred_y = P_MEDIAN[1];
2157
    }
2158

    
2159
    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2160
                             (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2161

    
2162
    assert(mx >= c->xmin);
2163
    assert(mx <= c->xmax);
2164
    assert(my >= c->ymin);
2165
    assert(my <= c->ymax);
2166

    
2167
    score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2168
    score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2169
    //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2170

    
2171
  //  subpel search
2172
    pc= s->c;
2173
    pc.bytestream_start=
2174
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2175
    memcpy(p_state, s->block_state, sizeof(s->block_state));
2176

    
2177
    if(level!=s->block_max_depth)
2178
        put_rac(&pc, &p_state[4 + s_context], 1);
2179
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2180
    put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2181
    put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2182
    p_len= pc.bytestream - pc.bytestream_start;
2183
    score += (s->lambda2*(p_len*8
2184
              + (pc.outstanding_count - s->c.outstanding_count)*8
2185
              + (-av_log2(pc.range)    + av_log2(s->c.range))
2186
             ))>>FF_LAMBDA_SHIFT;
2187

    
2188
    block_s= block_w*block_w;
2189
    sum = pix_sum(current_data[0], stride, block_w);
2190
    l= (sum + block_s/2)/block_s;
2191
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2192

    
2193
    block_s= block_w*block_w>>2;
2194
    sum = pix_sum(current_data[1], uvstride, block_w>>1);
2195
    cb= (sum + block_s/2)/block_s;
2196
//    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2197
    sum = pix_sum(current_data[2], uvstride, block_w>>1);
2198
    cr= (sum + block_s/2)/block_s;
2199
//    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2200

    
2201
    ic= s->c;
2202
    ic.bytestream_start=
2203
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2204
    memcpy(i_state, s->block_state, sizeof(s->block_state));
2205
    if(level!=s->block_max_depth)
2206
        put_rac(&ic, &i_state[4 + s_context], 1);
2207
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2208
    put_symbol(&ic, &i_state[32],  l-pl , 1);
2209
    put_symbol(&ic, &i_state[64], cb-pcb, 1);
2210
    put_symbol(&ic, &i_state[96], cr-pcr, 1);
2211
    i_len= ic.bytestream - ic.bytestream_start;
2212
    iscore += (s->lambda2*(i_len*8
2213
              + (ic.outstanding_count - s->c.outstanding_count)*8
2214
              + (-av_log2(ic.range)    + av_log2(s->c.range))
2215
             ))>>FF_LAMBDA_SHIFT;
2216

    
2217
//    assert(score==256*256*256*64-1);
2218
    assert(iscore < 255*255*256 + s->lambda2*10);
2219
    assert(iscore >= 0);
2220
    assert(l>=0 && l<=255);
2221
    assert(pl>=0 && pl<=255);
2222

    
2223
    if(level==0){
2224
        int varc= iscore >> 8;
2225
        int vard= score >> 8;
2226
        if (vard <= 64 || vard < varc)
2227
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2228
        else
2229
            c->scene_change_score+= s->m.qscale;
2230
    }
2231

    
2232
    if(level!=s->block_max_depth){
2233
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
2234
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2235
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2236
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2237
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2238
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2239

    
2240
        if(score2 < score && score2 < iscore)
2241
            return score2;
2242
    }
2243

    
2244
    if(iscore < score){
2245
        memcpy(pbbak, i_buffer, i_len);
2246
        s->c= ic;
2247
        s->c.bytestream_start= pbbak_start;
2248
        s->c.bytestream= pbbak + i_len;
2249
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2250
        memcpy(s->block_state, i_state, sizeof(s->block_state));
2251
        return iscore;
2252
    }else{
2253
        memcpy(pbbak, p_buffer, p_len);
2254
        s->c= pc;
2255
        s->c.bytestream_start= pbbak_start;
2256
        s->c.bytestream= pbbak + p_len;
2257
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2258
        memcpy(s->block_state, p_state, sizeof(s->block_state));
2259
        return score;
2260
    }
2261
}
2262

    
2263
static always_inline int same_block(BlockNode *a, BlockNode *b){
2264
    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2265
        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2266
    }else{
2267
        return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
2268
    }
2269
}
2270

    
2271
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2272
    const int w= s->b_width  << s->block_max_depth;
2273
    const int rem_depth= s->block_max_depth - level;
2274
    const int index= (x + y*w) << rem_depth;
2275
    int trx= (x+1)<<rem_depth;
2276
    BlockNode *b= &s->block[index];
2277
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2278
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2279
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2280
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2281
    int pl = left->color[0];
2282
    int pcb= left->color[1];
2283
    int pcr= left->color[2];
2284
    int pmx= mid_pred(left->mx, top->mx, tr->mx);
2285
    int pmy= mid_pred(left->my, top->my, tr->my);
2286
    int mx_context= av_log2(2*ABS(left->mx - top->mx));
2287
    int my_context= av_log2(2*ABS(left->my - top->my));
2288
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2289

    
2290
    if(s->keyframe){
2291
        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2292
        return;
2293
    }
2294

    
2295
    if(level!=s->block_max_depth){
2296
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2297
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
2298
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2299
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2300
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2301
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2302
            return;
2303
        }else{
2304
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
2305
        }
2306
    }
2307
    if(b->type & BLOCK_INTRA){
2308
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2309
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2310
        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2311
        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2312
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
2313
    }else{
2314
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2315
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2316
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2317
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
2318
    }
2319
}
2320

    
2321
static void decode_q_branch(SnowContext *s, int level, int x, int y){
2322
    const int w= s->b_width << s->block_max_depth;
2323
    const int rem_depth= s->block_max_depth - level;
2324
    const int index= (x + y*w) << rem_depth;
2325
    int trx= (x+1)<<rem_depth;
2326
    BlockNode *left  = x ? &s->block[index-1] : &null_block;
2327
    BlockNode *top   = y ? &s->block[index-w] : &null_block;
2328
    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
2329
    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2330
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2331

    
2332
    if(s->keyframe){
2333
        set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2334
        return;
2335
    }
2336

    
2337
    if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2338
        int type;
2339
        int l = left->color[0];
2340
        int cb= left->color[1];
2341
        int cr= left->color[2];
2342
        int mx= mid_pred(left->mx, top->mx, tr->mx);
2343
        int my= mid_pred(left->my, top->my, tr->my);
2344
        int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2345
        int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2346

    
2347
        type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2348

    
2349
        if(type){
2350
            l += get_symbol(&s->c, &s->block_state[32], 1);
2351
            cb+= get_symbol(&s->c, &s->block_state[64], 1);
2352
            cr+= get_symbol(&s->c, &s->block_state[96], 1);
2353
        }else{
2354
            mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2355
            my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2356
        }
2357
        set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2358
    }else{
2359
        decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2360
        decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2361
        decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2362
        decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2363
    }
2364
}
2365

    
2366
static void encode_blocks(SnowContext *s){
2367
    int x, y;
2368
    int w= s->b_width;
2369
    int h= s->b_height;
2370

    
2371
    if(s->avctx->me_method == ME_ITER && !s->keyframe)
2372
        iterative_me(s);
2373

    
2374
    for(y=0; y<h; y++){
2375
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2376
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2377
            return;
2378
        }
2379
        for(x=0; x<w; x++){
2380
            if(s->avctx->me_method == ME_ITER)
2381
                encode_q_branch2(s, 0, x, y);
2382
            else
2383
                encode_q_branch (s, 0, x, y);
2384
        }
2385
    }
2386
}
2387

    
2388
static void decode_blocks(SnowContext *s){
2389
    int x, y;
2390
    int w= s->b_width;
2391
    int h= s->b_height;
2392

    
2393
    for(y=0; y<h; y++){
2394
        for(x=0; x<w; x++){
2395
            decode_q_branch(s, 0, x, y);
2396
        }
2397
    }
2398
}
2399

    
2400
static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2401
    int x, y;
2402
START_TIMER
2403
    for(y=0; y < b_h+5; y++){
2404
        for(x=0; x < b_w; x++){
2405
            int a0= src[x    ];
2406
            int a1= src[x + 1];
2407
            int a2= src[x + 2];
2408
            int a3= src[x + 3];
2409
            int a4= src[x + 4];
2410
            int a5= src[x + 5];
2411
//            int am= 9*(a1+a2) - (a0+a3);
2412
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2413
//            int am= 18*(a2+a3) - 2*(a1+a4);
2414
//             int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2415
//             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2416

    
2417
//            if(b_w==16) am= 8*(a1+a2);
2418

    
2419
            if(dx<8) am = (32*a2*( 8-dx) +    am* dx    + 128)>>8;
2420
            else     am = (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2421

    
2422
            /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2423
            if(am&(~255)) am= ~(am>>31);
2424

    
2425
            tmp[x] = am;
2426

    
2427
/*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
2428
            else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
2429
            else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
2430
            else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2431
        }
2432
        tmp += stride;
2433
        src += stride;
2434
    }
2435
    tmp -= (b_h+5)*stride;
2436

    
2437
    for(y=0; y < b_h; y++){
2438
        for(x=0; x < b_w; x++){
2439
            int a0= tmp[x + 0*stride];
2440
            int a1= tmp[x + 1*stride];
2441
            int a2= tmp[x + 2*stride];
2442
            int a3= tmp[x + 3*stride];
2443
            int a4= tmp[x + 4*stride];
2444
            int a5= tmp[x + 5*stride];
2445
            int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2446
//            int am= 18*(a2+a3) - 2*(a1+a4);
2447
/*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2448
            int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2449

    
2450
//            if(b_w==16) am= 8*(a1+a2);
2451

    
2452
            if(dy<8) am =  (32*a2*( 8-dy) +    am* dy    + 128)>>8;
2453
            else     am = (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2454

    
2455
            if(am&(~255)) am= ~(am>>31);
2456

    
2457
            dst[x] = am;
2458
/*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
2459
            else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
2460
            else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
2461
            else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2462
        }
2463
        dst += stride;
2464
        tmp += stride;
2465
    }
2466
STOP_TIMER("mc_block")
2467
}
2468

    
2469
#define mca(dx,dy,b_w)\
2470
static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2471
    uint8_t tmp[stride*(b_w+5)];\
2472
    assert(h==b_w);\
2473
    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2474
}
2475

    
2476
mca( 0, 0,16)
2477
mca( 8, 0,16)
2478
mca( 0, 8,16)
2479
mca( 8, 8,16)
2480
mca( 0, 0,8)
2481
mca( 8, 0,8)
2482
mca( 0, 8,8)
2483
mca( 8, 8,8)
2484

    
2485
static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2486
    if(block->type & BLOCK_INTRA){
2487
        int x, y;
2488
        const int color= block->color[plane_index];
2489
        for(y=0; y < b_h; y++){
2490
            for(x=0; x < b_w; x++){
2491
                dst[x + y*stride]= color;
2492
            }
2493
        }
2494
    }else{
2495
        const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
2496
        int mx= block->mx*scale;
2497
        int my= block->my*scale;
2498
        const int dx= mx&15;
2499
        const int dy= my&15;
2500
        sx += (mx>>4) - 2;
2501
        sy += (my>>4) - 2;
2502
        src += sx + sy*stride;
2503
        if(   (unsigned)sx >= w - b_w - 4
2504
           || (unsigned)sy >= h - b_h - 4){
2505
            ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2506
            src= tmp + MB_SIZE;
2507
        }
2508
        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2509
            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2510
        else
2511
            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2512
    }
2513
}
2514

    
2515
//FIXME name clenup (b_w, block_w, b_width stuff)
2516
static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2517
    DWTELEM * dst = NULL;
2518
    const int b_width = s->b_width  << s->block_max_depth;
2519
    const int b_height= s->b_height << s->block_max_depth;
2520
    const int b_stride= b_width;
2521
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2522
    BlockNode *rt= lt+1;
2523
    BlockNode *lb= lt+b_stride;
2524
    BlockNode *rb= lb+1;
2525
    uint8_t *block[4];
2526
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2527
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2528
    uint8_t *ptmp;
2529
    int x,y;
2530

    
2531
    if(b_x<0){
2532
        lt= rt;
2533
        lb= rb;
2534
    }else if(b_x + 1 >= b_width){
2535
        rt= lt;
2536
        rb= lb;
2537
    }
2538
    if(b_y<0){
2539
        lt= lb;
2540
        rt= rb;
2541
    }else if(b_y + 1 >= b_height){
2542
        lb= lt;
2543
        rb= rt;
2544
    }
2545

    
2546
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2547
        obmc -= src_x;
2548
        b_w += src_x;
2549
        src_x=0;
2550
    }else if(src_x + b_w > w){
2551
        b_w = w - src_x;
2552
    }
2553
    if(src_y<0){
2554
        obmc -= src_y*obmc_stride;
2555
        b_h += src_y;
2556
        src_y=0;
2557
    }else if(src_y + b_h> h){
2558
        b_h = h - src_y;
2559
    }
2560

    
2561
    if(b_w<=0 || b_h<=0) return;
2562

    
2563
assert(src_stride > 2*MB_SIZE + 5);
2564
//    old_dst += src_x + src_y*dst_stride;
2565
    dst8+= src_x + src_y*src_stride;
2566
//    src += src_x + src_y*src_stride;
2567

    
2568
    ptmp= tmp + 3*tmp_step;
2569
    block[0]= ptmp;
2570
    ptmp+=tmp_step;
2571
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2572

    
2573
    if(same_block(lt, rt)){
2574
        block[1]= block[0];
2575
    }else{
2576
        block[1]= ptmp;
2577
        ptmp+=tmp_step;
2578
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2579
    }
2580

    
2581
    if(same_block(lt, lb)){
2582
        block[2]= block[0];
2583
    }else if(same_block(rt, lb)){
2584
        block[2]= block[1];
2585
    }else{
2586
        block[2]= ptmp;
2587
        ptmp+=tmp_step;
2588
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2589
    }
2590

    
2591
    if(same_block(lt, rb) ){
2592
        block[3]= block[0];
2593
    }else if(same_block(rt, rb)){
2594
        block[3]= block[1];
2595
    }else if(same_block(lb, rb)){
2596
        block[3]= block[2];
2597
    }else{
2598
        block[3]= ptmp;
2599
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2600
    }
2601
#if 0
2602
    for(y=0; y<b_h; y++){
2603
        for(x=0; x<b_w; x++){
2604
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2605
            if(add) dst[x + y*dst_stride] += v;
2606
            else    dst[x + y*dst_stride] -= v;
2607
        }
2608
    }
2609
    for(y=0; y<b_h; y++){
2610
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2611
        for(x=0; x<b_w; x++){
2612
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2613
            if(add) dst[x + y*dst_stride] += v;
2614
            else    dst[x + y*dst_stride] -= v;
2615
        }
2616
    }
2617
    for(y=0; y<b_h; y++){
2618
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2619
        for(x=0; x<b_w; x++){
2620
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2621
            if(add) dst[x + y*dst_stride] += v;
2622
            else    dst[x + y*dst_stride] -= v;
2623
        }
2624
    }
2625
    for(y=0; y<b_h; y++){
2626
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2627
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2628
        for(x=0; x<b_w; x++){
2629
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2630
            if(add) dst[x + y*dst_stride] += v;
2631
            else    dst[x + y*dst_stride] -= v;
2632
        }
2633
    }
2634
#else
2635
{
2636

    
2637
    START_TIMER
2638

    
2639
    for(y=0; y<b_h; y++){
2640
        //FIXME ugly missue of obmc_stride
2641
        uint8_t *obmc1= obmc + y*obmc_stride;
2642
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2643
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2644
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2645
        dst = slice_buffer_get_line(sb, src_y + y);
2646
        for(x=0; x<b_w; x++){
2647
            int v=   obmc1[x] * block[3][x + y*src_stride]
2648
                    +obmc2[x] * block[2][x + y*src_stride]
2649
                    +obmc3[x] * block[1][x + y*src_stride]
2650
                    +obmc4[x] * block[0][x + y*src_stride];
2651

    
2652
            v <<= 8 - LOG2_OBMC_MAX;
2653
            if(FRAC_BITS != 8){
2654
                v += 1<<(7 - FRAC_BITS);
2655
                v >>= 8 - FRAC_BITS;
2656
            }
2657
            if(add){
2658
//                v += old_dst[x + y*dst_stride];
2659
                v += dst[x + src_x];
2660
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2661
                if(v&(~255)) v= ~(v>>31);
2662
                dst8[x + y*src_stride] = v;
2663
            }else{
2664
//                old_dst[x + y*dst_stride] -= v;
2665
                dst[x + src_x] -= v;
2666
            }
2667
        }
2668
    }
2669
        STOP_TIMER("Inner add y block")
2670
}
2671
#endif
2672
}
2673

    
2674
//FIXME name clenup (b_w, block_w, b_width stuff)
2675
static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2676
    const int b_width = s->b_width  << s->block_max_depth;
2677
    const int b_height= s->b_height << s->block_max_depth;
2678
    const int b_stride= b_width;
2679
    BlockNode *lt= &s->block[b_x + b_y*b_stride];
2680
    BlockNode *rt= lt+1;
2681
    BlockNode *lb= lt+b_stride;
2682
    BlockNode *rb= lb+1;
2683
    uint8_t *block[4];
2684
    int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2685
    uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2686
    uint8_t *ptmp;
2687
    int x,y;
2688

    
2689
    if(b_x<0){
2690
        lt= rt;
2691
        lb= rb;
2692
    }else if(b_x + 1 >= b_width){
2693
        rt= lt;
2694
        rb= lb;
2695
    }
2696
    if(b_y<0){
2697
        lt= lb;
2698
        rt= rb;
2699
    }else if(b_y + 1 >= b_height){
2700
        lb= lt;
2701
        rb= rt;
2702
    }
2703

    
2704
    if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2705
        obmc -= src_x;
2706
        b_w += src_x;
2707
        src_x=0;
2708
    }else if(src_x + b_w > w){
2709
        b_w = w - src_x;
2710
    }
2711
    if(src_y<0){
2712
        obmc -= src_y*obmc_stride;
2713
        b_h += src_y;
2714
        src_y=0;
2715
    }else if(src_y + b_h> h){
2716
        b_h = h - src_y;
2717
    }
2718

    
2719
    if(b_w<=0 || b_h<=0) return;
2720

    
2721
assert(src_stride > 2*MB_SIZE + 5);
2722
    dst += src_x + src_y*dst_stride;
2723
    dst8+= src_x + src_y*src_stride;
2724
//    src += src_x + src_y*src_stride;
2725

    
2726
    ptmp= tmp + 3*tmp_step;
2727
    block[0]= ptmp;
2728
    ptmp+=tmp_step;
2729
    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2730

    
2731
    if(same_block(lt, rt)){
2732
        block[1]= block[0];
2733
    }else{
2734
        block[1]= ptmp;
2735
        ptmp+=tmp_step;
2736
        pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2737
    }
2738

    
2739
    if(same_block(lt, lb)){
2740
        block[2]= block[0];
2741
    }else if(same_block(rt, lb)){
2742
        block[2]= block[1];
2743
    }else{
2744
        block[2]= ptmp;
2745
        ptmp+=tmp_step;
2746
        pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2747
    }
2748

    
2749
    if(same_block(lt, rb) ){
2750
        block[3]= block[0];
2751
    }else if(same_block(rt, rb)){
2752
        block[3]= block[1];
2753
    }else if(same_block(lb, rb)){
2754
        block[3]= block[2];
2755
    }else{
2756
        block[3]= ptmp;
2757
        pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2758
    }
2759
#if 0
2760
    for(y=0; y<b_h; y++){
2761
        for(x=0; x<b_w; x++){
2762
            int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2763
            if(add) dst[x + y*dst_stride] += v;
2764
            else    dst[x + y*dst_stride] -= v;
2765
        }
2766
    }
2767
    for(y=0; y<b_h; y++){
2768
        uint8_t *obmc2= obmc + (obmc_stride>>1);
2769
        for(x=0; x<b_w; x++){
2770
            int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2771
            if(add) dst[x + y*dst_stride] += v;
2772
            else    dst[x + y*dst_stride] -= v;
2773
        }
2774
    }
2775
    for(y=0; y<b_h; y++){
2776
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2777
        for(x=0; x<b_w; x++){
2778
            int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2779
            if(add) dst[x + y*dst_stride] += v;
2780
            else    dst[x + y*dst_stride] -= v;
2781
        }
2782
    }
2783
    for(y=0; y<b_h; y++){
2784
        uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2785
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2786
        for(x=0; x<b_w; x++){
2787
            int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2788
            if(add) dst[x + y*dst_stride] += v;
2789
            else    dst[x + y*dst_stride] -= v;
2790
        }
2791
    }
2792
#else
2793
    for(y=0; y<b_h; y++){
2794
        //FIXME ugly missue of obmc_stride
2795
        uint8_t *obmc1= obmc + y*obmc_stride;
2796
        uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2797
        uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2798
        uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2799
        for(x=0; x<b_w; x++){
2800
            int v=   obmc1[x] * block[3][x + y*src_stride]
2801
                    +obmc2[x] * block[2][x + y*src_stride]
2802
                    +obmc3[x] * block[1][x + y*src_stride]
2803
                    +obmc4[x] * block[0][x + y*src_stride];
2804

    
2805
            v <<= 8 - LOG2_OBMC_MAX;
2806
            if(FRAC_BITS != 8){
2807
                v += 1<<(7 - FRAC_BITS);
2808
                v >>= 8 - FRAC_BITS;
2809
            }
2810
            if(add){
2811
                v += dst[x + y*dst_stride];
2812
                v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2813
                if(v&(~255)) v= ~(v>>31);
2814
                dst8[x + y*src_stride] = v;
2815
            }else{
2816
                dst[x + y*dst_stride] -= v;
2817
            }
2818
        }
2819
    }
2820
#endif
2821
}
2822

    
2823
static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2824
    Plane *p= &s->plane[plane_index];
2825
    const int mb_w= s->b_width  << s->block_max_depth;
2826
    const int mb_h= s->b_height << s->block_max_depth;
2827
    int x, y, mb_x;
2828
    int block_size = MB_SIZE >> s->block_max_depth;
2829
    int block_w    = plane_index ? block_size/2 : block_size;
2830
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2831
    int obmc_stride= plane_index ? block_size : 2*block_size;
2832
    int ref_stride= s->current_picture.linesize[plane_index];
2833
    uint8_t *ref  = s->last_picture.data[plane_index];
2834
    uint8_t *dst8= s->current_picture.data[plane_index];
2835
    int w= p->width;
2836
    int h= p->height;
2837
    START_TIMER
2838

    
2839
    if(s->keyframe || (s->avctx->debug&512)){
2840
        if(mb_y==mb_h)
2841
            return;
2842

    
2843
        if(add){
2844
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2845
            {
2846
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2847
                DWTELEM * line = sb->line[y];
2848
                for(x=0; x<w; x++)
2849
                {
2850
//                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2851
                    int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2852
                    v >>= FRAC_BITS;
2853
                    if(v&(~255)) v= ~(v>>31);
2854
                    dst8[x + y*ref_stride]= v;
2855
                }
2856
            }
2857
        }else{
2858
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2859
            {
2860
//                DWTELEM * line = slice_buffer_get_line(sb, y);
2861
                DWTELEM * line = sb->line[y];
2862
                for(x=0; x<w; x++)
2863
                {
2864
                    line[x] -= 128 << FRAC_BITS;
2865
//                    buf[x + y*w]-= 128<<FRAC_BITS;
2866
                }
2867
            }
2868
        }
2869

    
2870
        return;
2871
    }
2872

    
2873
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2874
            START_TIMER
2875

    
2876
            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
2877
                       block_w*mb_x - block_w/2,
2878
                       block_w*mb_y - block_w/2,
2879
                       block_w, block_w,
2880
                       w, h,
2881
                       w, ref_stride, obmc_stride,
2882
                       mb_x - 1, mb_y - 1,
2883
                       add, plane_index);
2884

    
2885
            STOP_TIMER("add_yblock")
2886
        }
2887

    
2888
    STOP_TIMER("predict_slice")
2889
}
2890

    
2891
static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2892
    Plane *p= &s->plane[plane_index];
2893
    const int mb_w= s->b_width  << s->block_max_depth;
2894
    const int mb_h= s->b_height << s->block_max_depth;
2895
    int x, y, mb_x;
2896
    int block_size = MB_SIZE >> s->block_max_depth;
2897
    int block_w    = plane_index ? block_size/2 : block_size;
2898
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2899
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2900
    int ref_stride= s->current_picture.linesize[plane_index];
2901
    uint8_t *ref  = s->last_picture.data[plane_index];
2902
    uint8_t *dst8= s->current_picture.data[plane_index];
2903
    int w= p->width;
2904
    int h= p->height;
2905
    START_TIMER
2906

    
2907
    if(s->keyframe || (s->avctx->debug&512)){
2908
        if(mb_y==mb_h)
2909
            return;
2910

    
2911
        if(add){
2912
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2913
                for(x=0; x<w; x++){
2914
                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2915
                    v >>= FRAC_BITS;
2916
                    if(v&(~255)) v= ~(v>>31);
2917
                    dst8[x + y*ref_stride]= v;
2918
                }
2919
            }
2920
        }else{
2921
            for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2922
                for(x=0; x<w; x++){
2923
                    buf[x + y*w]-= 128<<FRAC_BITS;
2924
                }
2925
            }
2926
        }
2927

    
2928
        return;
2929
    }
2930

    
2931
        for(mb_x=0; mb_x<=mb_w; mb_x++){
2932
            START_TIMER
2933

    
2934
            add_yblock(s, buf, dst8, ref, obmc,
2935
                       block_w*mb_x - block_w/2,
2936
                       block_w*mb_y - block_w/2,
2937
                       block_w, block_w,
2938
                       w, h,
2939
                       w, ref_stride, obmc_stride,
2940
                       mb_x - 1, mb_y - 1,
2941
                       add, plane_index);
2942

    
2943
            STOP_TIMER("add_yblock")
2944
        }
2945

    
2946
    STOP_TIMER("predict_slice")
2947
}
2948

    
2949
static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2950
    const int mb_h= s->b_height << s->block_max_depth;
2951
    int mb_y;
2952
    for(mb_y=0; mb_y<=mb_h; mb_y++)
2953
        predict_slice(s, buf, plane_index, add, mb_y);
2954
}
2955

    
2956
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2957
    int i, x2, y2;
2958
    Plane *p= &s->plane[plane_index];
2959
    const int block_size = MB_SIZE >> s->block_max_depth;
2960
    const int block_w    = plane_index ? block_size/2 : block_size;
2961
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2962
    const int obmc_stride= plane_index ? block_size : 2*block_size;
2963
    const int ref_stride= s->current_picture.linesize[plane_index];
2964
    uint8_t *ref= s->   last_picture.data[plane_index];
2965
    uint8_t *dst= s->current_picture.data[plane_index];
2966
    uint8_t *src= s-> input_picture.data[plane_index];
2967
    const static DWTELEM zero_dst[4096]; //FIXME
2968
    const int b_stride = s->b_width << s->block_max_depth;
2969
    const int w= p->width;
2970
    const int h= p->height;
2971
    int index= mb_x + mb_y*b_stride;
2972
    BlockNode *b= &s->block[index];
2973
    BlockNode backup= *b;
2974
    int ab=0;
2975
    int aa=0;
2976

    
2977
    b->type|= BLOCK_INTRA;
2978
    b->color[plane_index]= 0;
2979

    
2980
    for(i=0; i<4; i++){
2981
        int mb_x2= mb_x + (i &1) - 1;
2982
        int mb_y2= mb_y + (i>>1) - 1;
2983
        int x= block_w*mb_x2 + block_w/2;
2984
        int y= block_w*mb_y2 + block_w/2;
2985

    
2986
        add_yblock(s, zero_dst, dst, ref, obmc, 
2987
                    x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index);
2988

    
2989
        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2990
            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2991
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2992
                int obmc_v= obmc[index];
2993
                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2994
                if(x<0) obmc_v += obmc[index + block_w];
2995
                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2996
                if(x+block_w>w) obmc_v += obmc[index - block_w];
2997
                //FIXME precalc this or simplify it somehow else
2998

    
2999
                ab += (src[x2 + y2*ref_stride] - dst[x2 + y2*ref_stride]) * obmc_v;
3000
                aa += obmc_v * obmc_v; //FIXME precalclate this
3001
            }
3002
        }
3003
    }
3004
    *b= backup;
3005

    
3006
    return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3007
}
3008

    
3009
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3010
    int i, y2;
3011
    Plane *p= &s->plane[plane_index];
3012
    const int block_size = MB_SIZE >> s->block_max_depth;
3013
    const int block_w    = plane_index ? block_size/2 : block_size;
3014
    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3015
    const int obmc_stride= plane_index ? block_size : 2*block_size;
3016
    const int ref_stride= s->current_picture.linesize[plane_index];
3017
    uint8_t *ref= s->   last_picture.data[plane_index];
3018
    uint8_t *dst= s->current_picture.data[plane_index];
3019
    uint8_t *src= s-> input_picture.data[plane_index];
3020
    const static DWTELEM zero_dst[4096]; //FIXME
3021
    const int b_stride = s->b_width << s->block_max_depth;
3022
    const int b_height = s->b_height<< s->block_max_depth;
3023
    const int w= p->width;
3024
    const int h= p->height;
3025
    int distortion= 0;
3026
    int rate= 0;
3027
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3028

    
3029
    for(i=0; i<4; i++){
3030
        int mb_x2= mb_x + (i &1) - 1;
3031
        int mb_y2= mb_y + (i>>1) - 1;
3032
        int x= block_w*mb_x2 + block_w/2;
3033
        int y= block_w*mb_y2 + block_w/2;
3034

    
3035
        add_yblock(s, zero_dst, dst, ref, obmc, 
3036
                    x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index);
3037

    
3038
        //FIXME find a cleaner/simpler way to skip the outside stuff
3039
        for(y2= y; y2<0; y2++)
3040
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3041
        for(y2= h; y2<y+block_w; y2++)
3042
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3043
        if(x<0){
3044
            for(y2= y; y2<y+block_w; y2++)
3045
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3046
        }
3047
        if(x+block_w > w){
3048
            for(y2= y; y2<y+block_w; y2++)
3049
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3050
        }
3051

    
3052
        assert(block_w== 8 || block_w==16);
3053
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3054
    }
3055

    
3056
    if(plane_index==0){
3057
        for(i=0; i<4; i++){
3058
/* ..RRr
3059
 * .RXx.
3060
 * rxx..
3061
 */
3062
            int x= mb_x + (i&1) - (i>>1);
3063
            int y= mb_y + (i>>1);
3064
            int index= x + y*b_stride;
3065
            BlockNode *b     = &s->block[index];
3066
            BlockNode *left  = x ? &s->block[index-1] : &null_block;
3067
            BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
3068
            BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
3069
            BlockNode *tr    = y && x+1<b_stride ? &s->block[index-b_stride+1] : tl;
3070
            int dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3071
            int dmy= b->my - mid_pred(left->my, top->my, tr->my);
3072
//        int mx_context= av_log2(2*ABS(left->mx - top->mx));
3073
//        int my_context= av_log2(2*ABS(left->my - top->my));
3074

    
3075
            if(x<0 || x>=b_stride || y>=b_height)
3076
                continue;
3077
/*
3078
1            0      0
3079
01X          1-2    1
3080
001XX        3-6    2-3
3081
0001XXX      7-14   4-7
3082
00001XXXX   15-30   8-15
3083
*/
3084
//FIXME try accurate rate
3085
//FIXME intra and inter predictors if surrounding blocks arent the same type
3086
            if(b->type & BLOCK_INTRA){
3087
                rate += 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3088
                            + av_log2(2*ABS(left->color[1] - b->color[1]))
3089
                            + av_log2(2*ABS(left->color[2] - b->color[2])));
3090
            }else
3091
                rate += 2*(1 + av_log2(2*ABS(dmx))
3092
                             + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
3093
        }
3094
    }
3095

    
3096
    return distortion + rate*penalty_factor;
3097
}
3098

    
3099
static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, int *best_rd){
3100
    const int b_stride= s->b_width << s->block_max_depth;
3101
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3102
    BlockNode backup= *block;
3103
    int rd, index, value;
3104

    
3105
    assert(mb_x>=0 && mb_y>=0);
3106
    assert(mb_x<b_stride);
3107

    
3108
    if(intra){
3109
        block->color[0] = p[0];
3110
        block->color[1] = p[1];
3111
        block->color[2] = p[2];
3112
        block->type |= BLOCK_INTRA;
3113
    }else{
3114
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3115
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
3116
        if(s->me_cache[index] == value)
3117
            return 0;
3118
        s->me_cache[index]= value;
3119

    
3120
        block->mx= p[0];
3121
        block->my= p[1];
3122
        block->type &= ~BLOCK_INTRA;
3123
    }
3124

    
3125
    rd= get_block_rd(s, mb_x, mb_y, 0);
3126

    
3127
//FIXME chroma
3128
    if(rd < *best_rd){
3129
        *best_rd= rd;
3130
        return 1;
3131
    }else{
3132
        *block= backup;
3133
        return 0;
3134
    }
3135
}
3136

    
3137
static void iterative_me(SnowContext *s){
3138
    int pass, mb_x, mb_y;
3139
    const int b_width = s->b_width  << s->block_max_depth;
3140
    const int b_height= s->b_height << s->block_max_depth;
3141
    const int b_stride= b_width;
3142
    int color[3];
3143

    
3144
    for(pass=0; pass<50; pass++){
3145
        int change= 0;
3146

    
3147
        for(mb_y= 0; mb_y<b_height; mb_y++){
3148
            for(mb_x= 0; mb_x<b_width; mb_x++){
3149
                int dia_change, i, j;
3150
                int best_rd= INT_MAX;
3151
                BlockNode backup;
3152
                const int index= mb_x + mb_y * b_stride;
3153
                BlockNode *block= &s->block[index];
3154
                BlockNode *tb =                 mb_y          ? &s->block[index-b_stride  ] : &null_block;
3155
                BlockNode *lb = mb_x                          ? &s->block[index         -1] : &null_block;
3156
                BlockNode *rb = mb_x<b_width                  ? &s->block[index         +1] : &null_block;
3157
                BlockNode *bb =                 mb_y<b_height ? &s->block[index+b_stride  ] : &null_block;
3158
                BlockNode *tlb= mb_x         && mb_y          ? &s->block[index-b_stride-1] : &null_block;
3159
                BlockNode *trb= mb_x<b_width && mb_y          ? &s->block[index-b_stride+1] : &null_block;
3160
                BlockNode *blb= mb_x         && mb_y<b_height ? &s->block[index+b_stride-1] : &null_block;
3161
                BlockNode *brb= mb_x<b_width && mb_y<b_height ? &s->block[index+b_stride+1] : &null_block;
3162

    
3163
                if(pass && (block->type & BLOCK_OPT))
3164
                    continue;
3165
                block->type |= BLOCK_OPT;
3166

    
3167
                backup= *block;
3168

    
3169
                if(!s->me_cache_generation)
3170
                    memset(s->me_cache, 0, sizeof(s->me_cache));
3171
                s->me_cache_generation += 1<<22;
3172

    
3173
                // get previous score (cant be cached due to OBMC)
3174
                check_block(s, mb_x, mb_y, (int[2]){block->mx, block->my}, 0, &best_rd);
3175
                check_block(s, mb_x, mb_y, (int[2]){0, 0}, 0, &best_rd);
3176
                check_block(s, mb_x, mb_y, (int[2]){tb->mx, tb->my}, 0, &best_rd);
3177
                check_block(s, mb_x, mb_y, (int[2]){lb->mx, lb->my}, 0, &best_rd);
3178
                check_block(s, mb_x, mb_y, (int[2]){rb->mx, rb->my}, 0, &best_rd);
3179
                check_block(s, mb_x, mb_y, (int[2]){bb->mx, bb->my}, 0, &best_rd);
3180

    
3181
                /* fullpel ME */
3182
                //FIXME avoid subpel interpol / round to nearest integer
3183
                do{
3184
                    dia_change=0;
3185
                    for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3186
                        for(j=0; j<i; j++){
3187
                            dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+4*(i-j), block->my+(4*j)}, 0, &best_rd);
3188
                            dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx-4*(i-j), block->my-(4*j)}, 0, &best_rd);
3189
                            dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+4*(i-j), block->my-(4*j)}, 0, &best_rd);
3190
                            dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx-4*(i-j), block->my+(4*j)}, 0, &best_rd);
3191
                        }
3192
                    }
3193
                }while(dia_change);
3194
                /* subpel ME */
3195
                do{
3196
                    static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3197
                    dia_change=0;
3198
                    for(i=0; i<8; i++)
3199
                        dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+square[i][0], block->my+square[i][1]}, 0, &best_rd);
3200
                }while(dia_change);
3201
                //FIXME or try the standard 2 pass qpel or similar
3202

    
3203
                for(i=0; i<3; i++){
3204
                    color[i]= get_dc(s, mb_x, mb_y, i);
3205
                }
3206
                check_block(s, mb_x, mb_y, color, 1, &best_rd);
3207
                //FIXME RD style color selection
3208

    
3209
                if(!same_block(block, &backup)){
3210
                    if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3211
                    if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3212
                    if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3213
                    if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3214
                    if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3215
                    if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3216
                    if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3217
                    if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3218
                    change ++;
3219
                }
3220
            }
3221
        }
3222
        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3223
        if(!change)
3224
            break;
3225
    }
3226
}
3227

    
3228
static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3229
    const int level= b->level;
3230
    const int w= b->width;
3231
    const int h= b->height;
3232
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3233
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3234
    int x,y, thres1, thres2;
3235
//    START_TIMER
3236

    
3237
    if(s->qlog == LOSSLESS_QLOG) return;
3238

    
3239
    bias= bias ? 0 : (3*qmul)>>3;
3240
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3241
    thres2= 2*thres1;
3242

    
3243
    if(!bias){
3244
        for(y=0; y<h; y++){
3245
            for(x=0; x<w; x++){
3246
                int i= src[x + y*stride];
3247

    
3248
                if((unsigned)(i+thres1) > thres2){
3249
                    if(i>=0){
3250
                        i<<= QEXPSHIFT;
3251
                        i/= qmul; //FIXME optimize
3252
                        src[x + y*stride]=  i;
3253
                    }else{
3254
                        i= -i;
3255
                        i<<= QEXPSHIFT;
3256
                        i/= qmul; //FIXME optimize
3257
                        src[x + y*stride]= -i;
3258
                    }
3259
                }else
3260
                    src[x + y*stride]= 0;
3261
            }
3262
        }
3263
    }else{
3264
        for(y=0; y<h; y++){
3265
            for(x=0; x<w; x++){
3266
                int i= src[x + y*stride];
3267

    
3268
                if((unsigned)(i+thres1) > thres2){
3269
                    if(i>=0){
3270
                        i<<= QEXPSHIFT;
3271
                        i= (i + bias) / qmul; //FIXME optimize
3272
                        src[x + y*stride]=  i;
3273
                    }else{
3274
                        i= -i;
3275
                        i<<= QEXPSHIFT;
3276
                        i= (i + bias) / qmul; //FIXME optimize
3277
                        src[x + y*stride]= -i;
3278
                    }
3279
                }else
3280
                    src[x + y*stride]= 0;
3281
            }
3282
        }
3283
    }
3284
    if(level+1 == s->spatial_decomposition_count){
3285
//        STOP_TIMER("quantize")
3286
    }
3287
}
3288

    
3289
static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3290
    const int w= b->width;
3291
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3292
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3293
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3294
    int x,y;
3295
    START_TIMER
3296

    
3297
    if(s->qlog == LOSSLESS_QLOG) return;
3298

    
3299
    for(y=start_y; y<end_y; y++){
3300
//        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3301
        DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3302
        for(x=0; x<w; x++){
3303
            int i= line[x];
3304
            if(i<0){
3305
                line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3306
            }else if(i>0){
3307
                line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3308
            }
3309
        }
3310
    }
3311
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3312
        STOP_TIMER("dquant")
3313
    }
3314
}
3315

    
3316
static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3317
    const int w= b->width;
3318
    const int h= b->height;
3319
    const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3320
    const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3321
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3322
    int x,y;
3323
    START_TIMER
3324

    
3325
    if(s->qlog == LOSSLESS_QLOG) return;
3326

    
3327
    for(y=0; y<h; y++){
3328
        for(x=0; x<w; x++){
3329
            int i= src[x + y*stride];
3330
            if(i<0){
3331
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3332
            }else if(i>0){
3333
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
3334
            }
3335
        }
3336
    }
3337
    if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3338
        STOP_TIMER("dquant")
3339
    }
3340
}
3341

    
3342
static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3343
    const int w= b->width;
3344
    const int h= b->height;
3345
    int x,y;
3346

    
3347
    for(y=h-1; y>=0; y--){
3348
        for(x=w-1; x>=0; x--){
3349
            int i= x + y*stride;
3350

    
3351
            if(x){
3352
                if(use_median){
3353
                    if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3354
                    else  src[i] -= src[i - 1];
3355
                }else{
3356
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3357
                    else  src[i] -= src[i - 1];
3358
                }
3359
            }else{
3360
                if(y) src[i] -= src[i - stride];
3361
            }
3362
        }
3363
    }
3364
}
3365

    
3366
static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3367
    const int w= b->width;
3368
    int x,y;
3369

    
3370
//    START_TIMER
3371

    
3372
    DWTELEM * line;
3373
    DWTELEM * prev;
3374

    
3375
    if (start_y != 0)
3376
        line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3377

    
3378
    for(y=start_y; y<end_y; y++){
3379
        prev = line;
3380
//        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3381
        line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3382
        for(x=0; x<w; x++){
3383
            if(x){
3384
                if(use_median){
3385
                    if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3386
                    else  line[x] += line[x - 1];
3387
                }else{
3388
                    if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3389
                    else  line[x] += line[x - 1];
3390
                }
3391
            }else{
3392
                if(y) line[x] += prev[x];
3393
            }
3394
        }
3395
    }
3396

    
3397
//    STOP_TIMER("correlate")
3398
}
3399

    
3400
static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3401
    const int w= b->width;
3402
    const int h= b->height;
3403
    int x,y;
3404

    
3405
    for(y=0; y<h; y++){
3406
        for(x=0; x<w; x++){
3407
            int i= x + y*stride;
3408

    
3409
            if(x){
3410
                if(use_median){
3411
                    if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3412
                    else  src[i] += src[i - 1];
3413
                }else{
3414
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3415
                    else  src[i] += src[i - 1];
3416
                }
3417
            }else{
3418
                if(y) src[i] += src[i - stride];
3419
            }
3420
        }
3421
    }
3422
}
3423

    
3424
static void encode_header(SnowContext *s){
3425
    int plane_index, level, orientation;
3426
    uint8_t kstate[32];
3427

    
3428
    memset(kstate, MID_STATE, sizeof(kstate));
3429

    
3430
    put_rac(&s->c, kstate, s->keyframe);
3431
    if(s->keyframe || s->always_reset)
3432
        reset_contexts(s);
3433
    if(s->keyframe){
3434
        put_symbol(&s->c, s->header_state, s->version, 0);
3435
        put_rac(&s->c, s->header_state, s->always_reset);
3436
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3437
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3438
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3439
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3440
        put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3441
        put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3442
        put_rac(&s->c, s->header_state, s->spatial_scalability);
3443
//        put_rac(&s->c, s->header_state, s->rate_scalability);
3444

    
3445
        for(plane_index=0; plane_index<2; plane_index++){
3446
            for(level=0; level<s->spatial_decomposition_count; level++){
3447
                for(orientation=level ? 1:0; orientation<4; orientation++){
3448
                    if(orientation==2) continue;
3449
                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3450
                }
3451
            }
3452
        }
3453
    }
3454
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3455
    put_symbol(&s->c, s->header_state, s->qlog, 1);
3456
    put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3457
    put_symbol(&s->c, s->header_state, s->qbias, 1);
3458
    put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3459
}
3460

    
3461
static int decode_header(SnowContext *s){
3462
    int plane_index, level, orientation;
3463
    uint8_t kstate[32];
3464

    
3465
    memset(kstate, MID_STATE, sizeof(kstate));
3466

    
3467
    s->keyframe= get_rac(&s->c, kstate);
3468
    if(s->keyframe || s->always_reset)
3469
        reset_contexts(s);
3470
    if(s->keyframe){
3471
        s->version= get_symbol(&s->c, s->header_state, 0);
3472
        if(s->version>0){
3473
            av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3474
            return -1;
3475
        }
3476
        s->always_reset= get_rac(&s->c, s->header_state);
3477
        s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3478
        s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3479
        s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3480
        s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3481
        s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3482
        s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3483
        s->spatial_scalability= get_rac(&s->c, s->header_state);
3484
//        s->rate_scalability= get_rac(&s->c, s->header_state);
3485

    
3486
        for(plane_index=0; plane_index<3; plane_index++){
3487
            for(level=0; level<s->spatial_decomposition_count; level++){
3488
                for(orientation=level ? 1:0; orientation<4; orientation++){
3489
                    int q;
3490
                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3491
                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3492
                    else                    q= get_symbol(&s->c, s->header_state, 1);
3493
                    s->plane[plane_index].band[level][orientation].qlog= q;
3494
                }
3495
            }
3496
        }
3497
    }
3498

    
3499
    s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3500
    if(s->spatial_decomposition_type > 2){
3501
        av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3502
        return -1;
3503
    }
3504

    
3505
    s->qlog= get_symbol(&s->c, s->header_state, 1);
3506
    s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3507
    s->qbias= get_symbol(&s->c, s->header_state, 1);
3508
    s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3509
    if(s->block_max_depth > 1){
3510
        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3511
        s->block_max_depth= 0;
3512
        return -1;
3513
    }
3514

    
3515
    return 0;
3516
}
3517

    
3518
static void init_qexp(){
3519
    int i;
3520
    double v=128;
3521

    
3522
    for(i=0; i<QROOT; i++){
3523
        qexp[i]= lrintf(v);
3524
        v *= pow(2, 1.0 / QROOT);
3525
    }
3526
}
3527

    
3528
static int common_init(AVCodecContext *avctx){
3529
    SnowContext *s = avctx->priv_data;
3530
    int width, height;
3531
    int level, orientation, plane_index, dec;
3532

    
3533
    s->avctx= avctx;
3534

    
3535
    dsputil_init(&s->dsp, avctx);
3536

    
3537
#define mcf(dx,dy)\
3538
    s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
3539
    s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3540
        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3541
    s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
3542
    s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3543
        s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3544

    
3545
    mcf( 0, 0)
3546
    mcf( 4, 0)
3547
    mcf( 8, 0)
3548
    mcf(12, 0)
3549
    mcf( 0, 4)
3550
    mcf( 4, 4)
3551
    mcf( 8, 4)
3552
    mcf(12, 4)
3553
    mcf( 0, 8)
3554
    mcf( 4, 8)
3555
    mcf( 8, 8)
3556
    mcf(12, 8)
3557
    mcf( 0,12)
3558
    mcf( 4,12)
3559
    mcf( 8,12)
3560
    mcf(12,12)
3561

    
3562
#define mcfh(dx,dy)\
3563
    s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
3564
    s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3565
        mc_block_hpel ## dx ## dy ## 16;\
3566
    s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
3567
    s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3568
        mc_block_hpel ## dx ## dy ## 8;
3569

    
3570
    mcfh(0, 0)
3571
    mcfh(8, 0)
3572
    mcfh(0, 8)
3573
    mcfh(8, 8)
3574

    
3575
    if(!qexp[0])
3576
        init_qexp();
3577

    
3578
    dec= s->spatial_decomposition_count= 5;
3579
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3580

    
3581
    s->chroma_h_shift= 1; //FIXME XXX
3582
    s->chroma_v_shift= 1;
3583

    
3584
//    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3585

    
3586
    width= s->avctx->width;
3587
    height= s->avctx->height;
3588

    
3589
    s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3590

    
3591
    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3592
    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3593

    
3594
    for(plane_index=0; plane_index<3; plane_index++){
3595
        int w= s->avctx->width;
3596
        int h= s->avctx->height;
3597

    
3598
        if(plane_index){
3599
            w>>= s->chroma_h_shift;
3600
            h>>= s->chroma_v_shift;
3601
        }
3602
        s->plane[plane_index].width = w;
3603
        s->plane[plane_index].height= h;
3604
//av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3605
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3606
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3607
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3608

    
3609
                b->buf= s->spatial_dwt_buffer;
3610
                b->level= level;
3611
                b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3612
                b->width = (w + !(orientation&1))>>1;
3613
                b->height= (h + !(orientation>1))>>1;
3614

    
3615
                b->stride_line = 1 << (s->spatial_decomposition_count - level);
3616
                b->buf_x_offset = 0;
3617
                b->buf_y_offset = 0;
3618

    
3619
                if(orientation&1){
3620
                    b->buf += (w+1)>>1;
3621
                    b->buf_x_offset = (w+1)>>1;
3622
                }
3623
                if(orientation>1){
3624
                    b->buf += b->stride>>1;
3625
                    b->buf_y_offset = b->stride_line >> 1;
3626
                }
3627

    
3628
                if(level)
3629
                    b->parent= &s->plane[plane_index].band[level-1][orientation];
3630
                b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3631
            }
3632
            w= (w+1)>>1;
3633
            h= (h+1)>>1;
3634
        }
3635
    }
3636

    
3637
    reset_contexts(s);
3638
/*
3639
    width= s->width= avctx->width;
3640
    height= s->height= avctx->height;
3641

3642
    assert(width && height);
3643
*/
3644
    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3645

    
3646
    return 0;
3647
}
3648

    
3649

    
3650
static void calculate_vissual_weight(SnowContext *s, Plane *p){
3651
    int width = p->width;
3652
    int height= p->height;
3653
    int level, orientation, x, y;
3654

    
3655
    for(level=0; level<s->spatial_decomposition_count; level++){
3656
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
3657
            SubBand *b= &p->band[level][orientation];
3658
            DWTELEM *buf= b->buf;
3659
            int64_t error=0;
3660

    
3661
            memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3662
            buf[b->width/2 + b->height/2*b->stride]= 256*256;
3663
            ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3664
            for(y=0; y<height; y++){
3665
                for(x=0; x<width; x++){
3666
                    int64_t d= s->spatial_dwt_buffer[x + y*width];
3667
                    error += d*d;
3668
                }
3669
            }
3670

    
3671
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3672
//            av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3673
        }
3674
    }
3675
}
3676

    
3677
static int encode_init(AVCodecContext *avctx)
3678
{
3679
    SnowContext *s = avctx->priv_data;
3680
    int plane_index;
3681

    
3682
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3683
        av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3684
               "use vstrict=-2 / -strict -2 to use it anyway\n");
3685
        return -1;
3686
    }
3687

    
3688
    common_init(avctx);
3689
    alloc_blocks(s);
3690

    
3691
    s->version=0;
3692

    
3693
    s->m.avctx   = avctx;
3694
    s->m.flags   = avctx->flags;
3695
    s->m.bit_rate= avctx->bit_rate;
3696

    
3697
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3698
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3699
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3700
    h263_encode_init(&s->m); //mv_penalty
3701

    
3702
    if(avctx->flags&CODEC_FLAG_PASS1){
3703
        if(!avctx->stats_out)
3704
            avctx->stats_out = av_mallocz(256);
3705
    }
3706
    if(avctx->flags&CODEC_FLAG_PASS2){
3707
        if(ff_rate_control_init(&s->m) < 0)
3708
            return -1;
3709
    }
3710

    
3711
    for(plane_index=0; plane_index<3; plane_index++){
3712
        calculate_vissual_weight(s, &s->plane[plane_index]);
3713
    }
3714

    
3715

    
3716
    avctx->coded_frame= &s->current_picture;
3717
    switch(avctx->pix_fmt){
3718
//    case PIX_FMT_YUV444P:
3719
//    case PIX_FMT_YUV422P:
3720
    case PIX_FMT_YUV420P:
3721
    case PIX_FMT_GRAY8:
3722
//    case PIX_FMT_YUV411P:
3723
//    case PIX_FMT_YUV410P:
3724
        s->colorspace_type= 0;
3725
        break;
3726
/*    case PIX_FMT_RGBA32:
3727
        s->colorspace= 1;
3728
        break;*/
3729
    default:
3730
        av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3731
        return -1;
3732
    }
3733
//    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3734
    s->chroma_h_shift= 1;
3735
    s->chroma_v_shift= 1;
3736

    
3737
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3738
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3739

    
3740
    s->avctx->get_buffer(s->avctx, &s->input_picture);
3741

    
3742
    return 0;
3743
}
3744

    
3745
static int frame_start(SnowContext *s){
3746
   AVFrame tmp;
3747
   int w= s->avctx->width; //FIXME round up to x16 ?
3748
   int h= s->avctx->height;
3749

    
3750
    if(s->current_picture.data[0]){
3751
        draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
3752
        draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3753
        draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3754
    }
3755

    
3756
    tmp= s->last_picture;
3757
    s->last_picture= s->current_picture;
3758
    s->current_picture= tmp;
3759

    
3760
    s->current_picture.reference= 1;
3761
    if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3762
        av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3763
        return -1;
3764
    }
3765

    
3766
    return 0;
3767
}
3768

    
3769
static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3770
    SnowContext *s = avctx->priv_data;
3771
    RangeCoder * const c= &s->c;
3772
    AVFrame *pict = data;
3773
    const int width= s->avctx->width;
3774
    const int height= s->avctx->height;
3775
    int level, orientation, plane_index, i, y;
3776

    
3777
    ff_init_range_encoder(c, buf, buf_size);
3778
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3779

    
3780
    for(i=0; i<3; i++){
3781
        int shift= !!i;
3782
        for(y=0; y<(height>>shift); y++)
3783
            memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3784
                   &pict->data[i][y * pict->linesize[i]],
3785
                   width>>shift);
3786
    }
3787
    s->new_picture = *pict;
3788

    
3789
    if(avctx->flags&CODEC_FLAG_PASS2){
3790
        s->m.pict_type =
3791
        pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3792
        s->keyframe= pict->pict_type==FF_I_TYPE;
3793
        s->m.picture_number= avctx->frame_number;
3794
        pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3795
    }else{
3796
        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3797
        pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3798
    }
3799

    
3800
    if(pict->quality){
3801
        s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3802
        //<64 >60
3803
        s->qlog += 61*QROOT/8;
3804
    }else{
3805
        s->qlog= LOSSLESS_QLOG;
3806
    }
3807

    
3808
    frame_start(s);
3809
    s->current_picture.key_frame= s->keyframe;
3810

    
3811
    s->m.current_picture_ptr= &s->m.current_picture;
3812
    if(pict->pict_type == P_TYPE){
3813
        int block_width = (width +15)>>4;
3814
        int block_height= (height+15)>>4;
3815
        int stride= s->current_picture.linesize[0];
3816

    
3817
        assert(s->current_picture.data[0]);
3818
        assert(s->last_picture.data[0]);
3819

    
3820
        s->m.avctx= s->avctx;
3821
        s->m.current_picture.data[0]= s->current_picture.data[0];
3822
        s->m.   last_picture.data[0]= s->   last_picture.data[0];
3823
        s->m.    new_picture.data[0]= s->  input_picture.data[0];
3824
        s->m.   last_picture_ptr= &s->m.   last_picture;
3825
        s->m.linesize=
3826
        s->m.   last_picture.linesize[0]=
3827
        s->m.    new_picture.linesize[0]=
3828
        s->m.current_picture.linesize[0]= stride;
3829
        s->m.uvlinesize= s->current_picture.linesize[1];
3830
        s->m.width = width;
3831
        s->m.height= height;
3832
        s->m.mb_width = block_width;
3833
        s->m.mb_height= block_height;
3834
        s->m.mb_stride=   s->m.mb_width+1;
3835
        s->m.b8_stride= 2*s->m.mb_width+1;
3836
        s->m.f_code=1;
3837
        s->m.pict_type= pict->pict_type;
3838
        s->m.me_method= s->avctx->me_method;
3839
        s->m.me.scene_change_score=0;
3840
        s->m.flags= s->avctx->flags;
3841
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3842
        s->m.out_format= FMT_H263;
3843
        s->m.unrestricted_mv= 1;
3844

    
3845
        s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3846
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3847
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3848

    
3849
        s->m.dsp= s->dsp; //move
3850
        ff_init_me(&s->m);
3851
        s->dsp= s->m.dsp;
3852
    }
3853

    
3854
redo_frame:
3855

    
3856
    s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3857

    
3858
    encode_header(s);
3859
    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3860
    encode_blocks(s);
3861
    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3862

    
3863
    for(plane_index=0; plane_index<3; plane_index++){
3864
        Plane *p= &s->plane[plane_index];
3865
        int w= p->width;
3866
        int h= p->height;
3867
        int x, y;
3868
//        int bits= put_bits_count(&s->c.pb);
3869

    
3870
        //FIXME optimize
3871
     if(pict->data[plane_index]) //FIXME gray hack
3872
        for(y=0; y<h; y++){
3873
            for(x=0; x<w; x++){
3874
                s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3875
            }
3876
        }
3877
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3878

    
3879
        if(   plane_index==0
3880
           && pict->pict_type == P_TYPE
3881
           && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3882
            ff_init_range_encoder(c, buf, buf_size);
3883
            ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3884
            pict->pict_type= FF_I_TYPE;
3885
            s->keyframe=1;
3886
            reset_contexts(s);
3887
            goto redo_frame;
3888
        }
3889

    
3890
        if(s->qlog == LOSSLESS_QLOG){
3891
            for(y=0; y<h; y++){
3892
                for(x=0; x<w; x++){
3893
                    s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3894
                }
3895
            }
3896
        }
3897

    
3898
        ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3899

    
3900
        for(level=0; level<s->spatial_decomposition_count; level++){
3901
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3902
                SubBand *b= &p->band[level][orientation];
3903

    
3904
                quantize(s, b, b->buf, b->stride, s->qbias);
3905
                if(orientation==0)
3906
                    decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3907
                encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3908
                assert(b->parent==NULL || b->parent->stride == b->stride*2);
3909
                if(orientation==0)
3910
                    correlate(s, b, b->buf, b->stride, 1, 0);
3911
            }
3912
        }
3913
//        av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3914

    
3915
        for(level=0; level<s->spatial_decomposition_count; level++){
3916
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3917
                SubBand *b= &p->band[level][orientation];
3918

    
3919
                dequantize(s, b, b->buf, b->stride);
3920
            }
3921
        }
3922

    
3923
        ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3924
        if(s->qlog == LOSSLESS_QLOG){
3925
            for(y=0; y<h; y++){
3926
                for(x=0; x<w; x++){
3927
                    s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
3928
                }
3929
            }
3930
        }
3931
{START_TIMER
3932
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3933
STOP_TIMER("pred-conv")}
3934
        if(s->avctx->flags&CODEC_FLAG_PSNR){
3935
            int64_t error= 0;
3936

    
3937
    if(pict->data[plane_index]) //FIXME gray hack
3938
            for(y=0; y<h; y++){
3939
                for(x=0; x<w; x++){
3940
                    int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3941
                    error += d*d;
3942
                }
3943
            }
3944
            s->avctx->error[plane_index] += error;
3945
            s->current_picture.error[plane_index] = error;
3946
        }
3947
    }
3948

    
3949
    if(s->last_picture.data[0])
3950
        avctx->release_buffer(avctx, &s->last_picture);
3951

    
3952
    s->current_picture.coded_picture_number = avctx->frame_number;
3953
    s->current_picture.pict_type = pict->pict_type;
3954
    s->current_picture.quality = pict->quality;
3955
    if(avctx->flags&CODEC_FLAG_PASS1){
3956
        s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
3957
        s->m.current_picture.display_picture_number =
3958
        s->m.current_picture.coded_picture_number = avctx->frame_number;
3959
        s->m.pict_type = pict->pict_type;
3960
        s->m.current_picture.quality = pict->quality;
3961
        ff_write_pass1_stats(&s->m);
3962
    }
3963
    if(avctx->flags&CODEC_FLAG_PASS2){
3964
        s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3965
    }
3966

    
3967
    emms_c();
3968

    
3969
    return ff_rac_terminate(c);
3970
}
3971

    
3972
static void common_end(SnowContext *s){
3973
    int plane_index, level, orientation;
3974

    
3975
    av_freep(&s->spatial_dwt_buffer);
3976

    
3977
    av_freep(&s->m.me.scratchpad);
3978
    av_freep(&s->m.me.map);
3979
    av_freep(&s->m.me.score_map);
3980

    
3981
    av_freep(&s->block);
3982

    
3983
    for(plane_index=0; plane_index<3; plane_index++){
3984
        for(level=s->spatial_decomposition_count-1; level>=0; level--){
3985
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
3986
                SubBand *b= &s->plane[plane_index].band[level][orientation];
3987

    
3988
                av_freep(&b->x_coeff);
3989
            }
3990
        }
3991
    }
3992
}
3993

    
3994
static int encode_end(AVCodecContext *avctx)
3995
{
3996
    SnowContext *s = avctx->priv_data;
3997

    
3998
    common_end(s);
3999
    av_free(avctx->stats_out);
4000

    
4001
    return 0;
4002
}
4003

    
4004
static int decode_init(AVCodecContext *avctx)
4005
{
4006
    SnowContext *s = avctx->priv_data;
4007
    int block_size;
4008

    
4009
    avctx->pix_fmt= PIX_FMT_YUV420P;
4010

    
4011
    common_init(avctx);
4012

    
4013
    block_size = MB_SIZE >> s->block_max_depth;
4014
    slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 2)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4015

    
4016
    return 0;
4017
}
4018

    
4019
static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4020
    SnowContext *s = avctx->priv_data;
4021
    RangeCoder * const c= &s->c;
4022
    int bytes_read;
4023
    AVFrame *picture = data;
4024
    int level, orientation, plane_index;
4025

    
4026
    ff_init_range_decoder(c, buf, buf_size);
4027
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4028

    
4029
    s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4030
    decode_header(s);
4031
    if(!s->block) alloc_blocks(s);
4032

    
4033
    frame_start(s);
4034
    //keyframe flag dupliaction mess FIXME
4035
    if(avctx->debug&FF_DEBUG_PICT_INFO)
4036
        av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4037

    
4038
    decode_blocks(s);
4039

    
4040
    for(plane_index=0; plane_index<3; plane_index++){
4041
        Plane *p= &s->plane[plane_index];
4042
        int w= p->width;
4043
        int h= p->height;
4044
        int x, y;
4045
        int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4046

    
4047
if(s->avctx->debug&2048){
4048
        memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4049
        predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4050

    
4051
        for(y=0; y<h; y++){
4052
            for(x=0; x<w; x++){
4053
                int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4054
                s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4055
            }
4056
        }
4057
}
4058

    
4059
{   START_TIMER
4060
    for(level=0; level<s->spatial_decomposition_count; level++){
4061
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
4062
            SubBand *b= &p->band[level][orientation];
4063
            unpack_coeffs(s, b, b->parent, orientation);
4064
        }
4065
    }
4066
    STOP_TIMER("unpack coeffs");
4067
}
4068

    
4069
{START_TIMER
4070
    const int mb_h= s->b_height << s->block_max_depth;
4071
    const int block_size = MB_SIZE >> s->block_max_depth;
4072
    const int block_w    = plane_index ? block_size/2 : block_size;
4073
    int mb_y;
4074
    dwt_compose_t cs[MAX_DECOMPOSITIONS];
4075
    int yd=0, yq=0;
4076
    int y;
4077
    int end_y;
4078

    
4079
    ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4080
    for(mb_y=0; mb_y<=mb_h; mb_y++){
4081

    
4082
        int slice_starty = block_w*mb_y;
4083
        int slice_h = block_w*(mb_y+1);
4084
        if (!(s->keyframe || s->avctx->debug&512)){
4085
            slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4086
            slice_h -= (block_w >> 1);
4087
        }
4088

    
4089
        {
4090
        START_TIMER
4091
        for(level=0; level<s->spatial_decomposition_count; level++){
4092
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
4093
                SubBand *b= &p->band[level][orientation];
4094
                int start_y;
4095
                int end_y;
4096
                int our_mb_start = mb_y;
4097
                int our_mb_end = (mb_y + 1);
4098
                start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2: 0);
4099
                end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2);
4100
                if (!(s->keyframe || s->avctx->debug&512)){
4101
                    start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4102
                    end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4103
                }
4104
                start_y = FFMIN(b->height, start_y);
4105
                end_y = FFMIN(b->height, end_y);
4106

    
4107
                if (start_y != end_y){
4108
                    if (orientation == 0){
4109
                        SubBand * correlate_band = &p->band[0][0];
4110
                        int correlate_end_y = FFMIN(b->height, end_y + 1);
4111
                        int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));