Revision 78954a05

View differences:

doc/snow.txt
1
=============================================
2
SNOW Video Codec Specification Draft 20070103
3
=============================================
4

  
5

  
6
Definitions:
7
============
8

  
9
MUST    the specific part must be done to conform to this standard
10
SHOULD  it is recommended to be done that way, but not strictly required
11

  
12
ilog2(x) is the rounded down logarithm of x with basis 2
13
ilog2(0) = 0
14

  
15
Type definitions:
16
=================
17

  
18
b   1-bit range coded
19
u   unsigned scalar value range coded
20
s   signed scalar value range coded
21

  
22

  
23
Bitstream syntax:
24
=================
25

  
26
frame:
27
    header
28
    prediction
29
    residual
30

  
31
header:
32
    keyframe                            b   MID_STATE
33
    if(keyframe || always_reset)
34
        reset_contexts
35
    if(keyframe){
36
        version                         u   header_state
37
        always_reset                    b   header_state
38
        temporal_decomposition_type     u   header_state
39
        temporal_decomposition_count    u   header_state
40
        spatial_decomposition_count     u   header_state
41
        colorspace_type                 u   header_state
42
        chroma_h_shift                  u   header_state
43
        chroma_v_shift                  u   header_state
44
        spatial_scalability             b   header_state
45
        max_ref_frames-1                u   header_state
46
        qlogs
47
    }
48

  
49
    spatial_decomposition_type          s   header_state
50
    qlog                                s   header_state
51
    mv_scale                            s   header_state
52
    qbias                               s   header_state
53
    block_max_depth                     s   header_state
54

  
55
qlogs:
56
    for(plane=0; plane<2; plane++){
57
        quant_table[plane][0][0]        s   header_state
58
        for(level=0; level < spatial_decomposition_count; level++){
59
            quant_table[plane][level][1]s   header_state
60
            quant_table[plane][level][3]s   header_state
61
        }
62
    }
63

  
64
reset_contexts
65
    *_state[*]= MID_STATE
66

  
67
prediction:
68
    for(y=0; y<block_count_vertical; y++)
69
        for(x=0; x<block_count_horizontal; x++)
70
            block(0)
71

  
72
block(level):
73
    if(keyframe){
74
        intra=1
75
        y_diff=cb_diff=cr_diff=0
76
    }else{
77
        if(level!=max_block_depth){
78
            s_context= 2*left->level + 2*top->level + topleft->level + topright->level
79
            leaf                        b   block_state[4 + s_context]
80
        }
81
        if(level==max_block_depth || leaf){
82
            intra                       b   block_state[1 + left->intra + top->intra]
83
            if(intra){
84
                y_diff                  s   block_state[32]
85
                cb_diff                 s   block_state[64]
86
                cr_diff                 s   block_state[96]
87
            }else{
88
                ref_context= ilog2(2*left->ref) + ilog2(2*top->ref)
89
                if(ref_frames > 1)
90
                    ref                 u   block_state[128 + 1024 + 32*ref_context]
91
                mx_context= ilog2(2*abs(left->mx - top->mx))
92
                my_context= ilog2(2*abs(left->my - top->my))
93
                mvx_diff                s   block_state[128 + 32*(mx_context + 16*!!ref)]
94
                mvy_diff                s   block_state[128 + 32*(my_context + 16*!!ref)]
95
            }
96
        }else{
97
            block(level+1)
98
            block(level+1)
99
            block(level+1)
100
            block(level+1)
101
        }
102
    }
103

  
104

  
105
residual:
106
    FIXME
107

  
108

  
109

  
110
Tag description:
111
----------------
112

  
113
version
114
    0
115
    this MUST NOT change within a bitstream
116

  
117
always_reset
118
    if 1 then the range coder contexts will be reset after each frame
119

  
120
temporal_decomposition_type
121
    0
122

  
123
temporal_decomposition_count
124
    0
125

  
126
spatial_decomposition_count
127
    FIXME
128

  
129
colorspace_type
130
    0
131
    this MUST NOT change within a bitstream
132

  
133
chroma_h_shift
134
    log2(luma.width / chroma.width)
135
    this MUST NOT change within a bitstream
136

  
137
chroma_v_shift
138
    log2(luma.height / chroma.height)
139
    this MUST NOT change within a bitstream
140

  
141
spatial_scalability
142
    0
143

  
144
max_ref_frames
145
    maximum number of reference frames
146
    this MUST NOT change within a bitstream
147

  
148
ref_frames
149
    minimum of the number of available reference frames and max_ref_frames
150
    for example the first frame after a key frame always has ref_frames=1
151

  
152
spatial_decomposition_type
153
    wavelet type
154
    0 is a 9/7 symmetric compact integer wavelet
155
    1 is a 5/3 symmetric compact integer wavelet
156
    others are reserved
157
    stored as delta from last, last is reset to 0 if always_reset || keyframe
158

  
159
qlog
160
    quality (logarthmic quantizer scale)
161
    stored as delta from last, last is reset to 0 if always_reset || keyframe
162

  
163
mv_scale
164
    stored as delta from last, last is reset to 0 if always_reset || keyframe
165
    FIXME check that everything works fine if this chanes between frames
166

  
167
qbias
168
    dequantization bias
169
    stored as delta from last, last is reset to 0 if always_reset || keyframe
170

  
171
block_max_depth
172
    maximum depth of the block tree
173
    stored as delta from last, last is reset to 0 if always_reset || keyframe
174

  
175
quant_table
176
    quantiztation table
177

  
178
Range Coder:
179
============
180
FIXME
181

  
182
Neighboring Blocks:
183
===================
184
left and top are set to the respective blocks unless they are outside of
185
the image in which case they are set to the Null block
186

  
187
top-left is set to the top left block unless its outside of the image in
188
which case it is set to the left block
189

  
190
if this block has no larger parent block or its at the left side of its
191
parent block and the top right block is not outside of the image then the
192
top right block is used for top-right else the top-left block is used
193

  
194
Null block
195
y,cb,cr are 128
196
level, ref, mx and my are 0
197

  
198

  
199
Motion Vector Prediction:
200
=========================
201
1. the motion vectors of all the neighboring blocks are scaled to
202
compensate for the difference of reference frames
203

  
204
scaled_mv= (mv * (256 * (current_reference+1) / (mv.reference+1)) + 128)>>8
205

  
206
2. the median of the scaled left, top and top-right vectors is used as
207
motion vector prediction
208

  
209
3. the used motion vector is the sum of the predictor and
210
   (mvx_diff, mvy_diff)*mv_scale
211

  
212

  
213
Intra DC Predicton:
214
======================
215
the luma and chroma values of the left block are used as predictors
216

  
217
the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff
218

  
219

  
220
Motion Compensation:
221
====================
222
FIXME
223

  
224
LL band prediction:
225
===================
226
FIXME
227

  
228
Dequantizaton:
229
==============
230
FIXME
231

  
232
Wavelet Transform:
233
==================
234
FIXME
235

  
236
TODO:
237
=====
238
Important:
239
finetune initial contexts
240
spatial_decomposition_count per frame?
241
flip wavelet?
242
try to use the wavelet transformed predicted image (motion compensated image) as context for coding the residual coefficients
243
try the MV length as context for coding the residual coefficients
244
use extradata for stuff which is in the keyframes now?
245
the MV median predictor is patented IIRC
246

  
247
Not Important:
248
spatial_scalability b vs u (!= 0 breaks syntax anyway so we can add a u later)
249

  
250

  
251
Credits:
252
========
253
Michael Niedermayer
254
Loren Merritt
255

  
256

  
257
Copyright:
258
==========
259
GPL + GFDL + whatever is needed to make this a RFC

Also available in: Unified diff