## ffmpeg / doc / snow.txt @ dc7f38f5

History | View | Annotate | Download (20.9 KB)

1 |
============================================= |
---|---|

2 |
Snow Video Codec Specification Draft 20080110 |

3 |
============================================= |

4 | |

5 |
Introduction: |

6 |
============= |

7 |
This specification describes the Snow bitstream syntax and semantics as |

8 |
well as the formal Snow decoding process. |

9 | |

10 |
The decoding process is described precisely and any compliant decoder |

11 |
MUST produce the exact same output for a spec-conformant Snow stream. |

12 |
For encoding, though, any process which generates a stream compliant to |

13 |
the syntactical and semantic requirements and which is decodable by |

14 |
the process described in this spec shall be considered a conformant |

15 |
Snow encoder. |

16 | |

17 |
Definitions: |

18 |
============ |

19 | |

20 |
MUST the specific part must be done to conform to this standard |

21 |
SHOULD it is recommended to be done that way, but not strictly required |

22 | |

23 |
ilog2(x) is the rounded down logarithm of x with basis 2 |

24 |
ilog2(0) = 0 |

25 | |

26 |
Type definitions: |

27 |
================= |

28 | |

29 |
b 1-bit range coded |

30 |
u unsigned scalar value range coded |

31 |
s signed scalar value range coded |

32 | |

33 | |

34 |
Bitstream syntax: |

35 |
================= |

36 | |

37 |
frame: |

38 |
header |

39 |
prediction |

40 |
residual |

41 | |

42 |
header: |

43 |
keyframe b MID_STATE |

44 |
if(keyframe || always_reset) |

45 |
reset_contexts |

46 |
if(keyframe){ |

47 |
version u header_state |

48 |
always_reset b header_state |

49 |
temporal_decomposition_type u header_state |

50 |
temporal_decomposition_count u header_state |

51 |
spatial_decomposition_count u header_state |

52 |
colorspace_type u header_state |

53 |
chroma_h_shift u header_state |

54 |
chroma_v_shift u header_state |

55 |
spatial_scalability b header_state |

56 |
max_ref_frames-1 u header_state |

57 |
qlogs |

58 |
} |

59 |
if(!keyframe){ |

60 |
update_mc b header_state |

61 |
if(update_mc){ |

62 |
for(plane=0; plane<2; plane++){ |

63 |
diag_mc b header_state |

64 |
htaps/2-1 u header_state |

65 |
for(i= p->htaps/2; i; i--) |

66 |
|hcoeff[i]| u header_state |

67 |
} |

68 |
} |

69 |
update_qlogs b header_state |

70 |
if(update_qlogs){ |

71 |
spatial_decomposition_count u header_state |

72 |
qlogs |

73 |
} |

74 |
} |

75 | |

76 |
spatial_decomposition_type s header_state |

77 |
qlog s header_state |

78 |
mv_scale s header_state |

79 |
qbias s header_state |

80 |
block_max_depth s header_state |

81 | |

82 |
qlogs: |

83 |
for(plane=0; plane<2; plane++){ |

84 |
quant_table[plane][0][0] s header_state |

85 |
for(level=0; level < spatial_decomposition_count; level++){ |

86 |
quant_table[plane][level][1]s header_state |

87 |
quant_table[plane][level][3]s header_state |

88 |
} |

89 |
} |

90 | |

91 |
reset_contexts |

92 |
*_state[*]= MID_STATE |

93 | |

94 |
prediction: |

95 |
for(y=0; y<block_count_vertical; y++) |

96 |
for(x=0; x<block_count_horizontal; x++) |

97 |
block(0) |

98 | |

99 |
block(level): |

100 |
mvx_diff=mvy_diff=y_diff=cb_diff=cr_diff=0 |

101 |
if(keyframe){ |

102 |
intra=1 |

103 |
}else{ |

104 |
if(level!=max_block_depth){ |

105 |
s_context= 2*left->level + 2*top->level + topleft->level + topright->level |

106 |
leaf b block_state[4 + s_context] |

107 |
} |

108 |
if(level==max_block_depth || leaf){ |

109 |
intra b block_state[1 + left->intra + top->intra] |

110 |
if(intra){ |

111 |
y_diff s block_state[32] |

112 |
cb_diff s block_state[64] |

113 |
cr_diff s block_state[96] |

114 |
}else{ |

115 |
ref_context= ilog2(2*left->ref) + ilog2(2*top->ref) |

116 |
if(ref_frames > 1) |

117 |
ref u block_state[128 + 1024 + 32*ref_context] |

118 |
mx_context= ilog2(2*abs(left->mx - top->mx)) |

119 |
my_context= ilog2(2*abs(left->my - top->my)) |

120 |
mvx_diff s block_state[128 + 32*(mx_context + 16*!!ref)] |

121 |
mvy_diff s block_state[128 + 32*(my_context + 16*!!ref)] |

122 |
} |

123 |
}else{ |

124 |
block(level+1) |

125 |
block(level+1) |

126 |
block(level+1) |

127 |
block(level+1) |

128 |
} |

129 |
} |

130 | |

131 | |

132 |
residual: |

133 |
residual2(luma) |

134 |
residual2(chroma_cr) |

135 |
residual2(chroma_cb) |

136 | |

137 |
residual2: |

138 |
for(level=0; level<spatial_decomposition_count; level++){ |

139 |
if(level==0) |

140 |
subband(LL, 0) |

141 |
subband(HL, level) |

142 |
subband(LH, level) |

143 |
subband(HH, level) |

144 |
} |

145 | |

146 |
subband: |

147 |
FIXME |

148 | |

149 | |

150 | |

151 |
Tag description: |

152 |
---------------- |

153 | |

154 |
version |

155 |
0 |

156 |
this MUST NOT change within a bitstream |

157 | |

158 |
always_reset |

159 |
if 1 then the range coder contexts will be reset after each frame |

160 | |

161 |
temporal_decomposition_type |

162 |
0 |

163 | |

164 |
temporal_decomposition_count |

165 |
0 |

166 | |

167 |
spatial_decomposition_count |

168 |
FIXME |

169 | |

170 |
colorspace_type |

171 |
0 |

172 |
this MUST NOT change within a bitstream |

173 | |

174 |
chroma_h_shift |

175 |
log2(luma.width / chroma.width) |

176 |
this MUST NOT change within a bitstream |

177 | |

178 |
chroma_v_shift |

179 |
log2(luma.height / chroma.height) |

180 |
this MUST NOT change within a bitstream |

181 | |

182 |
spatial_scalability |

183 |
0 |

184 | |

185 |
max_ref_frames |

186 |
maximum number of reference frames |

187 |
this MUST NOT change within a bitstream |

188 | |

189 |
update_mc |

190 |
indicates that motion compensation filter parameters are stored in the |

191 |
header |

192 | |

193 |
diag_mc |

194 |
flag to enable faster diagonal interpolation |

195 |
this SHOULD be 1 unless it turns out to be covered by a valid patent |

196 | |

197 |
htaps |

198 |
number of half pel interpolation filter taps, MUST be even, >0 and <10 |

199 | |

200 |
hcoeff |

201 |
half pel interpolation filter coefficients, hcoeff[0] are the 2 middle |

202 |
coefficients [1] are the next outer ones and so on, resulting in a filter |

203 |
like: ...eff[2], hcoeff[1], hcoeff[0], hcoeff[0], hcoeff[1], hcoeff[2] ... |

204 |
the sign of the coefficients is not explicitly stored but alternates |

205 |
after each coeff and coeff[0] is positive, so ...,+,-,+,-,+,+,-,+,-,+,... |

206 |
hcoeff[0] is not explicitly stored but found by subtracting the sum |

207 |
of all stored coefficients with signs from 32 |

208 |
hcoeff[0]= 32 - hcoeff[1] - hcoeff[2] - ... |

209 |
a good choice for hcoeff and htaps is |

210 |
htaps= 6 |

211 |
hcoeff={40,-10,2} |

212 |
an alternative which requires more computations at both encoder and |

213 |
decoder side and may or may not be better is |

214 |
htaps= 8 |

215 |
hcoeff={42,-14,6,-2} |

216 | |

217 | |

218 |
ref_frames |

219 |
minimum of the number of available reference frames and max_ref_frames |

220 |
for example the first frame after a key frame always has ref_frames=1 |

221 | |

222 |
spatial_decomposition_type |

223 |
wavelet type |

224 |
0 is a 9/7 symmetric compact integer wavelet |

225 |
1 is a 5/3 symmetric compact integer wavelet |

226 |
others are reserved |

227 |
stored as delta from last, last is reset to 0 if always_reset || keyframe |

228 | |

229 |
qlog |

230 |
quality (logarthmic quantizer scale) |

231 |
stored as delta from last, last is reset to 0 if always_reset || keyframe |

232 | |

233 |
mv_scale |

234 |
stored as delta from last, last is reset to 0 if always_reset || keyframe |

235 |
FIXME check that everything works fine if this changes between frames |

236 | |

237 |
qbias |

238 |
dequantization bias |

239 |
stored as delta from last, last is reset to 0 if always_reset || keyframe |

240 | |

241 |
block_max_depth |

242 |
maximum depth of the block tree |

243 |
stored as delta from last, last is reset to 0 if always_reset || keyframe |

244 | |

245 |
quant_table |

246 |
quantiztation table |

247 | |

248 | |

249 |
Highlevel bitstream structure: |

250 |
============================= |

251 |
-------------------------------------------- |

252 |
| Header | |

253 |
-------------------------------------------- |

254 |
| ------------------------------------ | |

255 |
| | Block0 | | |

256 |
| | split? | | |

257 |
| | yes no | | |

258 |
| | ......... intra? | | |

259 |
| | : Block01 : yes no | | |

260 |
| | : Block02 : ....... .......... | | |

261 |
| | : Block03 : : y DC : : ref index: | | |

262 |
| | : Block04 : : cb DC : : motion x : | | |

263 |
| | ......... : cr DC : : motion y : | | |

264 |
| | ....... .......... | | |

265 |
| ------------------------------------ | |

266 |
| ------------------------------------ | |

267 |
| | Block1 | | |

268 |
| ... | |

269 |
-------------------------------------------- |

270 |
| ------------ ------------ ------------ | |

271 |
|| Y subbands | | Cb subbands| | Cr subbands|| |

272 |
|| --- --- | | --- --- | | --- --- || |

273 |
|| |LL0||HL0| | | |LL0||HL0| | | |LL0||HL0| || |

274 |
|| --- --- | | --- --- | | --- --- || |

275 |
|| --- --- | | --- --- | | --- --- || |

276 |
|| |LH0||HH0| | | |LH0||HH0| | | |LH0||HH0| || |

277 |
|| --- --- | | --- --- | | --- --- || |

278 |
|| --- --- | | --- --- | | --- --- || |

279 |
|| |HL1||LH1| | | |HL1||LH1| | | |HL1||LH1| || |

280 |
|| --- --- | | --- --- | | --- --- || |

281 |
|| --- --- | | --- --- | | --- --- || |

282 |
|| |HH1||HL2| | | |HH1||HL2| | | |HH1||HL2| || |

283 |
|| ... | | ... | | ... || |

284 |
| ------------ ------------ ------------ | |

285 |
-------------------------------------------- |

286 | |

287 |
Decoding process: |

288 |
================= |

289 | |

290 |
------------ |

291 |
| | |

292 |
| Subbands | |

293 |
------------ | | |

294 |
| | ------------ |

295 |
| Intra DC | | |

296 |
| | LL0 subband prediction |

297 |
------------ | |

298 |
\ Dequantizaton |

299 |
------------------- \ | |

300 |
| Reference frames | \ IDWT |

301 |
| ------- ------- | Motion \ | |

302 |
||Frame 0| |Frame 1|| Compensation . OBMC v ------- |

303 |
| ------- ------- | --------------. \------> + --->|Frame n|-->output |

304 |
| ------- ------- | ------- |

305 |
||Frame 2| |Frame 3||<----------------------------------/ |

306 |
| ... | |

307 |
------------------- |

308 | |

309 | |

310 |
Range Coder: |

311 |
============ |

312 | |

313 |
Binary Range Coder: |

314 |
------------------- |

315 |
The implemented range coder is an adapted version based upon "Range encoding: |

316 |
an algorithm for removing redundancy from a digitised message." by G. N. N. |

317 |
Martin. |

318 |
The symbols encoded by the Snow range coder are bits (0|1). The |

319 |
associated probabilities are not fix but change depending on the symbol mix |

320 |
seen so far. |

321 | |

322 | |

323 |
bit seen | new state |

324 |
---------+----------------------------------------------- |

325 |
0 | 256 - state_transition_table[256 - old_state]; |

326 |
1 | state_transition_table[ old_state]; |

327 | |

328 |
state_transition_table = { |

329 |
0, 0, 0, 0, 0, 0, 0, 0, 20, 21, 22, 23, 24, 25, 26, 27, |

330 |
28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, |

331 |
43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 56, 57, |

332 |
58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, |

333 |
74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, |

334 |
89, 90, 91, 92, 93, 94, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, |

335 |
104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 114, 115, 116, 117, 118, |

336 |
119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 133, |

337 |
134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, |

338 |
150, 151, 152, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, |

339 |
165, 166, 167, 168, 169, 170, 171, 171, 172, 173, 174, 175, 176, 177, 178, 179, |

340 |
180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 190, 191, 192, 194, 194, |

341 |
195, 196, 197, 198, 199, 200, 201, 202, 202, 204, 205, 206, 207, 208, 209, 209, |

342 |
210, 211, 212, 213, 215, 215, 216, 217, 218, 219, 220, 220, 222, 223, 224, 225, |

343 |
226, 227, 227, 229, 229, 230, 231, 232, 234, 234, 235, 236, 237, 238, 239, 240, |

344 |
241, 242, 243, 244, 245, 246, 247, 248, 248, 0, 0, 0, 0, 0, 0, 0}; |

345 | |

346 |
FIXME |

347 | |

348 | |

349 |
Range Coding of integers: |

350 |
------------------------- |

351 |
FIXME |

352 | |

353 | |

354 |
Neighboring Blocks: |

355 |
=================== |

356 |
left and top are set to the respective blocks unless they are outside of |

357 |
the image in which case they are set to the Null block |

358 | |

359 |
top-left is set to the top left block unless it is outside of the image in |

360 |
which case it is set to the left block |

361 | |

362 |
if this block has no larger parent block or it is at the left side of its |

363 |
parent block and the top right block is not outside of the image then the |

364 |
top right block is used for top-right else the top-left block is used |

365 | |

366 |
Null block |

367 |
y,cb,cr are 128 |

368 |
level, ref, mx and my are 0 |

369 | |

370 | |

371 |
Motion Vector Prediction: |

372 |
========================= |

373 |
1. the motion vectors of all the neighboring blocks are scaled to |

374 |
compensate for the difference of reference frames |

375 | |

376 |
scaled_mv= (mv * (256 * (current_reference+1) / (mv.reference+1)) + 128)>>8 |

377 | |

378 |
2. the median of the scaled left, top and top-right vectors is used as |

379 |
motion vector prediction |

380 | |

381 |
3. the used motion vector is the sum of the predictor and |

382 |
(mvx_diff, mvy_diff)*mv_scale |

383 | |

384 | |

385 |
Intra DC Predicton: |

386 |
====================== |

387 |
the luma and chroma values of the left block are used as predictors |

388 | |

389 |
the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff |

390 |
to reverse this in the decoder apply the following: |

391 |
block[y][x].dc[0] = block[y][x-1].dc[0] + y_diff; |

392 |
block[y][x].dc[1] = block[y][x-1].dc[1] + cb_diff; |

393 |
block[y][x].dc[2] = block[y][x-1].dc[2] + cr_diff; |

394 |
block[*][-1].dc[*]= 128; |

395 | |

396 | |

397 |
Motion Compensation: |

398 |
==================== |

399 | |

400 |
Halfpel interpolation: |

401 |
---------------------- |

402 |
halfpel interpolation is done by convolution with the halfpel filter stored |

403 |
in the header: |

404 | |

405 |
horizontal halfpel samples are found by |

406 |
H1[y][x] = hcoeff[0]*(F[y][x ] + F[y][x+1]) |

407 |
+ hcoeff[1]*(F[y][x-1] + F[y][x+2]) |

408 |
+ hcoeff[2]*(F[y][x-2] + F[y][x+3]) |

409 |
+ ... |

410 |
h1[y][x] = (H1[y][x] + 32)>>6; |

411 | |

412 |
vertical halfpel samples are found by |

413 |
H2[y][x] = hcoeff[0]*(F[y ][x] + F[y+1][x]) |

414 |
+ hcoeff[1]*(F[y-1][x] + F[y+2][x]) |

415 |
+ ... |

416 |
h2[y][x] = (H2[y][x] + 32)>>6; |

417 | |

418 |
vertical+horizontal halfpel samples are found by |

419 |
H3[y][x] = hcoeff[0]*(H2[y][x ] + H2[y][x+1]) |

420 |
+ hcoeff[1]*(H2[y][x-1] + H2[y][x+2]) |

421 |
+ ... |

422 |
H3[y][x] = hcoeff[0]*(H1[y ][x] + H1[y+1][x]) |

423 |
+ hcoeff[1]*(H1[y+1][x] + H1[y+2][x]) |

424 |
+ ... |

425 |
h3[y][x] = (H3[y][x] + 2048)>>12; |

426 | |

427 | |

428 |
F H1 F |

429 |
| | | |

430 |
| | | |

431 |
| | | |

432 |
F H1 F |

433 |
| | | |

434 |
| | | |

435 |
| | | |

436 |
F-------F-------F-> H1<-F-------F-------F |

437 |
v v v |

438 |
H2 H3 H2 |

439 |
^ ^ ^ |

440 |
F-------F-------F-> H1<-F-------F-------F |

441 |
| | | |

442 |
| | | |

443 |
| | | |

444 |
F H1 F |

445 |
| | | |

446 |
| | | |

447 |
| | | |

448 |
F H1 F |

449 | |

450 | |

451 |
unavailable fullpel samples (outside the picture for example) shall be equal |

452 |
to the closest available fullpel sample |

453 | |

454 | |

455 |
Smaller pel interpolation: |

456 |
-------------------------- |

457 |
if diag_mc is set then points which lie on a line between 2 vertically, |

458 |
horiziontally or diagonally adjacent halfpel points shall be interpolated |

459 |
linearls with rounding to nearest and halfway values rounded up. |

460 |
points which lie on 2 diagonals at the same time should only use the one |

461 |
diagonal not containing the fullpel point |

462 | |

463 | |

464 | |

465 |
F-->O---q---O<--h1->O---q---O<--F |

466 |
v \ / v \ / v |

467 |
O O O O O O O |

468 |
| / | \ | |

469 |
q q q q q |

470 |
| / | \ | |

471 |
O O O O O O O |

472 |
^ / \ ^ / \ ^ |

473 |
h2-->O---q---O<--h3->O---q---O<--h2 |

474 |
v \ / v \ / v |

475 |
O O O O O O O |

476 |
| \ | / | |

477 |
q q q q q |

478 |
| \ | / | |

479 |
O O O O O O O |

480 |
^ / \ ^ / \ ^ |

481 |
F-->O---q---O<--h1->O---q---O<--F |

482 | |

483 | |

484 | |

485 |
the remaining points shall be bilinearly interpolated from the |

486 |
up to 4 surrounding halfpel and fullpel points, again rounding should be to |

487 |
nearest and halfway values rounded up |

488 | |

489 |
compliant Snow decoders MUST support 1-1/8 pel luma and 1/2-1/16 pel chroma |

490 |
interpolation at least |

491 | |

492 | |

493 |
Overlapped block motion compensation: |

494 |
------------------------------------- |

495 |
FIXME |

496 | |

497 |
LL band prediction: |

498 |
=================== |

499 |
Each sample in the LL0 subband is predicted by the median of the left, top and |

500 |
left+top-topleft samples, samples outside the subband shall be considered to |

501 |
be 0. To reverse this prediction in the decoder apply the following. |

502 |
for(y=0; y<height; y++){ |

503 |
for(x=0; x<width; x++){ |

504 |
sample[y][x] += median(sample[y-1][x], |

505 |
sample[y][x-1], |

506 |
sample[y-1][x]+sample[y][x-1]-sample[y-1][x-1]); |

507 |
} |

508 |
} |

509 |
sample[-1][*]=sample[*][-1]= 0; |

510 |
width,height here are the width and height of the LL0 subband not of the final |

511 |
video |

512 | |

513 | |

514 |
Dequantizaton: |

515 |
============== |

516 |
FIXME |

517 | |

518 |
Wavelet Transform: |

519 |
================== |

520 | |

521 |
Snow supports 2 wavelet transforms, the symmetric biorthogonal 5/3 integer |

522 |
transform and a integer approximation of the symmetric biorthogonal 9/7 |

523 |
daubechies wavelet. |

524 | |

525 |
2D IDWT (inverse discrete wavelet transform) |

526 |
-------------------------------------------- |

527 |
The 2D IDWT applies a 2D filter recursively, each time combining the |

528 |
4 lowest frequency subbands into a single subband until only 1 subband |

529 |
remains. |

530 |
The 2D filter is done by first applying a 1D filter in the vertical direction |

531 |
and then applying it in the horizontal one. |

532 |
--------------- --------------- --------------- --------------- |

533 |
|LL0|HL0| | | | | | | | | | | | |

534 |
|---+---| HL1 | | L0|H0 | HL1 | | LL1 | HL1 | | | | |

535 |
|LH0|HH0| | | | | | | | | | | | |

536 |
|-------+-------|->|-------+-------|->|-------+-------|->| L1 | H1 |->... |

537 |
| | | | | | | | | | | | |

538 |
| LH1 | HH1 | | LH1 | HH1 | | LH1 | HH1 | | | | |

539 |
| | | | | | | | | | | | |

540 |
--------------- --------------- --------------- --------------- |

541 | |

542 | |

543 |
1D Filter: |

544 |
---------- |

545 |
1. interleave the samples of the low and high frequency subbands like |

546 |
s={L0, H0, L1, H1, L2, H2, L3, H3, ... } |

547 |
note, this can end with a L or a H, the number of elements shall be w |

548 |
s[-1] shall be considered equivalent to s[1 ] |

549 |
s[w ] shall be considered equivalent to s[w-2] |

550 | |

551 |
2. perform the lifting steps in order as described below |

552 | |

553 |
5/3 Integer filter: |

554 |
1. s[i] -= (s[i-1] + s[i+1] + 2)>>2; for all even i < w |

555 |
2. s[i] += (s[i-1] + s[i+1] )>>1; for all odd i < w |

556 | |

557 |
\ | /|\ | /|\ | /|\ | /|\ |

558 |
\|/ | \|/ | \|/ | \|/ | |

559 |
+ | + | + | + | -1/4 |

560 |
/|\ | /|\ | /|\ | /|\ | |

561 |
/ | \|/ | \|/ | \|/ | \|/ |

562 |
| + | + | + | + +1/2 |

563 | |

564 | |

565 |
Snow's 9/7 Integer filter: |

566 |
1. s[i] -= (3*(s[i-1] + s[i+1]) + 4)>>3; for all even i < w |

567 |
2. s[i] -= s[i-1] + s[i+1] ; for all odd i < w |

568 |
3. s[i] += ( s[i-1] + s[i+1] + 4*s[i] + 8)>>4; for all even i < w |

569 |
4. s[i] += (3*(s[i-1] + s[i+1]) )>>1; for all odd i < w |

570 | |

571 |
\ | /|\ | /|\ | /|\ | /|\ |

572 |
\|/ | \|/ | \|/ | \|/ | |

573 |
+ | + | + | + | -3/8 |

574 |
/|\ | /|\ | /|\ | /|\ | |

575 |
/ | \|/ | \|/ | \|/ | \|/ |

576 |
(| + (| + (| + (| + -1 |

577 |
\ + /|\ + /|\ + /|\ + /|\ +1/4 |

578 |
\|/ | \|/ | \|/ | \|/ | |

579 |
+ | + | + | + | +1/16 |

580 |
/|\ | /|\ | /|\ | /|\ | |

581 |
/ | \|/ | \|/ | \|/ | \|/ |

582 |
| + | + | + | + +3/2 |

583 | |

584 |
optimization tips: |

585 |
following are exactly identical |

586 |
(3a)>>1 == a + (a>>1) |

587 |
(a + 4b + 8)>>4 == ((a>>2) + b + 2)>>2 |

588 | |

589 |
16bit implementation note: |

590 |
The IDWT can be implemented with 16bits, but this requires some care to |

591 |
prevent overflows, the following list, lists the minimum number of bits needed |

592 |
for some terms |

593 |
1. lifting step |

594 |
A= s[i-1] + s[i+1] 16bit |

595 |
3*A + 4 18bit |

596 |
A + (A>>1) + 2 17bit |

597 | |

598 |
3. lifting step |

599 |
s[i-1] + s[i+1] 17bit |

600 | |

601 |
4. lifiting step |

602 |
3*(s[i-1] + s[i+1]) 17bit |

603 | |

604 | |

605 |
TODO: |

606 |
===== |

607 |
Important: |

608 |
finetune initial contexts |

609 |
flip wavelet? |

610 |
try to use the wavelet transformed predicted image (motion compensated image) as context for coding the residual coefficients |

611 |
try the MV length as context for coding the residual coefficients |

612 |
use extradata for stuff which is in the keyframes now? |

613 |
the MV median predictor is patented IIRC |

614 |
implement per picture halfpel interpolation |

615 |
try different range coder state transition tables for different contexts |

616 | |

617 |
Not Important: |

618 |
compare the 6 tap and 8 tap hpel filters (psnr/bitrate and subjective quality) |

619 |
spatial_scalability b vs u (!= 0 breaks syntax anyway so we can add a u later) |

620 | |

621 | |

622 |
Credits: |

623 |
======== |

624 |
Michael Niedermayer |

625 |
Loren Merritt |

626 | |

627 | |

628 |
Copyright: |

629 |
========== |

630 |
GPL + GFDL + whatever is needed to make this a RFC |