Statistics
| Branch: | Revision:

ffmpeg / libavformat / matroska.c @ 20b02bc6

History | View | Annotate | Download (85.7 KB)

1 08abe0fd Michael Niedermayer
/*
2
 * Matroska file demuxer (no muxer yet)
3
 * Copyright (c) 2003-2004 The ffmpeg Project
4
 *
5
 * This library is free software; you can redistribute it and/or
6
 * modify it under the terms of the GNU Lesser General Public
7
 * License as published by the Free Software Foundation; either
8
 * version 2 of the License, or (at your option) any later version.
9
 *
10
 * This library is distributed in the hope that it will be useful,
11
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13
 * Lesser General Public License for more details.
14
 *
15
 * You should have received a copy of the GNU Lesser General Public
16
 * License along with this library; if not, write to the Free Software
17
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18
 */
19
20
/**
21
 * @file matroska.c
22
 * Matroska file demuxer
23
 * by Ronald Bultje <rbultje@ronald.bitfreak.net>
24
 * with a little help from Moritz Bunkus <moritz@bunkus.org>
25
 * Specs available on the matroska project page:
26
 * http://www.matroska.org/.
27
 */
28
29
#include "avformat.h"
30
/* For codec_get_bmp_id and codec_get_wav_id. */
31
#include "avi.h"
32
33
/* EBML version supported */
34
#define EBML_VERSION 1
35
36
/* top-level master-IDs */
37
#define EBML_ID_HEADER             0x1A45DFA3
38
39
/* IDs in the HEADER master */
40
#define EBML_ID_EBMLVERSION        0x4286
41
#define EBML_ID_EBMLREADVERSION    0x42F7
42
#define EBML_ID_EBMLMAXIDLENGTH    0x42F2
43
#define EBML_ID_EBMLMAXSIZELENGTH  0x42F3
44
#define EBML_ID_DOCTYPE            0x4282
45
#define EBML_ID_DOCTYPEVERSION     0x4287
46
#define EBML_ID_DOCTYPEREADVERSION 0x4285
47
48
/* general EBML types */
49
#define EBML_ID_VOID               0xEC
50
51
/*
52
 * Matroska element IDs. max. 32-bit.
53
 */
54
55
/* toplevel segment */
56
#define MATROSKA_ID_SEGMENT    0x18538067
57
58
/* matroska top-level master IDs */
59
#define MATROSKA_ID_INFO       0x1549A966
60
#define MATROSKA_ID_TRACKS     0x1654AE6B
61
#define MATROSKA_ID_CUES       0x1C53BB6B
62
#define MATROSKA_ID_TAGS       0x1254C367
63
#define MATROSKA_ID_SEEKHEAD   0x114D9B74
64
#define MATROSKA_ID_CLUSTER    0x1F43B675
65
66
/* IDs in the info master */
67
#define MATROSKA_ID_TIMECODESCALE 0x2AD7B1
68
#define MATROSKA_ID_DURATION   0x4489
69
#define MATROSKA_ID_WRITINGAPP 0x5741
70
#define MATROSKA_ID_MUXINGAPP  0x4D80
71
#define MATROSKA_ID_DATEUTC    0x4461
72
73
/* ID in the tracks master */
74
#define MATROSKA_ID_TRACKENTRY 0xAE
75
76
/* IDs in the trackentry master */
77
#define MATROSKA_ID_TRACKNUMBER 0xD7
78
#define MATROSKA_ID_TRACKUID   0x73C5
79
#define MATROSKA_ID_TRACKTYPE  0x83
80
#define MATROSKA_ID_TRACKAUDIO 0xE1
81
#define MATROSKA_ID_TRACKVIDEO 0xE0
82
#define MATROSKA_ID_CODECID    0x86
83
#define MATROSKA_ID_CODECPRIVATE 0x63A2
84
#define MATROSKA_ID_CODECNAME  0x258688
85
#define MATROSKA_ID_CODECINFOURL 0x3B4040
86
#define MATROSKA_ID_CODECDOWNLOADURL 0x26B240
87
#define MATROSKA_ID_TRACKNAME  0x536E
88
#define MATROSKA_ID_TRACKLANGUAGE 0x22B59C
89
#define MATROSKA_ID_TRACKFLAGENABLED 0xB9
90
#define MATROSKA_ID_TRACKFLAGDEFAULT 0x88
91
#define MATROSKA_ID_TRACKFLAGLACING 0x9C
92
#define MATROSKA_ID_TRACKMINCACHE 0x6DE7
93
#define MATROSKA_ID_TRACKMAXCACHE 0x6DF8
94
#define MATROSKA_ID_TRACKDEFAULTDURATION 0x23E383
95
96
/* IDs in the trackvideo master */
97
#define MATROSKA_ID_VIDEOFRAMERATE 0x2383E3
98
#define MATROSKA_ID_VIDEODISPLAYWIDTH 0x54B0
99
#define MATROSKA_ID_VIDEODISPLAYHEIGHT 0x54BA
100
#define MATROSKA_ID_VIDEOPIXELWIDTH 0xB0
101
#define MATROSKA_ID_VIDEOPIXELHEIGHT 0xBA
102
#define MATROSKA_ID_VIDEOFLAGINTERLACED 0x9A
103
#define MATROSKA_ID_VIDEOSTEREOMODE 0x53B9
104
#define MATROSKA_ID_VIDEOASPECTRATIO 0x54B3
105
#define MATROSKA_ID_VIDEOCOLOURSPACE 0x2EB524
106
107
/* IDs in the trackaudio master */
108
#define MATROSKA_ID_AUDIOSAMPLINGFREQ 0xB5
109
#define MATROSKA_ID_AUDIOBITDEPTH 0x6264
110
#define MATROSKA_ID_AUDIOCHANNELS 0x9F
111
112
/* ID in the cues master */
113
#define MATROSKA_ID_POINTENTRY 0xBB
114
115
/* IDs in the pointentry master */
116
#define MATROSKA_ID_CUETIME    0xB3
117
#define MATROSKA_ID_CUETRACKPOSITION 0xB7
118
119
/* IDs in the cuetrackposition master */
120
#define MATROSKA_ID_CUETRACK   0xF7
121
#define MATROSKA_ID_CUECLUSTERPOSITION 0xF1
122
123
/* IDs in the tags master */
124
/* TODO */
125
126
/* IDs in the seekhead master */
127
#define MATROSKA_ID_SEEKENTRY  0x4DBB
128
129
/* IDs in the seekpoint master */
130
#define MATROSKA_ID_SEEKID     0x53AB
131
#define MATROSKA_ID_SEEKPOSITION 0x53AC
132
133
/* IDs in the cluster master */
134
#define MATROSKA_ID_CLUSTERTIMECODE 0xE7
135
#define MATROSKA_ID_BLOCKGROUP 0xA0
136
137
/* IDs in the blockgroup master */
138
#define MATROSKA_ID_BLOCK      0xA1
139
#define MATROSKA_ID_BLOCKDURATION 0x9B
140
#define MATROSKA_ID_BLOCKREFERENCE 0xFB
141
142
typedef enum {
143
  MATROSKA_TRACK_TYPE_VIDEO    = 0x1,
144
  MATROSKA_TRACK_TYPE_AUDIO    = 0x2,
145
  MATROSKA_TRACK_TYPE_COMPLEX  = 0x3,
146
  MATROSKA_TRACK_TYPE_LOGO     = 0x10,
147
  MATROSKA_TRACK_TYPE_SUBTITLE = 0x11,
148
  MATROSKA_TRACK_TYPE_CONTROL  = 0x20,
149
} MatroskaTrackType;
150
151
typedef enum {
152
  MATROSKA_EYE_MODE_MONO  = 0x0,
153
  MATROSKA_EYE_MODE_RIGHT = 0x1,
154
  MATROSKA_EYE_MODE_LEFT  = 0x2,
155
  MATROSKA_EYE_MODE_BOTH  = 0x3,
156
} MatroskaEyeMode;
157
158
typedef enum {
159
  MATROSKA_ASPECT_RATIO_MODE_FREE  = 0x0,
160
  MATROSKA_ASPECT_RATIO_MODE_KEEP  = 0x1,
161
  MATROSKA_ASPECT_RATIO_MODE_FIXED = 0x2,
162
} MatroskaAspectRatioMode;
163
164
/*
165
 * These aren't in any way "matroska-form" things,
166
 * it's just something I use in the muxer/demuxer.
167
 */
168
169
typedef enum {
170
  MATROSKA_TRACK_ENABLED = (1<<0),
171
  MATROSKA_TRACK_DEFAULT = (1<<1),
172
  MATROSKA_TRACK_LACING  = (1<<2),
173
  MATROSKA_TRACK_SHIFT   = (1<<16)
174
} MatroskaTrackFlags;
175
176
typedef enum {
177
  MATROSKA_VIDEOTRACK_INTERLACED = (MATROSKA_TRACK_SHIFT<<0)
178
} MatroskaVideoTrackFlags;
179
180
/*
181
 * Matroska Codec IDs. Strings.
182
 */
183
184
#define MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC   "V_MS/VFW/FOURCC"
185
#define MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED "V_UNCOMPRESSED"
186
#define MATROSKA_CODEC_ID_VIDEO_MPEG4_SP     "V_MPEG4/ISO/SP"
187
#define MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP    "V_MPEG4/ISO/ASP"
188
#define MATROSKA_CODEC_ID_VIDEO_MPEG4_AP     "V_MPEG4/ISO/AP"
189
#define MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3    "V_MPEG4/MS/V3"
190
#define MATROSKA_CODEC_ID_VIDEO_MPEG1        "V_MPEG1"
191
#define MATROSKA_CODEC_ID_VIDEO_MPEG2        "V_MPEG2"
192
#define MATROSKA_CODEC_ID_VIDEO_MJPEG        "V_MJPEG"
193
/* TODO: Real/Quicktime */
194
195
#define MATROSKA_CODEC_ID_AUDIO_ACM          "A_MS/ACM"
196
#define MATROSKA_CODEC_ID_AUDIO_MPEG1_L1     "A_MPEG/L1"
197
#define MATROSKA_CODEC_ID_AUDIO_MPEG1_L2     "A_MPEG/L2"
198
#define MATROSKA_CODEC_ID_AUDIO_MPEG1_L3     "A_MPEG/L3"
199
#define MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE   "A_PCM/INT/BIG"
200
#define MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE   "A_PCM/INT/LIT"
201
#define MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT    "A_PCM/FLOAT/IEEE"
202
#define MATROSKA_CODEC_ID_AUDIO_AC3          "A_AC3"
203
#define MATROSKA_CODEC_ID_AUDIO_DTS          "A_DTS"
204
#define MATROSKA_CODEC_ID_AUDIO_VORBIS       "A_VORBIS"
205
#define MATROSKA_CODEC_ID_AUDIO_ACM          "A_MS/ACM"
206
#define MATROSKA_CODEC_ID_AUDIO_MPEG2        "A_AAC/MPEG2/"
207
#define MATROSKA_CODEC_ID_AUDIO_MPEG4        "A_AAC/MPEG4/"
208
/* TODO: AC3-9/10 (?), Real, Musepack, Quicktime */
209
210
/* max. depth in the EBML tree structure */
211
#define EBML_MAX_DEPTH 16
212
213
typedef struct Track {
214
    MatroskaTrackType type;
215
216
    /* Unique track number and track ID. stream_index is the index that
217
     * the calling app uses for this track. */
218
    uint32_t num,
219
        uid,
220
        stream_index;
221
222
    char *name,
223
        *language;
224
225
    char *codec_id,
226
        *codec_name;
227
228
    unsigned char *codec_priv;
229
    int codec_priv_size;
230
231
    int64_t default_duration;
232
    MatroskaTrackFlags flags;
233
} MatroskaTrack;
234
235
typedef struct MatroskaVideoTrack {
236
    MatroskaTrack track;
237
238
    int pixel_width,
239
        pixel_height,
240
        display_width,
241
        display_height;
242
243
    uint32_t fourcc;
244
245
    MatroskaAspectRatioMode ar_mode;
246
    MatroskaEyeMode eye_mode;
247
248
    //..
249
} MatroskaVideoTrack;
250
251
typedef struct MatroskaAudioTrack {
252
    MatroskaTrack track;
253
254
    int channels,
255
        bitdepth,
256
        samplerate;
257
    //..
258
} MatroskaAudioTrack;
259
260
typedef struct MatroskaSubtitleTrack {
261
    MatroskaTrack track;
262
263
    //..
264
} MatroskaSubtitleTrack;
265
266
typedef struct MatroskaLevel {
267
    uint64_t start, length;
268
} MatroskaLevel;
269
270
typedef struct MatroskaDemuxIndex {
271
  uint64_t        pos;   /* of the corresponding *cluster*! */
272
  uint16_t        track; /* reference to 'num' */
273
  uint64_t        time;  /* in nanoseconds */
274
} MatroskaDemuxIndex;
275
276
typedef struct MatroskaDemuxContext {
277
    AVFormatContext *ctx;
278
279
    /* ebml stuff */
280
    int num_levels;
281
    MatroskaLevel levels[EBML_MAX_DEPTH];
282
    int level_up;
283
284
    /* matroska stuff */
285
    char *writing_app,
286
        *muxing_app;
287
    int64_t created;
288
289
    /* timescale in the file */
290
    int64_t time_scale;
291
292
    /* length, position (time, ns) */
293
    int64_t duration,
294
        pos;
295
296
    /* num_streams is the number of streams that av_new_stream() was called
297
     * for ( = that are available to the calling program). */
298
    int num_tracks, num_streams;
299
    MatroskaTrack *tracks[MAX_STREAMS];
300
301
    /* cache for ID peeking */
302
    uint32_t peek_id;
303
304
    /* byte position of the segment inside the stream */
305
    offset_t segment_start;
306
307
    /* The packet queue. */
308
    AVPacket **packets;
309
    int num_packets;
310
311
    /* have we already parse metadata/cues/clusters? */
312
    int metadata_parsed,
313
        index_parsed,
314
        done;
315
316
    /* The index for seeking. */
317
    int num_indexes;
318
    MatroskaDemuxIndex *index;
319
} MatroskaDemuxContext;
320
321
/*
322
 * The first few functions handle EBML file parsing. The rest
323
 * is the document interpretation. Matroska really just is a
324
 * EBML file.
325
 */
326
327
/*
328
 * Return: the amount of levels in the hierarchy that the
329
 * current element lies higher than the previous one.
330
 * The opposite isn't done - that's auto-done using master
331
 * element reading.
332
 */
333
334
static int
335
ebml_read_element_level_up (MatroskaDemuxContext *matroska)
336
{
337
    ByteIOContext *pb = &matroska->ctx->pb;
338
    offset_t pos = url_ftell(pb);
339
    int num = 0;
340
341
    while (matroska->num_levels > 0) {
342
        MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
343
344
        if (pos >= level->start + level->length) {
345
            matroska->num_levels--;
346
            num++;
347
        } else {
348
            break;
349
        }
350
    }
351
352
    return num;
353
}
354
355
/*
356
 * Read: an "EBML number", which is defined as a variable-length
357
 * array of bytes. The first byte indicates the length by giving a
358
 * number of 0-bits followed by a one. The position of the first
359
 * "one" bit inside the first byte indicates the length of this
360
 * number.
361
 * Returns: num. of bytes read. < 0 on error.
362
 */
363
364
static int
365
ebml_read_num (MatroskaDemuxContext *matroska,
366
               int                   max_size,
367
               uint64_t             *number)
368
{
369
    ByteIOContext *pb = &matroska->ctx->pb;
370
    int len_mask = 0x80, read = 1, n = 1;
371
    int64_t total = 0;
372
373
    /* the first byte tells us the length in bytes - get_byte() can normally
374
     * return 0, but since that's not a valid first ebmlID byte, we can
375
     * use it safely here to catch EOS. */
376
    if (!(total = get_byte(pb))) {
377
        /* we might encounter EOS here */
378
        if (!url_feof(pb)) {
379
            offset_t pos = url_ftell(pb);
380
            av_log(matroska->ctx, AV_LOG_ERROR,
381
                   "Read error at pos. %llu (0x%llx)\n",
382
                   pos, pos);
383
        }
384
        return AVERROR_IO; /* EOS or actual I/O error */
385
    }
386
387
    /* get the length of the EBML number */
388
    while (read <= max_size && !(total & len_mask)) {
389
        read++;
390
        len_mask >>= 1;
391
    }
392
    if (read > max_size) {
393
        offset_t pos = url_ftell(pb) - 1;
394
        av_log(matroska->ctx, AV_LOG_ERROR,
395
               "Invalid EBML number size tag 0x%02x at pos %llu (0x%llx)\n",
396
               (uint8_t) total, pos, pos);
397
        return AVERROR_INVALIDDATA;
398
    }
399
400
    /* read out length */
401
    total &= ~len_mask;
402
    while (n++ < read)
403
        total = (total << 8) | get_byte(pb);
404
405
    *number = total;
406
407
    return read;
408
}
409
410
/*
411
 * Read: the element content data ID.
412
 * Return: the number of bytes read or < 0 on error.
413
 */
414
415
static int
416
ebml_read_element_id (MatroskaDemuxContext *matroska,
417
                      uint32_t             *id,
418
                      int                  *level_up)
419
{
420
    int read;
421
    uint64_t total;
422
423
    /* if we re-call this, use our cached ID */
424
    if (matroska->peek_id != 0) {
425
        if (level_up)
426
            *level_up = 0;
427
        *id = matroska->peek_id;
428
        return 0;
429
    }
430
431
    /* read out the "EBML number", include tag in ID */
432
    if ((read = ebml_read_num(matroska, 4, &total)) < 0)
433
        return read;
434
    *id = matroska->peek_id  = total | (1 << (read * 7));
435
436
    /* level tracking */
437
    if (level_up)
438
        *level_up = ebml_read_element_level_up(matroska);
439
440
    return read;
441
}
442
443
/*
444
 * Read: element content length.
445
 * Return: the number of bytes read or < 0 on error.
446
 */
447
448
static int
449
ebml_read_element_length (MatroskaDemuxContext *matroska,
450
                          uint64_t             *length)
451
{
452
    /* clear cache since we're now beyond that data point */
453
    matroska->peek_id = 0;
454
455
    /* read out the "EBML number", include tag in ID */
456
    return ebml_read_num(matroska, 8, length);
457
}
458
459
/*
460
 * Return: the ID of the next element, or 0 on error.
461
 * Level_up contains the amount of levels that this
462
 * next element lies higher than the previous one.
463
 */
464
465
static uint32_t
466
ebml_peek_id (MatroskaDemuxContext *matroska,
467
              int                  *level_up)
468
{
469
    uint32_t id;
470
471
    assert(level_up != NULL);
472
473
    if (ebml_read_element_id(matroska, &id, level_up) < 0)
474
        return 0;
475
476
    return id;
477
}
478
479
/*
480
 * Seek to a given offset.
481
 * 0 is success, -1 is failure.
482
 */
483
484
static int
485
ebml_read_seek (MatroskaDemuxContext *matroska,
486
                offset_t              offset)
487
{
488
    ByteIOContext *pb = &matroska->ctx->pb;
489
490
    /* clear ID cache, if any */
491
    matroska->peek_id = 0;
492
493
    return (url_fseek(pb, offset, SEEK_SET) == offset) ? 0 : -1;
494
}
495
496
/*
497
 * Skip the next element.
498
 * 0 is success, -1 is failure.
499
 */
500
501
static int
502
ebml_read_skip (MatroskaDemuxContext *matroska)
503
{
504
    ByteIOContext *pb = &matroska->ctx->pb;
505
    uint32_t id;
506
    uint64_t length;
507
    int res;
508
509
    if ((res = ebml_read_element_id(matroska, &id, NULL)) < 0 ||
510
        (res = ebml_read_element_length(matroska, &length)) < 0)
511
        return res;
512
513
    url_fskip(pb, length);
514
515
    return 0;
516
}
517
518
/*
519
 * Read the next element as an unsigned int.
520
 * 0 is success, < 0 is failure.
521
 */
522
523
static int
524
ebml_read_uint (MatroskaDemuxContext *matroska,
525
                uint32_t             *id,
526
                uint64_t             *num)
527
{
528
    ByteIOContext *pb = &matroska->ctx->pb;
529
    int n = 0, size, res;
530
    uint64_t rlength;
531
532
    if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
533
        (res = ebml_read_element_length(matroska, &rlength)) < 0)
534
        return res;
535
    size = rlength;
536
    if (size < 1 || size > 8) {
537
        offset_t pos = url_ftell(pb);
538
        av_log(matroska->ctx, AV_LOG_ERROR,
539
               "Invalid uint element size %d at position %lld (0x%llx)\n",
540
                size, pos, pos);
541
        return AVERROR_INVALIDDATA;
542
    }
543
544
    /* big-endian ordening; build up number */
545
    *num = 0;
546
    while (n++ < size)
547
        *num = (*num << 8) | get_byte(pb);
548
549
    return 0;
550
}
551
552
/*
553
 * Read the next element as a signed int.
554
 * 0 is success, < 0 is failure.
555
 */
556
557
static int
558
ebml_read_sint (MatroskaDemuxContext *matroska,
559
                uint32_t             *id,
560
                int64_t              *num)
561
{
562
    ByteIOContext *pb = &matroska->ctx->pb;
563
    int size, n = 1, negative = 0, res;
564
    uint64_t rlength;
565
566
    if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
567
        (res = ebml_read_element_length(matroska, &rlength)) < 0)
568
        return res;
569
    size = rlength;
570
    if (size < 1 || size > 8) {
571
        offset_t pos = url_ftell(pb);
572
        av_log(matroska->ctx, AV_LOG_ERROR,
573
               "Invalid sint element size %d at position %lld (0x%llx)\n",
574
                size, pos, pos);
575
        return AVERROR_INVALIDDATA;
576
    }
577
    if ((*num = get_byte(pb)) & 0x80) {
578
        negative = 1;
579
        *num &= ~0x80;
580
    }
581
    *num = 0;
582
    while (n++ < size)
583
        *num = (*num << 8) | get_byte(pb);
584
585
    /* make signed */
586
    if (negative)
587
        *num = *num - (1LL << ((8 * size) - 1));
588
589
    return 0;
590
}
591
592
/*
593
 * Read the next element as a float.
594
 * 0 is success, < 0 is failure.
595
 */
596
597
static int
598
ebml_read_float (MatroskaDemuxContext *matroska,
599
                 uint32_t             *id,
600
                 double               *num)
601
{
602
    ByteIOContext *pb = &matroska->ctx->pb;
603
    int size, res;
604
    uint64_t rlength;
605
606
    if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
607
        (res = ebml_read_element_length(matroska, &rlength)) < 0)
608
        return res;
609
    size = rlength;
610
611
    if (size != 4 && size != 8 && size != 10) {
612
        offset_t pos = url_ftell(pb);
613
        av_log(matroska->ctx, AV_LOG_ERROR,
614
               "Invalid float element size %d at position %llu (0x%llx)\n",
615
               size, pos, pos);
616
        return AVERROR_INVALIDDATA;
617
    }
618
    if (size == 10) {
619
        av_log(matroska->ctx, AV_LOG_ERROR,
620
               "FIXME! 10-byte floats unimplemented\n");
621
        return AVERROR_UNKNOWN;
622
    }
623
624
    if (size == 4) {
625
        float f;
626
627 6e046750 Michael Niedermayer
        while (size-- > 0)
628 08abe0fd Michael Niedermayer
#ifdef WORDS_BIGENDIAN
629 6e046750 Michael Niedermayer
            ((uint8_t *) &f)[3 - size] = get_byte(pb);
630 08abe0fd Michael Niedermayer
#else
631
            ((uint8_t *) &f)[size] = get_byte(pb);
632
#endif
633
634
        *num = f;
635
    } else {
636
        double d;
637
638 6e046750 Michael Niedermayer
        while (size-- > 0)
639 08abe0fd Michael Niedermayer
#ifdef WORDS_BIGENDIAN
640 6e046750 Michael Niedermayer
            ((uint8_t *) &d)[7 - size] = get_byte(pb);
641 08abe0fd Michael Niedermayer
#else
642
            ((uint8_t *) &d)[size] = get_byte(pb);
643
#endif
644
645
        *num = d;
646
    }
647
648
    return 0;
649
}
650
651
/*
652
 * Read the next element as an ASCII string.
653
 * 0 is success, < 0 is failure.
654
 */
655
656
static int
657
ebml_read_ascii (MatroskaDemuxContext *matroska,
658
                 uint32_t             *id,
659
                 char                **str)
660
{
661
    ByteIOContext *pb = &matroska->ctx->pb;
662
    int size, res;
663
    uint64_t rlength;
664
665
    if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
666
        (res = ebml_read_element_length(matroska, &rlength)) < 0)
667
        return res;
668
    size = rlength;
669
670
    /* ebml strings are usually not 0-terminated, so we allocate one
671
     * byte more, read the string and NULL-terminate it ourselves. */
672
    if (!(*str = av_malloc(size + 1))) {
673
        av_log(matroska->ctx, AV_LOG_ERROR, "Memory allocation failed\n");
674
        return AVERROR_NOMEM;
675
    }
676
    if (get_buffer(pb, (uint8_t *) *str, size) != size) {
677
        offset_t pos = url_ftell(pb);
678
        av_log(matroska->ctx, AV_LOG_ERROR,
679
               "Read error at pos. %llu (0x%llx)\n", pos, pos);
680
        return AVERROR_IO;
681
    }
682
    (*str)[size] = '\0';
683
684
    return 0;
685
}
686
687
/*
688
 * Read the next element as a UTF-8 string.
689
 * 0 is success, < 0 is failure.
690
 */
691
692
static int
693
ebml_read_utf8 (MatroskaDemuxContext *matroska,
694
                uint32_t             *id,
695
                char                **str)
696
{
697
  return ebml_read_ascii(matroska, id, str);
698
}
699
700
/*
701
 * Read the next element as a date (nanoseconds since 1/1/2000).
702
 * 0 is success, < 0 is failure.
703
 */
704
705
static int
706
ebml_read_date (MatroskaDemuxContext *matroska,
707
                uint32_t             *id,
708
                int64_t              *date)
709
{
710
  return ebml_read_sint(matroska, id, date);
711
}
712
713
/*
714
 * Read the next element, but only the header. The contents
715
 * are supposed to be sub-elements which can be read separately.
716
 * 0 is success, < 0 is failure.
717
 */
718
719
static int
720
ebml_read_master (MatroskaDemuxContext *matroska,
721
                  uint32_t             *id)
722
{
723
    ByteIOContext *pb = &matroska->ctx->pb;
724
    uint64_t length;
725
    MatroskaLevel *level;
726
    int res;
727
728
    if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
729
        (res = ebml_read_element_length(matroska, &length)) < 0)
730
        return res;
731
732
    /* protect... (Heaven forbids that the '>' is true) */
733
    if (matroska->num_levels >= EBML_MAX_DEPTH) {
734
        av_log(matroska->ctx, AV_LOG_ERROR,
735
               "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
736
        return AVERROR_NOTSUPP;
737
    }
738
739
    /* remember level */
740
    level = &matroska->levels[matroska->num_levels++];
741
    level->start = url_ftell(pb);
742
    level->length = length;
743
744
    return 0;
745
}
746
747
/*
748
 * Read the next element as binary data.
749
 * 0 is success, < 0 is failure.
750
 */
751
752
static int
753
ebml_read_binary (MatroskaDemuxContext *matroska,
754
                  uint32_t             *id,
755
                  uint8_t             **binary,
756
                  int                  *size)
757
{
758
    ByteIOContext *pb = &matroska->ctx->pb;
759
    uint64_t rlength;
760
    int res;
761
762
    if ((res = ebml_read_element_id(matroska, id, NULL)) < 0 ||
763
        (res = ebml_read_element_length(matroska, &rlength)) < 0)
764
        return res;
765
    *size = rlength;
766
767
    if (!(*binary = av_malloc(*size))) {
768
        av_log(matroska->ctx, AV_LOG_ERROR,
769
               "Memory allocation error\n");
770
        return AVERROR_NOMEM;
771
    }
772
773
    if (get_buffer(pb, *binary, *size) != *size) {
774
        offset_t pos = url_ftell(pb);
775
        av_log(matroska->ctx, AV_LOG_ERROR,
776
               "Read error at pos. %llu (0x%llx)\n", pos, pos);
777
        return AVERROR_IO;
778
    }
779
780
    return 0;
781
}
782
783
/*
784
 * Read signed/unsigned "EBML" numbers.
785
 * Return: number of bytes processed, < 0 on error.
786
 * XXX: use ebml_read_num().
787
 */
788
789
static int
790
matroska_ebmlnum_uint (uint8_t  *data,
791
                       uint32_t  size,
792
                       uint64_t *num)
793
{
794
    int len_mask = 0x80, read = 1, n = 1, num_ffs = 0;
795
    uint64_t total;
796
797
    if (size <= 0)
798
        return AVERROR_INVALIDDATA;
799
800
    total = data[0];
801
    while (read <= 8 && !(total & len_mask)) {
802
        read++;
803
        len_mask >>= 1;
804
    }
805
    if (read > 8)
806
        return AVERROR_INVALIDDATA;
807
808
    if ((total &= (len_mask - 1)) == len_mask - 1)
809
        num_ffs++;
810
    if (size < read)
811
        return AVERROR_INVALIDDATA;
812
    while (n < read) {
813
        if (data[n] == 0xff)
814
            num_ffs++;
815
        total = (total << 8) | data[n];
816
        n++;
817
    }
818
819
    if (!total)
820
        return AVERROR_INVALIDDATA;
821
822
    if (read == num_ffs)
823
        *num = (uint64_t)-1;
824
    else
825
        *num = total;
826
827
    return read;
828
}
829
830
/*
831
 * Same as above, but signed.
832
 */
833
834
static int
835
matroska_ebmlnum_sint (uint8_t  *data,
836
                       uint32_t  size,
837
                       int64_t  *num)
838
{
839
    uint64_t unum;
840
    int res;
841
842
    /* read as unsigned number first */
843
    if ((res = matroska_ebmlnum_uint(data, size, &unum)) < 0)
844
        return res;
845
846
    /* make signed (weird way) */
847
    if (unum == (uint64_t)-1)
848
        *num = INT64_MAX;
849
    else
850
        *num = unum - ((1LL << ((7 * res) - 1)) - 1);
851
852
    return res;
853
}
854
855
/*
856
 * Read an EBML header.
857
 * 0 is success, < 0 is failure.
858
 */
859
860
static int
861
ebml_read_header (MatroskaDemuxContext *matroska,
862
                  char                **doctype,
863
                  int                  *version)
864
{
865
    uint32_t id;
866
    int level_up, res = 0;
867
868
    /* default init */
869
    if (doctype)
870
        *doctype = NULL;
871
    if (version)
872
        *version = 1;
873
874
    if (!(id = ebml_peek_id(matroska, &level_up)) ||
875
        level_up != 0 || id != EBML_ID_HEADER) {
876
        av_log(matroska->ctx, AV_LOG_ERROR,
877
               "This is not an EBML file (id=0x%x/0x%x)\n", id, EBML_ID_HEADER);
878
        return AVERROR_INVALIDDATA;
879
    }
880
    if ((res = ebml_read_master(matroska, &id)) < 0)
881
        return res;
882
883
    while (res == 0) {
884
        if (!(id = ebml_peek_id(matroska, &level_up)))
885
            return AVERROR_IO;
886
887
        /* end-of-header */
888
        if (level_up)
889
            break;
890
891
        switch (id) {
892
            /* is our read version uptodate? */
893
            case EBML_ID_EBMLREADVERSION: {
894
                uint64_t num;
895
896
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
897
                    return res;
898
                if (num > EBML_VERSION) {
899
                    av_log(matroska->ctx, AV_LOG_ERROR,
900
                           "EBML version %llu (> %d) is not supported\n",
901
                           num, EBML_VERSION);
902
                    return AVERROR_INVALIDDATA;
903
                }
904
                break;
905
            }
906
907
            /* we only handle 8 byte lengths at max */
908
            case EBML_ID_EBMLMAXSIZELENGTH: {
909
                uint64_t num;
910
911
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
912
                    return res;
913
                if (num > sizeof(uint64_t)) {
914
                    av_log(matroska->ctx, AV_LOG_ERROR,
915
                           "Integers of size %llu (> %d) not supported\n",
916
                           num, sizeof(uint64_t));
917
                    return AVERROR_INVALIDDATA;
918
                }
919
                break;
920
            }
921
922
            /* we handle 4 byte IDs at max */
923
            case EBML_ID_EBMLMAXIDLENGTH: {
924
                uint64_t num;
925
926
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
927
                    return res;
928
                if (num > sizeof(uint32_t)) {
929
                    av_log(matroska->ctx, AV_LOG_ERROR,
930 e9afa2f4 Alex Beregszaszi
                           "IDs of size %llu (> %u) not supported\n",
931 08abe0fd Michael Niedermayer
                            num, sizeof(uint32_t));
932
                    return AVERROR_INVALIDDATA;
933
                }
934
                break;
935
            }
936
937
            case EBML_ID_DOCTYPE: {
938
                char *text;
939
940
                if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
941
                    return res;
942
                if (doctype) {
943
                    if (*doctype)
944
                        av_free(*doctype);
945
                    *doctype = text;
946
                } else
947
                    av_free(text);
948
                break;
949
            }
950
951
            case EBML_ID_DOCTYPEREADVERSION: {
952
                uint64_t num;
953
954
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
955
                    return res;
956
                if (version)
957
                    *version = num;
958
                break;
959
            }
960
961
            default:
962
                av_log(matroska->ctx, AV_LOG_INFO,
963
                       "Unknown data type 0x%x in EBML header", id);
964
                /* pass-through */
965
966
            case EBML_ID_VOID:
967
            /* we ignore these two, as they don't tell us anything we
968
             * care about */
969
            case EBML_ID_EBMLVERSION:
970
            case EBML_ID_DOCTYPEVERSION:
971
                res = ebml_read_skip (matroska);
972
                break;
973
        }
974
    }
975
976
    return 0;
977
}
978
979
/*
980
 * Put one packet in an application-supplied AVPacket struct.
981
 * Returns 0 on success or -1 on failure.
982
 */
983
984
static int
985
matroska_deliver_packet (MatroskaDemuxContext *matroska,
986
                         AVPacket             *pkt)
987
{
988
    if (matroska->num_packets > 0) {
989
        memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
990
        av_free(matroska->packets[0]);
991
        if (matroska->num_packets > 1) {
992
            memmove(&matroska->packets[0], &matroska->packets[1],
993
                    (matroska->num_packets - 1) * sizeof(AVPacket *));
994
            matroska->packets =
995
                av_realloc(matroska->packets, (matroska->num_packets - 1) *
996
                           sizeof(AVPacket *));
997
        } else {
998
            av_free(matroska->packets);
999
            matroska->packets = NULL;
1000
        }
1001
        matroska->num_packets--;
1002
        return 0;
1003
    }
1004
1005
    return -1;
1006
}
1007
1008
/*
1009
 * Put a packet into our internal queue. Will be delivered to the
1010
 * user/application during the next get_packet() call.
1011
 */
1012
1013
static void
1014
matroska_queue_packet (MatroskaDemuxContext *matroska,
1015
                       AVPacket             *pkt)
1016
{
1017
    matroska->packets =
1018
        av_realloc(matroska->packets, (matroska->num_packets + 1) *
1019
                   sizeof(AVPacket *));
1020
    matroska->packets[matroska->num_packets] = pkt;
1021
    matroska->num_packets++;
1022
}
1023
1024
/*
1025
 * Autodetecting...
1026
 */
1027
1028
static int
1029
matroska_probe (AVProbeData *p)
1030
{
1031
    uint64_t total = 0;
1032
    int len_mask = 0x80, size = 1, n = 1;
1033
    uint8_t probe_data[] = { 'm', 'a', 't', 'r', 'o', 's', 'k', 'a' };
1034
1035
    if (p->buf_size < 5)
1036
        return 0;
1037
1038
    /* ebml header? */
1039
    if ((p->buf[0] << 24 | p->buf[1] << 16 |
1040
         p->buf[2] << 8 | p->buf[3]) != EBML_ID_HEADER)
1041
        return 0;
1042
1043
    /* length of header */
1044
    total = p->buf[4];
1045
    while (size <= 8 && !(total & len_mask)) {
1046
        size++;
1047
        len_mask >>= 1;
1048
    }
1049
    if (size > 8)
1050
      return 0;
1051
    total &= (len_mask - 1);
1052
    while (n < size)
1053
        total = (total << 8) | p->buf[4 + n++];
1054
1055
    /* does the probe data contain the whole header? */
1056
    if (p->buf_size < 4 + size + total)
1057
      return 0;
1058
1059
    /* the header must contain the document type 'matroska'. For now,
1060
     * we don't parse the whole header but simply check for the
1061
     * availability of that array of characters inside the header.
1062
     * Not fully fool-proof, but good enough. */
1063
    for (n = 4 + size; n < 4 + size + total - sizeof(probe_data); n++)
1064
        if (!memcmp (&p->buf[n], probe_data, sizeof(probe_data)))
1065
            return AVPROBE_SCORE_MAX;
1066
1067
    return 0;
1068
}
1069
1070
/*
1071
 * From here on, it's all XML-style DTD stuff... Needs no comments.
1072
 */
1073
1074
static int
1075
matroska_parse_info (MatroskaDemuxContext *matroska)
1076
{
1077
    int res = 0;
1078
    uint32_t id;
1079
1080
    av_log(matroska->ctx, AV_LOG_DEBUG, "Parsing info...\n");
1081
1082
    while (res == 0) {
1083
        if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1084
            res = AVERROR_IO;
1085
            break;
1086
        } else if (matroska->level_up) {
1087
            matroska->level_up--;
1088
            break;
1089
        }
1090
1091
        switch (id) {
1092
            /* cluster timecode */
1093
            case MATROSKA_ID_TIMECODESCALE: {
1094
                uint64_t num;
1095
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1096
                    break;
1097
                matroska->time_scale = num;
1098
                break;
1099
            }
1100
1101
            case MATROSKA_ID_DURATION: {
1102
                double num;
1103
                if ((res = ebml_read_float(matroska, &id, &num)) < 0)
1104
                    break;
1105
                matroska->duration = num * matroska->time_scale;
1106
                break;
1107
            }
1108
1109
            case MATROSKA_ID_WRITINGAPP: {
1110
                char *text;
1111
                if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1112
                    break;
1113
                matroska->writing_app = text;
1114
                break;
1115
            }
1116
1117
            case MATROSKA_ID_MUXINGAPP: {
1118
                char *text;
1119
                if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1120
                    break;
1121
                matroska->muxing_app = text;
1122
                break;
1123
            }
1124
1125
            case MATROSKA_ID_DATEUTC: {
1126
                int64_t time;
1127
                if ((res = ebml_read_date(matroska, &id, &time)) < 0)
1128
                    break;
1129
                matroska->created = time;
1130
                break;
1131
            }
1132
1133
            default:
1134
                av_log(matroska->ctx, AV_LOG_INFO,
1135
                       "Unknown entry 0x%x in info header\n", id);
1136
                /* fall-through */
1137
1138
            case EBML_ID_VOID:
1139
                res = ebml_read_skip(matroska);
1140
                break;
1141
        }
1142
1143
        if (matroska->level_up) {
1144
            matroska->level_up--;
1145
            break;
1146
        }
1147
    }
1148
1149
    return res;
1150
}
1151
1152
static int
1153
matroska_add_stream (MatroskaDemuxContext *matroska)
1154
{
1155
    int res = 0;
1156
    uint32_t id;
1157
    MatroskaTrack *track;
1158
1159
    av_log(matroska->ctx, AV_LOG_DEBUG, "parsing track, adding stream..,\n");
1160
1161
    /* Allocate a generic track. As soon as we know its type we'll realloc. */
1162
    track = av_mallocz(sizeof(MatroskaTrack));
1163
    matroska->num_tracks++;
1164
1165
    /* start with the master */
1166
    if ((res = ebml_read_master(matroska, &id)) < 0)
1167
        return res;
1168
1169
    /* try reading the trackentry headers */
1170
    while (res == 0) {
1171
        if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1172
            res = AVERROR_IO;
1173
            break;
1174
        } else if (matroska->level_up > 0) {
1175
            matroska->level_up--;
1176
            break;
1177
        }
1178
1179
        switch (id) {
1180
            /* track number (unique stream ID) */
1181
            case MATROSKA_ID_TRACKNUMBER: {
1182
                uint64_t num;
1183
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1184
                    break;
1185
                track->num = num;
1186
                break;
1187
            }
1188
1189
            /* track UID (unique identifier) */
1190
            case MATROSKA_ID_TRACKUID: {
1191
                uint64_t num;
1192
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1193
                    break;
1194
                track->uid = num;
1195
                break;
1196
            }
1197
1198
            /* track type (video, audio, combined, subtitle, etc.) */
1199
            case MATROSKA_ID_TRACKTYPE: {
1200
                uint64_t num;
1201
                if (track->type != 0) {
1202
                    av_log(matroska->ctx, AV_LOG_INFO,
1203
                           "More than one tracktype in an entry - skip\n");
1204
                    break;
1205
                }
1206
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1207
                    break;
1208
                track->type = num;
1209
1210
                /* ok, so we're actually going to reallocate this thing */
1211
                switch (track->type) {
1212
                    case MATROSKA_TRACK_TYPE_VIDEO:
1213
                        track = (MatroskaTrack *)
1214
                            av_realloc(track, sizeof(MatroskaVideoTrack));
1215
                        break;
1216
                    case MATROSKA_TRACK_TYPE_AUDIO:
1217
                        track = (MatroskaTrack *)
1218
                            av_realloc(track, sizeof(MatroskaAudioTrack));
1219
                        ((MatroskaAudioTrack *)track)->channels = 1;
1220
                        ((MatroskaAudioTrack *)track)->samplerate = 8000;
1221
                        break;
1222
                    case MATROSKA_TRACK_TYPE_SUBTITLE:
1223
                        track = (MatroskaTrack *)
1224
                            av_realloc(track, sizeof(MatroskaSubtitleTrack));
1225
                        break;
1226
                    case MATROSKA_TRACK_TYPE_COMPLEX:
1227
                    case MATROSKA_TRACK_TYPE_LOGO:
1228
                    case MATROSKA_TRACK_TYPE_CONTROL:
1229
                    default:
1230
                        av_log(matroska->ctx, AV_LOG_INFO,
1231
                               "Unknown or unsupported track type 0x%x\n",
1232
                               track->type);
1233
                        track->type = 0;
1234
                        break;
1235
                }
1236
                matroska->tracks[matroska->num_tracks - 1] = track;
1237
                break;
1238
            }
1239
1240
            /* tracktype specific stuff for video */
1241
            case MATROSKA_ID_TRACKVIDEO: {
1242
                MatroskaVideoTrack *videotrack;
1243
                if (track->type != MATROSKA_TRACK_TYPE_VIDEO) {
1244
                    av_log(matroska->ctx, AV_LOG_INFO,
1245
                           "video data in non-video track - ignoring\n");
1246
                    res = AVERROR_INVALIDDATA;
1247
                    break;
1248
                } else if ((res = ebml_read_master(matroska, &id)) < 0)
1249
                    break;
1250
                videotrack = (MatroskaVideoTrack *)track;
1251
1252
                while (res == 0) {
1253
                    if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1254
                        res = AVERROR_IO;
1255
                        break;
1256
                    } else if (matroska->level_up > 0) {
1257
                        matroska->level_up--;
1258
                        break;
1259
                    }
1260
1261
                    switch (id) {
1262
                        /* fixme, this should be one-up, but I get it here */
1263
                        case MATROSKA_ID_TRACKDEFAULTDURATION: {
1264
                            uint64_t num;
1265
                            if ((res = ebml_read_uint (matroska, &id,
1266
                                                       &num)) < 0)
1267
                                break;
1268
                            track->default_duration = num;
1269
                            break;
1270
                        }
1271
1272
                        /* video framerate */
1273
                        case MATROSKA_ID_VIDEOFRAMERATE: {
1274
                            double num;
1275
                            if ((res = ebml_read_float(matroska, &id,
1276
                                                       &num)) < 0)
1277
                                break;
1278
                            track->default_duration = 1000000000 * (1. / num);
1279
                            break;
1280
                        }
1281
1282
                        /* width of the size to display the video at */
1283
                        case MATROSKA_ID_VIDEODISPLAYWIDTH: {
1284
                            uint64_t num;
1285
                            if ((res = ebml_read_uint(matroska, &id,
1286
                                                      &num)) < 0)
1287
                                break;
1288
                            videotrack->display_width = num;
1289
                            break;
1290
                        }
1291
1292
                        /* height of the size to display the video at */
1293
                        case MATROSKA_ID_VIDEODISPLAYHEIGHT: {
1294
                            uint64_t num;
1295
                            if ((res = ebml_read_uint(matroska, &id,
1296
                                                      &num)) < 0)
1297
                                break;
1298
                            videotrack->display_height = num;
1299
                            break;
1300
                        }
1301
1302
                        /* width of the video in the file */
1303
                        case MATROSKA_ID_VIDEOPIXELWIDTH: {
1304
                            uint64_t num;
1305
                            if ((res = ebml_read_uint(matroska, &id,
1306
                                                      &num)) < 0)
1307
                                break;
1308
                            videotrack->pixel_width = num;
1309
                            break;
1310
                        }
1311
1312
                        /* height of the video in the file */
1313
                        case MATROSKA_ID_VIDEOPIXELHEIGHT: {
1314
                            uint64_t num;
1315
                            if ((res = ebml_read_uint(matroska, &id,
1316
                                                      &num)) < 0)
1317
                                break;
1318
                            videotrack->pixel_height = num;
1319
                            break;
1320
                        }
1321
1322
                        /* whether the video is interlaced */
1323
                        case MATROSKA_ID_VIDEOFLAGINTERLACED: {
1324
                            uint64_t num;
1325
                            if ((res = ebml_read_uint(matroska, &id,
1326
                                                      &num)) < 0)
1327
                                break;
1328
                            if (num)
1329
                                track->flags |=
1330
                                    MATROSKA_VIDEOTRACK_INTERLACED;
1331
                            else
1332
                                track->flags &=
1333
                                    ~MATROSKA_VIDEOTRACK_INTERLACED;
1334
                            break;
1335
                        }
1336
1337
                        /* stereo mode (whether the video has two streams,
1338
                         * where one is for the left eye and the other for
1339
                         * the right eye, which creates a 3D-like
1340
                         * effect) */
1341
                        case MATROSKA_ID_VIDEOSTEREOMODE: {
1342
                            uint64_t num;
1343
                            if ((res = ebml_read_uint(matroska, &id,
1344
                                                      &num)) < 0)
1345
                                break;
1346
                            if (num != MATROSKA_EYE_MODE_MONO &&
1347
                                num != MATROSKA_EYE_MODE_LEFT &&
1348
                                num != MATROSKA_EYE_MODE_RIGHT &&
1349
                                num != MATROSKA_EYE_MODE_BOTH) {
1350
                                av_log(matroska->ctx, AV_LOG_INFO,
1351
                                       "Ignoring unknown eye mode 0x%x\n",
1352
                                       (uint32_t) num);
1353
                                break;
1354
                            }
1355
                            videotrack->eye_mode = num;
1356
                            break;
1357
                        }
1358
1359
                        /* aspect ratio behaviour */
1360
                        case MATROSKA_ID_VIDEOASPECTRATIO: {
1361
                            uint64_t num;
1362
                            if ((res = ebml_read_uint(matroska, &id,
1363
                                                      &num)) < 0)
1364
                                break;
1365
                            if (num != MATROSKA_ASPECT_RATIO_MODE_FREE &&
1366
                                num != MATROSKA_ASPECT_RATIO_MODE_KEEP &&
1367
                                num != MATROSKA_ASPECT_RATIO_MODE_FIXED) {
1368
                                av_log(matroska->ctx, AV_LOG_INFO,
1369
                                       "Ignoring unknown aspect ratio 0x%x\n",
1370
                                       (uint32_t) num);
1371
                                break;
1372
                            }
1373
                            videotrack->ar_mode = num;
1374
                            break;
1375
                        }
1376
1377
                        /* colourspace (only matters for raw video)
1378
                         * fourcc */
1379
                        case MATROSKA_ID_VIDEOCOLOURSPACE: {
1380
                            uint64_t num;
1381
                            if ((res = ebml_read_uint(matroska, &id,
1382
                                                      &num)) < 0)
1383
                                break;
1384
                            videotrack->fourcc = num;
1385
                            break;
1386
                        }
1387
1388
                        default:
1389
                            av_log(matroska->ctx, AV_LOG_INFO,
1390
                                   "Unknown video track header entry "
1391
                                   "0x%x - ignoring\n", id);
1392
                            /* pass-through */
1393
1394
                        case EBML_ID_VOID:
1395
                            res = ebml_read_skip(matroska);
1396
                            break;
1397
                    }
1398
1399
                    if (matroska->level_up) {
1400
                        matroska->level_up--;
1401
                        break;
1402
                    }
1403
                }
1404
                break;
1405
            }
1406
1407
            /* tracktype specific stuff for audio */
1408
            case MATROSKA_ID_TRACKAUDIO: {
1409
                MatroskaAudioTrack *audiotrack;
1410
                if (track->type != MATROSKA_TRACK_TYPE_AUDIO) {
1411
                    av_log(matroska->ctx, AV_LOG_INFO,
1412
                           "audio data in non-audio track - ignoring\n");
1413
                    res = AVERROR_INVALIDDATA;
1414
                    break;
1415
                } else if ((res = ebml_read_master(matroska, &id)) < 0)
1416
                    break;
1417
                audiotrack = (MatroskaAudioTrack *)track;
1418
1419
                while (res == 0) {
1420
                    if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1421
                        res = AVERROR_IO;
1422
                        break;
1423
                    } else if (matroska->level_up > 0) {
1424
                        matroska->level_up--;
1425
                        break;
1426
                    }
1427
1428
                    switch (id) {
1429
                        /* samplerate */
1430
                        case MATROSKA_ID_AUDIOSAMPLINGFREQ: {
1431
                            double num;
1432
                            if ((res = ebml_read_float(matroska, &id,
1433
                                                       &num)) < 0)
1434
                                break;
1435
                            audiotrack->samplerate = num;
1436
                            break;
1437
                        }
1438
1439
                            /* bitdepth */
1440
                        case MATROSKA_ID_AUDIOBITDEPTH: {
1441
                            uint64_t num;
1442
                            if ((res = ebml_read_uint(matroska, &id,
1443
                                                      &num)) < 0)
1444
                                break;
1445
                            audiotrack->bitdepth = num;
1446
                            break;
1447
                        }
1448
1449
                            /* channels */
1450
                        case MATROSKA_ID_AUDIOCHANNELS: {
1451
                            uint64_t num;
1452
                            if ((res = ebml_read_uint(matroska, &id,
1453
                                                      &num)) < 0)
1454
                                break;
1455
                            audiotrack->channels = num;
1456
                            break;
1457
                        }
1458
1459
                        default:
1460
                            av_log(matroska->ctx, AV_LOG_INFO,
1461
                                   "Unknown audio track header entry "
1462
                                   "0x%x - ignoring\n", id);
1463
                            /* pass-through */
1464
1465
                        case EBML_ID_VOID:
1466
                            res = ebml_read_skip(matroska);
1467
                            break;
1468
                    }
1469
1470
                    if (matroska->level_up) {
1471
                        matroska->level_up--;
1472
                        break;
1473
                    }
1474
                }
1475
                break;
1476
            }
1477
1478
                /* codec identifier */
1479
            case MATROSKA_ID_CODECID: {
1480
                char *text;
1481
                if ((res = ebml_read_ascii(matroska, &id, &text)) < 0)
1482
                    break;
1483
                track->codec_id = text;
1484
                break;
1485
            }
1486
1487
                /* codec private data */
1488
            case MATROSKA_ID_CODECPRIVATE: {
1489
                uint8_t *data;
1490
                int size;
1491
                if ((res = ebml_read_binary(matroska, &id, &data, &size) < 0))
1492
                    break;
1493
                track->codec_priv = data;
1494
                track->codec_priv_size = size;
1495
                break;
1496
            }
1497
1498
                /* name of the codec */
1499
            case MATROSKA_ID_CODECNAME: {
1500
                char *text;
1501
                if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1502
                    break;
1503
                track->codec_name = text;
1504
                break;
1505
            }
1506
1507
                /* name of this track */
1508
            case MATROSKA_ID_TRACKNAME: {
1509
                char *text;
1510
                if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1511
                    break;
1512
                track->name = text;
1513
                break;
1514
            }
1515
1516
                /* language (matters for audio/subtitles, mostly) */
1517
            case MATROSKA_ID_TRACKLANGUAGE: {
1518
                char *text;
1519
                if ((res = ebml_read_utf8(matroska, &id, &text)) < 0)
1520
                    break;
1521
                track->language = text;
1522
                break;
1523
            }
1524
1525
                /* whether this is actually used */
1526
            case MATROSKA_ID_TRACKFLAGENABLED: {
1527
                uint64_t num;
1528
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1529
                    break;
1530
                if (num)
1531
                    track->flags |= MATROSKA_TRACK_ENABLED;
1532
                else
1533
                    track->flags &= ~MATROSKA_TRACK_ENABLED;
1534
                break;
1535
            }
1536
1537
                /* whether it's the default for this track type */
1538
            case MATROSKA_ID_TRACKFLAGDEFAULT: {
1539
                uint64_t num;
1540
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1541
                    break;
1542
                if (num)
1543
                    track->flags |= MATROSKA_TRACK_DEFAULT;
1544
                else
1545
                    track->flags &= ~MATROSKA_TRACK_DEFAULT;
1546
                break;
1547
            }
1548
1549
                /* lacing (like MPEG, where blocks don't end/start on frame
1550
                 * boundaries) */
1551
            case MATROSKA_ID_TRACKFLAGLACING: {
1552
                uint64_t num;
1553
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1554
                    break;
1555
                if (num)
1556
                    track->flags |= MATROSKA_TRACK_LACING;
1557
                else
1558
                    track->flags &= ~MATROSKA_TRACK_LACING;
1559
                break;
1560
            }
1561
1562
                /* default length (in time) of one data block in this track */
1563
            case MATROSKA_ID_TRACKDEFAULTDURATION: {
1564
                uint64_t num;
1565
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
1566
                    break;
1567
                track->default_duration = num;
1568
                break;
1569
            }
1570
1571
            default:
1572
                av_log(matroska->ctx, AV_LOG_INFO,
1573
                       "Unknown track header entry 0x%x - ignoring\n", id);
1574
                /* pass-through */
1575
1576
            case EBML_ID_VOID:
1577
            /* we ignore these because they're nothing useful. */
1578
            case MATROSKA_ID_CODECINFOURL:
1579
            case MATROSKA_ID_CODECDOWNLOADURL:
1580
            case MATROSKA_ID_TRACKMINCACHE:
1581
            case MATROSKA_ID_TRACKMAXCACHE:
1582
                res = ebml_read_skip(matroska);
1583
                break;
1584
        }
1585
1586
        if (matroska->level_up) {
1587
            matroska->level_up--;
1588
            break;
1589
        }
1590
    }
1591
1592
    return res;
1593
}
1594
1595
static int
1596
matroska_parse_tracks (MatroskaDemuxContext *matroska)
1597
{
1598
    int res = 0;
1599
    uint32_t id;
1600
1601
    av_log(matroska->ctx, AV_LOG_DEBUG, "parsing tracks...\n");
1602
1603
    while (res == 0) {
1604
        if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1605
            res = AVERROR_IO;
1606
            break;
1607
        } else if (matroska->level_up) {
1608
            matroska->level_up--;
1609
            break;
1610
        }
1611
1612
        switch (id) {
1613
            /* one track within the "all-tracks" header */
1614
            case MATROSKA_ID_TRACKENTRY:
1615
                res = matroska_add_stream(matroska);
1616
                break;
1617
1618
            default:
1619
                av_log(matroska->ctx, AV_LOG_INFO,
1620
                       "Unknown entry 0x%x in track header\n", id);
1621
                /* fall-through */
1622
1623
            case EBML_ID_VOID:
1624
                res = ebml_read_skip(matroska);
1625
                break;
1626
        }
1627
1628
        if (matroska->level_up) {
1629
            matroska->level_up--;
1630
            break;
1631
        }
1632
    }
1633
1634
    return res;
1635
}
1636
1637
static int
1638
matroska_parse_index (MatroskaDemuxContext *matroska)
1639
{
1640
    int res = 0;
1641
    uint32_t id;
1642
    MatroskaDemuxIndex idx;
1643
1644
    av_log(matroska->ctx, AV_LOG_DEBUG, "parsing index...\n");
1645
1646
    while (res == 0) {
1647
        if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1648
            res = AVERROR_IO;
1649
            break;
1650
        } else if (matroska->level_up) {
1651
            matroska->level_up--;
1652
            break;
1653
        }
1654
1655
        switch (id) {
1656
            /* one single index entry ('point') */
1657
            case MATROSKA_ID_POINTENTRY:
1658
                if ((res = ebml_read_master(matroska, &id)) < 0)
1659
                    break;
1660
1661
                /* in the end, we hope to fill one entry with a
1662
                 * timestamp, a file position and a tracknum */
1663
                idx.pos   = (uint64_t) -1;
1664
                idx.time  = (uint64_t) -1;
1665
                idx.track = (uint16_t) -1;
1666
1667
                while (res == 0) {
1668
                    if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1669
                        res = AVERROR_IO;
1670
                        break;
1671
                    } else if (matroska->level_up) {
1672
                        matroska->level_up--;
1673
                        break;
1674
                    }
1675
1676
                    switch (id) {
1677
                        /* one single index entry ('point') */
1678
                        case MATROSKA_ID_CUETIME: {
1679
                            int64_t time;
1680
                            if ((res = ebml_read_uint(matroska, &id,
1681
                                                      &time)) < 0)
1682
                                break;
1683
                            idx.time = time * matroska->time_scale;
1684
                            break;
1685
                        }
1686
1687
                        /* position in the file + track to which it 
1688
                         * belongs */
1689
                        case MATROSKA_ID_CUETRACKPOSITION:
1690
                            if ((res = ebml_read_master(matroska, &id)) < 0)
1691
                                break;
1692
1693
                            while (res == 0) {
1694
                                if (!(id = ebml_peek_id (matroska,
1695
                                                    &matroska->level_up))) {
1696
                                    res = AVERROR_IO;
1697
                                    break;
1698
                                } else if (matroska->level_up) {
1699
                                    matroska->level_up--;
1700
                                    break;
1701
                                }
1702
1703
                                switch (id) {
1704
                                    /* track number */
1705
                                    case MATROSKA_ID_CUETRACK: {
1706
                                        uint64_t num;
1707
                                        if ((res = ebml_read_uint(matroska,
1708
                                                          &id, &num)) < 0)
1709
                                            break;
1710
                                        idx.track = num;
1711
                                        break;
1712
                                    }
1713
1714
                                        /* position in file */
1715
                                    case MATROSKA_ID_CUECLUSTERPOSITION: {
1716
                                        uint64_t num;
1717
                                        if ((res = ebml_read_uint(matroska,
1718
                                                          &id, &num)) < 0)
1719
                                            break;
1720
                                        idx.pos = num;
1721
                                        break;
1722
                                    }
1723
1724
                                    default:
1725
                                        av_log(matroska->ctx, AV_LOG_INFO,
1726
                                               "Unknown entry 0x%x in "
1727
                                               "CuesTrackPositions\n", id);
1728
                                        /* fall-through */
1729
1730
                                    case EBML_ID_VOID:
1731
                                        res = ebml_read_skip(matroska);
1732
                                        break;
1733
                                }
1734
1735
                                if (matroska->level_up) {
1736
                                    matroska->level_up--;
1737
                                    break;
1738
                                }
1739
                            }
1740
1741
                            break;
1742
1743
                        default:
1744
                            av_log(matroska->ctx, AV_LOG_INFO,
1745
                                   "Unknown entry 0x%x in cuespoint "
1746
                                   "index\n", id);
1747
                            /* fall-through */
1748
1749
                        case EBML_ID_VOID:
1750
                            res = ebml_read_skip(matroska);
1751
                            break;
1752
                    }
1753
1754
                    if (matroska->level_up) {
1755
                        matroska->level_up--;
1756
                        break;
1757
                    }
1758
                }
1759
1760
                /* so let's see if we got what we wanted */
1761
                if (idx.pos   != (uint64_t) -1 &&
1762
                    idx.time  != (uint64_t) -1 &&
1763
                    idx.track != (uint16_t) -1) {
1764
                    if (matroska->num_indexes % 32 == 0) {
1765
                        /* re-allocate bigger index */
1766
                        matroska->index =
1767
                            av_realloc(matroska->index,
1768
                                       (matroska->num_indexes + 32) *
1769
                                       sizeof(MatroskaDemuxIndex));
1770
                    }
1771
                    matroska->index[matroska->num_indexes] = idx;
1772
                    matroska->num_indexes++;
1773
                }
1774
                break;
1775
1776
            default:
1777
                av_log(matroska->ctx, AV_LOG_INFO,
1778
                       "Unknown entry 0x%x in cues header\n", id);
1779
                /* fall-through */
1780
1781
            case EBML_ID_VOID:
1782
                res = ebml_read_skip(matroska);
1783
                break;
1784
        }
1785
1786
        if (matroska->level_up) {
1787
            matroska->level_up--;
1788
            break;
1789
        }
1790
    }
1791
1792
    return res;
1793
}
1794
1795
static int
1796
matroska_parse_metadata (MatroskaDemuxContext *matroska)
1797
{
1798
    int res = 0;
1799
    uint32_t id;
1800
1801
    while (res == 0) {
1802
        if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1803
            res = AVERROR_IO;
1804
            break;
1805
        } else if (matroska->level_up) {
1806
            matroska->level_up--;
1807
            break;
1808
        }
1809
1810
        switch (id) {
1811
            /* Hm, this is unsupported... */
1812
            default:
1813
                av_log(matroska->ctx, AV_LOG_INFO,
1814
                       "Unknown entry 0x%x in metadata header\n", id);
1815
                /* fall-through */
1816
1817
            case EBML_ID_VOID:
1818
                res = ebml_read_skip(matroska);
1819
                break;
1820
        }
1821
1822
        if (matroska->level_up) {
1823
            matroska->level_up--;
1824
            break;
1825
        }
1826
    }
1827
1828
    return res;
1829
}
1830
1831
static int
1832
matroska_parse_seekhead (MatroskaDemuxContext *matroska)
1833
{
1834
    int res = 0;
1835
    uint32_t id;
1836
1837
    av_log(matroska->ctx, AV_LOG_DEBUG, "parsing seekhead...\n");
1838
1839
    while (res == 0) {
1840
        if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1841
            res = AVERROR_IO;
1842
            break;
1843
        } else if (matroska->level_up) {
1844
            matroska->level_up--;
1845
            break;
1846
        }
1847
1848
        switch (id) {
1849
            case MATROSKA_ID_SEEKENTRY: {
1850
                uint32_t seek_id = 0, peek_id_cache = 0;
1851
                uint64_t seek_pos = (uint64_t) -1, t;
1852
1853
                if ((res = ebml_read_master(matroska, &id)) < 0)
1854
                    break;
1855
1856
                while (res == 0) {
1857
                    if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
1858
                        res = AVERROR_IO;
1859
                        break;
1860
                    } else if (matroska->level_up) {
1861
                        matroska->level_up--;
1862
                        break;
1863
                    }
1864
1865
                    switch (id) {
1866
                        case MATROSKA_ID_SEEKID:
1867
                            res = ebml_read_uint(matroska, &id, &t);
1868
                            seek_id = t;
1869
                            break;
1870
1871
                        case MATROSKA_ID_SEEKPOSITION:
1872
                            res = ebml_read_uint(matroska, &id, &seek_pos);
1873
                            break;
1874
1875
                        default:
1876
                            av_log(matroska->ctx, AV_LOG_INFO,
1877
                                   "Unknown seekhead ID 0x%x\n", id);
1878
                            /* fall-through */
1879
1880
                        case EBML_ID_VOID:
1881
                            res = ebml_read_skip(matroska);
1882
                            break;
1883
                    }
1884
1885
                    if (matroska->level_up) {
1886
                        matroska->level_up--;
1887
                        break;
1888
                    }
1889
                }
1890
1891
                if (!seek_id || seek_pos == (uint64_t) -1) {
1892
                    av_log(matroska->ctx, AV_LOG_INFO,
1893
                           "Incomplete seekhead entry (0x%x/%llu)\n",
1894
                           seek_id, seek_pos);
1895
                    break;
1896
                }
1897
1898
                switch (seek_id) {
1899
                    case MATROSKA_ID_CUES:
1900
                    case MATROSKA_ID_TAGS: {
1901
                        uint32_t level_up = matroska->level_up;
1902
                        offset_t before_pos;
1903
                        uint64_t length;
1904
                        MatroskaLevel level;
1905
1906
                        /* remember the peeked ID and the current position */
1907
                        peek_id_cache = matroska->peek_id;
1908
                        before_pos = url_ftell(&matroska->ctx->pb);
1909
1910
                        /* seek */
1911
                        if ((res = ebml_read_seek(matroska, seek_pos +
1912
                                               matroska->segment_start)) < 0)
1913
                            return res;
1914
1915
                        /* we don't want to lose our seekhead level, so we add
1916
                         * a dummy. This is a crude hack. */
1917
                        if (matroska->num_levels == EBML_MAX_DEPTH) {
1918
                            av_log(matroska->ctx, AV_LOG_INFO,
1919
                                   "Max EBML element depth (%d) reached, "
1920
                                   "cannot parse further.\n", EBML_MAX_DEPTH);
1921
                            return AVERROR_UNKNOWN;
1922
                        }
1923
                            
1924
                        level.start = 0;
1925
                        level.length = (uint64_t)-1;
1926
                        matroska->levels[matroska->num_levels] = level;
1927
                        matroska->num_levels++;
1928
1929
                        /* check ID */
1930
                        if (!(id = ebml_peek_id (matroska,
1931
                                                 &matroska->level_up)))
1932
                            break;
1933
                        if (id != seek_id) {
1934
                            av_log(matroska->ctx, AV_LOG_INFO,
1935
                                   "We looked for ID=0x%x but got "
1936
                                   "ID=0x%x (pos=%llu)",
1937
                                   seek_id, id, seek_pos +
1938
                                   matroska->segment_start);
1939
                            goto finish;
1940
                        }
1941
1942
                        /* read master + parse */
1943
                        if ((res = ebml_read_master(matroska, &id)) < 0)
1944
                            break;
1945
                        switch (id) {
1946
                            case MATROSKA_ID_CUES:
1947
                                if (!(res = matroska_parse_index(matroska)) ||
1948
                                    url_feof(&matroska->ctx->pb)) {
1949
                                    matroska->index_parsed = 1;
1950
                                    res = 0;
1951
                                }
1952
                                break;
1953
                            case MATROSKA_ID_TAGS:
1954
                                if (!(res = matroska_parse_metadata(matroska)) ||
1955
                                   url_feof(&matroska->ctx->pb)) {
1956
                                    matroska->metadata_parsed = 1;
1957
                                    res = 0;
1958
                                }
1959
                                break;
1960
                        }
1961
                        if (res < 0)
1962
                            break;
1963
1964
                    finish:
1965
                        /* remove dummy level */
1966
                        while (matroska->num_levels) {
1967
                            matroska->num_levels--;
1968
                            length =
1969
                                matroska->levels[matroska->num_levels].length;
1970
                            if (length == (uint64_t)-1)
1971
                                break;
1972
                        }
1973
1974
                        /* seek back */
1975
                        if ((res = ebml_read_seek(matroska, before_pos)) < 0)
1976
                            return res;
1977
                        matroska->peek_id = peek_id_cache;
1978
                        matroska->level_up = level_up;
1979
                        break;
1980
                    }
1981
1982
                    default:
1983
                        av_log(matroska->ctx, AV_LOG_INFO,
1984
                               "Ignoring seekhead entry for ID=0x%x\n",
1985
                               seek_id);
1986
                        break;
1987
                }
1988
1989
                break;
1990
            }
1991
1992
            default:
1993
                av_log(matroska->ctx, AV_LOG_INFO,
1994
                       "Unknown seekhead ID 0x%x\n", id);
1995
                /* fall-through */
1996
1997
            case EBML_ID_VOID:
1998
                res = ebml_read_skip(matroska);
1999
                break;
2000
        }
2001
2002
        if (matroska->level_up) {
2003
            matroska->level_up--;
2004
            break;
2005
        }
2006
    }
2007
2008
    return res;
2009
}
2010
2011
static int
2012
matroska_read_header (AVFormatContext    *s,
2013
                      AVFormatParameters *ap)
2014
{
2015
    MatroskaDemuxContext *matroska = s->priv_data;
2016
    char *doctype;
2017
    int version, last_level, res = 0;
2018
    uint32_t id;
2019
2020
    matroska->ctx = s;
2021
2022
    /* First read the EBML header. */
2023
    doctype = NULL;
2024
    if ((res = ebml_read_header(matroska, &doctype, &version)) < 0)
2025
        return res;
2026
    if ((doctype == NULL) || strcmp(doctype, "matroska")) {
2027
        av_log(matroska->ctx, AV_LOG_ERROR,
2028
               "Wrong EBML doctype ('%s' != 'matroska').\n",
2029
               doctype ? doctype : "(none)");
2030
        if (doctype)
2031
            av_free(doctype);
2032
        return AVERROR_NOFMT;
2033
    }
2034
    av_free(doctype);
2035
    if (version != 1) {
2036
        av_log(matroska->ctx, AV_LOG_ERROR,
2037
               "Matroska demuxer version 1 too old for file version %d\n",
2038
               version);
2039
        return AVERROR_NOFMT;
2040
    }
2041
2042
    /* The next thing is a segment. */
2043
    while (1) {
2044
        if (!(id = ebml_peek_id(matroska, &last_level)))
2045
            return AVERROR_IO;
2046
        if (id == MATROSKA_ID_SEGMENT)
2047
            break;
2048
2049
        /* oi! */
2050
        av_log(matroska->ctx, AV_LOG_INFO,
2051
               "Expected a Segment ID (0x%x), but received 0x%x!\n",
2052
               MATROSKA_ID_SEGMENT, id);
2053
        if ((res = ebml_read_skip(matroska)) < 0)
2054
            return res;
2055
    }
2056
2057
    /* We now have a Matroska segment.
2058
     * Seeks are from the beginning of the segment,
2059
     * after the segment ID/length. */
2060
    if ((res = ebml_read_master(matroska, &id)) < 0)
2061
        return res;
2062
    matroska->segment_start = url_ftell(&s->pb);
2063
2064
    matroska->time_scale = 1000000;
2065
    /* we've found our segment, start reading the different contents in here */
2066
    while (res == 0) {
2067
        if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2068
            res = AVERROR_IO;
2069
            break;
2070
        } else if (matroska->level_up) {
2071
            matroska->level_up--;
2072
            break;
2073
        }
2074
2075
        switch (id) {
2076
            /* stream info */
2077
            case MATROSKA_ID_INFO: {
2078
                if ((res = ebml_read_master(matroska, &id)) < 0)
2079
                    break;
2080
                res = matroska_parse_info(matroska);
2081
                break;
2082
            }
2083
2084
            /* track info headers */
2085
            case MATROSKA_ID_TRACKS: {
2086
                if ((res = ebml_read_master(matroska, &id)) < 0)
2087
                    break;
2088
                res = matroska_parse_tracks(matroska);
2089
                break;
2090
            }
2091
2092
            /* stream index */
2093
            case MATROSKA_ID_CUES: {
2094
                if (!matroska->index_parsed) {
2095
                    if ((res = ebml_read_master(matroska, &id)) < 0)
2096
                        break;
2097
                    res = matroska_parse_index(matroska);
2098
                } else
2099
                    res = ebml_read_skip(matroska);
2100
                break;
2101
            }
2102
2103
            /* metadata */
2104
            case MATROSKA_ID_TAGS: {
2105
                if (!matroska->metadata_parsed) {
2106
                    if ((res = ebml_read_master(matroska, &id)) < 0)
2107
                        break;
2108
                    res = matroska_parse_metadata(matroska);
2109
                } else
2110
                    res = ebml_read_skip(matroska);
2111
                break;
2112
            }
2113
2114
            /* file index (if seekable, seek to Cues/Tags to parse it) */
2115
            case MATROSKA_ID_SEEKHEAD: {
2116
                if ((res = ebml_read_master(matroska, &id)) < 0)
2117
                    break;
2118
                res = matroska_parse_seekhead(matroska);
2119
                break;
2120
            }
2121
2122
            case MATROSKA_ID_CLUSTER: {
2123
                /* Do not read the master - this will be done in the next
2124
                 * call to matroska_read_packet. */
2125
                res = 1;
2126
                break;
2127
            }
2128
2129
            default:
2130
                av_log(matroska->ctx, AV_LOG_INFO,
2131
                       "Unknown matroska file header ID 0x%x\n", id);
2132
            /* fall-through */
2133
2134
            case EBML_ID_VOID:
2135
                res = ebml_read_skip(matroska);
2136
                break;
2137
        }
2138
2139
        if (matroska->level_up) {
2140
            matroska->level_up--;
2141
            break;
2142
        }
2143
    }
2144
2145
    if (res < 0)
2146
        return res;
2147
2148
    /* Have we found a cluster? */
2149
    if (res == 1) {
2150
        int i;
2151
        enum CodecID codec_id;
2152
        MatroskaTrack *track;
2153
        AVStream *st;
2154
2155
        for (i = 0; i < matroska->num_tracks; i++) {
2156
            track = matroska->tracks[i];
2157
2158
            /* libavformat does not really support subtitles.
2159
             * Also apply some sanity checks. */
2160
            if ((track->type == MATROSKA_TRACK_TYPE_SUBTITLE) ||
2161
                (track->codec_id == NULL))
2162
                continue;
2163
2164
            /* Set the FourCC from the CodecID. */
2165
            /* This is the MS compatibility mode which stores a
2166
             * BITMAPINFOHEADER in the CodecPrivate. */
2167
            if (!strcmp(track->codec_id,
2168
                        MATROSKA_CODEC_ID_VIDEO_VFW_FOURCC) &&
2169
                (track->codec_priv_size >= 40) &&
2170
                (track->codec_priv != NULL)) {
2171
                unsigned char *p;
2172
2173
                /* Offset of biCompression. Stored in LE. */
2174
                p = (unsigned char *)track->codec_priv + 16;
2175
                ((MatroskaVideoTrack *)track)->fourcc = (p[3] << 24) |
2176
                                 (p[2] << 16) | (p[1] << 8) | p[0];
2177
                codec_id = codec_get_bmp_id(((MatroskaVideoTrack *)track)->fourcc);
2178
2179
            } else if (!strcmp(track->codec_id,
2180
                               MATROSKA_CODEC_ID_VIDEO_MPEG4_SP) ||
2181
                       !strcmp(track->codec_id,
2182
                               MATROSKA_CODEC_ID_VIDEO_MPEG4_ASP) ||
2183
                       !strcmp(track->codec_id,
2184
                               MATROSKA_CODEC_ID_VIDEO_MPEG4_AP))
2185
                codec_id = CODEC_ID_MPEG4;
2186
/*             else if (!strcmp(track->codec_id, */
2187
/*                              MATROSKA_CODEC_ID_VIDEO_UNCOMPRESSED)) */
2188
/*                 codec_id = CODEC_ID_???; */
2189
            else if (!strcmp(track->codec_id,
2190
                             MATROSKA_CODEC_ID_VIDEO_MSMPEG4V3))
2191
                codec_id = CODEC_ID_MSMPEG4V3;
2192
            else if (!strcmp(track->codec_id,
2193
                             MATROSKA_CODEC_ID_VIDEO_MPEG1) ||
2194
                     !strcmp(track->codec_id,
2195
                             MATROSKA_CODEC_ID_VIDEO_MPEG2))
2196
                codec_id = CODEC_ID_MPEG2VIDEO;
2197
2198
            /* This is the MS compatibility mode which stores a
2199
             * WAVEFORMATEX in the CodecPrivate. */
2200
            else if (!strcmp(track->codec_id, 
2201
                             MATROSKA_CODEC_ID_AUDIO_ACM) &&
2202
                (track->codec_priv_size >= 18) &&
2203
                (track->codec_priv != NULL)) {
2204
                unsigned char *p;
2205
                uint16_t tag;
2206
2207
                /* Offset of wFormatTag. Stored in LE. */
2208
                p = (unsigned char *)track->codec_priv;
2209
                tag = (p[1] << 8) | p[0];
2210
                codec_id = codec_get_wav_id(tag);
2211
2212
            } else if (!strcmp(track->codec_id,
2213
                               MATROSKA_CODEC_ID_AUDIO_MPEG1_L1) ||
2214
                       !strcmp(track->codec_id,
2215
                               MATROSKA_CODEC_ID_AUDIO_MPEG1_L2) ||
2216
                       !strcmp(track->codec_id,
2217
                               MATROSKA_CODEC_ID_AUDIO_MPEG1_L3))
2218
                codec_id = CODEC_ID_MP3;
2219
            else if (!strcmp(track->codec_id,
2220
                             MATROSKA_CODEC_ID_AUDIO_PCM_INT_BE))
2221
                codec_id = CODEC_ID_PCM_U16BE;
2222
            else if (!strcmp(track->codec_id,
2223
                             MATROSKA_CODEC_ID_AUDIO_PCM_INT_LE))
2224
                codec_id = CODEC_ID_PCM_U16LE;
2225
/*             else if (!strcmp(track->codec_id, */
2226
/*                              MATROSKA_CODEC_ID_AUDIO_PCM_FLOAT)) */
2227
/*                 codec_id = CODEC_ID_PCM_???; */
2228
            else if (!strcmp(track->codec_id,
2229
                             MATROSKA_CODEC_ID_AUDIO_AC3))
2230
                codec_id = CODEC_ID_AC3;
2231 23c99253 Michael Niedermayer
            else if (!strcmp(track->codec_id,
2232
                             MATROSKA_CODEC_ID_AUDIO_DTS))
2233
                codec_id = CODEC_ID_DTS;
2234 08abe0fd Michael Niedermayer
            /* No such codec id so far. */
2235
/*             else if (!strcmp(track->codec_id, */
2236
/*                              MATROSKA_CODEC_ID_AUDIO_DTS)) */
2237
/*                 codec_id = CODEC_ID_DTS; */
2238
            else if (!strcmp(track->codec_id,
2239
                             MATROSKA_CODEC_ID_AUDIO_VORBIS))
2240
                codec_id = CODEC_ID_VORBIS;
2241
            else if (!strcmp(track->codec_id,
2242
                             MATROSKA_CODEC_ID_AUDIO_MPEG2) ||
2243
                     !strcmp(track->codec_id,
2244
                             MATROSKA_CODEC_ID_AUDIO_MPEG4))
2245
                codec_id = CODEC_ID_AAC;
2246
            else
2247
                codec_id = CODEC_ID_NONE;
2248
2249
            if (codec_id == CODEC_ID_NONE) {
2250
                av_log(matroska->ctx, AV_LOG_INFO,
2251
                       "Unknown/unsupported CodecID %s.\n",
2252
                       track->codec_id);
2253
            }
2254
2255
            track->stream_index = matroska->num_streams;
2256
2257
            matroska->num_streams++;
2258
            st = av_new_stream(s, track->stream_index);
2259
            if (st == NULL)
2260
                return AVERROR_NOMEM;
2261 9ee91c2f Michael Niedermayer
            av_set_pts_info(st, 24, 1, 1000); /* 24 bit pts in ms */
2262 08abe0fd Michael Niedermayer
2263
            st->codec.codec_id = codec_id;
2264
2265
            if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
2266
                MatroskaVideoTrack *videotrack = (MatroskaVideoTrack *)track;
2267
2268
                st->codec.codec_type = CODEC_TYPE_VIDEO;
2269
                st->codec.codec_tag = videotrack->fourcc;
2270
                st->codec.width = videotrack->pixel_width;
2271
                st->codec.height = videotrack->pixel_height;
2272
                if (videotrack->display_width == 0)
2273
                    st->codec.sample_aspect_ratio.num =
2274
                        videotrack->pixel_width;
2275
                else
2276
                    st->codec.sample_aspect_ratio.num =
2277
                        videotrack->display_width;
2278
                if (videotrack->display_height == 0)
2279
                    st->codec.sample_aspect_ratio.num =
2280
                        videotrack->pixel_height;
2281
                else
2282
                    st->codec.sample_aspect_ratio.num =
2283
                        videotrack->display_height;
2284
2285
            } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
2286
                MatroskaAudioTrack *audiotrack = (MatroskaAudioTrack *)track;
2287
2288
                st->codec.codec_type = CODEC_TYPE_AUDIO;
2289
                st->codec.sample_rate = audiotrack->samplerate;
2290
                st->codec.channels = audiotrack->channels;
2291
            }
2292
2293
            /* What do we do with private data? E.g. for Vorbis. */
2294
        }
2295
    }
2296
2297
    return 0;
2298
}
2299
2300
static int
2301
matroska_find_track_by_num (MatroskaDemuxContext *matroska,
2302
                            int                   num)
2303
{
2304
    int i;
2305
2306
    for (i = 0; i < matroska->num_tracks; i++)
2307
        if (matroska->tracks[i]->num == num)
2308
            return i;
2309
2310
    return -1;
2311
}
2312
2313
static int
2314
matroska_parse_blockgroup (MatroskaDemuxContext *matroska,
2315
                           uint64_t              cluster_time)
2316
{
2317
    int res = 0;
2318
    uint32_t id;
2319
    AVPacket *pkt;
2320
    int is_keyframe = PKT_FLAG_KEY, last_num_packets = matroska->num_packets;
2321
2322
    av_log(matroska->ctx, AV_LOG_DEBUG, "parsing blockgroup...\n");
2323
2324
    while (res == 0) {
2325
        if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2326
            res = AVERROR_IO;
2327
            break;
2328
        } else if (matroska->level_up) {
2329
            matroska->level_up--;
2330
            break;
2331
        }
2332
2333
        switch (id) {
2334
            /* one block inside the group. Note, block parsing is one
2335
             * of the harder things, so this code is a bit complicated.
2336
             * See http://www.matroska.org/ for documentation. */
2337
            case MATROSKA_ID_BLOCK: {
2338
                uint8_t *data, *origdata;
2339
                int size;
2340
                uint64_t time;
2341
                uint32_t *lace_size = NULL;
2342
                int n, track, flags, laces = 0;
2343
                uint64_t num;
2344
2345
                if ((res = ebml_read_binary(matroska, &id, &data, &size)) < 0)
2346
                    break;
2347
                origdata = data;
2348
2349
                /* first byte(s): blocknum */
2350
                if ((n = matroska_ebmlnum_uint(data, size, &num)) < 0) {
2351
                    av_log(matroska->ctx, AV_LOG_ERROR,
2352
                           "EBML block data error\n");
2353
                    av_free(origdata);
2354
                    break;
2355
                }
2356
                data += n;
2357
                size -= n;
2358
2359
                /* fetch track from num */
2360
                track = matroska_find_track_by_num(matroska, num);
2361
                if (size <= 3 || track < 0 || track >= matroska->num_tracks) {
2362
                    av_log(matroska->ctx, AV_LOG_INFO,
2363
                           "Invalid stream %d or size %u\n", track, size);
2364
                    av_free(origdata);
2365
                    break;
2366
                }
2367
2368
                /* time (relative to cluster time) */
2369
                time = ((data[0] << 8) | data[1]) * matroska->time_scale;
2370
                data += 2;
2371
                size -= 2;
2372
                flags = *data;
2373
                data += 1;
2374
                size -= 1;
2375
                switch ((flags & 0x06) >> 1) {
2376
                    case 0x0: /* no lacing */
2377
                        laces = 1;
2378
                        lace_size = av_mallocz(sizeof(int));
2379
                        lace_size[0] = size;
2380
                        break;
2381
2382
                    case 0x1: /* xiph lacing */
2383
                    case 0x2: /* fixed-size lacing */
2384
                    case 0x3: /* EBML lacing */
2385
                        if (size == 0) {
2386
                            res = -1;
2387
                            break;
2388
                        }
2389
                        laces = (*data) + 1;
2390
                        data += 1;
2391
                        size -= 1;
2392
                        lace_size = av_mallocz(laces * sizeof(int));
2393
2394
                        switch ((flags & 0x06) >> 1) {
2395
                            case 0x1: /* xiph lacing */ {
2396
                                uint8_t temp;
2397
                                uint32_t total = 0;
2398
                                for (n = 0; res == 0 && n < laces - 1; n++) {
2399
                                    while (1) {
2400
                                        if (size == 0) {
2401
                                            res = -1;
2402
                                            break;
2403
                                        }
2404
                                        temp = *data;
2405
                                        lace_size[n] += temp;
2406
                                        data += 1;
2407
                                        size -= 1;
2408
                                        if (temp != 0xff)
2409
                                            break;
2410
                                    }
2411
                                    total += lace_size[n];
2412
                                }
2413
                                lace_size[n] = size - total;
2414
                                break;
2415
                            }
2416
2417
                            case 0x2: /* fixed-size lacing */
2418
                                for (n = 0; n < laces; n++)
2419
                                    lace_size[n] = size / laces;
2420
                                break;
2421
2422
                            case 0x3: /* EBML lacing */ {
2423
                                uint32_t total;
2424
                                n = matroska_ebmlnum_uint(data, size, &num);
2425
                                if (n < 0) {
2426
                                    av_log(matroska->ctx, AV_LOG_INFO,
2427
                                           "EBML block data error\n");
2428
                                    break;
2429
                                }
2430
                                data += n;
2431
                                size -= n;
2432
                                total = lace_size[0] = num;
2433
                                for (n = 1; res == 0 && n < laces - 1; n++) {
2434
                                    int64_t snum;
2435
                                    int r;
2436
                                    r = matroska_ebmlnum_sint (data, size,
2437
                                                               &snum);
2438
                                    if (r < 0) {
2439
                                        av_log(matroska->ctx, AV_LOG_INFO,
2440
                                               "EBML block data error\n");
2441
                                        break;
2442
                                    }
2443
                                    data += r;
2444
                                    size -= r;
2445
                                    lace_size[n] = lace_size[n - 1] + snum;
2446
                                    total += lace_size[n];
2447
                                }
2448
                                lace_size[n] = size - total;
2449
                                break;
2450
                            }
2451
                        }
2452
                        break;
2453
                }
2454
2455
                if (res == 0) {
2456
                    for (n = 0; n < laces; n++) {
2457
                        uint64_t timecode = 0;
2458
2459
                        pkt = av_mallocz(sizeof(AVPacket));
2460
                        /* XXX: prevent data copy... */
2461
                        if (av_new_packet(pkt,lace_size[n]) < 0) {
2462
                            res = AVERROR_NOMEM;
2463
                            break;
2464
                        }
2465
                        if (cluster_time != (uint64_t)-1) {
2466
                            if (time < 0 && (-time) > cluster_time)
2467
                                timecode = cluster_time;
2468
                            else
2469
                                timecode = cluster_time + time;
2470
                        }
2471
                        /* FIXME: duration */
2472
2473
                        memcpy(pkt->data, data, lace_size[n]);
2474
                        data += lace_size[n];
2475
                        if (n == 0)
2476
                            pkt->flags = is_keyframe;
2477
                        pkt->stream_index =
2478
                            matroska->tracks[track]->stream_index;
2479
2480
                        pkt->pts = timecode / 1000000; /* ns to ms */
2481
2482
                        matroska_queue_packet(matroska, pkt);
2483
                    }
2484
                }
2485
2486
                av_free(lace_size);
2487
                av_free(origdata);
2488
                break;
2489
            }
2490
2491
            case MATROSKA_ID_BLOCKDURATION: {
2492
                uint64_t num;
2493
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
2494
                    break;
2495
                av_log(matroska->ctx, AV_LOG_INFO,
2496
                       "FIXME: implement support for BlockDuration\n");
2497
                break;
2498
            }
2499
2500
            case MATROSKA_ID_BLOCKREFERENCE:
2501
                /* We've found a reference, so not even the first frame in
2502
                 * the lace is a key frame. */
2503
                is_keyframe = 0;
2504
                if (last_num_packets != matroska->num_packets)
2505
                    matroska->packets[last_num_packets]->flags = 0;
2506
                res = ebml_read_skip(matroska);
2507
                break;
2508
2509
            default:
2510
                av_log(matroska->ctx, AV_LOG_INFO,
2511
                       "Unknown entry 0x%x in blockgroup data\n", id);
2512
                /* fall-through */
2513
2514
            case EBML_ID_VOID:
2515
                res = ebml_read_skip(matroska);
2516
                break;
2517
        }
2518
2519
        if (matroska->level_up) {
2520
            matroska->level_up--;
2521
            break;
2522
        }
2523
    }
2524
2525
    return res;
2526
}
2527
2528
static int
2529
matroska_parse_cluster (MatroskaDemuxContext *matroska)
2530
{
2531
    int res = 0;
2532
    uint32_t id;
2533
    uint64_t cluster_time = 0;
2534
2535
    av_log(matroska->ctx, AV_LOG_DEBUG,
2536
           "parsing cluster at %lld\n", url_ftell(&matroska->ctx->pb));
2537
2538
    while (res == 0) {
2539
        if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2540
            res = AVERROR_IO;
2541
            break;
2542
        } else if (matroska->level_up) {
2543
            matroska->level_up--;
2544
            break;
2545
        }
2546
2547
        switch (id) {
2548
            /* cluster timecode */
2549
            case MATROSKA_ID_CLUSTERTIMECODE: {
2550
                uint64_t num;
2551
                if ((res = ebml_read_uint(matroska, &id, &num)) < 0)
2552
                    break;
2553
                cluster_time = num * matroska->time_scale;
2554
                break;
2555
            }
2556
2557
                /* a group of blocks inside a cluster */
2558
            case MATROSKA_ID_BLOCKGROUP:
2559
                if ((res = ebml_read_master(matroska, &id)) < 0)
2560
                    break;
2561
                res = matroska_parse_blockgroup(matroska, cluster_time);
2562
                break;
2563
2564
            default:
2565
                av_log(matroska->ctx, AV_LOG_INFO,
2566
                       "Unknown entry 0x%x in cluster data\n", id);
2567
                /* fall-through */
2568
2569
            case EBML_ID_VOID:
2570
                res = ebml_read_skip(matroska);
2571
                break;
2572
        }
2573
2574
        if (matroska->level_up) {
2575
            matroska->level_up--;
2576
            break;
2577
        }
2578
    }
2579
2580
    return res;
2581
}
2582
2583
static int
2584
matroska_read_packet (AVFormatContext *s,
2585
                      AVPacket        *pkt)
2586
{
2587
    MatroskaDemuxContext *matroska = s->priv_data;
2588
    int res = 0;
2589
    uint32_t id;
2590
2591
    /* Do we still have a packet queued? */
2592
    if (matroska_deliver_packet(matroska, pkt) == 0)
2593
        return 0;
2594
2595
    /* Have we already reached the end? */
2596
    if (matroska->done)
2597
        return AVERROR_IO;
2598
2599
    while (res == 0) {
2600
        if (!(id = ebml_peek_id(matroska, &matroska->level_up))) {
2601
            res = AVERROR_IO;
2602
            break;
2603
        } else if (matroska->level_up) {
2604
            matroska->level_up--;
2605
            break;
2606
        }
2607
2608
        switch (id) {
2609
            case MATROSKA_ID_CLUSTER:
2610
                if ((res = ebml_read_master(matroska, &id)) < 0)
2611
                    break;
2612
                if ((res = matroska_parse_cluster(matroska)) == 0)
2613
                    res = 1; /* Parsed one cluster, let's get out. */
2614
                break;
2615
2616
            default:
2617
            case EBML_ID_VOID:
2618
                res = ebml_read_skip(matroska);
2619
                break;
2620
        }
2621
2622
        if (matroska->level_up) {
2623
            matroska->level_up--;
2624
            break;
2625
        }
2626
    }
2627
2628
    if (res == -1)
2629
        matroska->done = 1;
2630
2631
    return matroska_deliver_packet(matroska, pkt);
2632
}
2633
2634
static int
2635
matroska_read_close (AVFormatContext *s)
2636
{
2637
    MatroskaDemuxContext *matroska = s->priv_data;
2638
    int n = 0;
2639
2640
    if (matroska->writing_app)
2641
        av_free(matroska->writing_app);
2642
    if (matroska->muxing_app)
2643
        av_free(matroska->muxing_app);
2644
    if (matroska->index)
2645
        av_free(matroska->index);
2646
2647
    if (matroska->packets != NULL) {
2648
        for (n = 0; n < matroska->num_packets; n++) {
2649
            av_free_packet(matroska->packets[n]);
2650
            av_free(matroska->packets[n]);
2651
        }
2652
        av_free(matroska->packets);
2653
    }
2654
2655
    for (n = 0; n < matroska->num_tracks; n++) {
2656
        MatroskaTrack *track = matroska->tracks[n];
2657
        if (track->codec_id)
2658
            av_free(track->codec_id);
2659
        if (track->codec_name)
2660
            av_free(track->codec_name);
2661
        if (track->codec_priv)
2662
            av_free(track->codec_priv);
2663
        if (track->name)
2664
            av_free(track->name);
2665
        if (track->language)
2666
            av_free(track->language);
2667
2668
        av_free(track);
2669
    }
2670
2671
    memset(matroska, 0, sizeof(MatroskaDemuxContext));
2672
2673
    return 0;
2674
}
2675
2676
static AVInputFormat matroska_iformat = {
2677
    "matroska",
2678
    "Matroska file format",
2679
    sizeof(MatroskaDemuxContext),
2680
    matroska_probe,
2681
    matroska_read_header,
2682
    matroska_read_packet,
2683
    matroska_read_close,
2684
};
2685
2686
int
2687
matroska_init(void)
2688
{
2689
    av_register_input_format(&matroska_iformat);
2690
    return 0;
2691
}