matroskadec.c 72.7 KB
Newer Older
1
/*
2
 * Matroska file demuxer
3
 * Copyright (c) 2003-2008 The Libav Project
4
 *
5
 * This file is part of Libav.
6
 *
7
 * Libav is free software; you can redistribute it and/or
8 9 10 11
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
12
 * Libav is distributed in the hope that it will be useful,
13 14 15 16 17
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with Libav; if not, write to the Free Software
19 20 21 22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
24 25 26
 * Matroska file demuxer
 * by Ronald Bultje <rbultje@ronald.bitfreak.net>
 * with a little help from Moritz Bunkus <moritz@bunkus.org>
27
 * totally reworked by Aurelien Jacobs <aurel@gnuage.org>
D
Diego Biurrun 已提交
28
 * Specs available on the Matroska project page: http://www.matroska.org/.
29 30
 */

31
#include <stdio.h>
32
#include "avformat.h"
33
#include "internal.h"
34
#include "avio_internal.h"
35
/* For ff_codec_get_id(). */
36
#include "riff.h"
37
#include "isom.h"
38
#include "rm.h"
39
#include "matroska.h"
40
#include "libavcodec/mpeg4audio.h"
41
#include "libavutil/intfloat_readwrite.h"
42
#include "libavutil/intreadwrite.h"
43
#include "libavutil/avstring.h"
44
#include "libavutil/lzo.h"
45
#include "libavutil/dict.h"
46
#if CONFIG_ZLIB
47 48
#include <zlib.h>
#endif
49
#if CONFIG_BZLIB
50 51
#include <bzlib.h>
#endif
52

53 54 55 56 57 58 59 60 61 62
typedef enum {
    EBML_NONE,
    EBML_UINT,
    EBML_FLOAT,
    EBML_STR,
    EBML_UTF8,
    EBML_BIN,
    EBML_NEST,
    EBML_PASS,
    EBML_STOP,
63
    EBML_TYPE_COUNT
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
} EbmlType;

typedef const struct EbmlSyntax {
    uint32_t id;
    EbmlType type;
    int list_elem_size;
    int data_offset;
    union {
        uint64_t    u;
        double      f;
        const char *s;
        const struct EbmlSyntax *n;
    } def;
} EbmlSyntax;

typedef struct {
    int nb_elem;
    void *elem;
} EbmlList;

typedef struct {
    int      size;
    uint8_t *data;
    int64_t  pos;
} EbmlBin;

90 91 92 93 94 95 96 97
typedef struct {
    uint64_t version;
    uint64_t max_size;
    uint64_t id_length;
    char    *doctype;
    uint64_t doctype_version;
} Ebml;

98 99 100 101
typedef struct {
    uint64_t algo;
    EbmlBin  settings;
} MatroskaTrackCompression;
102

103 104 105 106 107
typedef struct {
    uint64_t scope;
    uint64_t type;
    MatroskaTrackCompression compression;
} MatroskaTrackEncoding;
108

109 110 111 112 113 114 115 116
typedef struct {
    double   frame_rate;
    uint64_t display_width;
    uint64_t display_height;
    uint64_t pixel_width;
    uint64_t pixel_height;
    uint64_t fourcc;
} MatroskaTrackVideo;
117

118 119 120 121 122 123 124 125 126 127 128 129 130
typedef struct {
    double   samplerate;
    double   out_samplerate;
    uint64_t bitdepth;
    uint64_t channels;

    /* real audio header (extracted from extradata) */
    int      coded_framesize;
    int      sub_packet_h;
    int      frame_size;
    int      sub_packet_size;
    int      sub_packet_cnt;
    int      pkt_cnt;
131
    uint64_t buf_timecode;
132 133
    uint8_t *buf;
} MatroskaTrackAudio;
134

135 136
typedef struct {
    uint64_t num;
137
    uint64_t uid;
138
    uint64_t type;
139
    char    *name;
140 141 142
    char    *codec_id;
    EbmlBin  codec_priv;
    char    *language;
143
    double time_scale;
144
    uint64_t default_duration;
145
    uint64_t flag_default;
146
    uint64_t flag_forced;
147 148 149
    MatroskaTrackVideo video;
    MatroskaTrackAudio audio;
    EbmlList encodings;
150 151

    AVStream *stream;
152
    int64_t end_timecode;
153
    int ms_compat;
154 155
} MatroskaTrack;

156
typedef struct {
157
    uint64_t uid;
158 159 160
    char *filename;
    char *mime;
    EbmlBin bin;
161 162

    AVStream *stream;
163 164
} MatroskaAttachement;

165 166 167 168 169
typedef struct {
    uint64_t start;
    uint64_t end;
    uint64_t uid;
    char    *title;
170 171

    AVChapter *chapter;
172 173
} MatroskaChapter;

174 175 176 177 178 179 180 181 182 183
typedef struct {
    uint64_t track;
    uint64_t pos;
} MatroskaIndexPos;

typedef struct {
    uint64_t time;
    EbmlList pos;
} MatroskaIndex;

184 185 186
typedef struct {
    char *name;
    char *string;
187 188
    char *lang;
    uint64_t def;
189 190 191
    EbmlList sub;
} MatroskaTag;

192 193 194 195 196 197 198 199 200 201 202 203 204
typedef struct {
    char    *type;
    uint64_t typevalue;
    uint64_t trackuid;
    uint64_t chapteruid;
    uint64_t attachuid;
} MatroskaTagTarget;

typedef struct {
    MatroskaTagTarget target;
    EbmlList tag;
} MatroskaTags;

205 206 207 208 209
typedef struct {
    uint64_t id;
    uint64_t pos;
} MatroskaSeekhead;

210
typedef struct {
211 212
    uint64_t start;
    uint64_t length;
213 214
} MatroskaLevel;

215
typedef struct {
216 217
    AVFormatContext *ctx;

D
Diego Biurrun 已提交
218
    /* EBML stuff */
219 220 221
    int num_levels;
    MatroskaLevel levels[EBML_MAX_DEPTH];
    int level_up;
222
    uint32_t current_id;
223

224 225 226
    uint64_t time_scale;
    double   duration;
    char    *title;
227
    EbmlList tracks;
228
    EbmlList attachments;
229
    EbmlList chapters;
230
    EbmlList index;
231
    EbmlList tags;
232
    EbmlList seekhead;
233 234

    /* byte position of the segment inside the stream */
235
    int64_t segment_start;
236

D
Diego Biurrun 已提交
237
    /* the packet queue */
238 239
    AVPacket **packets;
    int num_packets;
240
    AVPacket *prev_pkt;
241

242
    int done;
243 244 245

    /* What to skip before effectively reading a packet. */
    int skip_to_keyframe;
246
    uint64_t skip_to_timecode;
247 248 249

    /* File has a CUES element, but we defer parsing until it is needed. */
    int cues_parsing_deferred;
250 251
} MatroskaDemuxContext;

252 253 254
typedef struct {
    uint64_t duration;
    int64_t  reference;
255
    uint64_t non_simple;
256 257 258 259 260 261 262 263
    EbmlBin  bin;
} MatroskaBlock;

typedef struct {
    uint64_t timecode;
    EbmlList blocks;
} MatroskaCluster;

264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
static EbmlSyntax ebml_header[] = {
    { EBML_ID_EBMLREADVERSION,        EBML_UINT, 0, offsetof(Ebml,version), {.u=EBML_VERSION} },
    { EBML_ID_EBMLMAXSIZELENGTH,      EBML_UINT, 0, offsetof(Ebml,max_size), {.u=8} },
    { EBML_ID_EBMLMAXIDLENGTH,        EBML_UINT, 0, offsetof(Ebml,id_length), {.u=4} },
    { EBML_ID_DOCTYPE,                EBML_STR,  0, offsetof(Ebml,doctype), {.s="(none)"} },
    { EBML_ID_DOCTYPEREADVERSION,     EBML_UINT, 0, offsetof(Ebml,doctype_version), {.u=1} },
    { EBML_ID_EBMLVERSION,            EBML_NONE },
    { EBML_ID_DOCTYPEVERSION,         EBML_NONE },
    { 0 }
};

static EbmlSyntax ebml_syntax[] = {
    { EBML_ID_HEADER,                 EBML_NEST, 0, 0, {.n=ebml_header} },
    { 0 }
};

280 281 282 283 284 285 286 287 288 289 290
static EbmlSyntax matroska_info[] = {
    { MATROSKA_ID_TIMECODESCALE,      EBML_UINT,  0, offsetof(MatroskaDemuxContext,time_scale), {.u=1000000} },
    { MATROSKA_ID_DURATION,           EBML_FLOAT, 0, offsetof(MatroskaDemuxContext,duration) },
    { MATROSKA_ID_TITLE,              EBML_UTF8,  0, offsetof(MatroskaDemuxContext,title) },
    { MATROSKA_ID_WRITINGAPP,         EBML_NONE },
    { MATROSKA_ID_MUXINGAPP,          EBML_NONE },
    { MATROSKA_ID_DATEUTC,            EBML_NONE },
    { MATROSKA_ID_SEGMENTUID,         EBML_NONE },
    { 0 }
};

291 292 293 294 295 296 297
static EbmlSyntax matroska_track_video[] = {
    { MATROSKA_ID_VIDEOFRAMERATE,     EBML_FLOAT,0, offsetof(MatroskaTrackVideo,frame_rate) },
    { MATROSKA_ID_VIDEODISPLAYWIDTH,  EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_width) },
    { MATROSKA_ID_VIDEODISPLAYHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_height) },
    { MATROSKA_ID_VIDEOPIXELWIDTH,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) },
    { MATROSKA_ID_VIDEOPIXELHEIGHT,   EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) },
    { MATROSKA_ID_VIDEOCOLORSPACE,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,fourcc) },
298 299 300 301 302
    { MATROSKA_ID_VIDEOPIXELCROPB,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPT,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPL,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPR,    EBML_NONE },
    { MATROSKA_ID_VIDEODISPLAYUNIT,   EBML_NONE },
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
    { MATROSKA_ID_VIDEOFLAGINTERLACED,EBML_NONE },
    { MATROSKA_ID_VIDEOSTEREOMODE,    EBML_NONE },
    { MATROSKA_ID_VIDEOASPECTRATIO,   EBML_NONE },
    { 0 }
};

static EbmlSyntax matroska_track_audio[] = {
    { MATROSKA_ID_AUDIOSAMPLINGFREQ,  EBML_FLOAT,0, offsetof(MatroskaTrackAudio,samplerate), {.f=8000.0} },
    { MATROSKA_ID_AUDIOOUTSAMPLINGFREQ,EBML_FLOAT,0,offsetof(MatroskaTrackAudio,out_samplerate) },
    { MATROSKA_ID_AUDIOBITDEPTH,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,bitdepth) },
    { MATROSKA_ID_AUDIOCHANNELS,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,channels), {.u=1} },
    { 0 }
};

static EbmlSyntax matroska_track_encoding_compression[] = {
    { MATROSKA_ID_ENCODINGCOMPALGO,   EBML_UINT, 0, offsetof(MatroskaTrackCompression,algo), {.u=0} },
    { MATROSKA_ID_ENCODINGCOMPSETTINGS,EBML_BIN, 0, offsetof(MatroskaTrackCompression,settings) },
    { 0 }
};

static EbmlSyntax matroska_track_encoding[] = {
    { MATROSKA_ID_ENCODINGSCOPE,      EBML_UINT, 0, offsetof(MatroskaTrackEncoding,scope), {.u=1} },
    { MATROSKA_ID_ENCODINGTYPE,       EBML_UINT, 0, offsetof(MatroskaTrackEncoding,type), {.u=0} },
    { MATROSKA_ID_ENCODINGCOMPRESSION,EBML_NEST, 0, offsetof(MatroskaTrackEncoding,compression), {.n=matroska_track_encoding_compression} },
327
    { MATROSKA_ID_ENCODINGORDER,      EBML_NONE },
328 329 330 331 332 333 334 335 336 337
    { 0 }
};

static EbmlSyntax matroska_track_encodings[] = {
    { MATROSKA_ID_TRACKCONTENTENCODING, EBML_NEST, sizeof(MatroskaTrackEncoding), offsetof(MatroskaTrack,encodings), {.n=matroska_track_encoding} },
    { 0 }
};

static EbmlSyntax matroska_track[] = {
    { MATROSKA_ID_TRACKNUMBER,          EBML_UINT, 0, offsetof(MatroskaTrack,num) },
338
    { MATROSKA_ID_TRACKNAME,            EBML_UTF8, 0, offsetof(MatroskaTrack,name) },
339
    { MATROSKA_ID_TRACKUID,             EBML_UINT, 0, offsetof(MatroskaTrack,uid) },
340 341 342 343 344 345 346
    { MATROSKA_ID_TRACKTYPE,            EBML_UINT, 0, offsetof(MatroskaTrack,type) },
    { MATROSKA_ID_CODECID,              EBML_STR,  0, offsetof(MatroskaTrack,codec_id) },
    { MATROSKA_ID_CODECPRIVATE,         EBML_BIN,  0, offsetof(MatroskaTrack,codec_priv) },
    { MATROSKA_ID_TRACKLANGUAGE,        EBML_UTF8, 0, offsetof(MatroskaTrack,language), {.s="eng"} },
    { MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, offsetof(MatroskaTrack,default_duration) },
    { MATROSKA_ID_TRACKTIMECODESCALE,   EBML_FLOAT,0, offsetof(MatroskaTrack,time_scale), {.f=1.0} },
    { MATROSKA_ID_TRACKFLAGDEFAULT,     EBML_UINT, 0, offsetof(MatroskaTrack,flag_default), {.u=1} },
347
    { MATROSKA_ID_TRACKFLAGFORCED,      EBML_UINT, 0, offsetof(MatroskaTrack,flag_forced), {.u=0} },
348 349 350 351 352 353 354 355 356 357 358
    { MATROSKA_ID_TRACKVIDEO,           EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} },
    { MATROSKA_ID_TRACKAUDIO,           EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} },
    { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} },
    { MATROSKA_ID_TRACKFLAGENABLED,     EBML_NONE },
    { MATROSKA_ID_TRACKFLAGLACING,      EBML_NONE },
    { MATROSKA_ID_CODECNAME,            EBML_NONE },
    { MATROSKA_ID_CODECDECODEALL,       EBML_NONE },
    { MATROSKA_ID_CODECINFOURL,         EBML_NONE },
    { MATROSKA_ID_CODECDOWNLOADURL,     EBML_NONE },
    { MATROSKA_ID_TRACKMINCACHE,        EBML_NONE },
    { MATROSKA_ID_TRACKMAXCACHE,        EBML_NONE },
359
    { MATROSKA_ID_TRACKMAXBLKADDID,     EBML_NONE },
360 361 362 363 364 365 366 367
    { 0 }
};

static EbmlSyntax matroska_tracks[] = {
    { MATROSKA_ID_TRACKENTRY,         EBML_NEST, sizeof(MatroskaTrack), offsetof(MatroskaDemuxContext,tracks), {.n=matroska_track} },
    { 0 }
};

368
static EbmlSyntax matroska_attachment[] = {
369
    { MATROSKA_ID_FILEUID,            EBML_UINT, 0, offsetof(MatroskaAttachement,uid) },
370 371 372
    { MATROSKA_ID_FILENAME,           EBML_UTF8, 0, offsetof(MatroskaAttachement,filename) },
    { MATROSKA_ID_FILEMIMETYPE,       EBML_STR,  0, offsetof(MatroskaAttachement,mime) },
    { MATROSKA_ID_FILEDATA,           EBML_BIN,  0, offsetof(MatroskaAttachement,bin) },
373
    { MATROSKA_ID_FILEDESC,           EBML_NONE },
374 375 376 377 378 379 380 381
    { 0 }
};

static EbmlSyntax matroska_attachments[] = {
    { MATROSKA_ID_ATTACHEDFILE,       EBML_NEST, sizeof(MatroskaAttachement), offsetof(MatroskaDemuxContext,attachments), {.n=matroska_attachment} },
    { 0 }
};

382 383
static EbmlSyntax matroska_chapter_display[] = {
    { MATROSKA_ID_CHAPSTRING,         EBML_UTF8, 0, offsetof(MatroskaChapter,title) },
384
    { MATROSKA_ID_CHAPLANG,           EBML_NONE },
385 386 387 388 389 390 391 392 393
    { 0 }
};

static EbmlSyntax matroska_chapter_entry[] = {
    { MATROSKA_ID_CHAPTERTIMESTART,   EBML_UINT, 0, offsetof(MatroskaChapter,start), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_CHAPTERTIMEEND,     EBML_UINT, 0, offsetof(MatroskaChapter,end), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_CHAPTERUID,         EBML_UINT, 0, offsetof(MatroskaChapter,uid) },
    { MATROSKA_ID_CHAPTERDISPLAY,     EBML_NEST, 0, 0, {.n=matroska_chapter_display} },
    { MATROSKA_ID_CHAPTERFLAGHIDDEN,  EBML_NONE },
394 395 396
    { MATROSKA_ID_CHAPTERFLAGENABLED, EBML_NONE },
    { MATROSKA_ID_CHAPTERPHYSEQUIV,   EBML_NONE },
    { MATROSKA_ID_CHAPTERATOM,        EBML_NONE },
397 398 399 400 401 402 403 404
    { 0 }
};

static EbmlSyntax matroska_chapter[] = {
    { MATROSKA_ID_CHAPTERATOM,        EBML_NEST, sizeof(MatroskaChapter), offsetof(MatroskaDemuxContext,chapters), {.n=matroska_chapter_entry} },
    { MATROSKA_ID_EDITIONUID,         EBML_NONE },
    { MATROSKA_ID_EDITIONFLAGHIDDEN,  EBML_NONE },
    { MATROSKA_ID_EDITIONFLAGDEFAULT, EBML_NONE },
405
    { MATROSKA_ID_EDITIONFLAGORDERED, EBML_NONE },
406 407 408 409 410 411 412 413
    { 0 }
};

static EbmlSyntax matroska_chapters[] = {
    { MATROSKA_ID_EDITIONENTRY,       EBML_NEST, 0, 0, {.n=matroska_chapter} },
    { 0 }
};

414 415 416
static EbmlSyntax matroska_index_pos[] = {
    { MATROSKA_ID_CUETRACK,           EBML_UINT, 0, offsetof(MatroskaIndexPos,track) },
    { MATROSKA_ID_CUECLUSTERPOSITION, EBML_UINT, 0, offsetof(MatroskaIndexPos,pos)   },
417
    { MATROSKA_ID_CUEBLOCKNUMBER,     EBML_NONE },
418 419 420 421 422 423 424 425 426 427 428 429 430 431
    { 0 }
};

static EbmlSyntax matroska_index_entry[] = {
    { MATROSKA_ID_CUETIME,            EBML_UINT, 0, offsetof(MatroskaIndex,time) },
    { MATROSKA_ID_CUETRACKPOSITION,   EBML_NEST, sizeof(MatroskaIndexPos), offsetof(MatroskaIndex,pos), {.n=matroska_index_pos} },
    { 0 }
};

static EbmlSyntax matroska_index[] = {
    { MATROSKA_ID_POINTENTRY,         EBML_NEST, sizeof(MatroskaIndex), offsetof(MatroskaDemuxContext,index), {.n=matroska_index_entry} },
    { 0 }
};

432 433 434
static EbmlSyntax matroska_simpletag[] = {
    { MATROSKA_ID_TAGNAME,            EBML_UTF8, 0, offsetof(MatroskaTag,name) },
    { MATROSKA_ID_TAGSTRING,          EBML_UTF8, 0, offsetof(MatroskaTag,string) },
435 436
    { MATROSKA_ID_TAGLANG,            EBML_STR,  0, offsetof(MatroskaTag,lang), {.s="und"} },
    { MATROSKA_ID_TAGDEFAULT,         EBML_UINT, 0, offsetof(MatroskaTag,def) },
437
    { MATROSKA_ID_TAGDEFAULT_BUG,     EBML_UINT, 0, offsetof(MatroskaTag,def) },
438 439 440 441
    { MATROSKA_ID_SIMPLETAG,          EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag,sub), {.n=matroska_simpletag} },
    { 0 }
};

442 443 444 445 446 447 448 449 450
static EbmlSyntax matroska_tagtargets[] = {
    { MATROSKA_ID_TAGTARGETS_TYPE,      EBML_STR,  0, offsetof(MatroskaTagTarget,type) },
    { MATROSKA_ID_TAGTARGETS_TYPEVALUE, EBML_UINT, 0, offsetof(MatroskaTagTarget,typevalue), {.u=50} },
    { MATROSKA_ID_TAGTARGETS_TRACKUID,  EBML_UINT, 0, offsetof(MatroskaTagTarget,trackuid) },
    { MATROSKA_ID_TAGTARGETS_CHAPTERUID,EBML_UINT, 0, offsetof(MatroskaTagTarget,chapteruid) },
    { MATROSKA_ID_TAGTARGETS_ATTACHUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,attachuid) },
    { 0 }
};

451
static EbmlSyntax matroska_tag[] = {
452 453
    { MATROSKA_ID_SIMPLETAG,          EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTags,tag), {.n=matroska_simpletag} },
    { MATROSKA_ID_TAGTARGETS,         EBML_NEST, 0, offsetof(MatroskaTags,target), {.n=matroska_tagtargets} },
454 455 456
    { 0 }
};

457
static EbmlSyntax matroska_tags[] = {
458
    { MATROSKA_ID_TAG,                EBML_NEST, sizeof(MatroskaTags), offsetof(MatroskaDemuxContext,tags), {.n=matroska_tag} },
459 460 461
    { 0 }
};

462 463 464 465 466 467 468 469 470 471 472
static EbmlSyntax matroska_seekhead_entry[] = {
    { MATROSKA_ID_SEEKID,             EBML_UINT, 0, offsetof(MatroskaSeekhead,id) },
    { MATROSKA_ID_SEEKPOSITION,       EBML_UINT, 0, offsetof(MatroskaSeekhead,pos), {.u=-1} },
    { 0 }
};

static EbmlSyntax matroska_seekhead[] = {
    { MATROSKA_ID_SEEKENTRY,          EBML_NEST, sizeof(MatroskaSeekhead), offsetof(MatroskaDemuxContext,seekhead), {.n=matroska_seekhead_entry} },
    { 0 }
};

473 474 475 476 477 478 479 480
static EbmlSyntax matroska_segment[] = {
    { MATROSKA_ID_INFO,           EBML_NEST, 0, 0, {.n=matroska_info       } },
    { MATROSKA_ID_TRACKS,         EBML_NEST, 0, 0, {.n=matroska_tracks     } },
    { MATROSKA_ID_ATTACHMENTS,    EBML_NEST, 0, 0, {.n=matroska_attachments} },
    { MATROSKA_ID_CHAPTERS,       EBML_NEST, 0, 0, {.n=matroska_chapters   } },
    { MATROSKA_ID_CUES,           EBML_NEST, 0, 0, {.n=matroska_index      } },
    { MATROSKA_ID_TAGS,           EBML_NEST, 0, 0, {.n=matroska_tags       } },
    { MATROSKA_ID_SEEKHEAD,       EBML_NEST, 0, 0, {.n=matroska_seekhead   } },
481
    { MATROSKA_ID_CLUSTER,        EBML_STOP },
482 483 484 485 486 487 488 489
    { 0 }
};

static EbmlSyntax matroska_segments[] = {
    { MATROSKA_ID_SEGMENT,        EBML_NEST, 0, 0, {.n=matroska_segment    } },
    { 0 }
};

490 491 492 493 494
static EbmlSyntax matroska_blockgroup[] = {
    { MATROSKA_ID_BLOCK,          EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
    { MATROSKA_ID_SIMPLEBLOCK,    EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
    { MATROSKA_ID_BLOCKDURATION,  EBML_UINT, 0, offsetof(MatroskaBlock,duration), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_BLOCKREFERENCE, EBML_UINT, 0, offsetof(MatroskaBlock,reference) },
495
    { 1,                          EBML_UINT, 0, offsetof(MatroskaBlock,non_simple), {.u=1} },
496 497 498 499 500 501 502
    { 0 }
};

static EbmlSyntax matroska_cluster[] = {
    { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
    { MATROSKA_ID_BLOCKGROUP,     EBML_NEST, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
    { MATROSKA_ID_SIMPLEBLOCK,    EBML_PASS, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
503 504
    { MATROSKA_ID_CLUSTERPOSITION,EBML_NONE },
    { MATROSKA_ID_CLUSTERPREVSIZE,EBML_NONE },
505 506 507 508 509
    { 0 }
};

static EbmlSyntax matroska_clusters[] = {
    { MATROSKA_ID_CLUSTER,        EBML_NEST, 0, 0, {.n=matroska_cluster} },
510 511 512 513
    { MATROSKA_ID_INFO,           EBML_NONE },
    { MATROSKA_ID_CUES,           EBML_NONE },
    { MATROSKA_ID_TAGS,           EBML_NONE },
    { MATROSKA_ID_SEEKHEAD,       EBML_NONE },
514 515 516
    { 0 }
};

J
James Zern 已提交
517 518
static const char *matroska_doctypes[] = { "matroska", "webm" };

519
/*
D
Diego Biurrun 已提交
520
 * Return: Whether we reached the end of a level in the hierarchy or not.
521
 */
522
static int ebml_level_end(MatroskaDemuxContext *matroska)
523
{
524
    AVIOContext *pb = matroska->ctx->pb;
525
    int64_t pos = avio_tell(pb);
526

527
    if (matroska->num_levels > 0) {
528
        MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
529
        if (pos - level->start >= level->length || matroska->current_id) {
530
            matroska->num_levels--;
531
            return 1;
532 533
        }
    }
534
    return 0;
535 536 537 538 539 540 541 542
}

/*
 * Read: an "EBML number", which is defined as a variable-length
 * array of bytes. The first byte indicates the length by giving a
 * number of 0-bits followed by a one. The position of the first
 * "one" bit inside the first byte indicates the length of this
 * number.
D
Diego Biurrun 已提交
543
 * Returns: number of bytes read, < 0 on error
544
 */
545
static int ebml_read_num(MatroskaDemuxContext *matroska, AVIOContext *pb,
546
                         int max_size, uint64_t *number)
547
{
548 549
    int read = 1, n = 1;
    uint64_t total = 0;
550

551
    /* The first byte tells us the length in bytes - avio_r8() can normally
552 553
     * return 0, but since that's not a valid first ebmlID byte, we can
     * use it safely here to catch EOS. */
554
    if (!(total = avio_r8(pb))) {
555
        /* we might encounter EOS here */
A
Anton Khirnov 已提交
556
        if (!pb->eof_reached) {
557
            int64_t pos = avio_tell(pb);
558 559 560 561
            av_log(matroska->ctx, AV_LOG_ERROR,
                   "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
                   pos, pos);
        }
562
        return AVERROR(EIO); /* EOS or actual I/O error */
563 564 565
    }

    /* get the length of the EBML number */
566
    read = 8 - ff_log2_tab[total];
567
    if (read > max_size) {
568
        int64_t pos = avio_tell(pb) - 1;
569 570 571 572 573 574 575
        av_log(matroska->ctx, AV_LOG_ERROR,
               "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
               (uint8_t) total, pos, pos);
        return AVERROR_INVALIDDATA;
    }

    /* read out length */
576
    total ^= 1 << ff_log2_tab[total];
577
    while (n++ < read)
578
        total = (total << 8) | avio_r8(pb);
579 580 581 582 583 584

    *number = total;

    return read;
}

585 586 587 588 589
/**
 * Read a EBML length value.
 * This needs special handling for the "unknown length" case which has multiple
 * encodings.
 */
590
static int ebml_read_length(MatroskaDemuxContext *matroska, AVIOContext *pb,
591 592 593 594 595 596 597 598
                            uint64_t *number)
{
    int res = ebml_read_num(matroska, pb, 8, number);
    if (res > 0 && *number + 1 == 1ULL << (7 * res))
        *number = 0xffffffffffffffULL;
    return res;
}

599 600 601 602
/*
 * Read the next element as an unsigned int.
 * 0 is success, < 0 is failure.
 */
603
static int ebml_read_uint(AVIOContext *pb, int size, uint64_t *num)
604
{
605
    int n = 0;
606

607
    if (size > 8)
608 609
        return AVERROR_INVALIDDATA;

D
Diego Biurrun 已提交
610
    /* big-endian ordering; build up number */
611 612
    *num = 0;
    while (n++ < size)
613
        *num = (*num << 8) | avio_r8(pb);
614 615 616 617 618 619 620 621

    return 0;
}

/*
 * Read the next element as a float.
 * 0 is success, < 0 is failure.
 */
622
static int ebml_read_float(AVIOContext *pb, int size, double *num)
623
{
624 625 626
    if (size == 0) {
        *num = 0;
    } else if (size == 4) {
627
        *num= av_int2flt(avio_rb32(pb));
628
    } else if(size==8){
629
        *num= av_int2dbl(avio_rb64(pb));
630
    } else
631 632 633 634 635 636 637 638 639
        return AVERROR_INVALIDDATA;

    return 0;
}

/*
 * Read the next element as an ASCII string.
 * 0 is success, < 0 is failure.
 */
640
static int ebml_read_ascii(AVIOContext *pb, int size, char **str)
641
{
642
    av_free(*str);
D
Diego Biurrun 已提交
643
    /* EBML strings are usually not 0-terminated, so we allocate one
644
     * byte more, read the string and NULL-terminate it ourselves. */
645
    if (!(*str = av_malloc(size + 1)))
646
        return AVERROR(ENOMEM);
647
    if (avio_read(pb, (uint8_t *) *str, size) != size) {
648
        av_freep(str);
649
        return AVERROR(EIO);
650 651 652 653 654 655
    }
    (*str)[size] = '\0';

    return 0;
}

656 657 658 659
/*
 * Read the next element as binary data.
 * 0 is success, < 0 is failure.
 */
660
static int ebml_read_binary(AVIOContext *pb, int length, EbmlBin *bin)
661 662 663 664 665 666
{
    av_free(bin->data);
    if (!(bin->data = av_malloc(length)))
        return AVERROR(ENOMEM);

    bin->size = length;
667
    bin->pos  = avio_tell(pb);
668
    if (avio_read(pb, bin->data, length) != length) {
669
        av_freep(&bin->data);
670
        return AVERROR(EIO);
671
    }
672 673 674 675

    return 0;
}

676 677 678 679 680
/*
 * Read the next element, but only the header. The contents
 * are supposed to be sub-elements which can be read separately.
 * 0 is success, < 0 is failure.
 */
681
static int ebml_read_master(MatroskaDemuxContext *matroska, uint64_t length)
682
{
683
    AVIOContext *pb = matroska->ctx->pb;
684 685 686 687 688
    MatroskaLevel *level;

    if (matroska->num_levels >= EBML_MAX_DEPTH) {
        av_log(matroska->ctx, AV_LOG_ERROR,
               "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
689
        return AVERROR(ENOSYS);
690 691 692
    }

    level = &matroska->levels[matroska->num_levels++];
693
    level->start = avio_tell(pb);
694 695 696 697 698 699 700
    level->length = length;

    return 0;
}

/*
 * Read signed/unsigned "EBML" numbers.
D
Diego Biurrun 已提交
701
 * Return: number of bytes processed, < 0 on error
702
 */
703 704
static int matroska_ebmlnum_uint(MatroskaDemuxContext *matroska,
                                 uint8_t *data, uint32_t size, uint64_t *num)
705
{
706
    AVIOContext pb;
707
    ffio_init_context(&pb, data, size, 0, NULL, NULL, NULL, NULL);
708
    return ebml_read_num(matroska, &pb, FFMIN(size, 8), num);
709 710 711 712 713
}

/*
 * Same as above, but signed.
 */
714 715
static int matroska_ebmlnum_sint(MatroskaDemuxContext *matroska,
                                 uint8_t *data, uint32_t size, int64_t *num)
716 717 718 719 720
{
    uint64_t unum;
    int res;

    /* read as unsigned number first */
721
    if ((res = matroska_ebmlnum_uint(matroska, data, size, &unum)) < 0)
722 723 724
        return res;

    /* make signed (weird way) */
725
    *num = unum - ((1LL << (7*res - 1)) - 1);
726 727 728 729

    return res;
}

730 731
static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                           EbmlSyntax *syntax, void *data);
732

733 734
static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                         uint32_t id, void *data)
735 736
{
    int i;
737 738 739
    for (i=0; syntax[i].id; i++)
        if (id == syntax[i].id)
            break;
740 741
    if (!syntax[i].id && id == MATROSKA_ID_CLUSTER &&
        matroska->num_levels > 0 &&
742
        matroska->levels[matroska->num_levels-1].length == 0xffffffffffffff)
743
        return 0;  // we reached the end of an unknown size cluster
744
    if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32)
745 746
        av_log(matroska->ctx, AV_LOG_INFO, "Unknown entry 0x%X\n", id);
    return ebml_parse_elem(matroska, &syntax[i], data);
747 748
}

749 750
static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                      void *data)
751
{
752
    if (!matroska->current_id) {
753 754 755 756 757
        uint64_t id;
        int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
        if (res < 0)
            return res;
        matroska->current_id = id | 1 << 7*res;
758 759
    }
    return ebml_parse_id(matroska, syntax, matroska->current_id, data);
760 761
}

762 763
static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                           void *data)
764
{
765
    int i, res = 0;
766

767 768 769 770 771 772 773 774 775 776 777 778
    for (i=0; syntax[i].id; i++)
        switch (syntax[i].type) {
        case EBML_UINT:
            *(uint64_t *)((char *)data+syntax[i].data_offset) = syntax[i].def.u;
            break;
        case EBML_FLOAT:
            *(double   *)((char *)data+syntax[i].data_offset) = syntax[i].def.f;
            break;
        case EBML_STR:
        case EBML_UTF8:
            *(char    **)((char *)data+syntax[i].data_offset) = av_strdup(syntax[i].def.s);
            break;
779
        }
780

781 782
    while (!res && !ebml_level_end(matroska))
        res = ebml_parse(matroska, syntax, data);
783

784
    return res;
785 786
}

787 788 789
static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                           EbmlSyntax *syntax, void *data)
{
790 791 792 793 794 795 796 797 798 799
    static const uint64_t max_lengths[EBML_TYPE_COUNT] = {
        [EBML_UINT]  = 8,
        [EBML_FLOAT] = 8,
        // max. 16 MB for strings
        [EBML_STR]   = 0x1000000,
        [EBML_UTF8]  = 0x1000000,
        // max. 256 MB for binary data
        [EBML_BIN]   = 0x10000000,
        // no limits for anything else
    };
800
    AVIOContext *pb = matroska->ctx->pb;
801
    uint32_t id = syntax->id;
802
    uint64_t length;
803
    int res;
804
    void *newelem;
805 806 807 808

    data = (char *)data + syntax->data_offset;
    if (syntax->list_elem_size) {
        EbmlList *list = data;
809 810 811 812
        newelem = av_realloc(list->elem, (list->nb_elem+1)*syntax->list_elem_size);
        if (!newelem)
            return AVERROR(ENOMEM);
        list->elem = newelem;
813 814 815 816 817
        data = (char*)list->elem + list->nb_elem*syntax->list_elem_size;
        memset(data, 0, syntax->list_elem_size);
        list->nb_elem++;
    }

818 819
    if (syntax->type != EBML_PASS && syntax->type != EBML_STOP) {
        matroska->current_id = 0;
820
        if ((res = ebml_read_length(matroska, pb, &length)) < 0)
821
            return res;
822 823 824 825 826 827
        if (max_lengths[syntax->type] && length > max_lengths[syntax->type]) {
            av_log(matroska->ctx, AV_LOG_ERROR,
                   "Invalid length 0x%"PRIx64" > 0x%"PRIx64" for syntax element %i\n",
                   length, max_lengths[syntax->type], syntax->type);
            return AVERROR_INVALIDDATA;
        }
828
    }
829

830
    switch (syntax->type) {
831 832
    case EBML_UINT:  res = ebml_read_uint  (pb, length, data);  break;
    case EBML_FLOAT: res = ebml_read_float (pb, length, data);  break;
833
    case EBML_STR:
834
    case EBML_UTF8:  res = ebml_read_ascii (pb, length, data);  break;
835
    case EBML_BIN:   res = ebml_read_binary(pb, length, data);  break;
836
    case EBML_NEST:  if ((res=ebml_read_master(matroska, length)) < 0)
837 838
                         return res;
                     if (id == MATROSKA_ID_SEGMENT)
839
                         matroska->segment_start = avio_tell(matroska->ctx->pb);
840
                     return ebml_parse_nest(matroska, syntax->def.n, data);
841
    case EBML_PASS:  return ebml_parse_id(matroska, syntax->def.n, id, data);
842
    case EBML_STOP:  return 1;
843
    default:         return avio_skip(pb,length)<0 ? AVERROR(EIO) : 0;
844
    }
845 846 847 848 849
    if (res == AVERROR_INVALIDDATA)
        av_log(matroska->ctx, AV_LOG_ERROR, "Invalid element\n");
    else if (res == AVERROR(EIO))
        av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
    return res;
850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874
}

static void ebml_free(EbmlSyntax *syntax, void *data)
{
    int i, j;
    for (i=0; syntax[i].id; i++) {
        void *data_off = (char *)data + syntax[i].data_offset;
        switch (syntax[i].type) {
        case EBML_STR:
        case EBML_UTF8:  av_freep(data_off);                      break;
        case EBML_BIN:   av_freep(&((EbmlBin *)data_off)->data);  break;
        case EBML_NEST:
            if (syntax[i].list_elem_size) {
                EbmlList *list = data_off;
                char *ptr = list->elem;
                for (j=0; j<list->nb_elem; j++, ptr+=syntax[i].list_elem_size)
                    ebml_free(syntax[i].def.n, ptr);
                av_free(list->elem);
            } else
                ebml_free(syntax[i].def.n, data_off);
        default:  break;
        }
    }
}

875 876 877 878 879 880 881

/*
 * Autodetecting...
 */
static int matroska_probe(AVProbeData *p)
{
    uint64_t total = 0;
J
James Zern 已提交
882
    int len_mask = 0x80, size = 1, n = 1, i;
883

D
Diego Biurrun 已提交
884
    /* EBML header? */
885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
    if (AV_RB32(p->buf) != EBML_ID_HEADER)
        return 0;

    /* length of header */
    total = p->buf[4];
    while (size <= 8 && !(total & len_mask)) {
        size++;
        len_mask >>= 1;
    }
    if (size > 8)
      return 0;
    total &= (len_mask - 1);
    while (n < size)
        total = (total << 8) | p->buf[4 + n++];

D
Diego Biurrun 已提交
900
    /* Does the probe data contain the whole header? */
901 902 903
    if (p->buf_size < 4 + size + total)
      return 0;

J
James Zern 已提交
904
    /* The header should contain a known document type. For now,
905 906 907
     * we don't parse the whole header but simply check for the
     * availability of that array of characters inside the header.
     * Not fully fool-proof, but good enough. */
J
James Zern 已提交
908 909
    for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++) {
        int probelen = strlen(matroska_doctypes[i]);
910 911
        if (total < probelen)
            continue;
J
James Zern 已提交
912 913 914 915
        for (n = 4+size; n <= 4+size+total-probelen; n++)
            if (!memcmp(p->buf+n, matroska_doctypes[i], probelen))
                return AVPROBE_SCORE_MAX;
    }
916

917 918
    // probably valid EBML header but no recognized doctype
    return AVPROBE_SCORE_MAX/2;
919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934
}

static MatroskaTrack *matroska_find_track_by_num(MatroskaDemuxContext *matroska,
                                                 int num)
{
    MatroskaTrack *tracks = matroska->tracks.elem;
    int i;

    for (i=0; i < matroska->tracks.nb_elem; i++)
        if (tracks[i].num == num)
            return &tracks[i];

    av_log(matroska->ctx, AV_LOG_ERROR, "Invalid track number %d\n", num);
    return NULL;
}

935 936
static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
                                  MatroskaTrack *track)
937
{
938
    MatroskaTrackEncoding *encodings = track->encodings.elem;
939 940 941
    uint8_t* data = *buf;
    int isize = *buf_size;
    uint8_t* pkt_data = NULL;
942
    uint8_t* newpktdata;
943 944 945 946
    int pkt_size = isize;
    int result = 0;
    int olen;

947 948 949
    if (pkt_size >= 10000000)
        return -1;

950
    switch (encodings[0].compression.algo) {
951
    case MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP:
952
        return encodings[0].compression.settings.size;
953 954 955
    case MATROSKA_TRACK_ENCODING_COMP_LZO:
        do {
            olen = pkt_size *= 3;
A
Aurelien Jacobs 已提交
956
            pkt_data = av_realloc(pkt_data, pkt_size+AV_LZO_OUTPUT_PADDING);
957 958
            result = av_lzo1x_decode(pkt_data, &olen, data, &isize);
        } while (result==AV_LZO_OUTPUT_FULL && pkt_size<10000000);
959 960 961 962
        if (result)
            goto failed;
        pkt_size -= olen;
        break;
963
#if CONFIG_ZLIB
964 965 966 967 968 969 970 971
    case MATROSKA_TRACK_ENCODING_COMP_ZLIB: {
        z_stream zstream = {0};
        if (inflateInit(&zstream) != Z_OK)
            return -1;
        zstream.next_in = data;
        zstream.avail_in = isize;
        do {
            pkt_size *= 3;
972 973 974 975 976 977
            newpktdata = av_realloc(pkt_data, pkt_size);
            if (!newpktdata) {
                inflateEnd(&zstream);
                goto failed;
            }
            pkt_data = newpktdata;
978 979 980 981 982 983 984 985 986 987 988
            zstream.avail_out = pkt_size - zstream.total_out;
            zstream.next_out = pkt_data + zstream.total_out;
            result = inflate(&zstream, Z_NO_FLUSH);
        } while (result==Z_OK && pkt_size<10000000);
        pkt_size = zstream.total_out;
        inflateEnd(&zstream);
        if (result != Z_STREAM_END)
            goto failed;
        break;
    }
#endif
989
#if CONFIG_BZLIB
990 991 992 993 994 995 996 997
    case MATROSKA_TRACK_ENCODING_COMP_BZLIB: {
        bz_stream bzstream = {0};
        if (BZ2_bzDecompressInit(&bzstream, 0, 0) != BZ_OK)
            return -1;
        bzstream.next_in = data;
        bzstream.avail_in = isize;
        do {
            pkt_size *= 3;
998 999 1000 1001 1002 1003
            newpktdata = av_realloc(pkt_data, pkt_size);
            if (!newpktdata) {
                BZ2_bzDecompressEnd(&bzstream);
                goto failed;
            }
            pkt_data = newpktdata;
1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014
            bzstream.avail_out = pkt_size - bzstream.total_out_lo32;
            bzstream.next_out = pkt_data + bzstream.total_out_lo32;
            result = BZ2_bzDecompress(&bzstream);
        } while (result==BZ_OK && pkt_size<10000000);
        pkt_size = bzstream.total_out_lo32;
        BZ2_bzDecompressEnd(&bzstream);
        if (result != BZ_STREAM_END)
            goto failed;
        break;
    }
#endif
1015 1016
    default:
        return -1;
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026
    }

    *buf = pkt_data;
    *buf_size = pkt_size;
    return 0;
 failed:
    av_free(pkt_data);
    return -1;
}

1027
static void matroska_fix_ass_packet(MatroskaDemuxContext *matroska,
1028
                                    AVPacket *pkt, uint64_t display_duration)
1029 1030 1031 1032 1033 1034 1035
{
    char *line, *layer, *ptr = pkt->data, *end = ptr+pkt->size;
    for (; *ptr!=',' && ptr<end-1; ptr++);
    if (*ptr == ',')
        layer = ++ptr;
    for (; *ptr!=',' && ptr<end-1; ptr++);
    if (*ptr == ',') {
1036
        int64_t end_pts = pkt->pts + display_duration;
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049
        int sc = matroska->time_scale * pkt->pts / 10000000;
        int ec = matroska->time_scale * end_pts  / 10000000;
        int sh, sm, ss, eh, em, es, len;
        sh = sc/360000;  sc -= 360000*sh;
        sm = sc/  6000;  sc -=   6000*sm;
        ss = sc/   100;  sc -=    100*ss;
        eh = ec/360000;  ec -= 360000*eh;
        em = ec/  6000;  ec -=   6000*em;
        es = ec/   100;  ec -=    100*es;
        *ptr++ = '\0';
        len = 50 + end-ptr + FF_INPUT_BUFFER_PADDING_SIZE;
        if (!(line = av_malloc(len)))
            return;
1050
        snprintf(line,len,"Dialogue: %s,%d:%02d:%02d.%02d,%d:%02d:%02d.%02d,%s\r\n",
1051 1052 1053 1054 1055 1056 1057
                 layer, sh, sm, ss, sc, eh, em, es, ec, ptr);
        av_free(pkt->data);
        pkt->data = line;
        pkt->size = strlen(line);
    }
}

1058
static int matroska_merge_packets(AVPacket *out, AVPacket *in)
1059
{
1060 1061 1062 1063
    void *newdata = av_realloc(out->data, out->size+in->size);
    if (!newdata)
        return AVERROR(ENOMEM);
    out->data = newdata;
1064 1065 1066 1067
    memcpy(out->data+out->size, in->data, in->size);
    out->size += in->size;
    av_destruct_packet(in);
    av_free(in);
1068
    return 0;
1069 1070
}

1071
static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
1072
                                 AVDictionary **metadata, char *prefix)
1073 1074
{
    MatroskaTag *tags = list->elem;
1075 1076
    char key[1024];
    int i;
1077 1078

    for (i=0; i < list->nb_elem; i++) {
1079
        const char *lang = strcmp(tags[i].lang, "und") ? tags[i].lang : NULL;
1080 1081 1082 1083 1084

        if (!tags[i].name) {
            av_log(s, AV_LOG_WARNING, "Skipping invalid tag with no TagName.\n");
            continue;
        }
1085 1086
        if (prefix)  snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name);
        else         av_strlcpy(key, tags[i].name, sizeof(key));
1087
        if (tags[i].def || !lang) {
1088
        av_dict_set(metadata, key, tags[i].string, 0);
1089
        if (tags[i].sub.nb_elem)
1090
            matroska_convert_tag(s, &tags[i].sub, metadata, key);
1091 1092 1093 1094
        }
        if (lang) {
            av_strlcat(key, "-", sizeof(key));
            av_strlcat(key, lang, sizeof(key));
1095
            av_dict_set(metadata, key, tags[i].string, 0);
1096 1097 1098
            if (tags[i].sub.nb_elem)
                matroska_convert_tag(s, &tags[i].sub, metadata, key);
        }
1099
    }
A
Anton Khirnov 已提交
1100
    ff_metadata_conv(metadata, NULL, ff_mkv_metadata_conv);
1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
}

static void matroska_convert_tags(AVFormatContext *s)
{
    MatroskaDemuxContext *matroska = s->priv_data;
    MatroskaTags *tags = matroska->tags.elem;
    int i, j;

    for (i=0; i < matroska->tags.nb_elem; i++) {
        if (tags[i].target.attachuid) {
            MatroskaAttachement *attachment = matroska->attachments.elem;
            for (j=0; j<matroska->attachments.nb_elem; j++)
1113 1114
                if (attachment[j].uid == tags[i].target.attachuid
                    && attachment[j].stream)
1115 1116 1117 1118 1119
                    matroska_convert_tag(s, &tags[i].tag,
                                         &attachment[j].stream->metadata, NULL);
        } else if (tags[i].target.chapteruid) {
            MatroskaChapter *chapter = matroska->chapters.elem;
            for (j=0; j<matroska->chapters.nb_elem; j++)
1120 1121
                if (chapter[j].uid == tags[i].target.chapteruid
                    && chapter[j].chapter)
1122 1123 1124 1125 1126
                    matroska_convert_tag(s, &tags[i].tag,
                                         &chapter[j].chapter->metadata, NULL);
        } else if (tags[i].target.trackuid) {
            MatroskaTrack *track = matroska->tracks.elem;
            for (j=0; j<matroska->tracks.nb_elem; j++)
1127
                if (track[j].uid == tags[i].target.trackuid && track[j].stream)
1128 1129 1130
                    matroska_convert_tag(s, &tags[i].tag,
                                         &track[j].stream->metadata, NULL);
        } else {
1131 1132
            matroska_convert_tag(s, &tags[i].tag, &s->metadata,
                                 tags[i].target.type);
1133
        }
1134 1135 1136
    }
}

1137
static int matroska_parse_seekhead_entry(MatroskaDemuxContext *matroska, int idx)
1138 1139 1140 1141
{
    EbmlList *seekhead_list = &matroska->seekhead;
    MatroskaSeekhead *seekhead = seekhead_list->elem;
    uint32_t level_up = matroska->level_up;
1142
    int64_t before_pos = avio_tell(matroska->ctx->pb);
1143
    uint32_t saved_id = matroska->current_id;
1144
    MatroskaLevel level;
1145 1146
    int64_t offset;
    int ret = 0;
1147

1148 1149 1150 1151
    if (idx >= seekhead_list->nb_elem
            || seekhead[idx].id == MATROSKA_ID_SEEKHEAD
            || seekhead[idx].id == MATROSKA_ID_CLUSTER)
        return 0;
1152

A
Anton Khirnov 已提交
1153
    /* seek */
1154 1155
    offset = seekhead[idx].pos + matroska->segment_start;
    if (avio_seek(matroska->ctx->pb, offset, SEEK_SET) == offset) {
D
Diego Biurrun 已提交
1156
        /* We don't want to lose our seekhead level, so we add
1157 1158 1159 1160 1161
         * a dummy. This is a crude hack. */
        if (matroska->num_levels == EBML_MAX_DEPTH) {
            av_log(matroska->ctx, AV_LOG_INFO,
                   "Max EBML element depth (%d) reached, "
                   "cannot parse further.\n", EBML_MAX_DEPTH);
1162 1163
            ret = AVERROR_INVALIDDATA;
        } else {
A
Anton Khirnov 已提交
1164 1165 1166 1167 1168 1169
            level.start = 0;
            level.length = (uint64_t)-1;
            matroska->levels[matroska->num_levels] = level;
            matroska->num_levels++;
            matroska->current_id = 0;

1170
            ret = ebml_parse(matroska, matroska_segment, matroska);
A
Anton Khirnov 已提交
1171 1172 1173 1174 1175 1176 1177

            /* remove dummy level */
            while (matroska->num_levels) {
                uint64_t length = matroska->levels[--matroska->num_levels].length;
                if (length == (uint64_t)-1)
                    break;
            }
1178
        }
1179
    }
1180
    /* seek back */
A
Anton Khirnov 已提交
1181
    avio_seek(matroska->ctx->pb, before_pos, SEEK_SET);
1182
    matroska->level_up = level_up;
1183
    matroska->current_id = saved_id;
1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223

    return ret;
}

static void matroska_execute_seekhead(MatroskaDemuxContext *matroska)
{
    EbmlList *seekhead_list = &matroska->seekhead;
    MatroskaSeekhead *seekhead = seekhead_list->elem;
    int64_t before_pos = avio_tell(matroska->ctx->pb);
    int i;

    // we should not do any seeking in the streaming case
    if (!matroska->ctx->pb->seekable ||
        (matroska->ctx->flags & AVFMT_FLAG_IGNIDX))
        return;

    for (i = 0; i < seekhead_list->nb_elem; i++) {
        if (seekhead[i].pos <= before_pos)
            continue;

        // defer cues parsing until we actually need cue data.
        if (seekhead[i].id == MATROSKA_ID_CUES) {
            matroska->cues_parsing_deferred = 1;
            continue;
        }

        if (matroska_parse_seekhead_entry(matroska, i) < 0)
            break;
    }
}

static void matroska_parse_cues(MatroskaDemuxContext *matroska) {
    EbmlList *seekhead_list = &matroska->seekhead;
    MatroskaSeekhead *seekhead = seekhead_list->elem;
    EbmlList *index_list;
    MatroskaIndex *index;
    int index_scale = 1;
    int i, j;

    for (i = 0; i < seekhead_list->nb_elem; i++)
1224
        if (seekhead[i].id == MATROSKA_ID_CUES)
1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
            break;
    assert(i <= seekhead_list->nb_elem);

    matroska_parse_seekhead_entry(matroska, i);

    index_list = &matroska->index;
    index = index_list->elem;
    if (index_list->nb_elem
        && index[0].time > 1E14/matroska->time_scale) {
        av_log(matroska->ctx, AV_LOG_WARNING, "Working around broken index.\n");
        index_scale = matroska->time_scale;
    }
    for (i = 0; i < index_list->nb_elem; i++) {
        EbmlList *pos_list = &index[i].pos;
        MatroskaIndexPos *pos = pos_list->elem;
        for (j = 0; j < pos_list->nb_elem; j++) {
            MatroskaTrack *track = matroska_find_track_by_num(matroska, pos[j].track);
            if (track && track->stream)
                av_add_index_entry(track->stream,
                                   pos[j].pos + matroska->segment_start,
                                   index[i].time/index_scale, 0, 0,
                                   AVINDEX_KEYFRAME);
        }
    }
1249 1250
}

1251
static int matroska_aac_profile(char *codec_id)
1252
{
1253
    static const char * const aac_profiles[] = { "MAIN", "LC", "SSR" };
1254 1255
    int profile;

1256
    for (profile=0; profile<FF_ARRAY_ELEMS(aac_profiles); profile++)
1257 1258 1259 1260 1261
        if (strstr(codec_id, aac_profiles[profile]))
            break;
    return profile + 1;
}

1262
static int matroska_aac_sri(int samplerate)
1263 1264 1265
{
    int sri;

1266 1267
    for (sri=0; sri<FF_ARRAY_ELEMS(avpriv_mpeg4audio_sample_rates); sri++)
        if (avpriv_mpeg4audio_sample_rates[sri] == samplerate)
1268 1269 1270 1271
            break;
    return sri;
}

1272
static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
1273 1274
{
    MatroskaDemuxContext *matroska = s->priv_data;
1275 1276 1277 1278
    EbmlList *attachements_list = &matroska->attachments;
    MatroskaAttachement *attachements;
    EbmlList *chapters_list = &matroska->chapters;
    MatroskaChapter *chapters;
1279
    MatroskaTrack *tracks;
1280
    uint64_t max_start = 0;
1281
    Ebml ebml = { 0 };
1282
    AVStream *st;
1283
    int i, j, res;
1284 1285 1286 1287

    matroska->ctx = s;

    /* First read the EBML header. */
1288
    if (ebml_parse(matroska, ebml_syntax, &ebml)
1289
        || ebml.version > EBML_VERSION       || ebml.max_size > sizeof(uint64_t)
J
James Zern 已提交
1290
        || ebml.id_length > sizeof(uint32_t) || ebml.doctype_version > 2) {
1291
        av_log(matroska->ctx, AV_LOG_ERROR,
1292 1293 1294
               "EBML header using unsupported features\n"
               "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n",
               ebml.version, ebml.doctype, ebml.doctype_version);
J
James Zern 已提交
1295 1296 1297 1298 1299 1300 1301
        ebml_free(ebml_syntax, &ebml);
        return AVERROR_PATCHWELCOME;
    }
    for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++)
        if (!strcmp(ebml.doctype, matroska_doctypes[i]))
            break;
    if (i >= FF_ARRAY_ELEMS(matroska_doctypes)) {
1302
        av_log(s, AV_LOG_WARNING, "Unknown EBML doctype '%s'\n", ebml.doctype);
1303
    }
1304
    ebml_free(ebml_syntax, &ebml);
1305 1306

    /* The next thing is a segment. */
1307 1308
    if ((res = ebml_parse(matroska, matroska_segments, matroska)) < 0)
        return res;
1309
    matroska_execute_seekhead(matroska);
1310

1311 1312
    if (!matroska->time_scale)
        matroska->time_scale = 1000000;
1313 1314 1315
    if (matroska->duration)
        matroska->ctx->duration = matroska->duration * matroska->time_scale
                                  * 1000 / AV_TIME_BASE;
1316
    av_dict_set(&s->metadata, "title", matroska->title, 0);
1317

1318 1319 1320 1321
    tracks = matroska->tracks.elem;
    for (i=0; i < matroska->tracks.nb_elem; i++) {
        MatroskaTrack *track = &tracks[i];
        enum CodecID codec_id = CODEC_ID_NONE;
1322 1323
        EbmlList *encodings_list = &tracks->encodings;
        MatroskaTrackEncoding *encodings = encodings_list->elem;
1324 1325 1326
        uint8_t *extradata = NULL;
        int extradata_size = 0;
        int extradata_offset = 0;
1327
        AVIOContext b;
1328 1329

        /* Apply some sanity checks. */
1330 1331 1332 1333 1334 1335 1336 1337
        if (track->type != MATROSKA_TRACK_TYPE_VIDEO &&
            track->type != MATROSKA_TRACK_TYPE_AUDIO &&
            track->type != MATROSKA_TRACK_TYPE_SUBTITLE) {
            av_log(matroska->ctx, AV_LOG_INFO,
                   "Unknown or unsupported track type %"PRIu64"\n",
                   track->type);
            continue;
        }
1338 1339 1340
        if (track->codec_id == NULL)
            continue;

1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353
        if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
            if (!track->default_duration)
                track->default_duration = 1000000000/track->video.frame_rate;
            if (!track->video.display_width)
                track->video.display_width = track->video.pixel_width;
            if (!track->video.display_height)
                track->video.display_height = track->video.pixel_height;
        } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
            if (!track->audio.out_samplerate)
                track->audio.out_samplerate = track->audio.samplerate;
        }
        if (encodings_list->nb_elem > 1) {
            av_log(matroska->ctx, AV_LOG_ERROR,
D
Dustin Brody 已提交
1354
                   "Multiple combined encodings not supported");
1355 1356 1357
        } else if (encodings_list->nb_elem == 1) {
            if (encodings[0].type ||
                (encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP &&
1358
#if CONFIG_ZLIB
1359 1360
                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_ZLIB &&
#endif
1361
#if CONFIG_BZLIB
1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390
                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_BZLIB &&
#endif
                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_LZO)) {
                encodings[0].scope = 0;
                av_log(matroska->ctx, AV_LOG_ERROR,
                       "Unsupported encoding type");
            } else if (track->codec_priv.size && encodings[0].scope&2) {
                uint8_t *codec_priv = track->codec_priv.data;
                int offset = matroska_decode_buffer(&track->codec_priv.data,
                                                    &track->codec_priv.size,
                                                    track);
                if (offset < 0) {
                    track->codec_priv.data = NULL;
                    track->codec_priv.size = 0;
                    av_log(matroska->ctx, AV_LOG_ERROR,
                           "Failed to decode codec private data\n");
                } else if (offset > 0) {
                    track->codec_priv.data = av_malloc(track->codec_priv.size + offset);
                    memcpy(track->codec_priv.data,
                           encodings[0].compression.settings.data, offset);
                    memcpy(track->codec_priv.data+offset, codec_priv,
                           track->codec_priv.size);
                    track->codec_priv.size += offset;
                }
                if (codec_priv != track->codec_priv.data)
                    av_free(codec_priv);
            }
        }

1391 1392 1393 1394 1395
        for(j=0; ff_mkv_codec_tags[j].id != CODEC_ID_NONE; j++){
            if(!strncmp(ff_mkv_codec_tags[j].str, track->codec_id,
                        strlen(ff_mkv_codec_tags[j].str))){
                codec_id= ff_mkv_codec_tags[j].id;
                break;
1396
            }
1397
        }
1398

1399
        st = track->stream = avformat_new_stream(s, NULL);
1400 1401 1402
        if (st == NULL)
            return AVERROR(ENOMEM);

1403
        if (!strcmp(track->codec_id, "V_MS/VFW/FOURCC")
1404 1405
            && track->codec_priv.size >= 40
            && track->codec_priv.data != NULL) {
1406
            track->ms_compat = 1;
1407
            track->video.fourcc = AV_RL32(track->codec_priv.data + 16);
1408
            codec_id = ff_codec_get_id(ff_codec_bmp_tags, track->video.fourcc);
1409
            extradata_offset = 40;
1410
        } else if (!strcmp(track->codec_id, "A_MS/ACM")
1411
                   && track->codec_priv.size >= 14
1412
                   && track->codec_priv.data != NULL) {
1413
            int ret;
1414
            ffio_init_context(&b, track->codec_priv.data, track->codec_priv.size,
1415
                          AVIO_FLAG_READ, NULL, NULL, NULL, NULL);
1416 1417 1418
            ret = ff_get_wav_header(&b, st->codec, track->codec_priv.size);
            if (ret < 0)
                return ret;
1419
            codec_id = st->codec->codec_id;
1420
            extradata_offset = FFMIN(track->codec_priv.size, 18);
1421 1422 1423
        } else if (!strcmp(track->codec_id, "V_QUICKTIME")
                   && (track->codec_priv.size >= 86)
                   && (track->codec_priv.data != NULL)) {
1424
            track->video.fourcc = AV_RL32(track->codec_priv.data);
1425
            codec_id=ff_codec_get_id(codec_movvideo_tags, track->video.fourcc);
1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439
        } else if (codec_id == CODEC_ID_PCM_S16BE) {
            switch (track->audio.bitdepth) {
            case  8:  codec_id = CODEC_ID_PCM_U8;     break;
            case 24:  codec_id = CODEC_ID_PCM_S24BE;  break;
            case 32:  codec_id = CODEC_ID_PCM_S32BE;  break;
            }
        } else if (codec_id == CODEC_ID_PCM_S16LE) {
            switch (track->audio.bitdepth) {
            case  8:  codec_id = CODEC_ID_PCM_U8;     break;
            case 24:  codec_id = CODEC_ID_PCM_S24LE;  break;
            case 32:  codec_id = CODEC_ID_PCM_S32LE;  break;
            }
        } else if (codec_id==CODEC_ID_PCM_F32LE && track->audio.bitdepth==64) {
            codec_id = CODEC_ID_PCM_F64LE;
1440
        } else if (codec_id == CODEC_ID_AAC && !track->codec_priv.size) {
1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453
            int profile = matroska_aac_profile(track->codec_id);
            int sri = matroska_aac_sri(track->audio.samplerate);
            extradata = av_malloc(5);
            if (extradata == NULL)
                return AVERROR(ENOMEM);
            extradata[0] = (profile << 3) | ((sri&0x0E) >> 1);
            extradata[1] = ((sri&0x01) << 7) | (track->audio.channels<<3);
            if (strstr(track->codec_id, "SBR")) {
                sri = matroska_aac_sri(track->audio.out_samplerate);
                extradata[2] = 0x56;
                extradata[3] = 0xE5;
                extradata[4] = 0x80 | (sri<<3);
                extradata_size = 5;
1454
            } else
1455
                extradata_size = 2;
1456
        } else if (codec_id == CODEC_ID_TTA) {
1457 1458 1459 1460
            extradata_size = 30;
            extradata = av_mallocz(extradata_size);
            if (extradata == NULL)
                return AVERROR(ENOMEM);
1461
            ffio_init_context(&b, extradata, extradata_size, 1,
1462
                          NULL, NULL, NULL, NULL);
1463 1464 1465 1466 1467 1468
            avio_write(&b, "TTA1", 4);
            avio_wl16(&b, 1);
            avio_wl16(&b, track->audio.channels);
            avio_wl16(&b, track->audio.bitdepth);
            avio_wl32(&b, track->audio.out_samplerate);
            avio_wl32(&b, matroska->ctx->duration * track->audio.out_samplerate);
1469 1470
        } else if (codec_id == CODEC_ID_RV10 || codec_id == CODEC_ID_RV20 ||
                   codec_id == CODEC_ID_RV30 || codec_id == CODEC_ID_RV40) {
1471
            extradata_offset = 26;
1472
        } else if (codec_id == CODEC_ID_RA_144) {
1473 1474
            track->audio.out_samplerate = 8000;
            track->audio.channels = 1;
1475
        } else if (codec_id == CODEC_ID_RA_288 || codec_id == CODEC_ID_COOK ||
1476 1477
                   codec_id == CODEC_ID_ATRAC3 || codec_id == CODEC_ID_SIPR) {
            int flavor;
1478
            ffio_init_context(&b, track->codec_priv.data,track->codec_priv.size,
1479
                          0, NULL, NULL, NULL, NULL);
1480
            avio_skip(&b, 22);
1481 1482
            flavor                       = avio_rb16(&b);
            track->audio.coded_framesize = avio_rb32(&b);
1483
            avio_skip(&b, 12);
1484 1485 1486
            track->audio.sub_packet_h    = avio_rb16(&b);
            track->audio.frame_size      = avio_rb16(&b);
            track->audio.sub_packet_size = avio_rb16(&b);
1487 1488 1489 1490 1491
            track->audio.buf = av_malloc(track->audio.frame_size * track->audio.sub_packet_h);
            if (codec_id == CODEC_ID_RA_288) {
                st->codec->block_align = track->audio.coded_framesize;
                track->codec_priv.size = 0;
            } else {
1492 1493 1494 1495 1496
                if (codec_id == CODEC_ID_SIPR && flavor < 4) {
                    const int sipr_bit_rate[4] = { 6504, 8496, 5000, 16000 };
                    track->audio.sub_packet_size = ff_sipr_subpk_size[flavor];
                    st->codec->bit_rate = sipr_bit_rate[flavor];
                }
1497 1498
                st->codec->block_align = track->audio.sub_packet_size;
                extradata_offset = 78;
1499
            }
1500
        }
1501
        track->codec_priv.size -= extradata_offset;
1502

1503
        if (codec_id == CODEC_ID_NONE)
1504
            av_log(matroska->ctx, AV_LOG_INFO,
A
Aurelien Jacobs 已提交
1505
                   "Unknown/unsupported CodecID %s.\n", track->codec_id);
1506

1507 1508
        if (track->time_scale < 0.01)
            track->time_scale = 1.0;
1509
        av_set_pts_info(st, 64, matroska->time_scale*track->time_scale, 1000*1000*1000); /* 64 bit pts in ns */
1510

1511 1512 1513
        st->codec->codec_id = codec_id;
        st->start_time = 0;
        if (strcmp(track->language, "und"))
1514 1515
            av_dict_set(&st->metadata, "language", track->language, 0);
        av_dict_set(&st->metadata, "title", track->name, 0);
1516

1517 1518
        if (track->flag_default)
            st->disposition |= AV_DISPOSITION_DEFAULT;
1519 1520
        if (track->flag_forced)
            st->disposition |= AV_DISPOSITION_FORCED;
1521

1522 1523 1524
        if (track->default_duration)
            av_reduce(&st->codec->time_base.num, &st->codec->time_base.den,
                      track->default_duration, 1000000000, 30000);
1525

1526
        if (!st->codec->extradata) {
1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539
            if(extradata){
                st->codec->extradata = extradata;
                st->codec->extradata_size = extradata_size;
            } else if(track->codec_priv.data && track->codec_priv.size > 0){
                st->codec->extradata = av_mallocz(track->codec_priv.size +
                                                  FF_INPUT_BUFFER_PADDING_SIZE);
                if(st->codec->extradata == NULL)
                    return AVERROR(ENOMEM);
                st->codec->extradata_size = track->codec_priv.size;
                memcpy(st->codec->extradata,
                       track->codec_priv.data + extradata_offset,
                       track->codec_priv.size);
            }
1540
        }
1541 1542

        if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
1543
            st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
1544 1545 1546
            st->codec->codec_tag  = track->video.fourcc;
            st->codec->width  = track->video.pixel_width;
            st->codec->height = track->video.pixel_height;
1547 1548
            av_reduce(&st->sample_aspect_ratio.num,
                      &st->sample_aspect_ratio.den,
1549 1550 1551
                      st->codec->height * track->video.display_width,
                      st->codec-> width * track->video.display_height,
                      255);
1552
            if (st->codec->codec_id != CODEC_ID_H264)
1553
            st->need_parsing = AVSTREAM_PARSE_HEADERS;
1554 1555
            if (track->default_duration)
                st->avg_frame_rate = av_d2q(1000000000.0/track->default_duration, INT_MAX);
1556
        } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
1557
            st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
1558 1559
            st->codec->sample_rate = track->audio.out_samplerate;
            st->codec->channels = track->audio.channels;
1560
            if (st->codec->codec_id != CODEC_ID_AAC)
1561
            st->need_parsing = AVSTREAM_PARSE_HEADERS;
1562
        } else if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE) {
1563
            st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
1564
        }
1565 1566
    }

1567 1568 1569 1570 1571 1572
    attachements = attachements_list->elem;
    for (j=0; j<attachements_list->nb_elem; j++) {
        if (!(attachements[j].filename && attachements[j].mime &&
              attachements[j].bin.data && attachements[j].bin.size > 0)) {
            av_log(matroska->ctx, AV_LOG_ERROR, "incomplete attachment\n");
        } else {
1573
            AVStream *st = avformat_new_stream(s, NULL);
1574 1575
            if (st == NULL)
                break;
1576
            av_dict_set(&st->metadata, "filename",attachements[j].filename, 0);
1577
            av_dict_set(&st->metadata, "mimetype", attachements[j].mime, 0);
1578
            st->codec->codec_id = CODEC_ID_NONE;
1579
            st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT;
1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592
            st->codec->extradata  = av_malloc(attachements[j].bin.size);
            if(st->codec->extradata == NULL)
                break;
            st->codec->extradata_size = attachements[j].bin.size;
            memcpy(st->codec->extradata, attachements[j].bin.data, attachements[j].bin.size);

            for (i=0; ff_mkv_mime_tags[i].id != CODEC_ID_NONE; i++) {
                if (!strncmp(ff_mkv_mime_tags[i].str, attachements[j].mime,
                             strlen(ff_mkv_mime_tags[i].str))) {
                    st->codec->codec_id = ff_mkv_mime_tags[i].id;
                    break;
                }
            }
1593
            attachements[j].stream = st;
1594 1595 1596 1597 1598
        }
    }

    chapters = chapters_list->elem;
    for (i=0; i<chapters_list->nb_elem; i++)
1599 1600
        if (chapters[i].start != AV_NOPTS_VALUE && chapters[i].uid
            && (max_start==0 || chapters[i].start > max_start)) {
1601
            chapters[i].chapter =
1602
            avpriv_new_chapter(s, chapters[i].uid, (AVRational){1, 1000000000},
1603 1604
                           chapters[i].start, chapters[i].end,
                           chapters[i].title);
1605
            av_dict_set(&chapters[i].chapter->metadata,
1606
                             "title", chapters[i].title, 0);
1607 1608
            max_start = chapters[i].start;
        }
1609

1610 1611
    matroska_convert_tags(s);

1612
    return 0;
1613 1614
}

1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625
/*
 * Put one packet in an application-supplied AVPacket struct.
 * Returns 0 on success or -1 on failure.
 */
static int matroska_deliver_packet(MatroskaDemuxContext *matroska,
                                   AVPacket *pkt)
{
    if (matroska->num_packets > 0) {
        memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
        av_free(matroska->packets[0]);
        if (matroska->num_packets > 1) {
1626
            void *newpackets;
1627 1628
            memmove(&matroska->packets[0], &matroska->packets[1],
                    (matroska->num_packets - 1) * sizeof(AVPacket *));
1629 1630 1631 1632
            newpackets = av_realloc(matroska->packets,
                            (matroska->num_packets - 1) * sizeof(AVPacket *));
            if (newpackets)
                matroska->packets = newpackets;
1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653
        } else {
            av_freep(&matroska->packets);
        }
        matroska->num_packets--;
        return 0;
    }

    return -1;
}

/*
 * Free all packets in our internal queue.
 */
static void matroska_clear_queue(MatroskaDemuxContext *matroska)
{
    if (matroska->packets) {
        int n;
        for (n = 0; n < matroska->num_packets; n++) {
            av_free_packet(matroska->packets[n]);
            av_free(matroska->packets[n]);
        }
1654
        av_freep(&matroska->packets);
1655 1656 1657 1658
        matroska->num_packets = 0;
    }
}

1659 1660
static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
                                int size, int64_t pos, uint64_t cluster_time,
1661 1662
                                uint64_t duration, int is_keyframe,
                                int64_t cluster_pos)
1663
{
1664
    uint64_t timecode = AV_NOPTS_VALUE;
1665
    MatroskaTrack *track;
1666
    int res = 0;
1667 1668 1669 1670 1671 1672 1673
    AVStream *st;
    AVPacket *pkt;
    int16_t block_time;
    uint32_t *lace_size = NULL;
    int n, flags, laces = 0;
    uint64_t num;

1674
    if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0) {
1675 1676 1677 1678 1679 1680 1681
        av_log(matroska->ctx, AV_LOG_ERROR, "EBML block data error\n");
        return res;
    }
    data += n;
    size -= n;

    track = matroska_find_track_by_num(matroska, num);
1682
    if (size <= 3 || !track || !track->stream) {
1683
        av_log(matroska->ctx, AV_LOG_INFO,
1684
               "Invalid stream %"PRIu64" or size %u\n", num, size);
1685
        return AVERROR_INVALIDDATA;
1686
    }
1687
    st = track->stream;
1688
    if (st->discard >= AVDISCARD_ALL)
1689 1690
        return res;
    if (duration == AV_NOPTS_VALUE)
1691
        duration = track->default_duration / matroska->time_scale;
1692

1693
    block_time = AV_RB16(data);
1694
    data += 2;
A
Aurelien Jacobs 已提交
1695 1696
    flags = *data++;
    size -= 3;
1697
    if (is_keyframe == -1)
1698
        is_keyframe = flags & 0x80 ? AV_PKT_FLAG_KEY : 0;
1699

1700 1701 1702
    if (cluster_time != (uint64_t)-1
        && (block_time >= 0 || cluster_time >= -block_time)) {
        timecode = cluster_time + block_time;
1703 1704 1705
        if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE
            && timecode < track->end_timecode)
            is_keyframe = 0;  /* overlapping subtitles are not key frame */
1706
        if (is_keyframe)
1707
            av_add_index_entry(st, cluster_pos, timecode, 0,0,AVINDEX_KEYFRAME);
1708
        track->end_timecode = FFMAX(track->end_timecode, timecode+duration);
1709 1710
    }

1711
    if (matroska->skip_to_keyframe && track->type != MATROSKA_TRACK_TYPE_SUBTITLE) {
1712
        if (!is_keyframe || timecode < matroska->skip_to_timecode)
1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723
            return res;
        matroska->skip_to_keyframe = 0;
    }

    switch ((flags & 0x06) >> 1) {
        case 0x0: /* no lacing */
            laces = 1;
            lace_size = av_mallocz(sizeof(int));
            lace_size[0] = size;
            break;

D
Diego Biurrun 已提交
1724
        case 0x1: /* Xiph lacing */
1725 1726
        case 0x2: /* fixed-size lacing */
        case 0x3: /* EBML lacing */
1727
            assert(size>0); // size <=3 is checked before size-=3 above
1728 1729 1730 1731 1732 1733
            laces = (*data) + 1;
            data += 1;
            size -= 1;
            lace_size = av_mallocz(laces * sizeof(int));

            switch ((flags & 0x06) >> 1) {
D
Diego Biurrun 已提交
1734
                case 0x1: /* Xiph lacing */ {
1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762
                    uint8_t temp;
                    uint32_t total = 0;
                    for (n = 0; res == 0 && n < laces - 1; n++) {
                        while (1) {
                            if (size == 0) {
                                res = -1;
                                break;
                            }
                            temp = *data;
                            lace_size[n] += temp;
                            data += 1;
                            size -= 1;
                            if (temp != 0xff)
                                break;
                        }
                        total += lace_size[n];
                    }
                    lace_size[n] = size - total;
                    break;
                }

                case 0x2: /* fixed-size lacing */
                    for (n = 0; n < laces; n++)
                        lace_size[n] = size / laces;
                    break;

                case 0x3: /* EBML lacing */ {
                    uint32_t total;
1763
                    n = matroska_ebmlnum_uint(matroska, data, size, &num);
1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774
                    if (n < 0) {
                        av_log(matroska->ctx, AV_LOG_INFO,
                               "EBML block data error\n");
                        break;
                    }
                    data += n;
                    size -= n;
                    total = lace_size[0] = num;
                    for (n = 1; res == 0 && n < laces - 1; n++) {
                        int64_t snum;
                        int r;
1775
                        r = matroska_ebmlnum_sint(matroska, data, size, &snum);
1776 1777 1778 1779 1780 1781 1782 1783 1784 1785
                        if (r < 0) {
                            av_log(matroska->ctx, AV_LOG_INFO,
                                   "EBML block data error\n");
                            break;
                        }
                        data += r;
                        size -= r;
                        lace_size[n] = lace_size[n - 1] + snum;
                        total += lace_size[n];
                    }
1786
                    lace_size[laces - 1] = size - total;
1787 1788 1789 1790 1791 1792 1793 1794
                    break;
                }
            }
            break;
    }

    if (res == 0) {
        for (n = 0; n < laces; n++) {
1795 1796
            if ((st->codec->codec_id == CODEC_ID_RA_288 ||
                 st->codec->codec_id == CODEC_ID_COOK ||
1797
                 st->codec->codec_id == CODEC_ID_SIPR ||
1798 1799
                 st->codec->codec_id == CODEC_ID_ATRAC3) &&
                 st->codec->block_align && track->audio.sub_packet_size) {
A
Aurelien Jacobs 已提交
1800
                int a = st->codec->block_align;
1801 1802 1803 1804 1805
                int sps = track->audio.sub_packet_size;
                int cfs = track->audio.coded_framesize;
                int h = track->audio.sub_packet_h;
                int y = track->audio.sub_packet_cnt;
                int w = track->audio.frame_size;
A
Aurelien Jacobs 已提交
1806 1807
                int x;

1808
                if (!track->audio.pkt_cnt) {
1809 1810
                    if (track->audio.sub_packet_cnt == 0)
                        track->audio.buf_timecode = timecode;
A
Aurelien Jacobs 已提交
1811 1812
                    if (st->codec->codec_id == CODEC_ID_RA_288)
                        for (x=0; x<h/2; x++)
1813
                            memcpy(track->audio.buf+x*2*w+y*cfs,
A
Aurelien Jacobs 已提交
1814
                                   data+x*cfs, cfs);
1815 1816
                    else if (st->codec->codec_id == CODEC_ID_SIPR)
                        memcpy(track->audio.buf + y*w, data, w);
A
Aurelien Jacobs 已提交
1817 1818
                    else
                        for (x=0; x<w/sps; x++)
1819
                            memcpy(track->audio.buf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), data+x*sps, sps);
A
Aurelien Jacobs 已提交
1820

1821
                    if (++track->audio.sub_packet_cnt >= h) {
1822 1823
                        if (st->codec->codec_id == CODEC_ID_SIPR)
                            ff_rm_reorder_sipr_data(track->audio.buf, h, w);
1824 1825
                        track->audio.sub_packet_cnt = 0;
                        track->audio.pkt_cnt = h*w / a;
1826
                    }
A
Aurelien Jacobs 已提交
1827
                }
1828
                while (track->audio.pkt_cnt) {
A
Aurelien Jacobs 已提交
1829
                    pkt = av_mallocz(sizeof(AVPacket));
A
Aurelien Jacobs 已提交
1830
                    av_new_packet(pkt, a);
1831 1832
                    memcpy(pkt->data, track->audio.buf
                           + a * (h*w / a - track->audio.pkt_cnt--), a);
1833 1834
                    pkt->pts = track->audio.buf_timecode;
                    track->audio.buf_timecode = AV_NOPTS_VALUE;
A
Aurelien Jacobs 已提交
1835
                    pkt->pos = pos;
1836
                    pkt->stream_index = st->index;
1837
                    dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
1838
                }
A
Aurelien Jacobs 已提交
1839
            } else {
1840
                MatroskaTrackEncoding *encodings = track->encodings.elem;
1841
                int offset = 0, pkt_size = lace_size[n];
1842
                uint8_t *pkt_data = data;
A
Aurelien Jacobs 已提交
1843

1844
                if (pkt_size > size) {
1845 1846 1847 1848
                    av_log(matroska->ctx, AV_LOG_ERROR, "Invalid packet size\n");
                    break;
                }

1849
                if (encodings && encodings->scope & 1) {
A
Aurelien Jacobs 已提交
1850
                    offset = matroska_decode_buffer(&pkt_data,&pkt_size, track);
1851 1852
                    if (offset < 0)
                        continue;
1853 1854
                }

A
Aurelien Jacobs 已提交
1855 1856
                pkt = av_mallocz(sizeof(AVPacket));
                /* XXX: prevent data copy... */
1857
                if (av_new_packet(pkt, pkt_size+offset) < 0) {
1858
                    av_free(pkt);
A
Aurelien Jacobs 已提交
1859 1860 1861
                    res = AVERROR(ENOMEM);
                    break;
                }
1862
                if (offset)
1863
                    memcpy (pkt->data, encodings->compression.settings.data, offset);
1864
                memcpy (pkt->data+offset, pkt_data, pkt_size);
A
Aurelien Jacobs 已提交
1865

A
Aurelien Jacobs 已提交
1866 1867 1868
                if (pkt_data != data)
                    av_free(pkt_data);

A
Aurelien Jacobs 已提交
1869 1870
                if (n == 0)
                    pkt->flags = is_keyframe;
1871
                pkt->stream_index = st->index;
A
Aurelien Jacobs 已提交
1872

1873 1874 1875
                if (track->ms_compat)
                    pkt->dts = timecode;
                else
1876
                    pkt->pts = timecode;
A
Aurelien Jacobs 已提交
1877
                pkt->pos = pos;
1878
                if (st->codec->codec_id == CODEC_ID_TEXT)
1879
                    pkt->convergence_duration = duration;
1880
                else if (track->type != MATROSKA_TRACK_TYPE_SUBTITLE)
1881
                    pkt->duration = duration;
A
Aurelien Jacobs 已提交
1882

1883
                if (st->codec->codec_id == CODEC_ID_SSA)
1884
                    matroska_fix_ass_packet(matroska, pkt, duration);
1885

1886
                if (matroska->prev_pkt &&
1887
                    timecode != AV_NOPTS_VALUE &&
1888
                    matroska->prev_pkt->pts == timecode &&
1889 1890
                    matroska->prev_pkt->stream_index == st->index &&
                    st->codec->codec_id == CODEC_ID_SSA)
1891 1892
                    matroska_merge_packets(matroska->prev_pkt, pkt);
                else {
1893
                    dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
1894 1895
                    matroska->prev_pkt = pkt;
                }
A
Aurelien Jacobs 已提交
1896
            }
1897

A
Aurelien Jacobs 已提交
1898 1899
            if (timecode != AV_NOPTS_VALUE)
                timecode = duration ? timecode + duration : AV_NOPTS_VALUE;
1900
            data += lace_size[n];
1901
            size -= lace_size[n];
1902 1903 1904 1905
        }
    }

    av_free(lace_size);
1906
    return res;
1907 1908
}

1909
static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
1910
{
1911 1912 1913
    MatroskaCluster cluster = { 0 };
    EbmlList *blocks_list;
    MatroskaBlock *blocks;
1914
    int i, res;
1915
    int64_t pos = avio_tell(matroska->ctx->pb);
1916
    matroska->prev_pkt = NULL;
1917
    if (matroska->current_id)
1918
        pos -= 4;  /* sizeof the ID which was already read */
1919
    res = ebml_parse(matroska, matroska_clusters, &cluster);
1920 1921
    blocks_list = &cluster.blocks;
    blocks = blocks_list->elem;
1922
    for (i=0; i<blocks_list->nb_elem && !res; i++)
1923
        if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
1924
            int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
1925 1926
            if (!blocks[i].non_simple)
                blocks[i].duration = AV_NOPTS_VALUE;
1927 1928 1929
            res=matroska_parse_block(matroska,
                                     blocks[i].bin.data, blocks[i].bin.size,
                                     blocks[i].bin.pos,  cluster.timecode,
1930
                                     blocks[i].duration, is_keyframe,
1931
                                     pos);
1932
        }
1933
    ebml_free(matroska_cluster, &cluster);
1934
    if (res < 0)  matroska->done = 1;
1935 1936 1937
    return res;
}

1938
static int matroska_read_packet(AVFormatContext *s, AVPacket *pkt)
1939 1940
{
    MatroskaDemuxContext *matroska = s->priv_data;
1941
    int ret = 0;
1942

1943
    while (!ret && matroska_deliver_packet(matroska, pkt)) {
1944
        if (matroska->done)
1945
            return AVERROR_EOF;
1946
        ret = matroska_parse_cluster(matroska);
1947 1948
    }

1949
    return ret;
1950 1951
}

1952 1953
static int matroska_read_seek(AVFormatContext *s, int stream_index,
                              int64_t timestamp, int flags)
1954 1955
{
    MatroskaDemuxContext *matroska = s->priv_data;
1956
    MatroskaTrack *tracks = matroska->tracks.elem;
1957
    AVStream *st = s->streams[stream_index];
1958
    int i, index, index_sub, index_min;
1959

1960 1961 1962 1963 1964 1965
    /* Parse the CUES now since we need the index data to seek. */
    if (matroska->cues_parsing_deferred) {
        matroska_parse_cues(matroska);
        matroska->cues_parsing_deferred = 0;
    }

1966 1967 1968
    if (!st->nb_index_entries)
        return 0;
    timestamp = FFMAX(timestamp, st->index_entries[0].timestamp);
1969

1970
    if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
A
Anton Khirnov 已提交
1971
        avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET);
1972
        matroska->current_id = 0;
1973 1974 1975 1976 1977
        while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
            matroska_clear_queue(matroska);
            if (matroska_parse_cluster(matroska) < 0)
                break;
        }
1978
    }
1979

1980
    matroska_clear_queue(matroska);
1981 1982
    if (index < 0)
        return 0;
1983

1984 1985
    index_min = index;
    for (i=0; i < matroska->tracks.nb_elem; i++) {
1986 1987 1988
        tracks[i].audio.pkt_cnt = 0;
        tracks[i].audio.sub_packet_cnt = 0;
        tracks[i].audio.buf_timecode = AV_NOPTS_VALUE;
1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999
        tracks[i].end_timecode = 0;
        if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE
            && !tracks[i].stream->discard != AVDISCARD_ALL) {
            index_sub = av_index_search_timestamp(tracks[i].stream, st->index_entries[index].timestamp, AVSEEK_FLAG_BACKWARD);
            if (index_sub >= 0
                && st->index_entries[index_sub].pos < st->index_entries[index_min].pos
                && st->index_entries[index].timestamp - st->index_entries[index_sub].timestamp < 30000000000/matroska->time_scale)
                index_min = index_sub;
        }
    }

A
Anton Khirnov 已提交
2000
    avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET);
2001
    matroska->current_id = 0;
2002
    matroska->skip_to_keyframe = !(flags & AVSEEK_FLAG_ANY);
2003
    matroska->skip_to_timecode = st->index_entries[index].timestamp;
2004
    matroska->done = 0;
2005
    ff_update_cur_dts(s, st, st->index_entries[index].timestamp);
2006 2007 2008
    return 0;
}

2009
static int matroska_read_close(AVFormatContext *s)
2010 2011
{
    MatroskaDemuxContext *matroska = s->priv_data;
2012
    MatroskaTrack *tracks = matroska->tracks.elem;
2013
    int n;
2014

2015
    matroska_clear_queue(matroska);
2016

2017 2018 2019
    for (n=0; n < matroska->tracks.nb_elem; n++)
        if (tracks[n].type == MATROSKA_TRACK_TYPE_AUDIO)
            av_free(tracks[n].audio.buf);
2020
    ebml_free(matroska_segment, matroska);
2021 2022 2023 2024

    return 0;
}

2025
AVInputFormat ff_matroska_demuxer = {
2026 2027 2028 2029 2030 2031 2032 2033
    .name           = "matroska,webm",
    .long_name      = NULL_IF_CONFIG_SMALL("Matroska/WebM file format"),
    .priv_data_size = sizeof(MatroskaDemuxContext),
    .read_probe     = matroska_probe,
    .read_header    = matroska_read_header,
    .read_packet    = matroska_read_packet,
    .read_close     = matroska_read_close,
    .read_seek      = matroska_read_seek,
2034
};