matroskadec.c 69.7 KB
Newer Older
1
/*
2
 * Matroska file demuxer
D
Diego Biurrun 已提交
3
 * Copyright (c) 2003-2008 The FFmpeg Project
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
24 25 26
 * Matroska file demuxer
 * by Ronald Bultje <rbultje@ronald.bitfreak.net>
 * with a little help from Moritz Bunkus <moritz@bunkus.org>
27
 * totally reworked by Aurelien Jacobs <aurel@gnuage.org>
D
Diego Biurrun 已提交
28
 * Specs available on the Matroska project page: http://www.matroska.org/.
29 30
 */

31
#include <stdio.h>
32
#include "avformat.h"
33
#include "internal.h"
34
#include "avio_internal.h"
35
/* For ff_codec_get_id(). */
36
#include "riff.h"
37
#include "isom.h"
38
#include "rm.h"
39
#include "matroska.h"
40
#include "libavcodec/mpeg4audio.h"
41
#include "libavutil/intfloat_readwrite.h"
42
#include "libavutil/intreadwrite.h"
43
#include "libavutil/avstring.h"
44
#include "libavutil/lzo.h"
45
#if CONFIG_ZLIB
46 47
#include <zlib.h>
#endif
48
#if CONFIG_BZLIB
49 50
#include <bzlib.h>
#endif
51

52 53 54 55 56 57 58 59 60 61
typedef enum {
    EBML_NONE,
    EBML_UINT,
    EBML_FLOAT,
    EBML_STR,
    EBML_UTF8,
    EBML_BIN,
    EBML_NEST,
    EBML_PASS,
    EBML_STOP,
62
    EBML_TYPE_COUNT
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
} EbmlType;

typedef const struct EbmlSyntax {
    uint32_t id;
    EbmlType type;
    int list_elem_size;
    int data_offset;
    union {
        uint64_t    u;
        double      f;
        const char *s;
        const struct EbmlSyntax *n;
    } def;
} EbmlSyntax;

typedef struct {
    int nb_elem;
    void *elem;
} EbmlList;

typedef struct {
    int      size;
    uint8_t *data;
    int64_t  pos;
} EbmlBin;

89 90 91 92 93 94 95 96
typedef struct {
    uint64_t version;
    uint64_t max_size;
    uint64_t id_length;
    char    *doctype;
    uint64_t doctype_version;
} Ebml;

97 98 99 100
typedef struct {
    uint64_t algo;
    EbmlBin  settings;
} MatroskaTrackCompression;
101

102 103 104 105 106
typedef struct {
    uint64_t scope;
    uint64_t type;
    MatroskaTrackCompression compression;
} MatroskaTrackEncoding;
107

108 109 110 111 112 113 114 115
typedef struct {
    double   frame_rate;
    uint64_t display_width;
    uint64_t display_height;
    uint64_t pixel_width;
    uint64_t pixel_height;
    uint64_t fourcc;
} MatroskaTrackVideo;
116

117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
typedef struct {
    double   samplerate;
    double   out_samplerate;
    uint64_t bitdepth;
    uint64_t channels;

    /* real audio header (extracted from extradata) */
    int      coded_framesize;
    int      sub_packet_h;
    int      frame_size;
    int      sub_packet_size;
    int      sub_packet_cnt;
    int      pkt_cnt;
    uint8_t *buf;
} MatroskaTrackAudio;
132

133 134
typedef struct {
    uint64_t num;
135
    uint64_t uid;
136
    uint64_t type;
137
    char    *name;
138 139 140
    char    *codec_id;
    EbmlBin  codec_priv;
    char    *language;
141
    double time_scale;
142
    uint64_t default_duration;
143
    uint64_t flag_default;
144
    uint64_t flag_forced;
145 146 147
    MatroskaTrackVideo video;
    MatroskaTrackAudio audio;
    EbmlList encodings;
148 149

    AVStream *stream;
150
    int64_t end_timecode;
151
    int ms_compat;
152 153
} MatroskaTrack;

154
typedef struct {
155
    uint64_t uid;
156 157 158
    char *filename;
    char *mime;
    EbmlBin bin;
159 160

    AVStream *stream;
161 162
} MatroskaAttachement;

163 164 165 166 167
typedef struct {
    uint64_t start;
    uint64_t end;
    uint64_t uid;
    char    *title;
168 169

    AVChapter *chapter;
170 171
} MatroskaChapter;

172 173 174 175 176 177 178 179 180 181
typedef struct {
    uint64_t track;
    uint64_t pos;
} MatroskaIndexPos;

typedef struct {
    uint64_t time;
    EbmlList pos;
} MatroskaIndex;

182 183 184
typedef struct {
    char *name;
    char *string;
185 186
    char *lang;
    uint64_t def;
187 188 189
    EbmlList sub;
} MatroskaTag;

190 191 192 193 194 195 196 197 198 199 200 201 202
typedef struct {
    char    *type;
    uint64_t typevalue;
    uint64_t trackuid;
    uint64_t chapteruid;
    uint64_t attachuid;
} MatroskaTagTarget;

typedef struct {
    MatroskaTagTarget target;
    EbmlList tag;
} MatroskaTags;

203 204 205 206 207
typedef struct {
    uint64_t id;
    uint64_t pos;
} MatroskaSeekhead;

208
typedef struct {
209 210
    uint64_t start;
    uint64_t length;
211 212
} MatroskaLevel;

213
typedef struct {
214 215
    AVFormatContext *ctx;

D
Diego Biurrun 已提交
216
    /* EBML stuff */
217 218 219
    int num_levels;
    MatroskaLevel levels[EBML_MAX_DEPTH];
    int level_up;
220
    uint32_t current_id;
221

222 223 224
    uint64_t time_scale;
    double   duration;
    char    *title;
225
    EbmlList tracks;
226
    EbmlList attachments;
227
    EbmlList chapters;
228
    EbmlList index;
229
    EbmlList tags;
230
    EbmlList seekhead;
231 232

    /* byte position of the segment inside the stream */
233
    int64_t segment_start;
234

D
Diego Biurrun 已提交
235
    /* the packet queue */
236 237
    AVPacket **packets;
    int num_packets;
238
    AVPacket *prev_pkt;
239

240
    int done;
241 242 243

    /* What to skip before effectively reading a packet. */
    int skip_to_keyframe;
244
    uint64_t skip_to_timecode;
245 246
} MatroskaDemuxContext;

247 248 249
typedef struct {
    uint64_t duration;
    int64_t  reference;
250
    uint64_t non_simple;
251 252 253 254 255 256 257 258
    EbmlBin  bin;
} MatroskaBlock;

typedef struct {
    uint64_t timecode;
    EbmlList blocks;
} MatroskaCluster;

259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
static EbmlSyntax ebml_header[] = {
    { EBML_ID_EBMLREADVERSION,        EBML_UINT, 0, offsetof(Ebml,version), {.u=EBML_VERSION} },
    { EBML_ID_EBMLMAXSIZELENGTH,      EBML_UINT, 0, offsetof(Ebml,max_size), {.u=8} },
    { EBML_ID_EBMLMAXIDLENGTH,        EBML_UINT, 0, offsetof(Ebml,id_length), {.u=4} },
    { EBML_ID_DOCTYPE,                EBML_STR,  0, offsetof(Ebml,doctype), {.s="(none)"} },
    { EBML_ID_DOCTYPEREADVERSION,     EBML_UINT, 0, offsetof(Ebml,doctype_version), {.u=1} },
    { EBML_ID_EBMLVERSION,            EBML_NONE },
    { EBML_ID_DOCTYPEVERSION,         EBML_NONE },
    { 0 }
};

static EbmlSyntax ebml_syntax[] = {
    { EBML_ID_HEADER,                 EBML_NEST, 0, 0, {.n=ebml_header} },
    { 0 }
};

275 276 277 278 279 280 281 282 283 284 285
static EbmlSyntax matroska_info[] = {
    { MATROSKA_ID_TIMECODESCALE,      EBML_UINT,  0, offsetof(MatroskaDemuxContext,time_scale), {.u=1000000} },
    { MATROSKA_ID_DURATION,           EBML_FLOAT, 0, offsetof(MatroskaDemuxContext,duration) },
    { MATROSKA_ID_TITLE,              EBML_UTF8,  0, offsetof(MatroskaDemuxContext,title) },
    { MATROSKA_ID_WRITINGAPP,         EBML_NONE },
    { MATROSKA_ID_MUXINGAPP,          EBML_NONE },
    { MATROSKA_ID_DATEUTC,            EBML_NONE },
    { MATROSKA_ID_SEGMENTUID,         EBML_NONE },
    { 0 }
};

286 287 288 289 290 291 292
static EbmlSyntax matroska_track_video[] = {
    { MATROSKA_ID_VIDEOFRAMERATE,     EBML_FLOAT,0, offsetof(MatroskaTrackVideo,frame_rate) },
    { MATROSKA_ID_VIDEODISPLAYWIDTH,  EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_width) },
    { MATROSKA_ID_VIDEODISPLAYHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_height) },
    { MATROSKA_ID_VIDEOPIXELWIDTH,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) },
    { MATROSKA_ID_VIDEOPIXELHEIGHT,   EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) },
    { MATROSKA_ID_VIDEOCOLORSPACE,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,fourcc) },
293 294 295 296 297
    { MATROSKA_ID_VIDEOPIXELCROPB,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPT,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPL,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPR,    EBML_NONE },
    { MATROSKA_ID_VIDEODISPLAYUNIT,   EBML_NONE },
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
    { MATROSKA_ID_VIDEOFLAGINTERLACED,EBML_NONE },
    { MATROSKA_ID_VIDEOSTEREOMODE,    EBML_NONE },
    { MATROSKA_ID_VIDEOASPECTRATIO,   EBML_NONE },
    { 0 }
};

static EbmlSyntax matroska_track_audio[] = {
    { MATROSKA_ID_AUDIOSAMPLINGFREQ,  EBML_FLOAT,0, offsetof(MatroskaTrackAudio,samplerate), {.f=8000.0} },
    { MATROSKA_ID_AUDIOOUTSAMPLINGFREQ,EBML_FLOAT,0,offsetof(MatroskaTrackAudio,out_samplerate) },
    { MATROSKA_ID_AUDIOBITDEPTH,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,bitdepth) },
    { MATROSKA_ID_AUDIOCHANNELS,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,channels), {.u=1} },
    { 0 }
};

static EbmlSyntax matroska_track_encoding_compression[] = {
    { MATROSKA_ID_ENCODINGCOMPALGO,   EBML_UINT, 0, offsetof(MatroskaTrackCompression,algo), {.u=0} },
    { MATROSKA_ID_ENCODINGCOMPSETTINGS,EBML_BIN, 0, offsetof(MatroskaTrackCompression,settings) },
    { 0 }
};

static EbmlSyntax matroska_track_encoding[] = {
    { MATROSKA_ID_ENCODINGSCOPE,      EBML_UINT, 0, offsetof(MatroskaTrackEncoding,scope), {.u=1} },
    { MATROSKA_ID_ENCODINGTYPE,       EBML_UINT, 0, offsetof(MatroskaTrackEncoding,type), {.u=0} },
    { MATROSKA_ID_ENCODINGCOMPRESSION,EBML_NEST, 0, offsetof(MatroskaTrackEncoding,compression), {.n=matroska_track_encoding_compression} },
322
    { MATROSKA_ID_ENCODINGORDER,      EBML_NONE },
323 324 325 326 327 328 329 330 331 332
    { 0 }
};

static EbmlSyntax matroska_track_encodings[] = {
    { MATROSKA_ID_TRACKCONTENTENCODING, EBML_NEST, sizeof(MatroskaTrackEncoding), offsetof(MatroskaTrack,encodings), {.n=matroska_track_encoding} },
    { 0 }
};

static EbmlSyntax matroska_track[] = {
    { MATROSKA_ID_TRACKNUMBER,          EBML_UINT, 0, offsetof(MatroskaTrack,num) },
333
    { MATROSKA_ID_TRACKNAME,            EBML_UTF8, 0, offsetof(MatroskaTrack,name) },
334
    { MATROSKA_ID_TRACKUID,             EBML_UINT, 0, offsetof(MatroskaTrack,uid) },
335 336 337 338 339 340 341
    { MATROSKA_ID_TRACKTYPE,            EBML_UINT, 0, offsetof(MatroskaTrack,type) },
    { MATROSKA_ID_CODECID,              EBML_STR,  0, offsetof(MatroskaTrack,codec_id) },
    { MATROSKA_ID_CODECPRIVATE,         EBML_BIN,  0, offsetof(MatroskaTrack,codec_priv) },
    { MATROSKA_ID_TRACKLANGUAGE,        EBML_UTF8, 0, offsetof(MatroskaTrack,language), {.s="eng"} },
    { MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, offsetof(MatroskaTrack,default_duration) },
    { MATROSKA_ID_TRACKTIMECODESCALE,   EBML_FLOAT,0, offsetof(MatroskaTrack,time_scale), {.f=1.0} },
    { MATROSKA_ID_TRACKFLAGDEFAULT,     EBML_UINT, 0, offsetof(MatroskaTrack,flag_default), {.u=1} },
342
    { MATROSKA_ID_TRACKFLAGFORCED,      EBML_UINT, 0, offsetof(MatroskaTrack,flag_forced), {.u=0} },
343 344 345 346 347 348 349 350 351 352 353
    { MATROSKA_ID_TRACKVIDEO,           EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} },
    { MATROSKA_ID_TRACKAUDIO,           EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} },
    { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} },
    { MATROSKA_ID_TRACKFLAGENABLED,     EBML_NONE },
    { MATROSKA_ID_TRACKFLAGLACING,      EBML_NONE },
    { MATROSKA_ID_CODECNAME,            EBML_NONE },
    { MATROSKA_ID_CODECDECODEALL,       EBML_NONE },
    { MATROSKA_ID_CODECINFOURL,         EBML_NONE },
    { MATROSKA_ID_CODECDOWNLOADURL,     EBML_NONE },
    { MATROSKA_ID_TRACKMINCACHE,        EBML_NONE },
    { MATROSKA_ID_TRACKMAXCACHE,        EBML_NONE },
354
    { MATROSKA_ID_TRACKMAXBLKADDID,     EBML_NONE },
355 356 357 358 359 360 361 362
    { 0 }
};

static EbmlSyntax matroska_tracks[] = {
    { MATROSKA_ID_TRACKENTRY,         EBML_NEST, sizeof(MatroskaTrack), offsetof(MatroskaDemuxContext,tracks), {.n=matroska_track} },
    { 0 }
};

363
static EbmlSyntax matroska_attachment[] = {
364
    { MATROSKA_ID_FILEUID,            EBML_UINT, 0, offsetof(MatroskaAttachement,uid) },
365 366 367
    { MATROSKA_ID_FILENAME,           EBML_UTF8, 0, offsetof(MatroskaAttachement,filename) },
    { MATROSKA_ID_FILEMIMETYPE,       EBML_STR,  0, offsetof(MatroskaAttachement,mime) },
    { MATROSKA_ID_FILEDATA,           EBML_BIN,  0, offsetof(MatroskaAttachement,bin) },
368
    { MATROSKA_ID_FILEDESC,           EBML_NONE },
369 370 371 372 373 374 375 376
    { 0 }
};

static EbmlSyntax matroska_attachments[] = {
    { MATROSKA_ID_ATTACHEDFILE,       EBML_NEST, sizeof(MatroskaAttachement), offsetof(MatroskaDemuxContext,attachments), {.n=matroska_attachment} },
    { 0 }
};

377 378
static EbmlSyntax matroska_chapter_display[] = {
    { MATROSKA_ID_CHAPSTRING,         EBML_UTF8, 0, offsetof(MatroskaChapter,title) },
379
    { MATROSKA_ID_CHAPLANG,           EBML_NONE },
380 381 382 383 384 385 386 387 388
    { 0 }
};

static EbmlSyntax matroska_chapter_entry[] = {
    { MATROSKA_ID_CHAPTERTIMESTART,   EBML_UINT, 0, offsetof(MatroskaChapter,start), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_CHAPTERTIMEEND,     EBML_UINT, 0, offsetof(MatroskaChapter,end), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_CHAPTERUID,         EBML_UINT, 0, offsetof(MatroskaChapter,uid) },
    { MATROSKA_ID_CHAPTERDISPLAY,     EBML_NEST, 0, 0, {.n=matroska_chapter_display} },
    { MATROSKA_ID_CHAPTERFLAGHIDDEN,  EBML_NONE },
389 390 391
    { MATROSKA_ID_CHAPTERFLAGENABLED, EBML_NONE },
    { MATROSKA_ID_CHAPTERPHYSEQUIV,   EBML_NONE },
    { MATROSKA_ID_CHAPTERATOM,        EBML_NONE },
392 393 394 395 396 397 398 399
    { 0 }
};

static EbmlSyntax matroska_chapter[] = {
    { MATROSKA_ID_CHAPTERATOM,        EBML_NEST, sizeof(MatroskaChapter), offsetof(MatroskaDemuxContext,chapters), {.n=matroska_chapter_entry} },
    { MATROSKA_ID_EDITIONUID,         EBML_NONE },
    { MATROSKA_ID_EDITIONFLAGHIDDEN,  EBML_NONE },
    { MATROSKA_ID_EDITIONFLAGDEFAULT, EBML_NONE },
400
    { MATROSKA_ID_EDITIONFLAGORDERED, EBML_NONE },
401 402 403 404 405 406 407 408
    { 0 }
};

static EbmlSyntax matroska_chapters[] = {
    { MATROSKA_ID_EDITIONENTRY,       EBML_NEST, 0, 0, {.n=matroska_chapter} },
    { 0 }
};

409 410 411
static EbmlSyntax matroska_index_pos[] = {
    { MATROSKA_ID_CUETRACK,           EBML_UINT, 0, offsetof(MatroskaIndexPos,track) },
    { MATROSKA_ID_CUECLUSTERPOSITION, EBML_UINT, 0, offsetof(MatroskaIndexPos,pos)   },
412
    { MATROSKA_ID_CUEBLOCKNUMBER,     EBML_NONE },
413 414 415 416 417 418 419 420 421 422 423 424 425 426
    { 0 }
};

static EbmlSyntax matroska_index_entry[] = {
    { MATROSKA_ID_CUETIME,            EBML_UINT, 0, offsetof(MatroskaIndex,time) },
    { MATROSKA_ID_CUETRACKPOSITION,   EBML_NEST, sizeof(MatroskaIndexPos), offsetof(MatroskaIndex,pos), {.n=matroska_index_pos} },
    { 0 }
};

static EbmlSyntax matroska_index[] = {
    { MATROSKA_ID_POINTENTRY,         EBML_NEST, sizeof(MatroskaIndex), offsetof(MatroskaDemuxContext,index), {.n=matroska_index_entry} },
    { 0 }
};

427 428 429
static EbmlSyntax matroska_simpletag[] = {
    { MATROSKA_ID_TAGNAME,            EBML_UTF8, 0, offsetof(MatroskaTag,name) },
    { MATROSKA_ID_TAGSTRING,          EBML_UTF8, 0, offsetof(MatroskaTag,string) },
430 431
    { MATROSKA_ID_TAGLANG,            EBML_STR,  0, offsetof(MatroskaTag,lang), {.s="und"} },
    { MATROSKA_ID_TAGDEFAULT,         EBML_UINT, 0, offsetof(MatroskaTag,def) },
432
    { MATROSKA_ID_TAGDEFAULT_BUG,     EBML_UINT, 0, offsetof(MatroskaTag,def) },
433 434 435 436
    { MATROSKA_ID_SIMPLETAG,          EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag,sub), {.n=matroska_simpletag} },
    { 0 }
};

437 438 439 440 441 442 443 444 445
static EbmlSyntax matroska_tagtargets[] = {
    { MATROSKA_ID_TAGTARGETS_TYPE,      EBML_STR,  0, offsetof(MatroskaTagTarget,type) },
    { MATROSKA_ID_TAGTARGETS_TYPEVALUE, EBML_UINT, 0, offsetof(MatroskaTagTarget,typevalue), {.u=50} },
    { MATROSKA_ID_TAGTARGETS_TRACKUID,  EBML_UINT, 0, offsetof(MatroskaTagTarget,trackuid) },
    { MATROSKA_ID_TAGTARGETS_CHAPTERUID,EBML_UINT, 0, offsetof(MatroskaTagTarget,chapteruid) },
    { MATROSKA_ID_TAGTARGETS_ATTACHUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,attachuid) },
    { 0 }
};

446
static EbmlSyntax matroska_tag[] = {
447 448
    { MATROSKA_ID_SIMPLETAG,          EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTags,tag), {.n=matroska_simpletag} },
    { MATROSKA_ID_TAGTARGETS,         EBML_NEST, 0, offsetof(MatroskaTags,target), {.n=matroska_tagtargets} },
449 450 451
    { 0 }
};

452
static EbmlSyntax matroska_tags[] = {
453
    { MATROSKA_ID_TAG,                EBML_NEST, sizeof(MatroskaTags), offsetof(MatroskaDemuxContext,tags), {.n=matroska_tag} },
454 455 456
    { 0 }
};

457 458 459 460 461 462 463 464 465 466 467
static EbmlSyntax matroska_seekhead_entry[] = {
    { MATROSKA_ID_SEEKID,             EBML_UINT, 0, offsetof(MatroskaSeekhead,id) },
    { MATROSKA_ID_SEEKPOSITION,       EBML_UINT, 0, offsetof(MatroskaSeekhead,pos), {.u=-1} },
    { 0 }
};

static EbmlSyntax matroska_seekhead[] = {
    { MATROSKA_ID_SEEKENTRY,          EBML_NEST, sizeof(MatroskaSeekhead), offsetof(MatroskaDemuxContext,seekhead), {.n=matroska_seekhead_entry} },
    { 0 }
};

468 469 470 471 472 473 474 475
static EbmlSyntax matroska_segment[] = {
    { MATROSKA_ID_INFO,           EBML_NEST, 0, 0, {.n=matroska_info       } },
    { MATROSKA_ID_TRACKS,         EBML_NEST, 0, 0, {.n=matroska_tracks     } },
    { MATROSKA_ID_ATTACHMENTS,    EBML_NEST, 0, 0, {.n=matroska_attachments} },
    { MATROSKA_ID_CHAPTERS,       EBML_NEST, 0, 0, {.n=matroska_chapters   } },
    { MATROSKA_ID_CUES,           EBML_NEST, 0, 0, {.n=matroska_index      } },
    { MATROSKA_ID_TAGS,           EBML_NEST, 0, 0, {.n=matroska_tags       } },
    { MATROSKA_ID_SEEKHEAD,       EBML_NEST, 0, 0, {.n=matroska_seekhead   } },
476
    { MATROSKA_ID_CLUSTER,        EBML_STOP },
477 478 479 480 481 482 483 484
    { 0 }
};

static EbmlSyntax matroska_segments[] = {
    { MATROSKA_ID_SEGMENT,        EBML_NEST, 0, 0, {.n=matroska_segment    } },
    { 0 }
};

485 486 487 488 489
static EbmlSyntax matroska_blockgroup[] = {
    { MATROSKA_ID_BLOCK,          EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
    { MATROSKA_ID_SIMPLEBLOCK,    EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
    { MATROSKA_ID_BLOCKDURATION,  EBML_UINT, 0, offsetof(MatroskaBlock,duration), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_BLOCKREFERENCE, EBML_UINT, 0, offsetof(MatroskaBlock,reference) },
490
    { 1,                          EBML_UINT, 0, offsetof(MatroskaBlock,non_simple), {.u=1} },
491 492 493 494 495 496 497
    { 0 }
};

static EbmlSyntax matroska_cluster[] = {
    { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
    { MATROSKA_ID_BLOCKGROUP,     EBML_NEST, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
    { MATROSKA_ID_SIMPLEBLOCK,    EBML_PASS, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
498 499
    { MATROSKA_ID_CLUSTERPOSITION,EBML_NONE },
    { MATROSKA_ID_CLUSTERPREVSIZE,EBML_NONE },
500 501 502 503 504
    { 0 }
};

static EbmlSyntax matroska_clusters[] = {
    { MATROSKA_ID_CLUSTER,        EBML_NEST, 0, 0, {.n=matroska_cluster} },
505 506 507 508
    { MATROSKA_ID_INFO,           EBML_NONE },
    { MATROSKA_ID_CUES,           EBML_NONE },
    { MATROSKA_ID_TAGS,           EBML_NONE },
    { MATROSKA_ID_SEEKHEAD,       EBML_NONE },
509 510 511
    { 0 }
};

J
James Zern 已提交
512 513
static const char *matroska_doctypes[] = { "matroska", "webm" };

514
/*
D
Diego Biurrun 已提交
515
 * Return: Whether we reached the end of a level in the hierarchy or not.
516
 */
517
static int ebml_level_end(MatroskaDemuxContext *matroska)
518
{
519
    AVIOContext *pb = matroska->ctx->pb;
520
    int64_t pos = avio_tell(pb);
521

522
    if (matroska->num_levels > 0) {
523
        MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
524
        if (pos - level->start >= level->length || matroska->current_id) {
525
            matroska->num_levels--;
526
            return 1;
527 528
        }
    }
529
    return 0;
530 531 532 533 534 535 536 537
}

/*
 * Read: an "EBML number", which is defined as a variable-length
 * array of bytes. The first byte indicates the length by giving a
 * number of 0-bits followed by a one. The position of the first
 * "one" bit inside the first byte indicates the length of this
 * number.
D
Diego Biurrun 已提交
538
 * Returns: number of bytes read, < 0 on error
539
 */
540
static int ebml_read_num(MatroskaDemuxContext *matroska, AVIOContext *pb,
541
                         int max_size, uint64_t *number)
542
{
543 544
    int read = 1, n = 1;
    uint64_t total = 0;
545

546
    /* The first byte tells us the length in bytes - avio_r8() can normally
547 548
     * return 0, but since that's not a valid first ebmlID byte, we can
     * use it safely here to catch EOS. */
549
    if (!(total = avio_r8(pb))) {
550
        /* we might encounter EOS here */
A
Anton Khirnov 已提交
551
        if (!pb->eof_reached) {
552
            int64_t pos = avio_tell(pb);
553 554 555 556
            av_log(matroska->ctx, AV_LOG_ERROR,
                   "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
                   pos, pos);
        }
557
        return AVERROR(EIO); /* EOS or actual I/O error */
558 559 560
    }

    /* get the length of the EBML number */
561
    read = 8 - ff_log2_tab[total];
562
    if (read > max_size) {
563
        int64_t pos = avio_tell(pb) - 1;
564 565 566 567 568 569 570
        av_log(matroska->ctx, AV_LOG_ERROR,
               "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
               (uint8_t) total, pos, pos);
        return AVERROR_INVALIDDATA;
    }

    /* read out length */
571
    total ^= 1 << ff_log2_tab[total];
572
    while (n++ < read)
573
        total = (total << 8) | avio_r8(pb);
574 575 576 577 578 579

    *number = total;

    return read;
}

580 581 582 583 584
/**
 * Read a EBML length value.
 * This needs special handling for the "unknown length" case which has multiple
 * encodings.
 */
585
static int ebml_read_length(MatroskaDemuxContext *matroska, AVIOContext *pb,
586 587 588 589 590 591 592 593
                            uint64_t *number)
{
    int res = ebml_read_num(matroska, pb, 8, number);
    if (res > 0 && *number + 1 == 1ULL << (7 * res))
        *number = 0xffffffffffffffULL;
    return res;
}

594 595 596 597
/*
 * Read the next element as an unsigned int.
 * 0 is success, < 0 is failure.
 */
598
static int ebml_read_uint(AVIOContext *pb, int size, uint64_t *num)
599
{
600
    int n = 0;
601

602
    if (size > 8)
603 604
        return AVERROR_INVALIDDATA;

D
Diego Biurrun 已提交
605
    /* big-endian ordering; build up number */
606 607
    *num = 0;
    while (n++ < size)
608
        *num = (*num << 8) | avio_r8(pb);
609 610 611 612 613 614 615 616

    return 0;
}

/*
 * Read the next element as a float.
 * 0 is success, < 0 is failure.
 */
617
static int ebml_read_float(AVIOContext *pb, int size, double *num)
618
{
619 620 621
    if (size == 0) {
        *num = 0;
    } else if (size == 4) {
622
        *num= av_int2flt(avio_rb32(pb));
623
    } else if(size==8){
624
        *num= av_int2dbl(avio_rb64(pb));
625
    } else
626 627 628 629 630 631 632 633 634
        return AVERROR_INVALIDDATA;

    return 0;
}

/*
 * Read the next element as an ASCII string.
 * 0 is success, < 0 is failure.
 */
635
static int ebml_read_ascii(AVIOContext *pb, int size, char **str)
636
{
637
    av_free(*str);
D
Diego Biurrun 已提交
638
    /* EBML strings are usually not 0-terminated, so we allocate one
639
     * byte more, read the string and NULL-terminate it ourselves. */
640
    if (!(*str = av_malloc(size + 1)))
641
        return AVERROR(ENOMEM);
642
    if (avio_read(pb, (uint8_t *) *str, size) != size) {
643
        av_freep(str);
644
        return AVERROR(EIO);
645 646 647 648 649 650
    }
    (*str)[size] = '\0';

    return 0;
}

651 652 653 654
/*
 * Read the next element as binary data.
 * 0 is success, < 0 is failure.
 */
655
static int ebml_read_binary(AVIOContext *pb, int length, EbmlBin *bin)
656 657 658 659 660 661
{
    av_free(bin->data);
    if (!(bin->data = av_malloc(length)))
        return AVERROR(ENOMEM);

    bin->size = length;
662
    bin->pos  = avio_tell(pb);
663
    if (avio_read(pb, bin->data, length) != length) {
664
        av_freep(&bin->data);
665
        return AVERROR(EIO);
666
    }
667 668 669 670

    return 0;
}

671 672 673 674 675
/*
 * Read the next element, but only the header. The contents
 * are supposed to be sub-elements which can be read separately.
 * 0 is success, < 0 is failure.
 */
676
static int ebml_read_master(MatroskaDemuxContext *matroska, uint64_t length)
677
{
678
    AVIOContext *pb = matroska->ctx->pb;
679 680 681 682 683
    MatroskaLevel *level;

    if (matroska->num_levels >= EBML_MAX_DEPTH) {
        av_log(matroska->ctx, AV_LOG_ERROR,
               "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
684
        return AVERROR(ENOSYS);
685 686 687
    }

    level = &matroska->levels[matroska->num_levels++];
688
    level->start = avio_tell(pb);
689 690 691 692 693 694 695
    level->length = length;

    return 0;
}

/*
 * Read signed/unsigned "EBML" numbers.
D
Diego Biurrun 已提交
696
 * Return: number of bytes processed, < 0 on error
697
 */
698 699
static int matroska_ebmlnum_uint(MatroskaDemuxContext *matroska,
                                 uint8_t *data, uint32_t size, uint64_t *num)
700
{
701
    AVIOContext pb;
702
    ffio_init_context(&pb, data, size, 0, NULL, NULL, NULL, NULL);
703
    return ebml_read_num(matroska, &pb, FFMIN(size, 8), num);
704 705 706 707 708
}

/*
 * Same as above, but signed.
 */
709 710
static int matroska_ebmlnum_sint(MatroskaDemuxContext *matroska,
                                 uint8_t *data, uint32_t size, int64_t *num)
711 712 713 714 715
{
    uint64_t unum;
    int res;

    /* read as unsigned number first */
716
    if ((res = matroska_ebmlnum_uint(matroska, data, size, &unum)) < 0)
717 718 719
        return res;

    /* make signed (weird way) */
720
    *num = unum - ((1LL << (7*res - 1)) - 1);
721 722 723 724

    return res;
}

725 726
static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                           EbmlSyntax *syntax, void *data);
727

728 729
static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                         uint32_t id, void *data)
730 731
{
    int i;
732 733 734
    for (i=0; syntax[i].id; i++)
        if (id == syntax[i].id)
            break;
735 736
    if (!syntax[i].id && id == MATROSKA_ID_CLUSTER &&
        matroska->num_levels > 0 &&
737
        matroska->levels[matroska->num_levels-1].length == 0xffffffffffffff)
738
        return 0;  // we reached the end of an unknown size cluster
739
    if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32)
740 741
        av_log(matroska->ctx, AV_LOG_INFO, "Unknown entry 0x%X\n", id);
    return ebml_parse_elem(matroska, &syntax[i], data);
742 743
}

744 745
static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                      void *data)
746
{
747
    if (!matroska->current_id) {
748 749 750 751 752
        uint64_t id;
        int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
        if (res < 0)
            return res;
        matroska->current_id = id | 1 << 7*res;
753 754
    }
    return ebml_parse_id(matroska, syntax, matroska->current_id, data);
755 756
}

757 758
static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                           void *data)
759
{
760
    int i, res = 0;
761

762 763 764 765 766 767 768 769 770 771 772 773
    for (i=0; syntax[i].id; i++)
        switch (syntax[i].type) {
        case EBML_UINT:
            *(uint64_t *)((char *)data+syntax[i].data_offset) = syntax[i].def.u;
            break;
        case EBML_FLOAT:
            *(double   *)((char *)data+syntax[i].data_offset) = syntax[i].def.f;
            break;
        case EBML_STR:
        case EBML_UTF8:
            *(char    **)((char *)data+syntax[i].data_offset) = av_strdup(syntax[i].def.s);
            break;
774
        }
775

776 777
    while (!res && !ebml_level_end(matroska))
        res = ebml_parse(matroska, syntax, data);
778

779
    return res;
780 781
}

782 783 784
static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                           EbmlSyntax *syntax, void *data)
{
785 786 787 788 789 790 791 792 793 794
    static const uint64_t max_lengths[EBML_TYPE_COUNT] = {
        [EBML_UINT]  = 8,
        [EBML_FLOAT] = 8,
        // max. 16 MB for strings
        [EBML_STR]   = 0x1000000,
        [EBML_UTF8]  = 0x1000000,
        // max. 256 MB for binary data
        [EBML_BIN]   = 0x10000000,
        // no limits for anything else
    };
795
    AVIOContext *pb = matroska->ctx->pb;
796
    uint32_t id = syntax->id;
797
    uint64_t length;
798 799 800 801 802 803 804 805 806 807 808
    int res;

    data = (char *)data + syntax->data_offset;
    if (syntax->list_elem_size) {
        EbmlList *list = data;
        list->elem = av_realloc(list->elem, (list->nb_elem+1)*syntax->list_elem_size);
        data = (char*)list->elem + list->nb_elem*syntax->list_elem_size;
        memset(data, 0, syntax->list_elem_size);
        list->nb_elem++;
    }

809 810
    if (syntax->type != EBML_PASS && syntax->type != EBML_STOP) {
        matroska->current_id = 0;
811
        if ((res = ebml_read_length(matroska, pb, &length)) < 0)
812
            return res;
813 814 815 816 817 818
        if (max_lengths[syntax->type] && length > max_lengths[syntax->type]) {
            av_log(matroska->ctx, AV_LOG_ERROR,
                   "Invalid length 0x%"PRIx64" > 0x%"PRIx64" for syntax element %i\n",
                   length, max_lengths[syntax->type], syntax->type);
            return AVERROR_INVALIDDATA;
        }
819
    }
820

821
    switch (syntax->type) {
822 823
    case EBML_UINT:  res = ebml_read_uint  (pb, length, data);  break;
    case EBML_FLOAT: res = ebml_read_float (pb, length, data);  break;
824
    case EBML_STR:
825
    case EBML_UTF8:  res = ebml_read_ascii (pb, length, data);  break;
826
    case EBML_BIN:   res = ebml_read_binary(pb, length, data);  break;
827
    case EBML_NEST:  if ((res=ebml_read_master(matroska, length)) < 0)
828 829
                         return res;
                     if (id == MATROSKA_ID_SEGMENT)
830
                         matroska->segment_start = avio_tell(matroska->ctx->pb);
831
                     return ebml_parse_nest(matroska, syntax->def.n, data);
832
    case EBML_PASS:  return ebml_parse_id(matroska, syntax->def.n, id, data);
833
    case EBML_STOP:  return 1;
834
    default:         return avio_skip(pb,length)<0 ? AVERROR(EIO) : 0;
835
    }
836 837 838 839 840
    if (res == AVERROR_INVALIDDATA)
        av_log(matroska->ctx, AV_LOG_ERROR, "Invalid element\n");
    else if (res == AVERROR(EIO))
        av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
    return res;
841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865
}

static void ebml_free(EbmlSyntax *syntax, void *data)
{
    int i, j;
    for (i=0; syntax[i].id; i++) {
        void *data_off = (char *)data + syntax[i].data_offset;
        switch (syntax[i].type) {
        case EBML_STR:
        case EBML_UTF8:  av_freep(data_off);                      break;
        case EBML_BIN:   av_freep(&((EbmlBin *)data_off)->data);  break;
        case EBML_NEST:
            if (syntax[i].list_elem_size) {
                EbmlList *list = data_off;
                char *ptr = list->elem;
                for (j=0; j<list->nb_elem; j++, ptr+=syntax[i].list_elem_size)
                    ebml_free(syntax[i].def.n, ptr);
                av_free(list->elem);
            } else
                ebml_free(syntax[i].def.n, data_off);
        default:  break;
        }
    }
}

866 867 868 869 870 871 872

/*
 * Autodetecting...
 */
static int matroska_probe(AVProbeData *p)
{
    uint64_t total = 0;
J
James Zern 已提交
873
    int len_mask = 0x80, size = 1, n = 1, i;
874

D
Diego Biurrun 已提交
875
    /* EBML header? */
876 877 878 879 880 881 882 883 884 885 886 887 888 889 890
    if (AV_RB32(p->buf) != EBML_ID_HEADER)
        return 0;

    /* length of header */
    total = p->buf[4];
    while (size <= 8 && !(total & len_mask)) {
        size++;
        len_mask >>= 1;
    }
    if (size > 8)
      return 0;
    total &= (len_mask - 1);
    while (n < size)
        total = (total << 8) | p->buf[4 + n++];

D
Diego Biurrun 已提交
891
    /* Does the probe data contain the whole header? */
892 893 894
    if (p->buf_size < 4 + size + total)
      return 0;

J
James Zern 已提交
895
    /* The header should contain a known document type. For now,
896 897 898
     * we don't parse the whole header but simply check for the
     * availability of that array of characters inside the header.
     * Not fully fool-proof, but good enough. */
J
James Zern 已提交
899 900 901 902 903 904
    for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++) {
        int probelen = strlen(matroska_doctypes[i]);
        for (n = 4+size; n <= 4+size+total-probelen; n++)
            if (!memcmp(p->buf+n, matroska_doctypes[i], probelen))
                return AVPROBE_SCORE_MAX;
    }
905

906 907
    // probably valid EBML header but no recognized doctype
    return AVPROBE_SCORE_MAX/2;
908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923
}

static MatroskaTrack *matroska_find_track_by_num(MatroskaDemuxContext *matroska,
                                                 int num)
{
    MatroskaTrack *tracks = matroska->tracks.elem;
    int i;

    for (i=0; i < matroska->tracks.nb_elem; i++)
        if (tracks[i].num == num)
            return &tracks[i];

    av_log(matroska->ctx, AV_LOG_ERROR, "Invalid track number %d\n", num);
    return NULL;
}

924 925
static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
                                  MatroskaTrack *track)
926
{
927
    MatroskaTrackEncoding *encodings = track->encodings.elem;
928 929 930 931 932 933 934
    uint8_t* data = *buf;
    int isize = *buf_size;
    uint8_t* pkt_data = NULL;
    int pkt_size = isize;
    int result = 0;
    int olen;

935 936 937
    if (pkt_size >= 10000000)
        return -1;

938
    switch (encodings[0].compression.algo) {
939
    case MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP:
940
        return encodings[0].compression.settings.size;
941 942 943
    case MATROSKA_TRACK_ENCODING_COMP_LZO:
        do {
            olen = pkt_size *= 3;
A
Aurelien Jacobs 已提交
944
            pkt_data = av_realloc(pkt_data, pkt_size+AV_LZO_OUTPUT_PADDING);
945 946
            result = av_lzo1x_decode(pkt_data, &olen, data, &isize);
        } while (result==AV_LZO_OUTPUT_FULL && pkt_size<10000000);
947 948 949 950
        if (result)
            goto failed;
        pkt_size -= olen;
        break;
951
#if CONFIG_ZLIB
952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971
    case MATROSKA_TRACK_ENCODING_COMP_ZLIB: {
        z_stream zstream = {0};
        if (inflateInit(&zstream) != Z_OK)
            return -1;
        zstream.next_in = data;
        zstream.avail_in = isize;
        do {
            pkt_size *= 3;
            pkt_data = av_realloc(pkt_data, pkt_size);
            zstream.avail_out = pkt_size - zstream.total_out;
            zstream.next_out = pkt_data + zstream.total_out;
            result = inflate(&zstream, Z_NO_FLUSH);
        } while (result==Z_OK && pkt_size<10000000);
        pkt_size = zstream.total_out;
        inflateEnd(&zstream);
        if (result != Z_STREAM_END)
            goto failed;
        break;
    }
#endif
972
#if CONFIG_BZLIB
973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992
    case MATROSKA_TRACK_ENCODING_COMP_BZLIB: {
        bz_stream bzstream = {0};
        if (BZ2_bzDecompressInit(&bzstream, 0, 0) != BZ_OK)
            return -1;
        bzstream.next_in = data;
        bzstream.avail_in = isize;
        do {
            pkt_size *= 3;
            pkt_data = av_realloc(pkt_data, pkt_size);
            bzstream.avail_out = pkt_size - bzstream.total_out_lo32;
            bzstream.next_out = pkt_data + bzstream.total_out_lo32;
            result = BZ2_bzDecompress(&bzstream);
        } while (result==BZ_OK && pkt_size<10000000);
        pkt_size = bzstream.total_out_lo32;
        BZ2_bzDecompressEnd(&bzstream);
        if (result != BZ_STREAM_END)
            goto failed;
        break;
    }
#endif
993 994
    default:
        return -1;
995 996 997 998 999 1000 1001 1002 1003 1004
    }

    *buf = pkt_data;
    *buf_size = pkt_size;
    return 0;
 failed:
    av_free(pkt_data);
    return -1;
}

1005
static void matroska_fix_ass_packet(MatroskaDemuxContext *matroska,
1006
                                    AVPacket *pkt, uint64_t display_duration)
1007 1008 1009 1010 1011 1012 1013
{
    char *line, *layer, *ptr = pkt->data, *end = ptr+pkt->size;
    for (; *ptr!=',' && ptr<end-1; ptr++);
    if (*ptr == ',')
        layer = ++ptr;
    for (; *ptr!=',' && ptr<end-1; ptr++);
    if (*ptr == ',') {
1014
        int64_t end_pts = pkt->pts + display_duration;
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
        int sc = matroska->time_scale * pkt->pts / 10000000;
        int ec = matroska->time_scale * end_pts  / 10000000;
        int sh, sm, ss, eh, em, es, len;
        sh = sc/360000;  sc -= 360000*sh;
        sm = sc/  6000;  sc -=   6000*sm;
        ss = sc/   100;  sc -=    100*ss;
        eh = ec/360000;  ec -= 360000*eh;
        em = ec/  6000;  ec -=   6000*em;
        es = ec/   100;  ec -=    100*es;
        *ptr++ = '\0';
        len = 50 + end-ptr + FF_INPUT_BUFFER_PADDING_SIZE;
        if (!(line = av_malloc(len)))
            return;
1028
        snprintf(line,len,"Dialogue: %s,%d:%02d:%02d.%02d,%d:%02d:%02d.%02d,%s\r\n",
1029 1030 1031 1032 1033 1034 1035
                 layer, sh, sm, ss, sc, eh, em, es, ec, ptr);
        av_free(pkt->data);
        pkt->data = line;
        pkt->size = strlen(line);
    }
}

1036 1037 1038 1039 1040 1041 1042 1043 1044
static void matroska_merge_packets(AVPacket *out, AVPacket *in)
{
    out->data = av_realloc(out->data, out->size+in->size);
    memcpy(out->data+out->size, in->data, in->size);
    out->size += in->size;
    av_destruct_packet(in);
    av_free(in);
}

1045 1046
static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
                                 AVMetadata **metadata, char *prefix)
1047 1048
{
    MatroskaTag *tags = list->elem;
1049 1050
    char key[1024];
    int i;
1051 1052

    for (i=0; i < list->nb_elem; i++) {
1053
        const char *lang = strcmp(tags[i].lang, "und") ? tags[i].lang : NULL;
1054 1055 1056 1057 1058

        if (!tags[i].name) {
            av_log(s, AV_LOG_WARNING, "Skipping invalid tag with no TagName.\n");
            continue;
        }
1059 1060
        if (prefix)  snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name);
        else         av_strlcpy(key, tags[i].name, sizeof(key));
1061
        if (tags[i].def || !lang) {
1062
        av_metadata_set2(metadata, key, tags[i].string, 0);
1063
        if (tags[i].sub.nb_elem)
1064
            matroska_convert_tag(s, &tags[i].sub, metadata, key);
1065 1066 1067 1068
        }
        if (lang) {
            av_strlcat(key, "-", sizeof(key));
            av_strlcat(key, lang, sizeof(key));
1069
            av_metadata_set2(metadata, key, tags[i].string, 0);
1070 1071 1072
            if (tags[i].sub.nb_elem)
                matroska_convert_tag(s, &tags[i].sub, metadata, key);
        }
1073
    }
A
Anton Khirnov 已提交
1074
    ff_metadata_conv(metadata, NULL, ff_mkv_metadata_conv);
1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
}

static void matroska_convert_tags(AVFormatContext *s)
{
    MatroskaDemuxContext *matroska = s->priv_data;
    MatroskaTags *tags = matroska->tags.elem;
    int i, j;

    for (i=0; i < matroska->tags.nb_elem; i++) {
        if (tags[i].target.attachuid) {
            MatroskaAttachement *attachment = matroska->attachments.elem;
            for (j=0; j<matroska->attachments.nb_elem; j++)
                if (attachment[j].uid == tags[i].target.attachuid)
                    matroska_convert_tag(s, &tags[i].tag,
                                         &attachment[j].stream->metadata, NULL);
        } else if (tags[i].target.chapteruid) {
            MatroskaChapter *chapter = matroska->chapters.elem;
            for (j=0; j<matroska->chapters.nb_elem; j++)
                if (chapter[j].uid == tags[i].target.chapteruid)
                    matroska_convert_tag(s, &tags[i].tag,
                                         &chapter[j].chapter->metadata, NULL);
        } else if (tags[i].target.trackuid) {
            MatroskaTrack *track = matroska->tracks.elem;
            for (j=0; j<matroska->tracks.nb_elem; j++)
                if (track[j].uid == tags[i].target.trackuid)
                    matroska_convert_tag(s, &tags[i].tag,
                                         &track[j].stream->metadata, NULL);
        } else {
1103 1104
            matroska_convert_tag(s, &tags[i].tag, &s->metadata,
                                 tags[i].target.type);
1105
        }
1106 1107 1108
    }
}

1109
static void matroska_execute_seekhead(MatroskaDemuxContext *matroska)
1110 1111 1112 1113
{
    EbmlList *seekhead_list = &matroska->seekhead;
    MatroskaSeekhead *seekhead = seekhead_list->elem;
    uint32_t level_up = matroska->level_up;
1114
    int64_t before_pos = avio_tell(matroska->ctx->pb);
1115
    uint32_t saved_id = matroska->current_id;
1116
    MatroskaLevel level;
1117
    int i;
1118

1119 1120 1121 1122 1123
    // we should not do any seeking in the streaming case
    if (url_is_streamed(matroska->ctx->pb) ||
        (matroska->ctx->flags & AVFMT_FLAG_IGNIDX))
        return;

1124
    for (i=0; i<seekhead_list->nb_elem; i++) {
1125
        int64_t offset = seekhead[i].pos + matroska->segment_start;
1126

1127 1128 1129 1130
        if (seekhead[i].pos <= before_pos
            || seekhead[i].id == MATROSKA_ID_SEEKHEAD
            || seekhead[i].id == MATROSKA_ID_CLUSTER)
            continue;
1131

1132
        /* seek */
A
Anton Khirnov 已提交
1133
        if (avio_seek(matroska->ctx->pb, offset, SEEK_SET) != offset)
1134 1135
            continue;

D
Diego Biurrun 已提交
1136
        /* We don't want to lose our seekhead level, so we add
1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
         * a dummy. This is a crude hack. */
        if (matroska->num_levels == EBML_MAX_DEPTH) {
            av_log(matroska->ctx, AV_LOG_INFO,
                   "Max EBML element depth (%d) reached, "
                   "cannot parse further.\n", EBML_MAX_DEPTH);
            break;
        }

        level.start = 0;
        level.length = (uint64_t)-1;
        matroska->levels[matroska->num_levels] = level;
        matroska->num_levels++;
1149
        matroska->current_id = 0;
1150

1151
        ebml_parse(matroska, matroska_segment, matroska);
1152 1153 1154 1155 1156 1157 1158

        /* remove dummy level */
        while (matroska->num_levels) {
            uint64_t length = matroska->levels[--matroska->num_levels].length;
            if (length == (uint64_t)-1)
                break;
        }
1159
    }
1160

1161
    /* seek back */
A
Anton Khirnov 已提交
1162
    avio_seek(matroska->ctx->pb, before_pos, SEEK_SET);
1163
    matroska->level_up = level_up;
1164
    matroska->current_id = saved_id;
1165 1166
}

1167
static int matroska_aac_profile(char *codec_id)
1168
{
1169
    static const char * const aac_profiles[] = { "MAIN", "LC", "SSR" };
1170 1171
    int profile;

1172
    for (profile=0; profile<FF_ARRAY_ELEMS(aac_profiles); profile++)
1173 1174 1175 1176 1177
        if (strstr(codec_id, aac_profiles[profile]))
            break;
    return profile + 1;
}

1178
static int matroska_aac_sri(int samplerate)
1179 1180 1181
{
    int sri;

1182
    for (sri=0; sri<FF_ARRAY_ELEMS(ff_mpeg4audio_sample_rates); sri++)
1183
        if (ff_mpeg4audio_sample_rates[sri] == samplerate)
1184 1185 1186 1187
            break;
    return sri;
}

1188
static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
1189 1190
{
    MatroskaDemuxContext *matroska = s->priv_data;
1191 1192 1193 1194
    EbmlList *attachements_list = &matroska->attachments;
    MatroskaAttachement *attachements;
    EbmlList *chapters_list = &matroska->chapters;
    MatroskaChapter *chapters;
1195
    MatroskaTrack *tracks;
1196 1197
    EbmlList *index_list;
    MatroskaIndex *index;
1198
    int index_scale = 1;
1199
    uint64_t max_start = 0;
1200
    Ebml ebml = { 0 };
1201
    AVStream *st;
1202
    int i, j, res;
1203 1204 1205 1206

    matroska->ctx = s;

    /* First read the EBML header. */
1207
    if (ebml_parse(matroska, ebml_syntax, &ebml)
1208
        || ebml.version > EBML_VERSION       || ebml.max_size > sizeof(uint64_t)
J
James Zern 已提交
1209
        || ebml.id_length > sizeof(uint32_t) || ebml.doctype_version > 2) {
1210
        av_log(matroska->ctx, AV_LOG_ERROR,
1211 1212 1213
               "EBML header using unsupported features\n"
               "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n",
               ebml.version, ebml.doctype, ebml.doctype_version);
J
James Zern 已提交
1214 1215 1216 1217 1218 1219 1220
        ebml_free(ebml_syntax, &ebml);
        return AVERROR_PATCHWELCOME;
    }
    for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++)
        if (!strcmp(ebml.doctype, matroska_doctypes[i]))
            break;
    if (i >= FF_ARRAY_ELEMS(matroska_doctypes)) {
1221
        av_log(s, AV_LOG_WARNING, "Unknown EBML doctype '%s'\n", ebml.doctype);
1222
    }
1223
    ebml_free(ebml_syntax, &ebml);
1224 1225

    /* The next thing is a segment. */
1226 1227
    if ((res = ebml_parse(matroska, matroska_segments, matroska)) < 0)
        return res;
1228
    matroska_execute_seekhead(matroska);
1229

1230 1231
    if (!matroska->time_scale)
        matroska->time_scale = 1000000;
1232 1233 1234
    if (matroska->duration)
        matroska->ctx->duration = matroska->duration * matroska->time_scale
                                  * 1000 / AV_TIME_BASE;
1235
    av_metadata_set2(&s->metadata, "title", matroska->title, 0);
1236

1237 1238 1239 1240
    tracks = matroska->tracks.elem;
    for (i=0; i < matroska->tracks.nb_elem; i++) {
        MatroskaTrack *track = &tracks[i];
        enum CodecID codec_id = CODEC_ID_NONE;
1241 1242
        EbmlList *encodings_list = &tracks->encodings;
        MatroskaTrackEncoding *encodings = encodings_list->elem;
1243 1244 1245
        uint8_t *extradata = NULL;
        int extradata_size = 0;
        int extradata_offset = 0;
1246
        AVIOContext b;
1247 1248

        /* Apply some sanity checks. */
1249 1250 1251 1252 1253 1254 1255 1256
        if (track->type != MATROSKA_TRACK_TYPE_VIDEO &&
            track->type != MATROSKA_TRACK_TYPE_AUDIO &&
            track->type != MATROSKA_TRACK_TYPE_SUBTITLE) {
            av_log(matroska->ctx, AV_LOG_INFO,
                   "Unknown or unsupported track type %"PRIu64"\n",
                   track->type);
            continue;
        }
1257 1258 1259
        if (track->codec_id == NULL)
            continue;

1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276
        if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
            if (!track->default_duration)
                track->default_duration = 1000000000/track->video.frame_rate;
            if (!track->video.display_width)
                track->video.display_width = track->video.pixel_width;
            if (!track->video.display_height)
                track->video.display_height = track->video.pixel_height;
        } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
            if (!track->audio.out_samplerate)
                track->audio.out_samplerate = track->audio.samplerate;
        }
        if (encodings_list->nb_elem > 1) {
            av_log(matroska->ctx, AV_LOG_ERROR,
                   "Multiple combined encodings no supported");
        } else if (encodings_list->nb_elem == 1) {
            if (encodings[0].type ||
                (encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP &&
1277
#if CONFIG_ZLIB
1278 1279
                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_ZLIB &&
#endif
1280
#if CONFIG_BZLIB
1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309
                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_BZLIB &&
#endif
                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_LZO)) {
                encodings[0].scope = 0;
                av_log(matroska->ctx, AV_LOG_ERROR,
                       "Unsupported encoding type");
            } else if (track->codec_priv.size && encodings[0].scope&2) {
                uint8_t *codec_priv = track->codec_priv.data;
                int offset = matroska_decode_buffer(&track->codec_priv.data,
                                                    &track->codec_priv.size,
                                                    track);
                if (offset < 0) {
                    track->codec_priv.data = NULL;
                    track->codec_priv.size = 0;
                    av_log(matroska->ctx, AV_LOG_ERROR,
                           "Failed to decode codec private data\n");
                } else if (offset > 0) {
                    track->codec_priv.data = av_malloc(track->codec_priv.size + offset);
                    memcpy(track->codec_priv.data,
                           encodings[0].compression.settings.data, offset);
                    memcpy(track->codec_priv.data+offset, codec_priv,
                           track->codec_priv.size);
                    track->codec_priv.size += offset;
                }
                if (codec_priv != track->codec_priv.data)
                    av_free(codec_priv);
            }
        }

1310 1311 1312 1313 1314
        for(j=0; ff_mkv_codec_tags[j].id != CODEC_ID_NONE; j++){
            if(!strncmp(ff_mkv_codec_tags[j].str, track->codec_id,
                        strlen(ff_mkv_codec_tags[j].str))){
                codec_id= ff_mkv_codec_tags[j].id;
                break;
1315
            }
1316
        }
1317

1318
        st = track->stream = av_new_stream(s, 0);
1319 1320 1321
        if (st == NULL)
            return AVERROR(ENOMEM);

1322
        if (!strcmp(track->codec_id, "V_MS/VFW/FOURCC")
1323 1324
            && track->codec_priv.size >= 40
            && track->codec_priv.data != NULL) {
1325
            track->ms_compat = 1;
1326
            track->video.fourcc = AV_RL32(track->codec_priv.data + 16);
1327
            codec_id = ff_codec_get_id(ff_codec_bmp_tags, track->video.fourcc);
1328
            extradata_offset = 40;
1329
        } else if (!strcmp(track->codec_id, "A_MS/ACM")
1330
                   && track->codec_priv.size >= 14
1331
                   && track->codec_priv.data != NULL) {
1332
            ffio_init_context(&b, track->codec_priv.data, track->codec_priv.size,
1333
                          URL_RDONLY, NULL, NULL, NULL, NULL);
1334
            ff_get_wav_header(&b, st->codec, track->codec_priv.size);
1335
            codec_id = st->codec->codec_id;
1336
            extradata_offset = FFMIN(track->codec_priv.size, 18);
1337 1338 1339
        } else if (!strcmp(track->codec_id, "V_QUICKTIME")
                   && (track->codec_priv.size >= 86)
                   && (track->codec_priv.data != NULL)) {
1340
            track->video.fourcc = AV_RL32(track->codec_priv.data);
1341
            codec_id=ff_codec_get_id(codec_movvideo_tags, track->video.fourcc);
1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355
        } else if (codec_id == CODEC_ID_PCM_S16BE) {
            switch (track->audio.bitdepth) {
            case  8:  codec_id = CODEC_ID_PCM_U8;     break;
            case 24:  codec_id = CODEC_ID_PCM_S24BE;  break;
            case 32:  codec_id = CODEC_ID_PCM_S32BE;  break;
            }
        } else if (codec_id == CODEC_ID_PCM_S16LE) {
            switch (track->audio.bitdepth) {
            case  8:  codec_id = CODEC_ID_PCM_U8;     break;
            case 24:  codec_id = CODEC_ID_PCM_S24LE;  break;
            case 32:  codec_id = CODEC_ID_PCM_S32LE;  break;
            }
        } else if (codec_id==CODEC_ID_PCM_F32LE && track->audio.bitdepth==64) {
            codec_id = CODEC_ID_PCM_F64LE;
1356
        } else if (codec_id == CODEC_ID_AAC && !track->codec_priv.size) {
1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369
            int profile = matroska_aac_profile(track->codec_id);
            int sri = matroska_aac_sri(track->audio.samplerate);
            extradata = av_malloc(5);
            if (extradata == NULL)
                return AVERROR(ENOMEM);
            extradata[0] = (profile << 3) | ((sri&0x0E) >> 1);
            extradata[1] = ((sri&0x01) << 7) | (track->audio.channels<<3);
            if (strstr(track->codec_id, "SBR")) {
                sri = matroska_aac_sri(track->audio.out_samplerate);
                extradata[2] = 0x56;
                extradata[3] = 0xE5;
                extradata[4] = 0x80 | (sri<<3);
                extradata_size = 5;
1370
            } else
1371
                extradata_size = 2;
1372
        } else if (codec_id == CODEC_ID_TTA) {
1373 1374 1375 1376
            extradata_size = 30;
            extradata = av_mallocz(extradata_size);
            if (extradata == NULL)
                return AVERROR(ENOMEM);
1377
            ffio_init_context(&b, extradata, extradata_size, 1,
1378
                          NULL, NULL, NULL, NULL);
1379 1380 1381 1382 1383 1384
            avio_write(&b, "TTA1", 4);
            avio_wl16(&b, 1);
            avio_wl16(&b, track->audio.channels);
            avio_wl16(&b, track->audio.bitdepth);
            avio_wl32(&b, track->audio.out_samplerate);
            avio_wl32(&b, matroska->ctx->duration * track->audio.out_samplerate);
1385 1386
        } else if (codec_id == CODEC_ID_RV10 || codec_id == CODEC_ID_RV20 ||
                   codec_id == CODEC_ID_RV30 || codec_id == CODEC_ID_RV40) {
1387
            extradata_offset = 26;
1388
        } else if (codec_id == CODEC_ID_RA_144) {
1389 1390
            track->audio.out_samplerate = 8000;
            track->audio.channels = 1;
1391
        } else if (codec_id == CODEC_ID_RA_288 || codec_id == CODEC_ID_COOK ||
1392 1393
                   codec_id == CODEC_ID_ATRAC3 || codec_id == CODEC_ID_SIPR) {
            int flavor;
1394
            ffio_init_context(&b, track->codec_priv.data,track->codec_priv.size,
1395
                          0, NULL, NULL, NULL, NULL);
1396
            avio_skip(&b, 22);
1397 1398
            flavor                       = avio_rb16(&b);
            track->audio.coded_framesize = avio_rb32(&b);
1399
            avio_skip(&b, 12);
1400 1401 1402
            track->audio.sub_packet_h    = avio_rb16(&b);
            track->audio.frame_size      = avio_rb16(&b);
            track->audio.sub_packet_size = avio_rb16(&b);
1403 1404 1405 1406 1407
            track->audio.buf = av_malloc(track->audio.frame_size * track->audio.sub_packet_h);
            if (codec_id == CODEC_ID_RA_288) {
                st->codec->block_align = track->audio.coded_framesize;
                track->codec_priv.size = 0;
            } else {
1408 1409 1410 1411 1412
                if (codec_id == CODEC_ID_SIPR && flavor < 4) {
                    const int sipr_bit_rate[4] = { 6504, 8496, 5000, 16000 };
                    track->audio.sub_packet_size = ff_sipr_subpk_size[flavor];
                    st->codec->bit_rate = sipr_bit_rate[flavor];
                }
1413 1414
                st->codec->block_align = track->audio.sub_packet_size;
                extradata_offset = 78;
1415
            }
1416
        }
1417
        track->codec_priv.size -= extradata_offset;
1418

1419
        if (codec_id == CODEC_ID_NONE)
1420
            av_log(matroska->ctx, AV_LOG_INFO,
A
Aurelien Jacobs 已提交
1421
                   "Unknown/unsupported CodecID %s.\n", track->codec_id);
1422

1423 1424
        if (track->time_scale < 0.01)
            track->time_scale = 1.0;
1425
        av_set_pts_info(st, 64, matroska->time_scale*track->time_scale, 1000*1000*1000); /* 64 bit pts in ns */
1426

1427 1428 1429
        st->codec->codec_id = codec_id;
        st->start_time = 0;
        if (strcmp(track->language, "und"))
1430 1431
            av_metadata_set2(&st->metadata, "language", track->language, 0);
        av_metadata_set2(&st->metadata, "title", track->name, 0);
1432

1433 1434
        if (track->flag_default)
            st->disposition |= AV_DISPOSITION_DEFAULT;
1435 1436
        if (track->flag_forced)
            st->disposition |= AV_DISPOSITION_FORCED;
1437

1438 1439 1440
        if (track->default_duration)
            av_reduce(&st->codec->time_base.num, &st->codec->time_base.den,
                      track->default_duration, 1000000000, 30000);
1441

1442
        if (!st->codec->extradata) {
1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455
            if(extradata){
                st->codec->extradata = extradata;
                st->codec->extradata_size = extradata_size;
            } else if(track->codec_priv.data && track->codec_priv.size > 0){
                st->codec->extradata = av_mallocz(track->codec_priv.size +
                                                  FF_INPUT_BUFFER_PADDING_SIZE);
                if(st->codec->extradata == NULL)
                    return AVERROR(ENOMEM);
                st->codec->extradata_size = track->codec_priv.size;
                memcpy(st->codec->extradata,
                       track->codec_priv.data + extradata_offset,
                       track->codec_priv.size);
            }
1456
        }
1457 1458

        if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
1459
            st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
1460 1461 1462
            st->codec->codec_tag  = track->video.fourcc;
            st->codec->width  = track->video.pixel_width;
            st->codec->height = track->video.pixel_height;
1463 1464
            av_reduce(&st->sample_aspect_ratio.num,
                      &st->sample_aspect_ratio.den,
1465 1466 1467
                      st->codec->height * track->video.display_width,
                      st->codec-> width * track->video.display_height,
                      255);
1468
            if (st->codec->codec_id != CODEC_ID_H264)
1469
            st->need_parsing = AVSTREAM_PARSE_HEADERS;
1470 1471
            if (track->default_duration)
                st->avg_frame_rate = av_d2q(1000000000.0/track->default_duration, INT_MAX);
1472
        } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
1473
            st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
1474 1475
            st->codec->sample_rate = track->audio.out_samplerate;
            st->codec->channels = track->audio.channels;
1476
            if (st->codec->codec_id != CODEC_ID_AAC)
1477
            st->need_parsing = AVSTREAM_PARSE_HEADERS;
1478
        } else if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE) {
1479
            st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
1480
        }
1481 1482
    }

1483 1484 1485 1486 1487 1488
    attachements = attachements_list->elem;
    for (j=0; j<attachements_list->nb_elem; j++) {
        if (!(attachements[j].filename && attachements[j].mime &&
              attachements[j].bin.data && attachements[j].bin.size > 0)) {
            av_log(matroska->ctx, AV_LOG_ERROR, "incomplete attachment\n");
        } else {
1489
            AVStream *st = av_new_stream(s, 0);
1490 1491
            if (st == NULL)
                break;
1492
            av_metadata_set2(&st->metadata, "filename",attachements[j].filename, 0);
1493
            st->codec->codec_id = CODEC_ID_NONE;
1494
            st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT;
1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507
            st->codec->extradata  = av_malloc(attachements[j].bin.size);
            if(st->codec->extradata == NULL)
                break;
            st->codec->extradata_size = attachements[j].bin.size;
            memcpy(st->codec->extradata, attachements[j].bin.data, attachements[j].bin.size);

            for (i=0; ff_mkv_mime_tags[i].id != CODEC_ID_NONE; i++) {
                if (!strncmp(ff_mkv_mime_tags[i].str, attachements[j].mime,
                             strlen(ff_mkv_mime_tags[i].str))) {
                    st->codec->codec_id = ff_mkv_mime_tags[i].id;
                    break;
                }
            }
1508
            attachements[j].stream = st;
1509 1510 1511 1512 1513
        }
    }

    chapters = chapters_list->elem;
    for (i=0; i<chapters_list->nb_elem; i++)
1514 1515
        if (chapters[i].start != AV_NOPTS_VALUE && chapters[i].uid
            && (max_start==0 || chapters[i].start > max_start)) {
1516
            chapters[i].chapter =
1517 1518 1519
            ff_new_chapter(s, chapters[i].uid, (AVRational){1, 1000000000},
                           chapters[i].start, chapters[i].end,
                           chapters[i].title);
1520 1521
            av_metadata_set2(&chapters[i].chapter->metadata,
                             "title", chapters[i].title, 0);
1522 1523
            max_start = chapters[i].start;
        }
1524

1525 1526
    index_list = &matroska->index;
    index = index_list->elem;
1527 1528 1529 1530 1531
    if (index_list->nb_elem
        && index[0].time > 100000000000000/matroska->time_scale) {
        av_log(matroska->ctx, AV_LOG_WARNING, "Working around broken index.\n");
        index_scale = matroska->time_scale;
    }
1532 1533 1534 1535
    for (i=0; i<index_list->nb_elem; i++) {
        EbmlList *pos_list = &index[i].pos;
        MatroskaIndexPos *pos = pos_list->elem;
        for (j=0; j<pos_list->nb_elem; j++) {
1536
            MatroskaTrack *track = matroska_find_track_by_num(matroska,
1537
                                                              pos[j].track);
1538 1539
            if (track && track->stream)
                av_add_index_entry(track->stream,
1540
                                   pos[j].pos + matroska->segment_start,
1541 1542
                                   index[i].time/index_scale, 0, 0,
                                   AVINDEX_KEYFRAME);
1543 1544 1545
        }
    }

1546 1547
    matroska_convert_tags(s);

1548
    return 0;
1549 1550
}

1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587
/*
 * Put one packet in an application-supplied AVPacket struct.
 * Returns 0 on success or -1 on failure.
 */
static int matroska_deliver_packet(MatroskaDemuxContext *matroska,
                                   AVPacket *pkt)
{
    if (matroska->num_packets > 0) {
        memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
        av_free(matroska->packets[0]);
        if (matroska->num_packets > 1) {
            memmove(&matroska->packets[0], &matroska->packets[1],
                    (matroska->num_packets - 1) * sizeof(AVPacket *));
            matroska->packets =
                av_realloc(matroska->packets, (matroska->num_packets - 1) *
                           sizeof(AVPacket *));
        } else {
            av_freep(&matroska->packets);
        }
        matroska->num_packets--;
        return 0;
    }

    return -1;
}

/*
 * Free all packets in our internal queue.
 */
static void matroska_clear_queue(MatroskaDemuxContext *matroska)
{
    if (matroska->packets) {
        int n;
        for (n = 0; n < matroska->num_packets; n++) {
            av_free_packet(matroska->packets[n]);
            av_free(matroska->packets[n]);
        }
1588
        av_freep(&matroska->packets);
1589 1590 1591 1592
        matroska->num_packets = 0;
    }
}

1593 1594
static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
                                int size, int64_t pos, uint64_t cluster_time,
1595 1596
                                uint64_t duration, int is_keyframe,
                                int64_t cluster_pos)
1597
{
1598
    uint64_t timecode = AV_NOPTS_VALUE;
1599
    MatroskaTrack *track;
1600
    int res = 0;
1601 1602 1603 1604 1605 1606 1607
    AVStream *st;
    AVPacket *pkt;
    int16_t block_time;
    uint32_t *lace_size = NULL;
    int n, flags, laces = 0;
    uint64_t num;

1608
    if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0) {
1609 1610 1611 1612 1613 1614 1615
        av_log(matroska->ctx, AV_LOG_ERROR, "EBML block data error\n");
        return res;
    }
    data += n;
    size -= n;

    track = matroska_find_track_by_num(matroska, num);
1616
    if (size <= 3 || !track || !track->stream) {
1617
        av_log(matroska->ctx, AV_LOG_INFO,
1618
               "Invalid stream %"PRIu64" or size %u\n", num, size);
1619 1620
        return res;
    }
1621
    st = track->stream;
1622
    if (st->discard >= AVDISCARD_ALL)
1623 1624
        return res;
    if (duration == AV_NOPTS_VALUE)
1625
        duration = track->default_duration / matroska->time_scale;
1626

1627
    block_time = AV_RB16(data);
1628
    data += 2;
A
Aurelien Jacobs 已提交
1629 1630
    flags = *data++;
    size -= 3;
1631
    if (is_keyframe == -1)
1632
        is_keyframe = flags & 0x80 ? AV_PKT_FLAG_KEY : 0;
1633

1634 1635 1636
    if (cluster_time != (uint64_t)-1
        && (block_time >= 0 || cluster_time >= -block_time)) {
        timecode = cluster_time + block_time;
1637 1638 1639
        if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE
            && timecode < track->end_timecode)
            is_keyframe = 0;  /* overlapping subtitles are not key frame */
1640
        if (is_keyframe)
1641
            av_add_index_entry(st, cluster_pos, timecode, 0,0,AVINDEX_KEYFRAME);
1642
        track->end_timecode = FFMAX(track->end_timecode, timecode+duration);
1643 1644
    }

1645
    if (matroska->skip_to_keyframe && track->type != MATROSKA_TRACK_TYPE_SUBTITLE) {
1646
        if (!is_keyframe || timecode < matroska->skip_to_timecode)
1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657
            return res;
        matroska->skip_to_keyframe = 0;
    }

    switch ((flags & 0x06) >> 1) {
        case 0x0: /* no lacing */
            laces = 1;
            lace_size = av_mallocz(sizeof(int));
            lace_size[0] = size;
            break;

D
Diego Biurrun 已提交
1658
        case 0x1: /* Xiph lacing */
1659 1660
        case 0x2: /* fixed-size lacing */
        case 0x3: /* EBML lacing */
1661
            assert(size>0); // size <=3 is checked before size-=3 above
1662 1663 1664 1665 1666 1667
            laces = (*data) + 1;
            data += 1;
            size -= 1;
            lace_size = av_mallocz(laces * sizeof(int));

            switch ((flags & 0x06) >> 1) {
D
Diego Biurrun 已提交
1668
                case 0x1: /* Xiph lacing */ {
1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696
                    uint8_t temp;
                    uint32_t total = 0;
                    for (n = 0; res == 0 && n < laces - 1; n++) {
                        while (1) {
                            if (size == 0) {
                                res = -1;
                                break;
                            }
                            temp = *data;
                            lace_size[n] += temp;
                            data += 1;
                            size -= 1;
                            if (temp != 0xff)
                                break;
                        }
                        total += lace_size[n];
                    }
                    lace_size[n] = size - total;
                    break;
                }

                case 0x2: /* fixed-size lacing */
                    for (n = 0; n < laces; n++)
                        lace_size[n] = size / laces;
                    break;

                case 0x3: /* EBML lacing */ {
                    uint32_t total;
1697
                    n = matroska_ebmlnum_uint(matroska, data, size, &num);
1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708
                    if (n < 0) {
                        av_log(matroska->ctx, AV_LOG_INFO,
                               "EBML block data error\n");
                        break;
                    }
                    data += n;
                    size -= n;
                    total = lace_size[0] = num;
                    for (n = 1; res == 0 && n < laces - 1; n++) {
                        int64_t snum;
                        int r;
1709
                        r = matroska_ebmlnum_sint(matroska, data, size, &snum);
1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728
                        if (r < 0) {
                            av_log(matroska->ctx, AV_LOG_INFO,
                                   "EBML block data error\n");
                            break;
                        }
                        data += r;
                        size -= r;
                        lace_size[n] = lace_size[n - 1] + snum;
                        total += lace_size[n];
                    }
                    lace_size[n] = size - total;
                    break;
                }
            }
            break;
    }

    if (res == 0) {
        for (n = 0; n < laces; n++) {
1729 1730
            if ((st->codec->codec_id == CODEC_ID_RA_288 ||
                 st->codec->codec_id == CODEC_ID_COOK ||
1731
                 st->codec->codec_id == CODEC_ID_SIPR ||
1732 1733
                 st->codec->codec_id == CODEC_ID_ATRAC3) &&
                 st->codec->block_align && track->audio.sub_packet_size) {
A
Aurelien Jacobs 已提交
1734
                int a = st->codec->block_align;
1735 1736 1737 1738 1739
                int sps = track->audio.sub_packet_size;
                int cfs = track->audio.coded_framesize;
                int h = track->audio.sub_packet_h;
                int y = track->audio.sub_packet_cnt;
                int w = track->audio.frame_size;
A
Aurelien Jacobs 已提交
1740 1741
                int x;

1742
                if (!track->audio.pkt_cnt) {
A
Aurelien Jacobs 已提交
1743 1744
                    if (st->codec->codec_id == CODEC_ID_RA_288)
                        for (x=0; x<h/2; x++)
1745
                            memcpy(track->audio.buf+x*2*w+y*cfs,
A
Aurelien Jacobs 已提交
1746
                                   data+x*cfs, cfs);
1747 1748
                    else if (st->codec->codec_id == CODEC_ID_SIPR)
                        memcpy(track->audio.buf + y*w, data, w);
A
Aurelien Jacobs 已提交
1749 1750
                    else
                        for (x=0; x<w/sps; x++)
1751
                            memcpy(track->audio.buf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), data+x*sps, sps);
A
Aurelien Jacobs 已提交
1752

1753
                    if (++track->audio.sub_packet_cnt >= h) {
1754 1755
                        if (st->codec->codec_id == CODEC_ID_SIPR)
                            ff_rm_reorder_sipr_data(track->audio.buf, h, w);
1756 1757
                        track->audio.sub_packet_cnt = 0;
                        track->audio.pkt_cnt = h*w / a;
1758
                    }
A
Aurelien Jacobs 已提交
1759
                }
1760
                while (track->audio.pkt_cnt) {
A
Aurelien Jacobs 已提交
1761
                    pkt = av_mallocz(sizeof(AVPacket));
A
Aurelien Jacobs 已提交
1762
                    av_new_packet(pkt, a);
1763 1764
                    memcpy(pkt->data, track->audio.buf
                           + a * (h*w / a - track->audio.pkt_cnt--), a);
A
Aurelien Jacobs 已提交
1765
                    pkt->pos = pos;
1766
                    pkt->stream_index = st->index;
1767
                    dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
1768
                }
A
Aurelien Jacobs 已提交
1769
            } else {
1770
                MatroskaTrackEncoding *encodings = track->encodings.elem;
1771
                int offset = 0, pkt_size = lace_size[n];
1772
                uint8_t *pkt_data = data;
A
Aurelien Jacobs 已提交
1773

1774
                if (pkt_size > size) {
1775 1776 1777 1778
                    av_log(matroska->ctx, AV_LOG_ERROR, "Invalid packet size\n");
                    break;
                }

1779
                if (encodings && encodings->scope & 1) {
A
Aurelien Jacobs 已提交
1780
                    offset = matroska_decode_buffer(&pkt_data,&pkt_size, track);
1781 1782
                    if (offset < 0)
                        continue;
1783 1784
                }

A
Aurelien Jacobs 已提交
1785 1786
                pkt = av_mallocz(sizeof(AVPacket));
                /* XXX: prevent data copy... */
1787
                if (av_new_packet(pkt, pkt_size+offset) < 0) {
1788
                    av_free(pkt);
A
Aurelien Jacobs 已提交
1789 1790 1791
                    res = AVERROR(ENOMEM);
                    break;
                }
1792
                if (offset)
1793
                    memcpy (pkt->data, encodings->compression.settings.data, offset);
1794
                memcpy (pkt->data+offset, pkt_data, pkt_size);
A
Aurelien Jacobs 已提交
1795

A
Aurelien Jacobs 已提交
1796 1797 1798
                if (pkt_data != data)
                    av_free(pkt_data);

A
Aurelien Jacobs 已提交
1799 1800
                if (n == 0)
                    pkt->flags = is_keyframe;
1801
                pkt->stream_index = st->index;
A
Aurelien Jacobs 已提交
1802

1803 1804 1805
                if (track->ms_compat)
                    pkt->dts = timecode;
                else
1806
                    pkt->pts = timecode;
A
Aurelien Jacobs 已提交
1807
                pkt->pos = pos;
1808
                if (st->codec->codec_id == CODEC_ID_TEXT)
1809
                    pkt->convergence_duration = duration;
1810
                else if (track->type != MATROSKA_TRACK_TYPE_SUBTITLE)
1811
                    pkt->duration = duration;
A
Aurelien Jacobs 已提交
1812

1813
                if (st->codec->codec_id == CODEC_ID_SSA)
1814
                    matroska_fix_ass_packet(matroska, pkt, duration);
1815

1816
                if (matroska->prev_pkt &&
1817
                    timecode != AV_NOPTS_VALUE &&
1818
                    matroska->prev_pkt->pts == timecode &&
1819 1820
                    matroska->prev_pkt->stream_index == st->index &&
                    st->codec->codec_id == CODEC_ID_SSA)
1821 1822
                    matroska_merge_packets(matroska->prev_pkt, pkt);
                else {
1823
                    dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
1824 1825
                    matroska->prev_pkt = pkt;
                }
A
Aurelien Jacobs 已提交
1826
            }
1827

A
Aurelien Jacobs 已提交
1828 1829
            if (timecode != AV_NOPTS_VALUE)
                timecode = duration ? timecode + duration : AV_NOPTS_VALUE;
1830
            data += lace_size[n];
1831
            size -= lace_size[n];
1832 1833 1834 1835
        }
    }

    av_free(lace_size);
1836
    return res;
1837 1838
}

1839
static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
1840
{
1841 1842 1843
    MatroskaCluster cluster = { 0 };
    EbmlList *blocks_list;
    MatroskaBlock *blocks;
1844
    int i, res;
1845
    int64_t pos = avio_tell(matroska->ctx->pb);
1846
    matroska->prev_pkt = NULL;
1847
    if (matroska->current_id)
1848
        pos -= 4;  /* sizeof the ID which was already read */
1849
    res = ebml_parse(matroska, matroska_clusters, &cluster);
1850 1851
    blocks_list = &cluster.blocks;
    blocks = blocks_list->elem;
1852
    for (i=0; i<blocks_list->nb_elem; i++)
1853
        if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
1854
            int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
1855 1856 1857
            res=matroska_parse_block(matroska,
                                     blocks[i].bin.data, blocks[i].bin.size,
                                     blocks[i].bin.pos,  cluster.timecode,
1858
                                     blocks[i].duration, is_keyframe,
1859
                                     pos);
1860
        }
1861
    ebml_free(matroska_cluster, &cluster);
1862
    if (res < 0)  matroska->done = 1;
1863 1864 1865
    return res;
}

1866
static int matroska_read_packet(AVFormatContext *s, AVPacket *pkt)
1867 1868 1869 1870 1871
{
    MatroskaDemuxContext *matroska = s->priv_data;

    while (matroska_deliver_packet(matroska, pkt)) {
        if (matroska->done)
1872
            return AVERROR_EOF;
1873
        matroska_parse_cluster(matroska);
1874 1875 1876 1877 1878
    }

    return 0;
}

1879 1880
static int matroska_read_seek(AVFormatContext *s, int stream_index,
                              int64_t timestamp, int flags)
1881 1882
{
    MatroskaDemuxContext *matroska = s->priv_data;
1883
    MatroskaTrack *tracks = matroska->tracks.elem;
1884
    AVStream *st = s->streams[stream_index];
1885
    int i, index, index_sub, index_min;
1886

1887 1888 1889
    if (!st->nb_index_entries)
        return 0;
    timestamp = FFMAX(timestamp, st->index_entries[0].timestamp);
1890

1891
    if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
A
Anton Khirnov 已提交
1892
        avio_seek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET);
1893 1894 1895 1896 1897
        while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
            matroska_clear_queue(matroska);
            if (matroska_parse_cluster(matroska) < 0)
                break;
        }
1898
    }
1899

1900
    matroska_clear_queue(matroska);
1901 1902
    if (index < 0)
        return 0;
1903

1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916
    index_min = index;
    for (i=0; i < matroska->tracks.nb_elem; i++) {
        tracks[i].end_timecode = 0;
        if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE
            && !tracks[i].stream->discard != AVDISCARD_ALL) {
            index_sub = av_index_search_timestamp(tracks[i].stream, st->index_entries[index].timestamp, AVSEEK_FLAG_BACKWARD);
            if (index_sub >= 0
                && st->index_entries[index_sub].pos < st->index_entries[index_min].pos
                && st->index_entries[index].timestamp - st->index_entries[index_sub].timestamp < 30000000000/matroska->time_scale)
                index_min = index_sub;
        }
    }

A
Anton Khirnov 已提交
1917
    avio_seek(s->pb, st->index_entries[index_min].pos, SEEK_SET);
1918
    matroska->skip_to_keyframe = !(flags & AVSEEK_FLAG_ANY);
1919
    matroska->skip_to_timecode = st->index_entries[index].timestamp;
1920
    matroska->done = 0;
1921
    av_update_cur_dts(s, st, st->index_entries[index].timestamp);
1922 1923 1924
    return 0;
}

1925
static int matroska_read_close(AVFormatContext *s)
1926 1927
{
    MatroskaDemuxContext *matroska = s->priv_data;
1928
    MatroskaTrack *tracks = matroska->tracks.elem;
1929
    int n;
1930

1931
    matroska_clear_queue(matroska);
1932

1933 1934 1935
    for (n=0; n < matroska->tracks.nb_elem; n++)
        if (tracks[n].type == MATROSKA_TRACK_TYPE_AUDIO)
            av_free(tracks[n].audio.buf);
1936
    ebml_free(matroska_segment, matroska);
1937 1938 1939 1940

    return 0;
}

1941
AVInputFormat ff_matroska_demuxer = {
1942 1943
    "matroska,webm",
    NULL_IF_CONFIG_SMALL("Matroska/WebM file format"),
1944 1945 1946 1947 1948 1949 1950
    sizeof(MatroskaDemuxContext),
    matroska_probe,
    matroska_read_header,
    matroska_read_packet,
    matroska_read_close,
    matroska_read_seek,
};