matroskadec.c 68.5 KB
Newer Older
1
/*
2
 * Matroska file demuxer
D
Diego Biurrun 已提交
3
 * Copyright (c) 2003-2008 The FFmpeg Project
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
23
 * @file
24 25 26
 * Matroska file demuxer
 * by Ronald Bultje <rbultje@ronald.bitfreak.net>
 * with a little help from Moritz Bunkus <moritz@bunkus.org>
27
 * totally reworked by Aurelien Jacobs <aurel@gnuage.org>
D
Diego Biurrun 已提交
28
 * Specs available on the Matroska project page: http://www.matroska.org/.
29 30
 */

31
#include <stdio.h>
32
#include "avformat.h"
33
#include "internal.h"
34
/* For ff_codec_get_id(). */
35
#include "riff.h"
36
#include "isom.h"
37
#include "rm.h"
38
#include "matroska.h"
39
#include "libavcodec/mpeg4audio.h"
40
#include "libavutil/intfloat_readwrite.h"
41
#include "libavutil/intreadwrite.h"
42
#include "libavutil/avstring.h"
43
#include "libavutil/lzo.h"
44
#if CONFIG_ZLIB
45 46
#include <zlib.h>
#endif
47
#if CONFIG_BZLIB
48 49
#include <bzlib.h>
#endif
50

51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
typedef enum {
    EBML_NONE,
    EBML_UINT,
    EBML_FLOAT,
    EBML_STR,
    EBML_UTF8,
    EBML_BIN,
    EBML_NEST,
    EBML_PASS,
    EBML_STOP,
} EbmlType;

typedef const struct EbmlSyntax {
    uint32_t id;
    EbmlType type;
    int list_elem_size;
    int data_offset;
    union {
        uint64_t    u;
        double      f;
        const char *s;
        const struct EbmlSyntax *n;
    } def;
} EbmlSyntax;

typedef struct {
    int nb_elem;
    void *elem;
} EbmlList;

typedef struct {
    int      size;
    uint8_t *data;
    int64_t  pos;
} EbmlBin;

87 88 89 90 91 92 93 94
typedef struct {
    uint64_t version;
    uint64_t max_size;
    uint64_t id_length;
    char    *doctype;
    uint64_t doctype_version;
} Ebml;

95 96 97 98
typedef struct {
    uint64_t algo;
    EbmlBin  settings;
} MatroskaTrackCompression;
99

100 101 102 103 104
typedef struct {
    uint64_t scope;
    uint64_t type;
    MatroskaTrackCompression compression;
} MatroskaTrackEncoding;
105

106 107 108 109 110 111 112 113
typedef struct {
    double   frame_rate;
    uint64_t display_width;
    uint64_t display_height;
    uint64_t pixel_width;
    uint64_t pixel_height;
    uint64_t fourcc;
} MatroskaTrackVideo;
114

115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
typedef struct {
    double   samplerate;
    double   out_samplerate;
    uint64_t bitdepth;
    uint64_t channels;

    /* real audio header (extracted from extradata) */
    int      coded_framesize;
    int      sub_packet_h;
    int      frame_size;
    int      sub_packet_size;
    int      sub_packet_cnt;
    int      pkt_cnt;
    uint8_t *buf;
} MatroskaTrackAudio;
130

131 132
typedef struct {
    uint64_t num;
133
    uint64_t uid;
134
    uint64_t type;
135
    char    *name;
136 137 138
    char    *codec_id;
    EbmlBin  codec_priv;
    char    *language;
139
    double time_scale;
140
    uint64_t default_duration;
141
    uint64_t flag_default;
142
    uint64_t flag_forced;
143 144 145
    MatroskaTrackVideo video;
    MatroskaTrackAudio audio;
    EbmlList encodings;
146 147

    AVStream *stream;
148
    int64_t end_timecode;
149
    int ms_compat;
150 151
} MatroskaTrack;

152
typedef struct {
153
    uint64_t uid;
154 155 156
    char *filename;
    char *mime;
    EbmlBin bin;
157 158

    AVStream *stream;
159 160
} MatroskaAttachement;

161 162 163 164 165
typedef struct {
    uint64_t start;
    uint64_t end;
    uint64_t uid;
    char    *title;
166 167

    AVChapter *chapter;
168 169
} MatroskaChapter;

170 171 172 173 174 175 176 177 178 179
typedef struct {
    uint64_t track;
    uint64_t pos;
} MatroskaIndexPos;

typedef struct {
    uint64_t time;
    EbmlList pos;
} MatroskaIndex;

180 181 182
typedef struct {
    char *name;
    char *string;
183 184
    char *lang;
    uint64_t def;
185 186 187
    EbmlList sub;
} MatroskaTag;

188 189 190 191 192 193 194 195 196 197 198 199 200
typedef struct {
    char    *type;
    uint64_t typevalue;
    uint64_t trackuid;
    uint64_t chapteruid;
    uint64_t attachuid;
} MatroskaTagTarget;

typedef struct {
    MatroskaTagTarget target;
    EbmlList tag;
} MatroskaTags;

201 202 203 204 205
typedef struct {
    uint64_t id;
    uint64_t pos;
} MatroskaSeekhead;

206
typedef struct {
207 208
    uint64_t start;
    uint64_t length;
209 210
} MatroskaLevel;

211
typedef struct {
212 213
    AVFormatContext *ctx;

D
Diego Biurrun 已提交
214
    /* EBML stuff */
215 216 217
    int num_levels;
    MatroskaLevel levels[EBML_MAX_DEPTH];
    int level_up;
218
    uint32_t current_id;
219

220 221 222
    uint64_t time_scale;
    double   duration;
    char    *title;
223
    EbmlList tracks;
224
    EbmlList attachments;
225
    EbmlList chapters;
226
    EbmlList index;
227
    EbmlList tags;
228
    EbmlList seekhead;
229 230

    /* byte position of the segment inside the stream */
231
    int64_t segment_start;
232

D
Diego Biurrun 已提交
233
    /* the packet queue */
234 235
    AVPacket **packets;
    int num_packets;
236
    AVPacket *prev_pkt;
237

238
    int done;
239 240 241

    /* What to skip before effectively reading a packet. */
    int skip_to_keyframe;
242
    uint64_t skip_to_timecode;
243 244
} MatroskaDemuxContext;

245 246 247
typedef struct {
    uint64_t duration;
    int64_t  reference;
248
    uint64_t non_simple;
249 250 251 252 253 254 255 256
    EbmlBin  bin;
} MatroskaBlock;

typedef struct {
    uint64_t timecode;
    EbmlList blocks;
} MatroskaCluster;

257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
static EbmlSyntax ebml_header[] = {
    { EBML_ID_EBMLREADVERSION,        EBML_UINT, 0, offsetof(Ebml,version), {.u=EBML_VERSION} },
    { EBML_ID_EBMLMAXSIZELENGTH,      EBML_UINT, 0, offsetof(Ebml,max_size), {.u=8} },
    { EBML_ID_EBMLMAXIDLENGTH,        EBML_UINT, 0, offsetof(Ebml,id_length), {.u=4} },
    { EBML_ID_DOCTYPE,                EBML_STR,  0, offsetof(Ebml,doctype), {.s="(none)"} },
    { EBML_ID_DOCTYPEREADVERSION,     EBML_UINT, 0, offsetof(Ebml,doctype_version), {.u=1} },
    { EBML_ID_EBMLVERSION,            EBML_NONE },
    { EBML_ID_DOCTYPEVERSION,         EBML_NONE },
    { 0 }
};

static EbmlSyntax ebml_syntax[] = {
    { EBML_ID_HEADER,                 EBML_NEST, 0, 0, {.n=ebml_header} },
    { 0 }
};

273 274 275 276 277 278 279 280 281 282 283
static EbmlSyntax matroska_info[] = {
    { MATROSKA_ID_TIMECODESCALE,      EBML_UINT,  0, offsetof(MatroskaDemuxContext,time_scale), {.u=1000000} },
    { MATROSKA_ID_DURATION,           EBML_FLOAT, 0, offsetof(MatroskaDemuxContext,duration) },
    { MATROSKA_ID_TITLE,              EBML_UTF8,  0, offsetof(MatroskaDemuxContext,title) },
    { MATROSKA_ID_WRITINGAPP,         EBML_NONE },
    { MATROSKA_ID_MUXINGAPP,          EBML_NONE },
    { MATROSKA_ID_DATEUTC,            EBML_NONE },
    { MATROSKA_ID_SEGMENTUID,         EBML_NONE },
    { 0 }
};

284 285 286 287 288 289 290
static EbmlSyntax matroska_track_video[] = {
    { MATROSKA_ID_VIDEOFRAMERATE,     EBML_FLOAT,0, offsetof(MatroskaTrackVideo,frame_rate) },
    { MATROSKA_ID_VIDEODISPLAYWIDTH,  EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_width) },
    { MATROSKA_ID_VIDEODISPLAYHEIGHT, EBML_UINT, 0, offsetof(MatroskaTrackVideo,display_height) },
    { MATROSKA_ID_VIDEOPIXELWIDTH,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_width) },
    { MATROSKA_ID_VIDEOPIXELHEIGHT,   EBML_UINT, 0, offsetof(MatroskaTrackVideo,pixel_height) },
    { MATROSKA_ID_VIDEOCOLORSPACE,    EBML_UINT, 0, offsetof(MatroskaTrackVideo,fourcc) },
291 292 293 294 295
    { MATROSKA_ID_VIDEOPIXELCROPB,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPT,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPL,    EBML_NONE },
    { MATROSKA_ID_VIDEOPIXELCROPR,    EBML_NONE },
    { MATROSKA_ID_VIDEODISPLAYUNIT,   EBML_NONE },
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
    { MATROSKA_ID_VIDEOFLAGINTERLACED,EBML_NONE },
    { MATROSKA_ID_VIDEOSTEREOMODE,    EBML_NONE },
    { MATROSKA_ID_VIDEOASPECTRATIO,   EBML_NONE },
    { 0 }
};

static EbmlSyntax matroska_track_audio[] = {
    { MATROSKA_ID_AUDIOSAMPLINGFREQ,  EBML_FLOAT,0, offsetof(MatroskaTrackAudio,samplerate), {.f=8000.0} },
    { MATROSKA_ID_AUDIOOUTSAMPLINGFREQ,EBML_FLOAT,0,offsetof(MatroskaTrackAudio,out_samplerate) },
    { MATROSKA_ID_AUDIOBITDEPTH,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,bitdepth) },
    { MATROSKA_ID_AUDIOCHANNELS,      EBML_UINT, 0, offsetof(MatroskaTrackAudio,channels), {.u=1} },
    { 0 }
};

static EbmlSyntax matroska_track_encoding_compression[] = {
    { MATROSKA_ID_ENCODINGCOMPALGO,   EBML_UINT, 0, offsetof(MatroskaTrackCompression,algo), {.u=0} },
    { MATROSKA_ID_ENCODINGCOMPSETTINGS,EBML_BIN, 0, offsetof(MatroskaTrackCompression,settings) },
    { 0 }
};

static EbmlSyntax matroska_track_encoding[] = {
    { MATROSKA_ID_ENCODINGSCOPE,      EBML_UINT, 0, offsetof(MatroskaTrackEncoding,scope), {.u=1} },
    { MATROSKA_ID_ENCODINGTYPE,       EBML_UINT, 0, offsetof(MatroskaTrackEncoding,type), {.u=0} },
    { MATROSKA_ID_ENCODINGCOMPRESSION,EBML_NEST, 0, offsetof(MatroskaTrackEncoding,compression), {.n=matroska_track_encoding_compression} },
320
    { MATROSKA_ID_ENCODINGORDER,      EBML_NONE },
321 322 323 324 325 326 327 328 329 330
    { 0 }
};

static EbmlSyntax matroska_track_encodings[] = {
    { MATROSKA_ID_TRACKCONTENTENCODING, EBML_NEST, sizeof(MatroskaTrackEncoding), offsetof(MatroskaTrack,encodings), {.n=matroska_track_encoding} },
    { 0 }
};

static EbmlSyntax matroska_track[] = {
    { MATROSKA_ID_TRACKNUMBER,          EBML_UINT, 0, offsetof(MatroskaTrack,num) },
331
    { MATROSKA_ID_TRACKNAME,            EBML_UTF8, 0, offsetof(MatroskaTrack,name) },
332
    { MATROSKA_ID_TRACKUID,             EBML_UINT, 0, offsetof(MatroskaTrack,uid) },
333 334 335 336 337 338 339
    { MATROSKA_ID_TRACKTYPE,            EBML_UINT, 0, offsetof(MatroskaTrack,type) },
    { MATROSKA_ID_CODECID,              EBML_STR,  0, offsetof(MatroskaTrack,codec_id) },
    { MATROSKA_ID_CODECPRIVATE,         EBML_BIN,  0, offsetof(MatroskaTrack,codec_priv) },
    { MATROSKA_ID_TRACKLANGUAGE,        EBML_UTF8, 0, offsetof(MatroskaTrack,language), {.s="eng"} },
    { MATROSKA_ID_TRACKDEFAULTDURATION, EBML_UINT, 0, offsetof(MatroskaTrack,default_duration) },
    { MATROSKA_ID_TRACKTIMECODESCALE,   EBML_FLOAT,0, offsetof(MatroskaTrack,time_scale), {.f=1.0} },
    { MATROSKA_ID_TRACKFLAGDEFAULT,     EBML_UINT, 0, offsetof(MatroskaTrack,flag_default), {.u=1} },
340
    { MATROSKA_ID_TRACKFLAGFORCED,      EBML_UINT, 0, offsetof(MatroskaTrack,flag_forced), {.u=0} },
341 342 343 344 345 346 347 348 349 350 351
    { MATROSKA_ID_TRACKVIDEO,           EBML_NEST, 0, offsetof(MatroskaTrack,video), {.n=matroska_track_video} },
    { MATROSKA_ID_TRACKAUDIO,           EBML_NEST, 0, offsetof(MatroskaTrack,audio), {.n=matroska_track_audio} },
    { MATROSKA_ID_TRACKCONTENTENCODINGS,EBML_NEST, 0, 0, {.n=matroska_track_encodings} },
    { MATROSKA_ID_TRACKFLAGENABLED,     EBML_NONE },
    { MATROSKA_ID_TRACKFLAGLACING,      EBML_NONE },
    { MATROSKA_ID_CODECNAME,            EBML_NONE },
    { MATROSKA_ID_CODECDECODEALL,       EBML_NONE },
    { MATROSKA_ID_CODECINFOURL,         EBML_NONE },
    { MATROSKA_ID_CODECDOWNLOADURL,     EBML_NONE },
    { MATROSKA_ID_TRACKMINCACHE,        EBML_NONE },
    { MATROSKA_ID_TRACKMAXCACHE,        EBML_NONE },
352
    { MATROSKA_ID_TRACKMAXBLKADDID,     EBML_NONE },
353 354 355 356 357 358 359 360
    { 0 }
};

static EbmlSyntax matroska_tracks[] = {
    { MATROSKA_ID_TRACKENTRY,         EBML_NEST, sizeof(MatroskaTrack), offsetof(MatroskaDemuxContext,tracks), {.n=matroska_track} },
    { 0 }
};

361
static EbmlSyntax matroska_attachment[] = {
362
    { MATROSKA_ID_FILEUID,            EBML_UINT, 0, offsetof(MatroskaAttachement,uid) },
363 364 365
    { MATROSKA_ID_FILENAME,           EBML_UTF8, 0, offsetof(MatroskaAttachement,filename) },
    { MATROSKA_ID_FILEMIMETYPE,       EBML_STR,  0, offsetof(MatroskaAttachement,mime) },
    { MATROSKA_ID_FILEDATA,           EBML_BIN,  0, offsetof(MatroskaAttachement,bin) },
366
    { MATROSKA_ID_FILEDESC,           EBML_NONE },
367 368 369 370 371 372 373 374
    { 0 }
};

static EbmlSyntax matroska_attachments[] = {
    { MATROSKA_ID_ATTACHEDFILE,       EBML_NEST, sizeof(MatroskaAttachement), offsetof(MatroskaDemuxContext,attachments), {.n=matroska_attachment} },
    { 0 }
};

375 376
static EbmlSyntax matroska_chapter_display[] = {
    { MATROSKA_ID_CHAPSTRING,         EBML_UTF8, 0, offsetof(MatroskaChapter,title) },
377
    { MATROSKA_ID_CHAPLANG,           EBML_NONE },
378 379 380 381 382 383 384 385 386
    { 0 }
};

static EbmlSyntax matroska_chapter_entry[] = {
    { MATROSKA_ID_CHAPTERTIMESTART,   EBML_UINT, 0, offsetof(MatroskaChapter,start), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_CHAPTERTIMEEND,     EBML_UINT, 0, offsetof(MatroskaChapter,end), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_CHAPTERUID,         EBML_UINT, 0, offsetof(MatroskaChapter,uid) },
    { MATROSKA_ID_CHAPTERDISPLAY,     EBML_NEST, 0, 0, {.n=matroska_chapter_display} },
    { MATROSKA_ID_CHAPTERFLAGHIDDEN,  EBML_NONE },
387 388 389
    { MATROSKA_ID_CHAPTERFLAGENABLED, EBML_NONE },
    { MATROSKA_ID_CHAPTERPHYSEQUIV,   EBML_NONE },
    { MATROSKA_ID_CHAPTERATOM,        EBML_NONE },
390 391 392 393 394 395 396 397
    { 0 }
};

static EbmlSyntax matroska_chapter[] = {
    { MATROSKA_ID_CHAPTERATOM,        EBML_NEST, sizeof(MatroskaChapter), offsetof(MatroskaDemuxContext,chapters), {.n=matroska_chapter_entry} },
    { MATROSKA_ID_EDITIONUID,         EBML_NONE },
    { MATROSKA_ID_EDITIONFLAGHIDDEN,  EBML_NONE },
    { MATROSKA_ID_EDITIONFLAGDEFAULT, EBML_NONE },
398
    { MATROSKA_ID_EDITIONFLAGORDERED, EBML_NONE },
399 400 401 402 403 404 405 406
    { 0 }
};

static EbmlSyntax matroska_chapters[] = {
    { MATROSKA_ID_EDITIONENTRY,       EBML_NEST, 0, 0, {.n=matroska_chapter} },
    { 0 }
};

407 408 409
static EbmlSyntax matroska_index_pos[] = {
    { MATROSKA_ID_CUETRACK,           EBML_UINT, 0, offsetof(MatroskaIndexPos,track) },
    { MATROSKA_ID_CUECLUSTERPOSITION, EBML_UINT, 0, offsetof(MatroskaIndexPos,pos)   },
410
    { MATROSKA_ID_CUEBLOCKNUMBER,     EBML_NONE },
411 412 413 414 415 416 417 418 419 420 421 422 423 424
    { 0 }
};

static EbmlSyntax matroska_index_entry[] = {
    { MATROSKA_ID_CUETIME,            EBML_UINT, 0, offsetof(MatroskaIndex,time) },
    { MATROSKA_ID_CUETRACKPOSITION,   EBML_NEST, sizeof(MatroskaIndexPos), offsetof(MatroskaIndex,pos), {.n=matroska_index_pos} },
    { 0 }
};

static EbmlSyntax matroska_index[] = {
    { MATROSKA_ID_POINTENTRY,         EBML_NEST, sizeof(MatroskaIndex), offsetof(MatroskaDemuxContext,index), {.n=matroska_index_entry} },
    { 0 }
};

425 426 427
static EbmlSyntax matroska_simpletag[] = {
    { MATROSKA_ID_TAGNAME,            EBML_UTF8, 0, offsetof(MatroskaTag,name) },
    { MATROSKA_ID_TAGSTRING,          EBML_UTF8, 0, offsetof(MatroskaTag,string) },
428 429
    { MATROSKA_ID_TAGLANG,            EBML_STR,  0, offsetof(MatroskaTag,lang), {.s="und"} },
    { MATROSKA_ID_TAGDEFAULT,         EBML_UINT, 0, offsetof(MatroskaTag,def) },
430
    { MATROSKA_ID_TAGDEFAULT_BUG,     EBML_UINT, 0, offsetof(MatroskaTag,def) },
431 432 433 434
    { MATROSKA_ID_SIMPLETAG,          EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTag,sub), {.n=matroska_simpletag} },
    { 0 }
};

435 436 437 438 439 440 441 442 443
static EbmlSyntax matroska_tagtargets[] = {
    { MATROSKA_ID_TAGTARGETS_TYPE,      EBML_STR,  0, offsetof(MatroskaTagTarget,type) },
    { MATROSKA_ID_TAGTARGETS_TYPEVALUE, EBML_UINT, 0, offsetof(MatroskaTagTarget,typevalue), {.u=50} },
    { MATROSKA_ID_TAGTARGETS_TRACKUID,  EBML_UINT, 0, offsetof(MatroskaTagTarget,trackuid) },
    { MATROSKA_ID_TAGTARGETS_CHAPTERUID,EBML_UINT, 0, offsetof(MatroskaTagTarget,chapteruid) },
    { MATROSKA_ID_TAGTARGETS_ATTACHUID, EBML_UINT, 0, offsetof(MatroskaTagTarget,attachuid) },
    { 0 }
};

444
static EbmlSyntax matroska_tag[] = {
445 446
    { MATROSKA_ID_SIMPLETAG,          EBML_NEST, sizeof(MatroskaTag), offsetof(MatroskaTags,tag), {.n=matroska_simpletag} },
    { MATROSKA_ID_TAGTARGETS,         EBML_NEST, 0, offsetof(MatroskaTags,target), {.n=matroska_tagtargets} },
447 448 449
    { 0 }
};

450
static EbmlSyntax matroska_tags[] = {
451
    { MATROSKA_ID_TAG,                EBML_NEST, sizeof(MatroskaTags), offsetof(MatroskaDemuxContext,tags), {.n=matroska_tag} },
452 453 454
    { 0 }
};

455 456 457 458 459 460 461 462 463 464 465
static EbmlSyntax matroska_seekhead_entry[] = {
    { MATROSKA_ID_SEEKID,             EBML_UINT, 0, offsetof(MatroskaSeekhead,id) },
    { MATROSKA_ID_SEEKPOSITION,       EBML_UINT, 0, offsetof(MatroskaSeekhead,pos), {.u=-1} },
    { 0 }
};

static EbmlSyntax matroska_seekhead[] = {
    { MATROSKA_ID_SEEKENTRY,          EBML_NEST, sizeof(MatroskaSeekhead), offsetof(MatroskaDemuxContext,seekhead), {.n=matroska_seekhead_entry} },
    { 0 }
};

466 467 468 469 470 471 472 473
static EbmlSyntax matroska_segment[] = {
    { MATROSKA_ID_INFO,           EBML_NEST, 0, 0, {.n=matroska_info       } },
    { MATROSKA_ID_TRACKS,         EBML_NEST, 0, 0, {.n=matroska_tracks     } },
    { MATROSKA_ID_ATTACHMENTS,    EBML_NEST, 0, 0, {.n=matroska_attachments} },
    { MATROSKA_ID_CHAPTERS,       EBML_NEST, 0, 0, {.n=matroska_chapters   } },
    { MATROSKA_ID_CUES,           EBML_NEST, 0, 0, {.n=matroska_index      } },
    { MATROSKA_ID_TAGS,           EBML_NEST, 0, 0, {.n=matroska_tags       } },
    { MATROSKA_ID_SEEKHEAD,       EBML_NEST, 0, 0, {.n=matroska_seekhead   } },
474
    { MATROSKA_ID_CLUSTER,        EBML_STOP },
475 476 477 478 479 480 481 482
    { 0 }
};

static EbmlSyntax matroska_segments[] = {
    { MATROSKA_ID_SEGMENT,        EBML_NEST, 0, 0, {.n=matroska_segment    } },
    { 0 }
};

483 484 485 486 487
static EbmlSyntax matroska_blockgroup[] = {
    { MATROSKA_ID_BLOCK,          EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
    { MATROSKA_ID_SIMPLEBLOCK,    EBML_BIN,  0, offsetof(MatroskaBlock,bin) },
    { MATROSKA_ID_BLOCKDURATION,  EBML_UINT, 0, offsetof(MatroskaBlock,duration), {.u=AV_NOPTS_VALUE} },
    { MATROSKA_ID_BLOCKREFERENCE, EBML_UINT, 0, offsetof(MatroskaBlock,reference) },
488
    { 1,                          EBML_UINT, 0, offsetof(MatroskaBlock,non_simple), {.u=1} },
489 490 491 492 493 494 495
    { 0 }
};

static EbmlSyntax matroska_cluster[] = {
    { MATROSKA_ID_CLUSTERTIMECODE,EBML_UINT,0, offsetof(MatroskaCluster,timecode) },
    { MATROSKA_ID_BLOCKGROUP,     EBML_NEST, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
    { MATROSKA_ID_SIMPLEBLOCK,    EBML_PASS, sizeof(MatroskaBlock), offsetof(MatroskaCluster,blocks), {.n=matroska_blockgroup} },
496 497
    { MATROSKA_ID_CLUSTERPOSITION,EBML_NONE },
    { MATROSKA_ID_CLUSTERPREVSIZE,EBML_NONE },
498 499 500 501 502
    { 0 }
};

static EbmlSyntax matroska_clusters[] = {
    { MATROSKA_ID_CLUSTER,        EBML_NEST, 0, 0, {.n=matroska_cluster} },
503 504 505 506
    { MATROSKA_ID_INFO,           EBML_NONE },
    { MATROSKA_ID_CUES,           EBML_NONE },
    { MATROSKA_ID_TAGS,           EBML_NONE },
    { MATROSKA_ID_SEEKHEAD,       EBML_NONE },
507 508 509
    { 0 }
};

J
James Zern 已提交
510 511
static const char *matroska_doctypes[] = { "matroska", "webm" };

512
/*
D
Diego Biurrun 已提交
513
 * Return: Whether we reached the end of a level in the hierarchy or not.
514
 */
515
static int ebml_level_end(MatroskaDemuxContext *matroska)
516
{
517
    ByteIOContext *pb = matroska->ctx->pb;
518
    int64_t pos = url_ftell(pb);
519

520
    if (matroska->num_levels > 0) {
521
        MatroskaLevel *level = &matroska->levels[matroska->num_levels - 1];
522
        if (pos - level->start >= level->length || matroska->current_id) {
523
            matroska->num_levels--;
524
            return 1;
525 526
        }
    }
527
    return 0;
528 529 530 531 532 533 534 535
}

/*
 * Read: an "EBML number", which is defined as a variable-length
 * array of bytes. The first byte indicates the length by giving a
 * number of 0-bits followed by a one. The position of the first
 * "one" bit inside the first byte indicates the length of this
 * number.
D
Diego Biurrun 已提交
536
 * Returns: number of bytes read, < 0 on error
537
 */
538
static int ebml_read_num(MatroskaDemuxContext *matroska, ByteIOContext *pb,
539
                         int max_size, uint64_t *number)
540 541 542 543
{
    int len_mask = 0x80, read = 1, n = 1;
    int64_t total = 0;

D
Diego Biurrun 已提交
544
    /* The first byte tells us the length in bytes - get_byte() can normally
545 546 547 548 549
     * return 0, but since that's not a valid first ebmlID byte, we can
     * use it safely here to catch EOS. */
    if (!(total = get_byte(pb))) {
        /* we might encounter EOS here */
        if (!url_feof(pb)) {
550
            int64_t pos = url_ftell(pb);
551 552 553 554
            av_log(matroska->ctx, AV_LOG_ERROR,
                   "Read error at pos. %"PRIu64" (0x%"PRIx64")\n",
                   pos, pos);
        }
555
        return AVERROR(EIO); /* EOS or actual I/O error */
556 557 558 559 560 561 562 563
    }

    /* get the length of the EBML number */
    while (read <= max_size && !(total & len_mask)) {
        read++;
        len_mask >>= 1;
    }
    if (read > max_size) {
564
        int64_t pos = url_ftell(pb) - 1;
565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
        av_log(matroska->ctx, AV_LOG_ERROR,
               "Invalid EBML number size tag 0x%02x at pos %"PRIu64" (0x%"PRIx64")\n",
               (uint8_t) total, pos, pos);
        return AVERROR_INVALIDDATA;
    }

    /* read out length */
    total &= ~len_mask;
    while (n++ < read)
        total = (total << 8) | get_byte(pb);

    *number = total;

    return read;
}

/*
 * Read the next element as an unsigned int.
 * 0 is success, < 0 is failure.
 */
585
static int ebml_read_uint(ByteIOContext *pb, int size, uint64_t *num)
586
{
587
    int n = 0;
588

589
    if (size < 1 || size > 8)
590 591
        return AVERROR_INVALIDDATA;

D
Diego Biurrun 已提交
592
    /* big-endian ordering; build up number */
593 594 595 596 597 598 599 600 601 602 603
    *num = 0;
    while (n++ < size)
        *num = (*num << 8) | get_byte(pb);

    return 0;
}

/*
 * Read the next element as a float.
 * 0 is success, < 0 is failure.
 */
604
static int ebml_read_float(ByteIOContext *pb, int size, double *num)
605 606 607 608 609
{
    if (size == 4) {
        *num= av_int2flt(get_be32(pb));
    } else if(size==8){
        *num= av_int2dbl(get_be64(pb));
610
    } else
611 612 613 614 615 616 617 618 619
        return AVERROR_INVALIDDATA;

    return 0;
}

/*
 * Read the next element as an ASCII string.
 * 0 is success, < 0 is failure.
 */
620
static int ebml_read_ascii(ByteIOContext *pb, int size, char **str)
621
{
622
    av_free(*str);
D
Diego Biurrun 已提交
623
    /* EBML strings are usually not 0-terminated, so we allocate one
624
     * byte more, read the string and NULL-terminate it ourselves. */
625
    if (!(*str = av_malloc(size + 1)))
626
        return AVERROR(ENOMEM);
627
    if (get_buffer(pb, (uint8_t *) *str, size) != size) {
628
        av_freep(str);
629
        return AVERROR(EIO);
630 631 632 633 634 635
    }
    (*str)[size] = '\0';

    return 0;
}

636 637 638 639 640 641 642 643 644 645 646 647
/*
 * Read the next element as binary data.
 * 0 is success, < 0 is failure.
 */
static int ebml_read_binary(ByteIOContext *pb, int length, EbmlBin *bin)
{
    av_free(bin->data);
    if (!(bin->data = av_malloc(length)))
        return AVERROR(ENOMEM);

    bin->size = length;
    bin->pos  = url_ftell(pb);
648 649
    if (get_buffer(pb, bin->data, length) != length) {
        av_freep(&bin->data);
650
        return AVERROR(EIO);
651
    }
652 653 654 655

    return 0;
}

656 657 658 659 660
/*
 * Read the next element, but only the header. The contents
 * are supposed to be sub-elements which can be read separately.
 * 0 is success, < 0 is failure.
 */
661
static int ebml_read_master(MatroskaDemuxContext *matroska, uint64_t length)
662
{
663
    ByteIOContext *pb = matroska->ctx->pb;
664 665 666 667 668
    MatroskaLevel *level;

    if (matroska->num_levels >= EBML_MAX_DEPTH) {
        av_log(matroska->ctx, AV_LOG_ERROR,
               "File moves beyond max. allowed depth (%d)\n", EBML_MAX_DEPTH);
669
        return AVERROR(ENOSYS);
670 671 672 673 674 675 676 677 678 679 680
    }

    level = &matroska->levels[matroska->num_levels++];
    level->start = url_ftell(pb);
    level->length = length;

    return 0;
}

/*
 * Read signed/unsigned "EBML" numbers.
D
Diego Biurrun 已提交
681
 * Return: number of bytes processed, < 0 on error
682
 */
683 684
static int matroska_ebmlnum_uint(MatroskaDemuxContext *matroska,
                                 uint8_t *data, uint32_t size, uint64_t *num)
685
{
686 687
    ByteIOContext pb;
    init_put_byte(&pb, data, size, 0, NULL, NULL, NULL, NULL);
688
    return ebml_read_num(matroska, &pb, FFMIN(size, 8), num);
689 690 691 692 693
}

/*
 * Same as above, but signed.
 */
694 695
static int matroska_ebmlnum_sint(MatroskaDemuxContext *matroska,
                                 uint8_t *data, uint32_t size, int64_t *num)
696 697 698 699 700
{
    uint64_t unum;
    int res;

    /* read as unsigned number first */
701
    if ((res = matroska_ebmlnum_uint(matroska, data, size, &unum)) < 0)
702 703 704
        return res;

    /* make signed (weird way) */
705
    *num = unum - ((1LL << (7*res - 1)) - 1);
706 707 708 709

    return res;
}

710 711
static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                           EbmlSyntax *syntax, void *data);
712

713 714
static int ebml_parse_id(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                         uint32_t id, void *data)
715 716
{
    int i;
717 718 719
    for (i=0; syntax[i].id; i++)
        if (id == syntax[i].id)
            break;
720 721
    if (!syntax[i].id && id == MATROSKA_ID_CLUSTER &&
        matroska->num_levels > 0 &&
722
        matroska->levels[matroska->num_levels-1].length == 0xffffffffffffff)
723
        return 0;  // we reached the end of an unknown size cluster
724
    if (!syntax[i].id && id != EBML_ID_VOID && id != EBML_ID_CRC32)
725 726
        av_log(matroska->ctx, AV_LOG_INFO, "Unknown entry 0x%X\n", id);
    return ebml_parse_elem(matroska, &syntax[i], data);
727 728
}

729 730
static int ebml_parse(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                      void *data)
731
{
732
    if (!matroska->current_id) {
733 734 735 736 737
        uint64_t id;
        int res = ebml_read_num(matroska, matroska->ctx->pb, 4, &id);
        if (res < 0)
            return res;
        matroska->current_id = id | 1 << 7*res;
738 739
    }
    return ebml_parse_id(matroska, syntax, matroska->current_id, data);
740 741
}

742 743
static int ebml_parse_nest(MatroskaDemuxContext *matroska, EbmlSyntax *syntax,
                           void *data)
744
{
745
    int i, res = 0;
746

747 748 749 750 751 752 753 754 755 756 757 758
    for (i=0; syntax[i].id; i++)
        switch (syntax[i].type) {
        case EBML_UINT:
            *(uint64_t *)((char *)data+syntax[i].data_offset) = syntax[i].def.u;
            break;
        case EBML_FLOAT:
            *(double   *)((char *)data+syntax[i].data_offset) = syntax[i].def.f;
            break;
        case EBML_STR:
        case EBML_UTF8:
            *(char    **)((char *)data+syntax[i].data_offset) = av_strdup(syntax[i].def.s);
            break;
759
        }
760

761 762
    while (!res && !ebml_level_end(matroska))
        res = ebml_parse(matroska, syntax, data);
763

764
    return res;
765 766
}

767 768 769
static int ebml_parse_elem(MatroskaDemuxContext *matroska,
                           EbmlSyntax *syntax, void *data)
{
770
    ByteIOContext *pb = matroska->ctx->pb;
771
    uint32_t id = syntax->id;
772
    uint64_t length;
773 774 775 776 777 778 779 780 781 782 783
    int res;

    data = (char *)data + syntax->data_offset;
    if (syntax->list_elem_size) {
        EbmlList *list = data;
        list->elem = av_realloc(list->elem, (list->nb_elem+1)*syntax->list_elem_size);
        data = (char*)list->elem + list->nb_elem*syntax->list_elem_size;
        memset(data, 0, syntax->list_elem_size);
        list->nb_elem++;
    }

784 785
    if (syntax->type != EBML_PASS && syntax->type != EBML_STOP) {
        matroska->current_id = 0;
786
        if ((res = ebml_read_num(matroska, pb, 8, &length)) < 0)
787
            return res;
788
    }
789

790
    switch (syntax->type) {
791 792
    case EBML_UINT:  res = ebml_read_uint  (pb, length, data);  break;
    case EBML_FLOAT: res = ebml_read_float (pb, length, data);  break;
793
    case EBML_STR:
794
    case EBML_UTF8:  res = ebml_read_ascii (pb, length, data);  break;
795
    case EBML_BIN:   res = ebml_read_binary(pb, length, data);  break;
796
    case EBML_NEST:  if ((res=ebml_read_master(matroska, length)) < 0)
797 798 799
                         return res;
                     if (id == MATROSKA_ID_SEGMENT)
                         matroska->segment_start = url_ftell(matroska->ctx->pb);
800
                     return ebml_parse_nest(matroska, syntax->def.n, data);
801
    case EBML_PASS:  return ebml_parse_id(matroska, syntax->def.n, id, data);
802
    case EBML_STOP:  return 1;
803
    default:         return url_fseek(pb,length,SEEK_CUR)<0 ? AVERROR(EIO) : 0;
804
    }
805 806 807 808 809
    if (res == AVERROR_INVALIDDATA)
        av_log(matroska->ctx, AV_LOG_ERROR, "Invalid element\n");
    else if (res == AVERROR(EIO))
        av_log(matroska->ctx, AV_LOG_ERROR, "Read error\n");
    return res;
810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834
}

static void ebml_free(EbmlSyntax *syntax, void *data)
{
    int i, j;
    for (i=0; syntax[i].id; i++) {
        void *data_off = (char *)data + syntax[i].data_offset;
        switch (syntax[i].type) {
        case EBML_STR:
        case EBML_UTF8:  av_freep(data_off);                      break;
        case EBML_BIN:   av_freep(&((EbmlBin *)data_off)->data);  break;
        case EBML_NEST:
            if (syntax[i].list_elem_size) {
                EbmlList *list = data_off;
                char *ptr = list->elem;
                for (j=0; j<list->nb_elem; j++, ptr+=syntax[i].list_elem_size)
                    ebml_free(syntax[i].def.n, ptr);
                av_free(list->elem);
            } else
                ebml_free(syntax[i].def.n, data_off);
        default:  break;
        }
    }
}

835 836 837 838 839 840 841

/*
 * Autodetecting...
 */
static int matroska_probe(AVProbeData *p)
{
    uint64_t total = 0;
J
James Zern 已提交
842
    int len_mask = 0x80, size = 1, n = 1, i;
843

D
Diego Biurrun 已提交
844
    /* EBML header? */
845 846 847 848 849 850 851 852 853 854 855 856 857 858 859
    if (AV_RB32(p->buf) != EBML_ID_HEADER)
        return 0;

    /* length of header */
    total = p->buf[4];
    while (size <= 8 && !(total & len_mask)) {
        size++;
        len_mask >>= 1;
    }
    if (size > 8)
      return 0;
    total &= (len_mask - 1);
    while (n < size)
        total = (total << 8) | p->buf[4 + n++];

D
Diego Biurrun 已提交
860
    /* Does the probe data contain the whole header? */
861 862 863
    if (p->buf_size < 4 + size + total)
      return 0;

J
James Zern 已提交
864
    /* The header should contain a known document type. For now,
865 866 867
     * we don't parse the whole header but simply check for the
     * availability of that array of characters inside the header.
     * Not fully fool-proof, but good enough. */
J
James Zern 已提交
868 869 870 871 872 873
    for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++) {
        int probelen = strlen(matroska_doctypes[i]);
        for (n = 4+size; n <= 4+size+total-probelen; n++)
            if (!memcmp(p->buf+n, matroska_doctypes[i], probelen))
                return AVPROBE_SCORE_MAX;
    }
874

875 876
    // probably valid EBML header but no recognized doctype
    return AVPROBE_SCORE_MAX/2;
877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
}

static MatroskaTrack *matroska_find_track_by_num(MatroskaDemuxContext *matroska,
                                                 int num)
{
    MatroskaTrack *tracks = matroska->tracks.elem;
    int i;

    for (i=0; i < matroska->tracks.nb_elem; i++)
        if (tracks[i].num == num)
            return &tracks[i];

    av_log(matroska->ctx, AV_LOG_ERROR, "Invalid track number %d\n", num);
    return NULL;
}

893 894
static int matroska_decode_buffer(uint8_t** buf, int* buf_size,
                                  MatroskaTrack *track)
895
{
896
    MatroskaTrackEncoding *encodings = track->encodings.elem;
897 898 899 900 901 902 903
    uint8_t* data = *buf;
    int isize = *buf_size;
    uint8_t* pkt_data = NULL;
    int pkt_size = isize;
    int result = 0;
    int olen;

904 905 906
    if (pkt_size >= 10000000)
        return -1;

907
    switch (encodings[0].compression.algo) {
908
    case MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP:
909
        return encodings[0].compression.settings.size;
910 911 912
    case MATROSKA_TRACK_ENCODING_COMP_LZO:
        do {
            olen = pkt_size *= 3;
A
Aurelien Jacobs 已提交
913
            pkt_data = av_realloc(pkt_data, pkt_size+AV_LZO_OUTPUT_PADDING);
914 915
            result = av_lzo1x_decode(pkt_data, &olen, data, &isize);
        } while (result==AV_LZO_OUTPUT_FULL && pkt_size<10000000);
916 917 918 919
        if (result)
            goto failed;
        pkt_size -= olen;
        break;
920
#if CONFIG_ZLIB
921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
    case MATROSKA_TRACK_ENCODING_COMP_ZLIB: {
        z_stream zstream = {0};
        if (inflateInit(&zstream) != Z_OK)
            return -1;
        zstream.next_in = data;
        zstream.avail_in = isize;
        do {
            pkt_size *= 3;
            pkt_data = av_realloc(pkt_data, pkt_size);
            zstream.avail_out = pkt_size - zstream.total_out;
            zstream.next_out = pkt_data + zstream.total_out;
            result = inflate(&zstream, Z_NO_FLUSH);
        } while (result==Z_OK && pkt_size<10000000);
        pkt_size = zstream.total_out;
        inflateEnd(&zstream);
        if (result != Z_STREAM_END)
            goto failed;
        break;
    }
#endif
941
#if CONFIG_BZLIB
942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
    case MATROSKA_TRACK_ENCODING_COMP_BZLIB: {
        bz_stream bzstream = {0};
        if (BZ2_bzDecompressInit(&bzstream, 0, 0) != BZ_OK)
            return -1;
        bzstream.next_in = data;
        bzstream.avail_in = isize;
        do {
            pkt_size *= 3;
            pkt_data = av_realloc(pkt_data, pkt_size);
            bzstream.avail_out = pkt_size - bzstream.total_out_lo32;
            bzstream.next_out = pkt_data + bzstream.total_out_lo32;
            result = BZ2_bzDecompress(&bzstream);
        } while (result==BZ_OK && pkt_size<10000000);
        pkt_size = bzstream.total_out_lo32;
        BZ2_bzDecompressEnd(&bzstream);
        if (result != BZ_STREAM_END)
            goto failed;
        break;
    }
#endif
962 963
    default:
        return -1;
964 965 966 967 968 969 970 971 972 973
    }

    *buf = pkt_data;
    *buf_size = pkt_size;
    return 0;
 failed:
    av_free(pkt_data);
    return -1;
}

974
static void matroska_fix_ass_packet(MatroskaDemuxContext *matroska,
975
                                    AVPacket *pkt, uint64_t display_duration)
976 977 978 979 980 981 982
{
    char *line, *layer, *ptr = pkt->data, *end = ptr+pkt->size;
    for (; *ptr!=',' && ptr<end-1; ptr++);
    if (*ptr == ',')
        layer = ++ptr;
    for (; *ptr!=',' && ptr<end-1; ptr++);
    if (*ptr == ',') {
983
        int64_t end_pts = pkt->pts + display_duration;
984 985 986 987 988 989 990 991 992 993 994 995 996
        int sc = matroska->time_scale * pkt->pts / 10000000;
        int ec = matroska->time_scale * end_pts  / 10000000;
        int sh, sm, ss, eh, em, es, len;
        sh = sc/360000;  sc -= 360000*sh;
        sm = sc/  6000;  sc -=   6000*sm;
        ss = sc/   100;  sc -=    100*ss;
        eh = ec/360000;  ec -= 360000*eh;
        em = ec/  6000;  ec -=   6000*em;
        es = ec/   100;  ec -=    100*es;
        *ptr++ = '\0';
        len = 50 + end-ptr + FF_INPUT_BUFFER_PADDING_SIZE;
        if (!(line = av_malloc(len)))
            return;
997
        snprintf(line,len,"Dialogue: %s,%d:%02d:%02d.%02d,%d:%02d:%02d.%02d,%s\r\n",
998 999 1000 1001 1002 1003 1004
                 layer, sh, sm, ss, sc, eh, em, es, ec, ptr);
        av_free(pkt->data);
        pkt->data = line;
        pkt->size = strlen(line);
    }
}

1005 1006 1007 1008 1009 1010 1011 1012 1013
static void matroska_merge_packets(AVPacket *out, AVPacket *in)
{
    out->data = av_realloc(out->data, out->size+in->size);
    memcpy(out->data+out->size, in->data, in->size);
    out->size += in->size;
    av_destruct_packet(in);
    av_free(in);
}

1014 1015
static void matroska_convert_tag(AVFormatContext *s, EbmlList *list,
                                 AVMetadata **metadata, char *prefix)
1016 1017
{
    MatroskaTag *tags = list->elem;
1018 1019
    char key[1024];
    int i;
1020 1021

    for (i=0; i < list->nb_elem; i++) {
1022
        const char *lang = strcmp(tags[i].lang, "und") ? tags[i].lang : NULL;
1023 1024
        if (prefix)  snprintf(key, sizeof(key), "%s/%s", prefix, tags[i].name);
        else         av_strlcpy(key, tags[i].name, sizeof(key));
1025
        if (tags[i].def || !lang) {
1026
        av_metadata_set2(metadata, key, tags[i].string, 0);
1027
        if (tags[i].sub.nb_elem)
1028
            matroska_convert_tag(s, &tags[i].sub, metadata, key);
1029 1030 1031 1032
        }
        if (lang) {
            av_strlcat(key, "-", sizeof(key));
            av_strlcat(key, lang, sizeof(key));
1033
            av_metadata_set2(metadata, key, tags[i].string, 0);
1034 1035 1036
            if (tags[i].sub.nb_elem)
                matroska_convert_tag(s, &tags[i].sub, metadata, key);
        }
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
    }
}

static void matroska_convert_tags(AVFormatContext *s)
{
    MatroskaDemuxContext *matroska = s->priv_data;
    MatroskaTags *tags = matroska->tags.elem;
    int i, j;

    for (i=0; i < matroska->tags.nb_elem; i++) {
        if (tags[i].target.attachuid) {
            MatroskaAttachement *attachment = matroska->attachments.elem;
            for (j=0; j<matroska->attachments.nb_elem; j++)
                if (attachment[j].uid == tags[i].target.attachuid)
                    matroska_convert_tag(s, &tags[i].tag,
                                         &attachment[j].stream->metadata, NULL);
        } else if (tags[i].target.chapteruid) {
            MatroskaChapter *chapter = matroska->chapters.elem;
            for (j=0; j<matroska->chapters.nb_elem; j++)
                if (chapter[j].uid == tags[i].target.chapteruid)
                    matroska_convert_tag(s, &tags[i].tag,
                                         &chapter[j].chapter->metadata, NULL);
        } else if (tags[i].target.trackuid) {
            MatroskaTrack *track = matroska->tracks.elem;
            for (j=0; j<matroska->tracks.nb_elem; j++)
                if (track[j].uid == tags[i].target.trackuid)
                    matroska_convert_tag(s, &tags[i].tag,
                                         &track[j].stream->metadata, NULL);
        } else {
1066 1067
            matroska_convert_tag(s, &tags[i].tag, &s->metadata,
                                 tags[i].target.type);
1068
        }
1069 1070 1071
    }
}

1072
static void matroska_execute_seekhead(MatroskaDemuxContext *matroska)
1073 1074 1075 1076
{
    EbmlList *seekhead_list = &matroska->seekhead;
    MatroskaSeekhead *seekhead = seekhead_list->elem;
    uint32_t level_up = matroska->level_up;
1077
    int64_t before_pos = url_ftell(matroska->ctx->pb);
1078
    uint32_t saved_id = matroska->current_id;
1079
    MatroskaLevel level;
1080
    int i;
1081

1082 1083 1084 1085 1086
    // we should not do any seeking in the streaming case
    if (url_is_streamed(matroska->ctx->pb) ||
        (matroska->ctx->flags & AVFMT_FLAG_IGNIDX))
        return;

1087
    for (i=0; i<seekhead_list->nb_elem; i++) {
1088
        int64_t offset = seekhead[i].pos + matroska->segment_start;
1089

1090 1091 1092 1093
        if (seekhead[i].pos <= before_pos
            || seekhead[i].id == MATROSKA_ID_SEEKHEAD
            || seekhead[i].id == MATROSKA_ID_CLUSTER)
            continue;
1094

1095
        /* seek */
1096
        if (url_fseek(matroska->ctx->pb, offset, SEEK_SET) != offset)
1097 1098
            continue;

D
Diego Biurrun 已提交
1099
        /* We don't want to lose our seekhead level, so we add
1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111
         * a dummy. This is a crude hack. */
        if (matroska->num_levels == EBML_MAX_DEPTH) {
            av_log(matroska->ctx, AV_LOG_INFO,
                   "Max EBML element depth (%d) reached, "
                   "cannot parse further.\n", EBML_MAX_DEPTH);
            break;
        }

        level.start = 0;
        level.length = (uint64_t)-1;
        matroska->levels[matroska->num_levels] = level;
        matroska->num_levels++;
1112
        matroska->current_id = 0;
1113

1114
        ebml_parse(matroska, matroska_segment, matroska);
1115 1116 1117 1118 1119 1120 1121

        /* remove dummy level */
        while (matroska->num_levels) {
            uint64_t length = matroska->levels[--matroska->num_levels].length;
            if (length == (uint64_t)-1)
                break;
        }
1122
    }
1123

1124
    /* seek back */
1125
    url_fseek(matroska->ctx->pb, before_pos, SEEK_SET);
1126
    matroska->level_up = level_up;
1127
    matroska->current_id = saved_id;
1128 1129
}

1130
static int matroska_aac_profile(char *codec_id)
1131
{
1132
    static const char * const aac_profiles[] = { "MAIN", "LC", "SSR" };
1133 1134
    int profile;

1135
    for (profile=0; profile<FF_ARRAY_ELEMS(aac_profiles); profile++)
1136 1137 1138 1139 1140
        if (strstr(codec_id, aac_profiles[profile]))
            break;
    return profile + 1;
}

1141
static int matroska_aac_sri(int samplerate)
1142 1143 1144
{
    int sri;

1145
    for (sri=0; sri<FF_ARRAY_ELEMS(ff_mpeg4audio_sample_rates); sri++)
1146
        if (ff_mpeg4audio_sample_rates[sri] == samplerate)
1147 1148 1149 1150
            break;
    return sri;
}

1151
static int matroska_read_header(AVFormatContext *s, AVFormatParameters *ap)
1152 1153
{
    MatroskaDemuxContext *matroska = s->priv_data;
1154 1155 1156 1157
    EbmlList *attachements_list = &matroska->attachments;
    MatroskaAttachement *attachements;
    EbmlList *chapters_list = &matroska->chapters;
    MatroskaChapter *chapters;
1158
    MatroskaTrack *tracks;
1159 1160
    EbmlList *index_list;
    MatroskaIndex *index;
1161
    int index_scale = 1;
1162
    uint64_t max_start = 0;
1163
    Ebml ebml = { 0 };
1164
    AVStream *st;
1165
    int i, j, res;
1166 1167 1168 1169

    matroska->ctx = s;

    /* First read the EBML header. */
1170
    if (ebml_parse(matroska, ebml_syntax, &ebml)
1171
        || ebml.version > EBML_VERSION       || ebml.max_size > sizeof(uint64_t)
J
James Zern 已提交
1172
        || ebml.id_length > sizeof(uint32_t) || ebml.doctype_version > 2) {
1173
        av_log(matroska->ctx, AV_LOG_ERROR,
1174 1175 1176
               "EBML header using unsupported features\n"
               "(EBML version %"PRIu64", doctype %s, doc version %"PRIu64")\n",
               ebml.version, ebml.doctype, ebml.doctype_version);
J
James Zern 已提交
1177 1178 1179 1180 1181 1182 1183
        ebml_free(ebml_syntax, &ebml);
        return AVERROR_PATCHWELCOME;
    }
    for (i = 0; i < FF_ARRAY_ELEMS(matroska_doctypes); i++)
        if (!strcmp(ebml.doctype, matroska_doctypes[i]))
            break;
    if (i >= FF_ARRAY_ELEMS(matroska_doctypes)) {
1184
        av_log(s, AV_LOG_WARNING, "Unknown EBML doctype '%s'\n", ebml.doctype);
1185
    }
1186
    ebml_free(ebml_syntax, &ebml);
1187 1188

    /* The next thing is a segment. */
1189 1190
    if ((res = ebml_parse(matroska, matroska_segments, matroska)) < 0)
        return res;
1191
    matroska_execute_seekhead(matroska);
1192

1193 1194
    if (!matroska->time_scale)
        matroska->time_scale = 1000000;
1195 1196 1197
    if (matroska->duration)
        matroska->ctx->duration = matroska->duration * matroska->time_scale
                                  * 1000 / AV_TIME_BASE;
1198
    av_metadata_set2(&s->metadata, "title", matroska->title, 0);
1199

1200 1201 1202 1203
    tracks = matroska->tracks.elem;
    for (i=0; i < matroska->tracks.nb_elem; i++) {
        MatroskaTrack *track = &tracks[i];
        enum CodecID codec_id = CODEC_ID_NONE;
1204 1205
        EbmlList *encodings_list = &tracks->encodings;
        MatroskaTrackEncoding *encodings = encodings_list->elem;
1206 1207 1208
        uint8_t *extradata = NULL;
        int extradata_size = 0;
        int extradata_offset = 0;
1209
        ByteIOContext b;
1210 1211

        /* Apply some sanity checks. */
1212 1213 1214 1215 1216 1217 1218 1219
        if (track->type != MATROSKA_TRACK_TYPE_VIDEO &&
            track->type != MATROSKA_TRACK_TYPE_AUDIO &&
            track->type != MATROSKA_TRACK_TYPE_SUBTITLE) {
            av_log(matroska->ctx, AV_LOG_INFO,
                   "Unknown or unsupported track type %"PRIu64"\n",
                   track->type);
            continue;
        }
1220 1221 1222
        if (track->codec_id == NULL)
            continue;

1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239
        if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
            if (!track->default_duration)
                track->default_duration = 1000000000/track->video.frame_rate;
            if (!track->video.display_width)
                track->video.display_width = track->video.pixel_width;
            if (!track->video.display_height)
                track->video.display_height = track->video.pixel_height;
        } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
            if (!track->audio.out_samplerate)
                track->audio.out_samplerate = track->audio.samplerate;
        }
        if (encodings_list->nb_elem > 1) {
            av_log(matroska->ctx, AV_LOG_ERROR,
                   "Multiple combined encodings no supported");
        } else if (encodings_list->nb_elem == 1) {
            if (encodings[0].type ||
                (encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_HEADERSTRIP &&
1240
#if CONFIG_ZLIB
1241 1242
                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_ZLIB &&
#endif
1243
#if CONFIG_BZLIB
1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272
                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_BZLIB &&
#endif
                 encodings[0].compression.algo != MATROSKA_TRACK_ENCODING_COMP_LZO)) {
                encodings[0].scope = 0;
                av_log(matroska->ctx, AV_LOG_ERROR,
                       "Unsupported encoding type");
            } else if (track->codec_priv.size && encodings[0].scope&2) {
                uint8_t *codec_priv = track->codec_priv.data;
                int offset = matroska_decode_buffer(&track->codec_priv.data,
                                                    &track->codec_priv.size,
                                                    track);
                if (offset < 0) {
                    track->codec_priv.data = NULL;
                    track->codec_priv.size = 0;
                    av_log(matroska->ctx, AV_LOG_ERROR,
                           "Failed to decode codec private data\n");
                } else if (offset > 0) {
                    track->codec_priv.data = av_malloc(track->codec_priv.size + offset);
                    memcpy(track->codec_priv.data,
                           encodings[0].compression.settings.data, offset);
                    memcpy(track->codec_priv.data+offset, codec_priv,
                           track->codec_priv.size);
                    track->codec_priv.size += offset;
                }
                if (codec_priv != track->codec_priv.data)
                    av_free(codec_priv);
            }
        }

1273 1274 1275 1276 1277
        for(j=0; ff_mkv_codec_tags[j].id != CODEC_ID_NONE; j++){
            if(!strncmp(ff_mkv_codec_tags[j].str, track->codec_id,
                        strlen(ff_mkv_codec_tags[j].str))){
                codec_id= ff_mkv_codec_tags[j].id;
                break;
1278
            }
1279
        }
1280

1281
        st = track->stream = av_new_stream(s, 0);
1282 1283 1284
        if (st == NULL)
            return AVERROR(ENOMEM);

1285
        if (!strcmp(track->codec_id, "V_MS/VFW/FOURCC")
1286 1287
            && track->codec_priv.size >= 40
            && track->codec_priv.data != NULL) {
1288
            track->ms_compat = 1;
1289
            track->video.fourcc = AV_RL32(track->codec_priv.data + 16);
1290
            codec_id = ff_codec_get_id(ff_codec_bmp_tags, track->video.fourcc);
1291
            extradata_offset = 40;
1292
        } else if (!strcmp(track->codec_id, "A_MS/ACM")
1293
                   && track->codec_priv.size >= 14
1294
                   && track->codec_priv.data != NULL) {
1295 1296
            init_put_byte(&b, track->codec_priv.data, track->codec_priv.size,
                          URL_RDONLY, NULL, NULL, NULL, NULL);
1297
            ff_get_wav_header(&b, st->codec, track->codec_priv.size);
1298
            codec_id = st->codec->codec_id;
1299
            extradata_offset = FFMIN(track->codec_priv.size, 18);
1300 1301 1302
        } else if (!strcmp(track->codec_id, "V_QUICKTIME")
                   && (track->codec_priv.size >= 86)
                   && (track->codec_priv.data != NULL)) {
1303
            track->video.fourcc = AV_RL32(track->codec_priv.data);
1304
            codec_id=ff_codec_get_id(codec_movvideo_tags, track->video.fourcc);
1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318
        } else if (codec_id == CODEC_ID_PCM_S16BE) {
            switch (track->audio.bitdepth) {
            case  8:  codec_id = CODEC_ID_PCM_U8;     break;
            case 24:  codec_id = CODEC_ID_PCM_S24BE;  break;
            case 32:  codec_id = CODEC_ID_PCM_S32BE;  break;
            }
        } else if (codec_id == CODEC_ID_PCM_S16LE) {
            switch (track->audio.bitdepth) {
            case  8:  codec_id = CODEC_ID_PCM_U8;     break;
            case 24:  codec_id = CODEC_ID_PCM_S24LE;  break;
            case 32:  codec_id = CODEC_ID_PCM_S32LE;  break;
            }
        } else if (codec_id==CODEC_ID_PCM_F32LE && track->audio.bitdepth==64) {
            codec_id = CODEC_ID_PCM_F64LE;
1319
        } else if (codec_id == CODEC_ID_AAC && !track->codec_priv.size) {
1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
            int profile = matroska_aac_profile(track->codec_id);
            int sri = matroska_aac_sri(track->audio.samplerate);
            extradata = av_malloc(5);
            if (extradata == NULL)
                return AVERROR(ENOMEM);
            extradata[0] = (profile << 3) | ((sri&0x0E) >> 1);
            extradata[1] = ((sri&0x01) << 7) | (track->audio.channels<<3);
            if (strstr(track->codec_id, "SBR")) {
                sri = matroska_aac_sri(track->audio.out_samplerate);
                extradata[2] = 0x56;
                extradata[3] = 0xE5;
                extradata[4] = 0x80 | (sri<<3);
                extradata_size = 5;
1333
            } else
1334
                extradata_size = 2;
1335
        } else if (codec_id == CODEC_ID_TTA) {
1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347
            extradata_size = 30;
            extradata = av_mallocz(extradata_size);
            if (extradata == NULL)
                return AVERROR(ENOMEM);
            init_put_byte(&b, extradata, extradata_size, 1,
                          NULL, NULL, NULL, NULL);
            put_buffer(&b, "TTA1", 4);
            put_le16(&b, 1);
            put_le16(&b, track->audio.channels);
            put_le16(&b, track->audio.bitdepth);
            put_le32(&b, track->audio.out_samplerate);
            put_le32(&b, matroska->ctx->duration * track->audio.out_samplerate);
1348 1349
        } else if (codec_id == CODEC_ID_RV10 || codec_id == CODEC_ID_RV20 ||
                   codec_id == CODEC_ID_RV30 || codec_id == CODEC_ID_RV40) {
1350
            extradata_offset = 26;
1351
        } else if (codec_id == CODEC_ID_RA_144) {
1352 1353
            track->audio.out_samplerate = 8000;
            track->audio.channels = 1;
1354
        } else if (codec_id == CODEC_ID_RA_288 || codec_id == CODEC_ID_COOK ||
1355 1356
                   codec_id == CODEC_ID_ATRAC3 || codec_id == CODEC_ID_SIPR) {
            int flavor;
1357 1358
            init_put_byte(&b, track->codec_priv.data,track->codec_priv.size,
                          0, NULL, NULL, NULL, NULL);
1359 1360
            url_fskip(&b, 22);
            flavor                       = get_be16(&b);
1361 1362 1363 1364 1365 1366 1367 1368 1369 1370
            track->audio.coded_framesize = get_be32(&b);
            url_fskip(&b, 12);
            track->audio.sub_packet_h    = get_be16(&b);
            track->audio.frame_size      = get_be16(&b);
            track->audio.sub_packet_size = get_be16(&b);
            track->audio.buf = av_malloc(track->audio.frame_size * track->audio.sub_packet_h);
            if (codec_id == CODEC_ID_RA_288) {
                st->codec->block_align = track->audio.coded_framesize;
                track->codec_priv.size = 0;
            } else {
1371 1372 1373 1374 1375
                if (codec_id == CODEC_ID_SIPR && flavor < 4) {
                    const int sipr_bit_rate[4] = { 6504, 8496, 5000, 16000 };
                    track->audio.sub_packet_size = ff_sipr_subpk_size[flavor];
                    st->codec->bit_rate = sipr_bit_rate[flavor];
                }
1376 1377
                st->codec->block_align = track->audio.sub_packet_size;
                extradata_offset = 78;
1378
            }
1379
        }
1380
        track->codec_priv.size -= extradata_offset;
1381

1382
        if (codec_id == CODEC_ID_NONE)
1383
            av_log(matroska->ctx, AV_LOG_INFO,
A
Aurelien Jacobs 已提交
1384
                   "Unknown/unsupported CodecID %s.\n", track->codec_id);
1385

1386 1387
        if (track->time_scale < 0.01)
            track->time_scale = 1.0;
1388
        av_set_pts_info(st, 64, matroska->time_scale*track->time_scale, 1000*1000*1000); /* 64 bit pts in ns */
1389

1390 1391 1392
        st->codec->codec_id = codec_id;
        st->start_time = 0;
        if (strcmp(track->language, "und"))
1393 1394
            av_metadata_set2(&st->metadata, "language", track->language, 0);
        av_metadata_set2(&st->metadata, "title", track->name, 0);
1395

1396 1397
        if (track->flag_default)
            st->disposition |= AV_DISPOSITION_DEFAULT;
1398 1399
        if (track->flag_forced)
            st->disposition |= AV_DISPOSITION_FORCED;
1400

1401 1402 1403
        if (track->default_duration)
            av_reduce(&st->codec->time_base.num, &st->codec->time_base.den,
                      track->default_duration, 1000000000, 30000);
1404

1405
        if (!st->codec->extradata) {
1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418
            if(extradata){
                st->codec->extradata = extradata;
                st->codec->extradata_size = extradata_size;
            } else if(track->codec_priv.data && track->codec_priv.size > 0){
                st->codec->extradata = av_mallocz(track->codec_priv.size +
                                                  FF_INPUT_BUFFER_PADDING_SIZE);
                if(st->codec->extradata == NULL)
                    return AVERROR(ENOMEM);
                st->codec->extradata_size = track->codec_priv.size;
                memcpy(st->codec->extradata,
                       track->codec_priv.data + extradata_offset,
                       track->codec_priv.size);
            }
1419
        }
1420 1421

        if (track->type == MATROSKA_TRACK_TYPE_VIDEO) {
1422
            st->codec->codec_type = AVMEDIA_TYPE_VIDEO;
1423 1424 1425
            st->codec->codec_tag  = track->video.fourcc;
            st->codec->width  = track->video.pixel_width;
            st->codec->height = track->video.pixel_height;
1426 1427
            av_reduce(&st->sample_aspect_ratio.num,
                      &st->sample_aspect_ratio.den,
1428 1429 1430
                      st->codec->height * track->video.display_width,
                      st->codec-> width * track->video.display_height,
                      255);
1431
            if (st->codec->codec_id != CODEC_ID_H264)
1432
            st->need_parsing = AVSTREAM_PARSE_HEADERS;
1433 1434
            if (track->default_duration)
                st->avg_frame_rate = av_d2q(1000000000.0/track->default_duration, INT_MAX);
1435
        } else if (track->type == MATROSKA_TRACK_TYPE_AUDIO) {
1436
            st->codec->codec_type = AVMEDIA_TYPE_AUDIO;
1437 1438
            st->codec->sample_rate = track->audio.out_samplerate;
            st->codec->channels = track->audio.channels;
1439
            if (st->codec->codec_id != CODEC_ID_AAC)
1440
            st->need_parsing = AVSTREAM_PARSE_HEADERS;
1441
        } else if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE) {
1442
            st->codec->codec_type = AVMEDIA_TYPE_SUBTITLE;
1443
        }
1444 1445
    }

1446 1447 1448 1449 1450 1451
    attachements = attachements_list->elem;
    for (j=0; j<attachements_list->nb_elem; j++) {
        if (!(attachements[j].filename && attachements[j].mime &&
              attachements[j].bin.data && attachements[j].bin.size > 0)) {
            av_log(matroska->ctx, AV_LOG_ERROR, "incomplete attachment\n");
        } else {
1452
            AVStream *st = av_new_stream(s, 0);
1453 1454
            if (st == NULL)
                break;
1455
            av_metadata_set2(&st->metadata, "filename",attachements[j].filename, 0);
1456
            st->codec->codec_id = CODEC_ID_NONE;
1457
            st->codec->codec_type = AVMEDIA_TYPE_ATTACHMENT;
1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470
            st->codec->extradata  = av_malloc(attachements[j].bin.size);
            if(st->codec->extradata == NULL)
                break;
            st->codec->extradata_size = attachements[j].bin.size;
            memcpy(st->codec->extradata, attachements[j].bin.data, attachements[j].bin.size);

            for (i=0; ff_mkv_mime_tags[i].id != CODEC_ID_NONE; i++) {
                if (!strncmp(ff_mkv_mime_tags[i].str, attachements[j].mime,
                             strlen(ff_mkv_mime_tags[i].str))) {
                    st->codec->codec_id = ff_mkv_mime_tags[i].id;
                    break;
                }
            }
1471
            attachements[j].stream = st;
1472 1473 1474 1475 1476
        }
    }

    chapters = chapters_list->elem;
    for (i=0; i<chapters_list->nb_elem; i++)
1477 1478
        if (chapters[i].start != AV_NOPTS_VALUE && chapters[i].uid
            && (max_start==0 || chapters[i].start > max_start)) {
1479
            chapters[i].chapter =
1480 1481 1482
            ff_new_chapter(s, chapters[i].uid, (AVRational){1, 1000000000},
                           chapters[i].start, chapters[i].end,
                           chapters[i].title);
1483 1484
            av_metadata_set2(&chapters[i].chapter->metadata,
                             "title", chapters[i].title, 0);
1485 1486
            max_start = chapters[i].start;
        }
1487

1488 1489
    index_list = &matroska->index;
    index = index_list->elem;
1490 1491 1492 1493 1494
    if (index_list->nb_elem
        && index[0].time > 100000000000000/matroska->time_scale) {
        av_log(matroska->ctx, AV_LOG_WARNING, "Working around broken index.\n");
        index_scale = matroska->time_scale;
    }
1495 1496 1497 1498
    for (i=0; i<index_list->nb_elem; i++) {
        EbmlList *pos_list = &index[i].pos;
        MatroskaIndexPos *pos = pos_list->elem;
        for (j=0; j<pos_list->nb_elem; j++) {
1499
            MatroskaTrack *track = matroska_find_track_by_num(matroska,
1500
                                                              pos[j].track);
1501 1502
            if (track && track->stream)
                av_add_index_entry(track->stream,
1503
                                   pos[j].pos + matroska->segment_start,
1504 1505
                                   index[i].time/index_scale, 0, 0,
                                   AVINDEX_KEYFRAME);
1506 1507 1508
        }
    }

1509 1510
    matroska_convert_tags(s);

1511
    return 0;
1512 1513
}

1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550
/*
 * Put one packet in an application-supplied AVPacket struct.
 * Returns 0 on success or -1 on failure.
 */
static int matroska_deliver_packet(MatroskaDemuxContext *matroska,
                                   AVPacket *pkt)
{
    if (matroska->num_packets > 0) {
        memcpy(pkt, matroska->packets[0], sizeof(AVPacket));
        av_free(matroska->packets[0]);
        if (matroska->num_packets > 1) {
            memmove(&matroska->packets[0], &matroska->packets[1],
                    (matroska->num_packets - 1) * sizeof(AVPacket *));
            matroska->packets =
                av_realloc(matroska->packets, (matroska->num_packets - 1) *
                           sizeof(AVPacket *));
        } else {
            av_freep(&matroska->packets);
        }
        matroska->num_packets--;
        return 0;
    }

    return -1;
}

/*
 * Free all packets in our internal queue.
 */
static void matroska_clear_queue(MatroskaDemuxContext *matroska)
{
    if (matroska->packets) {
        int n;
        for (n = 0; n < matroska->num_packets; n++) {
            av_free_packet(matroska->packets[n]);
            av_free(matroska->packets[n]);
        }
1551
        av_freep(&matroska->packets);
1552 1553 1554 1555
        matroska->num_packets = 0;
    }
}

1556 1557
static int matroska_parse_block(MatroskaDemuxContext *matroska, uint8_t *data,
                                int size, int64_t pos, uint64_t cluster_time,
1558 1559
                                uint64_t duration, int is_keyframe,
                                int64_t cluster_pos)
1560
{
1561
    uint64_t timecode = AV_NOPTS_VALUE;
1562
    MatroskaTrack *track;
1563
    int res = 0;
1564 1565 1566 1567 1568 1569 1570
    AVStream *st;
    AVPacket *pkt;
    int16_t block_time;
    uint32_t *lace_size = NULL;
    int n, flags, laces = 0;
    uint64_t num;

1571
    if ((n = matroska_ebmlnum_uint(matroska, data, size, &num)) < 0) {
1572 1573 1574 1575 1576 1577 1578
        av_log(matroska->ctx, AV_LOG_ERROR, "EBML block data error\n");
        return res;
    }
    data += n;
    size -= n;

    track = matroska_find_track_by_num(matroska, num);
1579
    if (size <= 3 || !track || !track->stream) {
1580
        av_log(matroska->ctx, AV_LOG_INFO,
1581
               "Invalid stream %"PRIu64" or size %u\n", num, size);
1582 1583
        return res;
    }
1584
    st = track->stream;
1585
    if (st->discard >= AVDISCARD_ALL)
1586 1587
        return res;
    if (duration == AV_NOPTS_VALUE)
1588
        duration = track->default_duration / matroska->time_scale;
1589

1590
    block_time = AV_RB16(data);
1591
    data += 2;
A
Aurelien Jacobs 已提交
1592 1593
    flags = *data++;
    size -= 3;
1594
    if (is_keyframe == -1)
1595
        is_keyframe = flags & 0x80 ? AV_PKT_FLAG_KEY : 0;
1596

1597 1598 1599
    if (cluster_time != (uint64_t)-1
        && (block_time >= 0 || cluster_time >= -block_time)) {
        timecode = cluster_time + block_time;
1600 1601 1602
        if (track->type == MATROSKA_TRACK_TYPE_SUBTITLE
            && timecode < track->end_timecode)
            is_keyframe = 0;  /* overlapping subtitles are not key frame */
1603
        if (is_keyframe)
1604
            av_add_index_entry(st, cluster_pos, timecode, 0,0,AVINDEX_KEYFRAME);
1605
        track->end_timecode = FFMAX(track->end_timecode, timecode+duration);
1606 1607
    }

1608
    if (matroska->skip_to_keyframe && track->type != MATROSKA_TRACK_TYPE_SUBTITLE) {
1609
        if (!is_keyframe || timecode < matroska->skip_to_timecode)
1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620
            return res;
        matroska->skip_to_keyframe = 0;
    }

    switch ((flags & 0x06) >> 1) {
        case 0x0: /* no lacing */
            laces = 1;
            lace_size = av_mallocz(sizeof(int));
            lace_size[0] = size;
            break;

D
Diego Biurrun 已提交
1621
        case 0x1: /* Xiph lacing */
1622 1623
        case 0x2: /* fixed-size lacing */
        case 0x3: /* EBML lacing */
1624
            assert(size>0); // size <=3 is checked before size-=3 above
1625 1626 1627 1628 1629 1630
            laces = (*data) + 1;
            data += 1;
            size -= 1;
            lace_size = av_mallocz(laces * sizeof(int));

            switch ((flags & 0x06) >> 1) {
D
Diego Biurrun 已提交
1631
                case 0x1: /* Xiph lacing */ {
1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659
                    uint8_t temp;
                    uint32_t total = 0;
                    for (n = 0; res == 0 && n < laces - 1; n++) {
                        while (1) {
                            if (size == 0) {
                                res = -1;
                                break;
                            }
                            temp = *data;
                            lace_size[n] += temp;
                            data += 1;
                            size -= 1;
                            if (temp != 0xff)
                                break;
                        }
                        total += lace_size[n];
                    }
                    lace_size[n] = size - total;
                    break;
                }

                case 0x2: /* fixed-size lacing */
                    for (n = 0; n < laces; n++)
                        lace_size[n] = size / laces;
                    break;

                case 0x3: /* EBML lacing */ {
                    uint32_t total;
1660
                    n = matroska_ebmlnum_uint(matroska, data, size, &num);
1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671
                    if (n < 0) {
                        av_log(matroska->ctx, AV_LOG_INFO,
                               "EBML block data error\n");
                        break;
                    }
                    data += n;
                    size -= n;
                    total = lace_size[0] = num;
                    for (n = 1; res == 0 && n < laces - 1; n++) {
                        int64_t snum;
                        int r;
1672
                        r = matroska_ebmlnum_sint(matroska, data, size, &snum);
1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691
                        if (r < 0) {
                            av_log(matroska->ctx, AV_LOG_INFO,
                                   "EBML block data error\n");
                            break;
                        }
                        data += r;
                        size -= r;
                        lace_size[n] = lace_size[n - 1] + snum;
                        total += lace_size[n];
                    }
                    lace_size[n] = size - total;
                    break;
                }
            }
            break;
    }

    if (res == 0) {
        for (n = 0; n < laces; n++) {
1692 1693
            if ((st->codec->codec_id == CODEC_ID_RA_288 ||
                 st->codec->codec_id == CODEC_ID_COOK ||
1694
                 st->codec->codec_id == CODEC_ID_SIPR ||
1695 1696
                 st->codec->codec_id == CODEC_ID_ATRAC3) &&
                 st->codec->block_align && track->audio.sub_packet_size) {
A
Aurelien Jacobs 已提交
1697
                int a = st->codec->block_align;
1698 1699 1700 1701 1702
                int sps = track->audio.sub_packet_size;
                int cfs = track->audio.coded_framesize;
                int h = track->audio.sub_packet_h;
                int y = track->audio.sub_packet_cnt;
                int w = track->audio.frame_size;
A
Aurelien Jacobs 已提交
1703 1704
                int x;

1705
                if (!track->audio.pkt_cnt) {
A
Aurelien Jacobs 已提交
1706 1707
                    if (st->codec->codec_id == CODEC_ID_RA_288)
                        for (x=0; x<h/2; x++)
1708
                            memcpy(track->audio.buf+x*2*w+y*cfs,
A
Aurelien Jacobs 已提交
1709
                                   data+x*cfs, cfs);
1710 1711
                    else if (st->codec->codec_id == CODEC_ID_SIPR)
                        memcpy(track->audio.buf + y*w, data, w);
A
Aurelien Jacobs 已提交
1712 1713
                    else
                        for (x=0; x<w/sps; x++)
1714
                            memcpy(track->audio.buf+sps*(h*x+((h+1)/2)*(y&1)+(y>>1)), data+x*sps, sps);
A
Aurelien Jacobs 已提交
1715

1716
                    if (++track->audio.sub_packet_cnt >= h) {
1717 1718
                        if (st->codec->codec_id == CODEC_ID_SIPR)
                            ff_rm_reorder_sipr_data(track->audio.buf, h, w);
1719 1720
                        track->audio.sub_packet_cnt = 0;
                        track->audio.pkt_cnt = h*w / a;
1721
                    }
A
Aurelien Jacobs 已提交
1722
                }
1723
                while (track->audio.pkt_cnt) {
A
Aurelien Jacobs 已提交
1724
                    pkt = av_mallocz(sizeof(AVPacket));
A
Aurelien Jacobs 已提交
1725
                    av_new_packet(pkt, a);
1726 1727
                    memcpy(pkt->data, track->audio.buf
                           + a * (h*w / a - track->audio.pkt_cnt--), a);
A
Aurelien Jacobs 已提交
1728
                    pkt->pos = pos;
1729
                    pkt->stream_index = st->index;
1730
                    dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
1731
                }
A
Aurelien Jacobs 已提交
1732
            } else {
1733
                MatroskaTrackEncoding *encodings = track->encodings.elem;
1734
                int offset = 0, pkt_size = lace_size[n];
1735
                uint8_t *pkt_data = data;
A
Aurelien Jacobs 已提交
1736

1737
                if (pkt_size > size) {
1738 1739 1740 1741
                    av_log(matroska->ctx, AV_LOG_ERROR, "Invalid packet size\n");
                    break;
                }

1742
                if (encodings && encodings->scope & 1) {
A
Aurelien Jacobs 已提交
1743
                    offset = matroska_decode_buffer(&pkt_data,&pkt_size, track);
1744 1745
                    if (offset < 0)
                        continue;
1746 1747
                }

A
Aurelien Jacobs 已提交
1748 1749
                pkt = av_mallocz(sizeof(AVPacket));
                /* XXX: prevent data copy... */
1750
                if (av_new_packet(pkt, pkt_size+offset) < 0) {
1751
                    av_free(pkt);
A
Aurelien Jacobs 已提交
1752 1753 1754
                    res = AVERROR(ENOMEM);
                    break;
                }
1755
                if (offset)
1756
                    memcpy (pkt->data, encodings->compression.settings.data, offset);
1757
                memcpy (pkt->data+offset, pkt_data, pkt_size);
A
Aurelien Jacobs 已提交
1758

A
Aurelien Jacobs 已提交
1759 1760 1761
                if (pkt_data != data)
                    av_free(pkt_data);

A
Aurelien Jacobs 已提交
1762 1763
                if (n == 0)
                    pkt->flags = is_keyframe;
1764
                pkt->stream_index = st->index;
A
Aurelien Jacobs 已提交
1765

1766 1767 1768
                if (track->ms_compat)
                    pkt->dts = timecode;
                else
1769
                    pkt->pts = timecode;
A
Aurelien Jacobs 已提交
1770
                pkt->pos = pos;
1771
                if (st->codec->codec_id == CODEC_ID_TEXT)
1772
                    pkt->convergence_duration = duration;
1773
                else if (track->type != MATROSKA_TRACK_TYPE_SUBTITLE)
1774
                    pkt->duration = duration;
A
Aurelien Jacobs 已提交
1775

1776
                if (st->codec->codec_id == CODEC_ID_SSA)
1777
                    matroska_fix_ass_packet(matroska, pkt, duration);
1778

1779
                if (matroska->prev_pkt &&
1780
                    timecode != AV_NOPTS_VALUE &&
1781
                    matroska->prev_pkt->pts == timecode &&
1782 1783
                    matroska->prev_pkt->stream_index == st->index &&
                    st->codec->codec_id == CODEC_ID_SSA)
1784 1785
                    matroska_merge_packets(matroska->prev_pkt, pkt);
                else {
1786
                    dynarray_add(&matroska->packets,&matroska->num_packets,pkt);
1787 1788
                    matroska->prev_pkt = pkt;
                }
A
Aurelien Jacobs 已提交
1789
            }
1790

A
Aurelien Jacobs 已提交
1791 1792
            if (timecode != AV_NOPTS_VALUE)
                timecode = duration ? timecode + duration : AV_NOPTS_VALUE;
1793
            data += lace_size[n];
1794
            size -= lace_size[n];
1795 1796 1797 1798
        }
    }

    av_free(lace_size);
1799
    return res;
1800 1801
}

1802
static int matroska_parse_cluster(MatroskaDemuxContext *matroska)
1803
{
1804 1805 1806
    MatroskaCluster cluster = { 0 };
    EbmlList *blocks_list;
    MatroskaBlock *blocks;
1807
    int i, res;
1808
    int64_t pos = url_ftell(matroska->ctx->pb);
1809
    matroska->prev_pkt = NULL;
1810
    if (matroska->current_id)
1811
        pos -= 4;  /* sizeof the ID which was already read */
1812
    res = ebml_parse(matroska, matroska_clusters, &cluster);
1813 1814
    blocks_list = &cluster.blocks;
    blocks = blocks_list->elem;
1815
    for (i=0; i<blocks_list->nb_elem; i++)
1816
        if (blocks[i].bin.size > 0 && blocks[i].bin.data) {
1817
            int is_keyframe = blocks[i].non_simple ? !blocks[i].reference : -1;
1818 1819 1820
            res=matroska_parse_block(matroska,
                                     blocks[i].bin.data, blocks[i].bin.size,
                                     blocks[i].bin.pos,  cluster.timecode,
1821
                                     blocks[i].duration, is_keyframe,
1822
                                     pos);
1823
        }
1824
    ebml_free(matroska_cluster, &cluster);
1825
    if (res < 0)  matroska->done = 1;
1826 1827 1828
    return res;
}

1829
static int matroska_read_packet(AVFormatContext *s, AVPacket *pkt)
1830 1831 1832 1833 1834
{
    MatroskaDemuxContext *matroska = s->priv_data;

    while (matroska_deliver_packet(matroska, pkt)) {
        if (matroska->done)
1835
            return AVERROR_EOF;
1836
        matroska_parse_cluster(matroska);
1837 1838 1839 1840 1841
    }

    return 0;
}

1842 1843
static int matroska_read_seek(AVFormatContext *s, int stream_index,
                              int64_t timestamp, int flags)
1844 1845
{
    MatroskaDemuxContext *matroska = s->priv_data;
1846
    MatroskaTrack *tracks = matroska->tracks.elem;
1847
    AVStream *st = s->streams[stream_index];
1848
    int i, index, index_sub, index_min;
1849

1850 1851 1852
    if (!st->nb_index_entries)
        return 0;
    timestamp = FFMAX(timestamp, st->index_entries[0].timestamp);
1853

1854
    if ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
1855
        url_fseek(s->pb, st->index_entries[st->nb_index_entries-1].pos, SEEK_SET);
1856 1857 1858 1859 1860
        while ((index = av_index_search_timestamp(st, timestamp, flags)) < 0) {
            matroska_clear_queue(matroska);
            if (matroska_parse_cluster(matroska) < 0)
                break;
        }
1861
    }
1862

1863
    matroska_clear_queue(matroska);
1864 1865
    if (index < 0)
        return 0;
1866

1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880
    index_min = index;
    for (i=0; i < matroska->tracks.nb_elem; i++) {
        tracks[i].end_timecode = 0;
        if (tracks[i].type == MATROSKA_TRACK_TYPE_SUBTITLE
            && !tracks[i].stream->discard != AVDISCARD_ALL) {
            index_sub = av_index_search_timestamp(tracks[i].stream, st->index_entries[index].timestamp, AVSEEK_FLAG_BACKWARD);
            if (index_sub >= 0
                && st->index_entries[index_sub].pos < st->index_entries[index_min].pos
                && st->index_entries[index].timestamp - st->index_entries[index_sub].timestamp < 30000000000/matroska->time_scale)
                index_min = index_sub;
        }
    }

    url_fseek(s->pb, st->index_entries[index_min].pos, SEEK_SET);
1881
    matroska->skip_to_keyframe = !(flags & AVSEEK_FLAG_ANY);
1882
    matroska->skip_to_timecode = st->index_entries[index].timestamp;
1883
    matroska->done = 0;
1884
    av_update_cur_dts(s, st, st->index_entries[index].timestamp);
1885 1886 1887
    return 0;
}

1888
static int matroska_read_close(AVFormatContext *s)
1889 1890
{
    MatroskaDemuxContext *matroska = s->priv_data;
1891
    MatroskaTrack *tracks = matroska->tracks.elem;
1892
    int n;
1893

1894
    matroska_clear_queue(matroska);
1895

1896 1897 1898
    for (n=0; n < matroska->tracks.nb_elem; n++)
        if (tracks[n].type == MATROSKA_TRACK_TYPE_AUDIO)
            av_free(tracks[n].audio.buf);
1899
    ebml_free(matroska_segment, matroska);
1900 1901 1902 1903 1904

    return 0;
}

AVInputFormat matroska_demuxer = {
1905 1906
    "matroska,webm",
    NULL_IF_CONFIG_SMALL("Matroska/WebM file format"),
1907 1908 1909 1910 1911 1912
    sizeof(MatroskaDemuxContext),
    matroska_probe,
    matroska_read_header,
    matroska_read_packet,
    matroska_read_close,
    matroska_read_seek,
1913
    .metadata_conv = ff_mkv_metadata_conv,
1914
};