vp8.c 77.8 KB
Newer Older
1
/*
D
David Conrad 已提交
2 3 4 5
 * VP8 compatible video decoder
 *
 * Copyright (C) 2010 David Conrad
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Jason Garrett-Glaser
D
Daniel Kang 已提交
7
 * Copyright (C) 2012 Daniel Kang
D
David Conrad 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

26
#include "libavutil/imgutils.h"
D
David Conrad 已提交
27
#include "avcodec.h"
28
#include "internal.h"
29
#include "vp8.h"
D
David Conrad 已提交
30 31
#include "vp8data.h"
#include "rectangle.h"
R
Ronald S. Bultje 已提交
32
#include "thread.h"
D
David Conrad 已提交
33

34 35 36 37
#if ARCH_ARM
#   include "arm/vp8.h"
#endif

38 39
static void free_buffers(VP8Context *s)
{
D
Daniel Kang 已提交
40 41 42 43 44 45 46
    int i;
    if (s->thread_data)
        for (i = 0; i < MAX_THREADS; i++) {
            av_freep(&s->thread_data[i].filter_strength);
            av_freep(&s->thread_data[i].edge_emu_buffer);
        }
    av_freep(&s->thread_data);
47 48 49 50 51 52 53 54
    av_freep(&s->macroblocks_base);
    av_freep(&s->intra4x4_pred_mode_top);
    av_freep(&s->top_nnz);
    av_freep(&s->top_border);

    s->macroblocks = NULL;
}

55
static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
56 57
{
    int ret;
58 59
    if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
                                    ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
60
        return ret;
61 62
    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
        ff_thread_release_buffer(s->avctx, &f->tf);
63 64 65 66 67
        return AVERROR(ENOMEM);
    }
    return 0;
}

68
static void vp8_release_frame(VP8Context *s, VP8Frame *f)
69
{
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
    av_buffer_unref(&f->seg_map);
    ff_thread_release_buffer(s->avctx, &f->tf);
}

static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
{
    int ret;

    vp8_release_frame(s, dst);

    if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
        return ret;
    if (src->seg_map &&
        !(dst->seg_map = av_buffer_ref(src->seg_map))) {
        vp8_release_frame(s, dst);
        return AVERROR(ENOMEM);
86
    }
87 88

    return 0;
89 90
}

91 92

static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
D
David Conrad 已提交
93 94 95 96
{
    VP8Context *s = avctx->priv_data;
    int i;

97 98
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
        vp8_release_frame(s, &s->frames[i]);
D
David Conrad 已提交
99 100
    memset(s->framep, 0, sizeof(s->framep));

101
    if (free_mem)
102
        free_buffers(s);
103 104 105 106
}

static void vp8_decode_flush(AVCodecContext *avctx)
{
107
    vp8_decode_flush_impl(avctx, 0);
D
David Conrad 已提交
108 109 110 111
}

static int update_dimensions(VP8Context *s, int width, int height)
{
D
Daniel Kang 已提交
112 113 114
    AVCodecContext *avctx = s->avctx;
    int i;

115
    if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
R
Ronald S. Bultje 已提交
116 117 118
        height != s->avctx->height) {
        if (av_image_check_size(width, height, 0, s->avctx))
            return AVERROR_INVALIDDATA;
D
David Conrad 已提交
119

120
        vp8_decode_flush_impl(s->avctx, 1);
D
David Conrad 已提交
121

R
Ronald S. Bultje 已提交
122 123
        avcodec_set_dimensions(s->avctx, width, height);
    }
D
David Conrad 已提交
124 125 126 127

    s->mb_width  = (s->avctx->coded_width +15) / 16;
    s->mb_height = (s->avctx->coded_height+15) / 16;

D
Daniel Kang 已提交
128 129 130 131 132 133 134 135 136 137
    s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
    if (!s->mb_layout) { // Frame threading and one thread
        s->macroblocks_base       = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
    }
    else // Sliced threading
        s->macroblocks_base       = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
    s->top_nnz                    = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
    s->top_border                 = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
    s->thread_data                = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
D
David Conrad 已提交
138

D
Daniel Kang 已提交
139 140
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
141
#if HAVE_THREADS
D
Daniel Kang 已提交
142 143
        pthread_mutex_init(&s->thread_data[i].lock, NULL);
        pthread_cond_init(&s->thread_data[i].cond, NULL);
144
#endif
D
Daniel Kang 已提交
145
    }
D
David Conrad 已提交
146

D
Daniel Kang 已提交
147 148
    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
        (!s->intra4x4_pred_mode_top && !s->mb_layout))
D
David Conrad 已提交
149 150
        return AVERROR(ENOMEM);

151
    s->macroblocks        = s->macroblocks_base + 1;
D
David Conrad 已提交
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181

    return 0;
}

static void parse_segment_info(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

    s->segmentation.update_map = vp8_rac_get(c);

    if (vp8_rac_get(c)) { // update segment feature data
        s->segmentation.absolute_vals = vp8_rac_get(c);

        for (i = 0; i < 4; i++)
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);

        for (i = 0; i < 4; i++)
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
    }
    if (s->segmentation.update_map)
        for (i = 0; i < 3; i++)
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
}

static void update_lf_deltas(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

182 183 184
    for (i = 0; i < 4; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
D
David Conrad 已提交
185

186 187 188 189 190 191 192 193 194 195 196 197 198
            if (vp8_rac_get(c))
                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
        }
    }

    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);

            if (vp8_rac_get(c))
                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
        }
    }
D
David Conrad 已提交
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
}

static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
{
    const uint8_t *sizes = buf;
    int i;

    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);

    buf      += 3*(s->num_coeff_partitions-1);
    buf_size -= 3*(s->num_coeff_partitions-1);
    if (buf_size < 0)
        return -1;

    for (i = 0; i < s->num_coeff_partitions-1; i++) {
214
        int size = AV_RL24(sizes + 3*i);
D
David Conrad 已提交
215 216 217
        if (buf_size - size < 0)
            return -1;

218
        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
D
David Conrad 已提交
219 220 221
        buf      += size;
        buf_size -= size;
    }
222
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
D
David Conrad 已提交
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246

    return 0;
}

static void get_quants(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i, base_qi;

    int yac_qi     = vp8_rac_get_uint(c, 7);
    int ydc_delta  = vp8_rac_get_sint(c, 4);
    int y2dc_delta = vp8_rac_get_sint(c, 4);
    int y2ac_delta = vp8_rac_get_sint(c, 4);
    int uvdc_delta = vp8_rac_get_sint(c, 4);
    int uvac_delta = vp8_rac_get_sint(c, 4);

    for (i = 0; i < 4; i++) {
        if (s->segmentation.enabled) {
            base_qi = s->segmentation.base_quant[i];
            if (!s->segmentation.absolute_vals)
                base_qi += yac_qi;
        } else
            base_qi = yac_qi;

247 248 249 250 251 252 253
        s->qmat[i].luma_qmul[0]    =           vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
        s->qmat[i].luma_qmul[1]    =           vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
        s->qmat[i].luma_dc_qmul[0] =       2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
        /* 101581>>16 is equivalent to 155/100 */
        s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
        s->qmat[i].chroma_qmul[0]  =           vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
        s->qmat[i].chroma_qmul[1]  =           vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
254 255 256

        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
D
David Conrad 已提交
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
    }
}

/**
 * Determine which buffers golden and altref should be updated with after this frame.
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
 *
 * Intra frames update all 3 references
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
 * If the update (golden|altref) flag is set, it's updated with the current frame
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
 * If the flag is not set, the number read means:
 *      0: no update
 *      1: VP56_FRAME_PREVIOUS
 *      2: update golden with altref, or update altref with golden
 */
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
{
    VP56RangeCoder *c = &s->c;

    if (update)
        return VP56_FRAME_CURRENT;

    switch (vp8_rac_get_uint(c, 2)) {
    case 1:
        return VP56_FRAME_PREVIOUS;
    case 2:
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
    }
    return VP56_FRAME_NONE;
}

static void update_refs(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;

    int update_golden = vp8_rac_get(c);
    int update_altref = vp8_rac_get(c);

    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
}

static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
{
    VP56RangeCoder *c = &s->c;
303
    int header_size, hscale, vscale, i, j, k, l, m, ret;
D
David Conrad 已提交
304 305 306 307 308 309
    int width  = s->avctx->width;
    int height = s->avctx->height;

    s->keyframe  = !(buf[0] & 1);
    s->profile   =  (buf[0]>>1) & 7;
    s->invisible = !(buf[0] & 0x10);
310
    header_size  = AV_RL24(buf) >> 5;
D
David Conrad 已提交
311 312 313
    buf      += 3;
    buf_size -= 3;

D
David Conrad 已提交
314 315 316 317 318 319 320
    if (s->profile > 3)
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);

    if (!s->profile)
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
D
David Conrad 已提交
321 322 323 324 325 326 327

    if (header_size > buf_size - 7*s->keyframe) {
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
        return AVERROR_INVALIDDATA;
    }

    if (s->keyframe) {
328 329
        if (AV_RL24(buf) != 0x2a019d) {
            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
D
David Conrad 已提交
330 331 332 333 334 335 336 337 338
            return AVERROR_INVALIDDATA;
        }
        width  = AV_RL16(buf+3) & 0x3fff;
        height = AV_RL16(buf+5) & 0x3fff;
        hscale = buf[4] >> 6;
        vscale = buf[6] >> 6;
        buf      += 7;
        buf_size -= 7;

339
        if (hscale || vscale)
340
            avpriv_request_sample(s->avctx, "Upscaling");
341

D
David Conrad 已提交
342
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
343 344 345 346
        for (i = 0; i < 4; i++)
            for (j = 0; j < 16; j++)
                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
                       sizeof(s->prob->token[i][j]));
D
David Conrad 已提交
347 348 349 350
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
        memset(&s->segmentation, 0, sizeof(s->segmentation));
351
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
D
David Conrad 已提交
352 353
    }

354
    ff_vp56_init_range_decoder(c, buf, header_size);
D
David Conrad 已提交
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
    buf      += header_size;
    buf_size -= header_size;

    if (s->keyframe) {
        if (vp8_rac_get(c))
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
        vp8_rac_get(c); // whether we can skip clamping in dsp functions
    }

    if ((s->segmentation.enabled = vp8_rac_get(c)))
        parse_segment_info(s);
    else
        s->segmentation.update_map = 0; // FIXME: move this to some init function?

    s->filter.simple    = vp8_rac_get(c);
    s->filter.level     = vp8_rac_get_uint(c, 6);
    s->filter.sharpness = vp8_rac_get_uint(c, 3);

    if ((s->lf_delta.enabled = vp8_rac_get(c)))
        if (vp8_rac_get(c))
            update_lf_deltas(s);

    if (setup_partitions(s, buf, buf_size)) {
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
        return AVERROR_INVALIDDATA;
    }

D
Daniel Kang 已提交
382
    if (!s->macroblocks_base || /* first frame */
383
        width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
D
Daniel Kang 已提交
384 385 386 387
        if ((ret = update_dimensions(s, width, height)) < 0)
            return ret;
    }

D
David Conrad 已提交
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406
    get_quants(s);

    if (!s->keyframe) {
        update_refs(s);
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
    }

    // if we aren't saving this frame's probabilities for future frames,
    // make a copy of the current probabilities
    if (!(s->update_probabilities = vp8_rac_get(c)))
        s->prob[1] = s->prob[0];

    s->update_last = s->keyframe || vp8_rac_get(c);

    for (i = 0; i < 4; i++)
        for (j = 0; j < 8; j++)
            for (k = 0; k < 3; k++)
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
407 408
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
                        int prob = vp8_rac_get_uint(c, 8);
409 410
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
411
                    }
D
David Conrad 已提交
412 413

    if ((s->mbskip_enabled = vp8_rac_get(c)))
414
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
D
David Conrad 已提交
415 416

    if (!s->keyframe) {
417 418 419
        s->prob->intra  = vp8_rac_get_uint(c, 8);
        s->prob->last   = vp8_rac_get_uint(c, 8);
        s->prob->golden = vp8_rac_get_uint(c, 8);
D
David Conrad 已提交
420 421 422 423 424 425 426 427 428 429 430

        if (vp8_rac_get(c))
            for (i = 0; i < 4; i++)
                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
        if (vp8_rac_get(c))
            for (i = 0; i < 3; i++)
                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);

        // 17.2 MV probability update
        for (i = 0; i < 2; i++)
            for (j = 0; j < 19; j++)
431
                if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
D
David Conrad 已提交
432 433 434 435 436 437
                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
    }

    return 0;
}

J
Jason Garrett-Glaser 已提交
438
static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
D
David Conrad 已提交
439
{
J
Jason Garrett-Glaser 已提交
440 441
    dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
    dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
D
David Conrad 已提交
442 443 444 445 446 447 448
}

/**
 * Motion vector coding, 17.1.
 */
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
{
449
    int bit, x = 0;
D
David Conrad 已提交
450

451
    if (vp56_rac_get_prob_branchy(c, p[0])) {
D
David Conrad 已提交
452 453 454 455 456 457 458 459
        int i;

        for (i = 0; i < 3; i++)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        for (i = 9; i > 3; i--)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
            x += 8;
460 461 462 463 464 465 466 467 468 469 470
    } else {
        // small_mvtree
        const uint8_t *ps = p+2;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + 3*bit;
        x  += 4*bit;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + bit;
        x  += 2*bit;
        x  += vp56_rac_get_prob(c, *ps);
    }
D
David Conrad 已提交
471 472 473 474

    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
}

475 476
static av_always_inline
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
D
David Conrad 已提交
477
{
478 479 480
    if (left == top)
        return vp8_submv_prob[4-!!left];
    if (!top)
D
David Conrad 已提交
481
        return vp8_submv_prob[2];
482
    return vp8_submv_prob[1-!!left];
D
David Conrad 已提交
483 484 485 486
}

/**
 * Split motion vector prediction, 16.4.
487
 * @returns the number of motion vectors parsed (2, 4 or 16)
D
David Conrad 已提交
488
 */
489
static av_always_inline
D
Daniel Kang 已提交
490
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
D
David Conrad 已提交
491
{
492 493
    int part_idx;
    int n, num;
D
Daniel Kang 已提交
494
    VP8Macroblock *top_mb;
495 496
    VP8Macroblock *left_mb = &mb[-1];
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
D
Daniel Kang 已提交
497
                  *mbsplits_top,
498
                  *mbsplits_cur, *firstidx;
D
Daniel Kang 已提交
499
    VP56mv *top_mv;
500 501
    VP56mv *left_mv = left_mb->bmv;
    VP56mv *cur_mv  = mb->bmv;
D
David Conrad 已提交
502

D
Daniel Kang 已提交
503 504 505 506 507 508 509
    if (!layout) // layout is inlined, s->mb_layout is not
        top_mb = &mb[2];
    else
        top_mb = &mb[-s->mb_width-1];
    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
    top_mv = top_mb->bmv;

510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
        } else {
            part_idx = VP8_SPLITMVMODE_8x8;
        }
    } else {
        part_idx = VP8_SPLITMVMODE_4x4;
    }

    num = vp8_mbsplit_count[part_idx];
    mbsplits_cur = vp8_mbsplits[part_idx],
    firstidx = vp8_mbfirstidx[part_idx];
    mb->partitioning = part_idx;

D
David Conrad 已提交
525
    for (n = 0; n < num; n++) {
526
        int k = firstidx[n];
527
        uint32_t left, above;
528 529
        const uint8_t *submv_prob;

530 531 532 533 534 535 536 537
        if (!(k & 3))
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
        else
            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
        if (k <= 3)
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
        else
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
538 539

        submv_prob = get_submv_prob(left, above);
D
David Conrad 已提交
540

541 542 543 544 545 546 547 548 549 550 551 552
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
                    mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
                    mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
                } else {
                    AV_ZERO32(&mb->bmv[n]);
                }
            } else {
                AV_WN32A(&mb->bmv[n], above);
            }
        } else {
553
            AV_WN32A(&mb->bmv[n], left);
D
David Conrad 已提交
554 555
        }
    }
556 557

    return num;
D
David Conrad 已提交
558 559
}

560
static av_always_inline
D
Daniel Kang 已提交
561
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
562
{
D
Daniel Kang 已提交
563
    VP8Macroblock *mb_edge[3] = { 0 /* top */,
564
                                  mb - 1 /* left */,
D
Daniel Kang 已提交
565
                                  0 /* top-left */ };
566
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
567
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
568 569
    int idx = CNT_ZERO;
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
570
    int8_t *sign_bias = s->sign_bias;
571 572 573 574
    VP56mv near_mv[4];
    uint8_t cnt[4] = { 0 };
    VP56RangeCoder *c = &s->c;

D
Daniel Kang 已提交
575 576 577 578 579 580 581 582 583
    if (!layout) { // layout is inlined (s->mb_layout is not)
        mb_edge[0] = mb + 2;
        mb_edge[2] = mb + 1;
    }
    else {
        mb_edge[0] = mb - s->mb_width-1;
        mb_edge[2] = mb - s->mb_width-2;
    }

584 585
    AV_ZERO32(&near_mv[0]);
    AV_ZERO32(&near_mv[1]);
586
    AV_ZERO32(&near_mv[2]);
587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617

    /* Process MB on top, left and top-left */
    #define MV_EDGE_CHECK(n)\
    {\
        VP8Macroblock *edge = mb_edge[n];\
        int edge_ref = edge->ref_frame;\
        if (edge_ref != VP56_FRAME_CURRENT) {\
            uint32_t mv = AV_RN32A(&edge->mv);\
            if (mv) {\
                if (cur_sign_bias != sign_bias[edge_ref]) {\
                    /* SWAR negate of the values in mv. */\
                    mv = ~mv;\
                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
                }\
                if (!n || mv != AV_RN32A(&near_mv[idx]))\
                    AV_WN32A(&near_mv[++idx], mv);\
                cnt[idx]      += 1 + (n != 2);\
            } else\
                cnt[CNT_ZERO] += 1 + (n != 2);\
        }\
    }

    MV_EDGE_CHECK(0)
    MV_EDGE_CHECK(1)
    MV_EDGE_CHECK(2)

    mb->partitioning = VP8_SPLITMVMODE_NONE;
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
        mb->mode = VP8_MVMODE_MV;

        /* If we have three distinct MVs, merge first and last if they're the same */
618
        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
619 620 621 622 623 624 625 626 627 628 629 630
            cnt[CNT_NEAREST] += 1;

        /* Swap near and nearest if necessary */
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
        }

        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {

                /* Choose the best mv out of 0,0 and the nearest mv */
J
Jason Garrett-Glaser 已提交
631
                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
632 633 634
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
635 636 637

                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
                    mb->mode = VP8_MVMODE_SPLIT;
D
Daniel Kang 已提交
638
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
639 640 641 642 643 644
                } else {
                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
                    mb->bmv[0] = mb->mv;
                }
            } else {
J
Jason Garrett-Glaser 已提交
645
                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
646 647 648
                mb->bmv[0] = mb->mv;
            }
        } else {
J
Jason Garrett-Glaser 已提交
649
            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
650 651 652 653 654 655 656 657 658
            mb->bmv[0] = mb->mv;
        }
    } else {
        mb->mode = VP8_MVMODE_ZERO;
        AV_ZERO32(&mb->mv);
        mb->bmv[0] = mb->mv;
    }
}

659
static av_always_inline
660
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
D
Daniel Kang 已提交
661
                           int mb_x, int keyframe, int layout)
D
David Conrad 已提交
662
{
663 664
    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;

D
Daniel Kang 已提交
665 666 667 668
    if (layout == 1) {
        VP8Macroblock *mb_top = mb - s->mb_width - 1;
        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
    }
669
    if (keyframe) {
670
        int x, y;
D
Daniel Kang 已提交
671
        uint8_t* top;
672
        uint8_t* const left = s->intra4x4_pred_mode_left;
D
Daniel Kang 已提交
673 674 675 676
        if (layout == 1)
            top = mb->intra4x4_pred_mode_top;
        else
            top = s->intra4x4_pred_mode_top + 4 * mb_x;
677 678
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
679 680 681 682 683
                const uint8_t *ctx;
                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
                left[y] = top[x] = *intra4x4;
                intra4x4++;
D
David Conrad 已提交
684 685
            }
        }
686
    } else {
687
        int i;
688 689
        for (i = 0; i < 16; i++)
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
D
David Conrad 已提交
690 691 692
    }
}

693
static av_always_inline
D
Daniel Kang 已提交
694 695
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
                    uint8_t *segment, uint8_t *ref, int layout)
D
David Conrad 已提交
696 697 698
{
    VP56RangeCoder *c = &s->c;

D
David Conrad 已提交
699 700 701
    if (s->segmentation.update_map) {
        int bit  = vp56_rac_get_prob(c, s->prob->segmentid[0]);
        *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
702
    } else if (s->segmentation.enabled)
R
Ronald S. Bultje 已提交
703
        *segment = ref ? *ref : *segment;
704
    mb->segment = *segment;
D
David Conrad 已提交
705

706
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
D
David Conrad 已提交
707 708 709 710 711

    if (s->keyframe) {
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);

        if (mb->mode == MODE_I4x4) {
D
Daniel Kang 已提交
712
            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
713 714
        } else {
            const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
D
Daniel Kang 已提交
715 716 717 718 719
            if (s->mb_layout == 1)
                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
            else
                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
            AV_WN32A( s->intra4x4_pred_mode_left, modes);
720
        }
D
David Conrad 已提交
721

722
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
D
David Conrad 已提交
723
        mb->ref_frame = VP56_FRAME_CURRENT;
724
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
D
David Conrad 已提交
725
        // inter MB, 16.2
726 727
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
D
David Conrad 已提交
728 729 730
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
        else
            mb->ref_frame = VP56_FRAME_PREVIOUS;
J
Jason Garrett-Glaser 已提交
731
        s->ref_count[mb->ref_frame-1]++;
D
David Conrad 已提交
732 733

        // motion vectors, 16.3
D
Daniel Kang 已提交
734
        decode_mvs(s, mb, mb_x, mb_y, layout);
D
David Conrad 已提交
735 736 737 738
    } else {
        // intra MB, 16.1
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);

739
        if (mb->mode == MODE_I4x4)
D
Daniel Kang 已提交
740
            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
D
David Conrad 已提交
741

742
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
D
David Conrad 已提交
743
        mb->ref_frame = VP56_FRAME_CURRENT;
744
        mb->partitioning = VP8_SPLITMVMODE_NONE;
745
        AV_ZERO32(&mb->bmv[0]);
D
David Conrad 已提交
746 747 748
    }
}

749
#ifndef decode_block_coeffs_internal
D
David Conrad 已提交
750
/**
751
 * @param r arithmetic bitstream reader context
752 753
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
D
David Conrad 已提交
754
 * @param i initial coeff index, 0 unless a separate DC block is coded
755
 * @param qmul array holding the dc/ac dequant factor at position 0/1
D
David Conrad 已提交
756 757 758
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
D
Diego Biurrun 已提交
759
static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
760
                                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
761
                                        int i, uint8_t *token_prob, int16_t qmul[2])
D
David Conrad 已提交
762
{
763
    VP56RangeCoder c = *r;
764
    goto skip_eob;
765
    do {
766
        int coeff;
767 768
        if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
            break;
D
David Conrad 已提交
769

770
skip_eob:
771
        if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
772
            if (++i == 16)
773
                break; // invalid input; blocks should end with EOB
774
            token_prob = probs[i][0];
775
            goto skip_eob;
776 777
        }

778
        if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
779
            coeff = 1;
780
            token_prob = probs[i+1][1];
781
        } else {
782 783
            if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
                coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
784
                if (coeff)
785
                    coeff += vp56_rac_get_prob(&c, token_prob[5]);
786 787 788
                coeff += 2;
            } else {
                // DCT_CAT*
789 790 791
                if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
                    if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
                        coeff  = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
792 793
                    } else {                                    // DCT_CAT2
                        coeff  = 7;
794 795
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
796 797
                    }
                } else {    // DCT_CAT3 and up
798 799
                    int a = vp56_rac_get_prob(&c, token_prob[8]);
                    int b = vp56_rac_get_prob(&c, token_prob[9+a]);
800 801
                    int cat = (a<<1) + b;
                    coeff  = 3 + (8<<cat);
802
                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
803 804
                }
            }
805
            token_prob = probs[i+1][2];
806
        }
807
        block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
808
    } while (++i < 16);
809

810
    *r = c;
811
    return i;
D
David Conrad 已提交
812
}
813
#endif
D
David Conrad 已提交
814

815 816 817 818 819 820 821 822 823 824 825
/**
 * @param c arithmetic bitstream reader context
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
 * @param i initial coeff index, 0 unless a separate DC block is coded
 * @param zero_nhood the initial prediction context for number of surrounding
 *                   all-zero blocks (only left/top, so 0-2)
 * @param qmul array holding the dc/ac dequant factor at position 0/1
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
826
static av_always_inline
D
Diego Biurrun 已提交
827
int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
828
                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
829 830 831 832 833 834 835 836
                        int i, int zero_nhood, int16_t qmul[2])
{
    uint8_t *token_prob = probs[i][zero_nhood];
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
        return 0;
    return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
}

837
static av_always_inline
D
Daniel Kang 已提交
838
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
839
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
D
David Conrad 已提交
840 841 842
{
    int i, x, y, luma_start = 0, luma_ctx = 3;
    int nnz_pred, nnz, nnz_total = 0;
843
    int segment = mb->segment;
844
    int block_dc = 0;
D
David Conrad 已提交
845 846 847 848 849

    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
        nnz_pred = t_nnz[8] + l_nnz[8];

        // decode DC values and do hadamard
D
Daniel Kang 已提交
850
        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
D
David Conrad 已提交
851 852
                                  s->qmat[segment].luma_dc_qmul);
        l_nnz[8] = t_nnz[8] = !!nnz;
853 854 855 856
        if (nnz) {
            nnz_total += nnz;
            block_dc = 1;
            if (nnz == 1)
D
Daniel Kang 已提交
857
                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
858
            else
D
Daniel Kang 已提交
859
                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
860
        }
D
David Conrad 已提交
861 862 863 864 865 866 867
        luma_start = 1;
        luma_ctx = 0;
    }

    // luma blocks
    for (y = 0; y < 4; y++)
        for (x = 0; x < 4; x++) {
J
Jason Garrett-Glaser 已提交
868
            nnz_pred = l_nnz[y] + t_nnz[x];
D
Daniel Kang 已提交
869
            nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
J
Jason Garrett-Glaser 已提交
870
                                      nnz_pred, s->qmat[segment].luma_qmul);
871
            // nnz+block_dc may be one more than the actual last index, but we don't care
D
Daniel Kang 已提交
872
            td->non_zero_count_cache[y][x] = nnz + block_dc;
D
David Conrad 已提交
873 874 875 876 877 878 879 880 881 882 883
            t_nnz[x] = l_nnz[y] = !!nnz;
            nnz_total += nnz;
        }

    // chroma blocks
    // TODO: what to do about dimensions? 2nd dim for luma is x,
    // but for chroma it's (y<<1)|x
    for (i = 4; i < 6; i++)
        for (y = 0; y < 2; y++)
            for (x = 0; x < 2; x++) {
                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
D
Daniel Kang 已提交
884
                nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
D
David Conrad 已提交
885
                                          nnz_pred, s->qmat[segment].chroma_qmul);
D
Daniel Kang 已提交
886
                td->non_zero_count_cache[i][(y<<1)+x] = nnz;
D
David Conrad 已提交
887 888 889 890 891 892 893 894 895 896 897
                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
                nnz_total += nnz;
            }

    // if there were no coded coeffs despite the macroblock not being marked skip,
    // we MUST not do the inner loop filter and should not do IDCT
    // Since skip isn't used for bitstream prediction, just manually set it.
    if (!nnz_total)
        mb->skip = 1;
}

898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918
static av_always_inline
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                      int linesize, int uvlinesize, int simple)
{
    AV_COPY128(top_border, src_y + 15*linesize);
    if (!simple) {
        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
    }
}

static av_always_inline
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
                    int simple, int xchg)
{
    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
    src_y  -=   linesize;
    src_cb -= uvlinesize;
    src_cr -= uvlinesize;

M
Måns Rullgård 已提交
919 920 921 922
#define XCHG(a,b,xchg) do {                     \
        if (xchg) AV_SWAP64(b,a);               \
        else      AV_COPY64(b,a);               \
    } while (0)
923 924 925 926

    XCHG(top_border_m1+8, src_y-8, xchg);
    XCHG(top_border,      src_y,   xchg);
    XCHG(top_border+8,    src_y+8, 1);
927
    if (mb_x < mb_width-1)
928
        XCHG(top_border+32, src_y+16, 1);
929

930 931 932 933 934 935 936 937 938 939
    // only copy chroma for normal loop filter
    // or to initialize the top row to 127
    if (!simple || !mb_y) {
        XCHG(top_border_m1+16, src_cb-8, xchg);
        XCHG(top_border_m1+24, src_cr-8, xchg);
        XCHG(top_border+16,    src_cb, 1);
        XCHG(top_border+24,    src_cr, 1);
    }
}

940
static av_always_inline
941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
    } else {
        return mb_y ? mode : LEFT_DC_PRED8x8;
    }
}

static av_always_inline
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
    } else {
        return mb_y ? mode : HOR_PRED8x8;
    }
}

static av_always_inline
int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
D
David Conrad 已提交
962 963
{
    if (mode == DC_PRED8x8) {
964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
    } else {
        return mode;
    }
}

static av_always_inline
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
{
    switch (mode) {
    case DC_PRED8x8:
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
    case VERT_PRED8x8:
        return !mb_y ? DC_127_PRED8x8 : mode;
    case HOR_PRED8x8:
        return !mb_x ? DC_129_PRED8x8 : mode;
    case PLANE_PRED8x8 /*TM*/:
        return check_tm_pred8x8_mode(mode, mb_x, mb_y);
    }
    return mode;
}

static av_always_inline
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_VP8_PRED : DC_129_PRED;
    } else {
        return mb_y ? mode : HOR_VP8_PRED;
    }
}

static av_always_inline
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
{
    switch (mode) {
    case VERT_PRED:
        if (!mb_x && mb_y) {
            *copy_buf = 1;
            return mode;
        }
        /* fall-through */
    case DIAG_DOWN_LEFT_PRED:
    case VERT_LEFT_PRED:
        return !mb_y ? DC_127_PRED : mode;
    case HOR_PRED:
        if (!mb_y) {
            *copy_buf = 1;
            return mode;
1013
        }
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
        /* fall-through */
    case HOR_UP_PRED:
        return !mb_x ? DC_129_PRED : mode;
    case TM_VP8_PRED:
        return check_tm_pred4x4_mode(mode, mb_x, mb_y);
    case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
    case DIAG_DOWN_RIGHT_PRED:
    case VERT_RIGHT_PRED:
    case HOR_DOWN_PRED:
        if (!mb_y || !mb_x)
            *copy_buf = 1;
        return mode;
D
David Conrad 已提交
1026 1027 1028 1029
    }
    return mode;
}

1030
static av_always_inline
D
Daniel Kang 已提交
1031 1032
void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
D
David Conrad 已提交
1033
{
1034
    AVCodecContext *avctx = s->avctx;
M
Mans Rullgard 已提交
1035 1036
    int x, y, mode, nnz;
    uint32_t tr;
D
David Conrad 已提交
1037

1038 1039
    // for the first row, we need to run xchg_mb_border to init the top edge to 127
    // otherwise, skip it if we aren't going to deblock
D
Daniel Kang 已提交
1040
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1041 1042 1043 1044
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 1);

D
David Conrad 已提交
1045
    if (mb->mode < MODE_I4x4) {
1046 1047 1048 1049 1050
        if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
            mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
        } else {
            mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
        }
D
David Conrad 已提交
1051 1052 1053
        s->hpc.pred16x16[mode](dst[0], s->linesize);
    } else {
        uint8_t *ptr = dst[0];
1054
        uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1055
        uint8_t tr_top[4] = { 127, 127, 127, 127 };
D
David Conrad 已提交
1056 1057 1058 1059 1060 1061 1062

        // all blocks on the right edge of the macroblock use bottom edge
        // the top macroblock for their topright edge
        uint8_t *tr_right = ptr - s->linesize + 16;

        // if we're on the right edge of the frame, said edge is extended
        // from the top macroblock
1063 1064
        if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
            mb_x == s->mb_width-1) {
M
Mans Rullgard 已提交
1065
            tr = tr_right[-1]*0x01010101u;
D
David Conrad 已提交
1066 1067 1068
            tr_right = (uint8_t *)&tr;
        }

1069
        if (mb->skip)
D
Daniel Kang 已提交
1070
            AV_ZERO128(td->non_zero_count_cache);
1071

D
David Conrad 已提交
1072 1073 1074
        for (y = 0; y < 4; y++) {
            uint8_t *topright = ptr + 4 - s->linesize;
            for (x = 0; x < 4; x++) {
1075 1076 1077 1078 1079 1080 1081
                int copy = 0, linesize = s->linesize;
                uint8_t *dst = ptr+4*x;
                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];

                if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
                    topright = tr_top;
                } else if (x == 3)
D
David Conrad 已提交
1082 1083
                    topright = tr_right;

1084 1085 1086 1087 1088 1089 1090
                if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
                    mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
                    if (copy) {
                        dst = copy_dst + 12;
                        linesize = 8;
                        if (!(mb_y + y)) {
                            copy_dst[3] = 127U;
R
Ronald S. Bultje 已提交
1091
                            AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1092
                        } else {
R
Ronald S. Bultje 已提交
1093
                            AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116
                            if (!(mb_x + x)) {
                                copy_dst[3] = 129U;
                            } else {
                                copy_dst[3] = ptr[4*x-s->linesize-1];
                            }
                        }
                        if (!(mb_x + x)) {
                            copy_dst[11] =
                            copy_dst[19] =
                            copy_dst[27] =
                            copy_dst[35] = 129U;
                        } else {
                            copy_dst[11] = ptr[4*x              -1];
                            copy_dst[19] = ptr[4*x+s->linesize  -1];
                            copy_dst[27] = ptr[4*x+s->linesize*2-1];
                            copy_dst[35] = ptr[4*x+s->linesize*3-1];
                        }
                    }
                } else {
                    mode = intra4x4[x];
                }
                s->hpc.pred4x4[mode](dst, topright, linesize);
                if (copy) {
R
Ronald S. Bultje 已提交
1117 1118 1119 1120
                    AV_COPY32(ptr+4*x              , copy_dst+12);
                    AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
                    AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
                    AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1121
                }
D
David Conrad 已提交
1122

D
Daniel Kang 已提交
1123
                nnz = td->non_zero_count_cache[y][x];
D
David Conrad 已提交
1124 1125
                if (nnz) {
                    if (nnz == 1)
D
Daniel Kang 已提交
1126
                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
D
David Conrad 已提交
1127
                    else
D
Daniel Kang 已提交
1128
                        s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
D
David Conrad 已提交
1129 1130 1131 1132 1133
                }
                topright += 4;
            }

            ptr   += 4*s->linesize;
1134
            intra4x4 += 4;
D
David Conrad 已提交
1135 1136 1137
        }
    }

1138
    if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1139
        mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1140
    } else {
1141
        mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1142
    }
D
David Conrad 已提交
1143 1144
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1145

D
Daniel Kang 已提交
1146
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1147 1148 1149
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 0);
D
David Conrad 已提交
1150 1151
}

1152 1153 1154 1155 1156 1157 1158
static const uint8_t subpel_idx[3][8] = {
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
                                // also function pointer index
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
};

D
David Conrad 已提交
1159
/**
1160
 * luma MC function
D
David Conrad 已提交
1161 1162 1163
 *
 * @param s VP8 decoding context
 * @param dst target buffer for block data at block position
1164
 * @param ref reference picture buffer at origin (0, 0)
D
David Conrad 已提交
1165 1166 1167 1168 1169 1170 1171 1172
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
1173
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
D
David Conrad 已提交
1174
 */
1175
static av_always_inline
D
Daniel Kang 已提交
1176
void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1177
                 ThreadFrame *ref, const VP56mv *mv,
1178 1179 1180
                 int x_off, int y_off, int block_w, int block_h,
                 int width, int height, int linesize,
                 vp8_mc_func mc_func[3][3])
D
David Conrad 已提交
1181
{
1182
    uint8_t *src = ref->f->data[0];
R
Ronald S. Bultje 已提交
1183

1184
    if (AV_RN32A(mv)) {
1185 1186 1187 1188 1189 1190

        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
        int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 2;
        y_off += mv->y >> 2;
1191 1192

        // edge emulation
R
Ronald S. Bultje 已提交
1193
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1194
        src += y_off * linesize + x_off;
1195 1196
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
R
Ronald S. Bultje 已提交
1197 1198 1199
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                     x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1200
            src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1201 1202
        }
        mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
R
Ronald S. Bultje 已提交
1203 1204
    } else {
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1205
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
R
Ronald S. Bultje 已提交
1206
    }
D
David Conrad 已提交
1207 1208
}

1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225
/**
 * chroma MC function
 *
 * @param s VP8 decoding context
 * @param dst1 target buffer for block data at block position (U plane)
 * @param dst2 target buffer for block data at block position (V plane)
 * @param ref reference picture buffer at origin (0, 0)
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
 */
1226
static av_always_inline
D
Daniel Kang 已提交
1227
void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1228
                   ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1229 1230 1231
                   int block_w, int block_h, int width, int height, int linesize,
                   vp8_mc_func mc_func[3][3])
{
1232
    uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
R
Ronald S. Bultje 已提交
1233

1234 1235 1236 1237 1238 1239 1240 1241 1242 1243
    if (AV_RN32A(mv)) {
        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
        int my = mv->y&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 3;
        y_off += mv->y >> 3;

        // edge emulation
        src1 += y_off * linesize + x_off;
        src2 += y_off * linesize + x_off;
R
Ronald S. Bultje 已提交
1244
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1245 1246
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
R
Ronald S. Bultje 已提交
1247 1248 1249
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                     x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1250
            src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1251 1252
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);

R
Ronald S. Bultje 已提交
1253 1254 1255
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                     x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1256
            src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1257 1258 1259 1260 1261 1262
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
        } else {
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
        }
    } else {
R
Ronald S. Bultje 已提交
1263
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1264 1265 1266 1267 1268
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
    }
}

1269
static av_always_inline
D
Daniel Kang 已提交
1270
void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1271
                 ThreadFrame *ref_frame, int x_off, int y_off,
1272 1273 1274
                 int bx_off, int by_off,
                 int block_w, int block_h,
                 int width, int height, VP56mv *mv)
1275 1276 1277 1278
{
    VP56mv uvmv = *mv;

    /* Y */
D
Daniel Kang 已提交
1279
    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
R
Ronald S. Bultje 已提交
1280
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1281 1282
                block_w, block_h, width, height, s->linesize,
                s->put_pixels_tab[block_w == 8]);
1283 1284 1285 1286 1287 1288 1289 1290 1291 1292

    /* U/V */
    if (s->profile == 3) {
        uvmv.x &= ~7;
        uvmv.y &= ~7;
    }
    x_off   >>= 1; y_off   >>= 1;
    bx_off  >>= 1; by_off  >>= 1;
    width   >>= 1; height  >>= 1;
    block_w >>= 1; block_h >>= 1;
D
Daniel Kang 已提交
1293
    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
R
Ronald S. Bultje 已提交
1294 1295
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
                  &uvmv, x_off + bx_off, y_off + by_off,
1296 1297
                  block_w, block_h, width, height, s->uvlinesize,
                  s->put_pixels_tab[1 + (block_w == 4)]);
1298 1299
}

1300 1301
/* Fetch pixels for estimated mv 4 macroblocks ahead.
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1302
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1303
{
J
Jason Garrett-Glaser 已提交
1304 1305
    /* Don't prefetch refs that haven't been used very often this frame. */
    if (s->ref_count[ref-1] > (mb_xy >> 5)) {
J
Jason Garrett-Glaser 已提交
1306
        int x_off = mb_x << 4, y_off = mb_y << 4;
J
Jason Garrett-Glaser 已提交
1307 1308
        int mx = (mb->mv.x>>2) + x_off + 8;
        int my = (mb->mv.y>>2) + y_off;
1309
        uint8_t **src= s->framep[ref]->tf.f->data;
J
Jason Garrett-Glaser 已提交
1310
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
R
Ronald S. Bultje 已提交
1311 1312 1313
        /* For threading, a ff_thread_await_progress here might be useful, but
         * it actually slows down the decoder. Since a bad prefetch doesn't
         * generate bad decoder output, we don't run it here. */
R
Ronald S. Bultje 已提交
1314
        s->vdsp.prefetch(src[0]+off, s->linesize, 4);
J
Jason Garrett-Glaser 已提交
1315
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
R
Ronald S. Bultje 已提交
1316
        s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
J
Jason Garrett-Glaser 已提交
1317
    }
1318 1319
}

D
David Conrad 已提交
1320 1321 1322
/**
 * Apply motion vectors to prediction buffer, chapter 18.
 */
1323
static av_always_inline
D
Daniel Kang 已提交
1324 1325
void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
D
David Conrad 已提交
1326 1327 1328
{
    int x_off = mb_x << 4, y_off = mb_y << 4;
    int width = 16*s->mb_width, height = 16*s->mb_height;
1329
    ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1330
    VP56mv *bmv = mb->bmv;
D
David Conrad 已提交
1331

1332 1333
    switch (mb->partitioning) {
    case VP8_SPLITMVMODE_NONE:
D
Daniel Kang 已提交
1334
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1335
                    0, 0, 16, 16, width, height, &mb->mv);
1336
        break;
1337
    case VP8_SPLITMVMODE_4x4: {
D
David Conrad 已提交
1338
        int x, y;
1339
        VP56mv uvmv;
D
David Conrad 已提交
1340 1341 1342 1343

        /* Y */
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
D
Daniel Kang 已提交
1344
                vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
R
Ronald S. Bultje 已提交
1345
                            ref, &bmv[4*y + x],
1346 1347 1348
                            4*x + x_off, 4*y + y_off, 4, 4,
                            width, height, s->linesize,
                            s->put_pixels_tab[2]);
D
David Conrad 已提交
1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363
            }
        }

        /* U/V */
        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
        for (y = 0; y < 2; y++) {
            for (x = 0; x < 2; x++) {
                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1364 1365
                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
D
David Conrad 已提交
1366 1367 1368 1369
                if (s->profile == 3) {
                    uvmv.x &= ~7;
                    uvmv.y &= ~7;
                }
D
Daniel Kang 已提交
1370
                vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
R
Ronald S. Bultje 已提交
1371
                              dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1372 1373 1374
                              4*x + x_off, 4*y + y_off, 4, 4,
                              width, height, s->uvlinesize,
                              s->put_pixels_tab[2]);
D
David Conrad 已提交
1375 1376
            }
        }
1377 1378 1379
        break;
    }
    case VP8_SPLITMVMODE_16x8:
D
Daniel Kang 已提交
1380
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1381
                    0, 0, 16, 8, width, height, &bmv[0]);
D
Daniel Kang 已提交
1382
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1383
                    0, 8, 16, 8, width, height, &bmv[1]);
1384 1385
        break;
    case VP8_SPLITMVMODE_8x16:
D
Daniel Kang 已提交
1386
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1387
                    0, 0, 8, 16, width, height, &bmv[0]);
D
Daniel Kang 已提交
1388
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1389
                    8, 0, 8, 16, width, height, &bmv[1]);
1390 1391
        break;
    case VP8_SPLITMVMODE_8x8:
D
Daniel Kang 已提交
1392
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1393
                    0, 0, 8, 8, width, height, &bmv[0]);
D
Daniel Kang 已提交
1394
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1395
                    8, 0, 8, 8, width, height, &bmv[1]);
D
Daniel Kang 已提交
1396
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1397
                    0, 8, 8, 8, width, height, &bmv[2]);
D
Daniel Kang 已提交
1398
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1399
                    8, 8, 8, 8, width, height, &bmv[3]);
1400
        break;
D
David Conrad 已提交
1401 1402 1403
    }
}

D
Daniel Kang 已提交
1404 1405
static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
                                     uint8_t *dst[3], VP8Macroblock *mb)
D
David Conrad 已提交
1406
{
1407
    int x, y, ch;
D
David Conrad 已提交
1408

J
Jason Garrett-Glaser 已提交
1409 1410
    if (mb->mode != MODE_I4x4) {
        uint8_t *y_dst = dst[0];
D
David Conrad 已提交
1411
        for (y = 0; y < 4; y++) {
D
Daniel Kang 已提交
1412
            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1413 1414
            if (nnz4) {
                if (nnz4&~0x01010101) {
J
Jason Garrett-Glaser 已提交
1415
                    for (x = 0; x < 4; x++) {
1416
                        if ((uint8_t)nnz4 == 1)
D
Daniel Kang 已提交
1417
                            s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1418
                        else if((uint8_t)nnz4 > 1)
D
Daniel Kang 已提交
1419
                            s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1420 1421 1422
                        nnz4 >>= 8;
                        if (!nnz4)
                            break;
J
Jason Garrett-Glaser 已提交
1423 1424
                    }
                } else {
D
Daniel Kang 已提交
1425
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
D
David Conrad 已提交
1426 1427 1428 1429
                }
            }
            y_dst += 4*s->linesize;
        }
J
Jason Garrett-Glaser 已提交
1430
    }
D
David Conrad 已提交
1431

J
Jason Garrett-Glaser 已提交
1432
    for (ch = 0; ch < 2; ch++) {
D
Daniel Kang 已提交
1433
        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1434
        if (nnz4) {
J
Jason Garrett-Glaser 已提交
1435
            uint8_t *ch_dst = dst[1+ch];
1436 1437 1438
            if (nnz4&~0x01010101) {
                for (y = 0; y < 2; y++) {
                    for (x = 0; x < 2; x++) {
1439
                        if ((uint8_t)nnz4 == 1)
D
Daniel Kang 已提交
1440
                            s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1441
                        else if((uint8_t)nnz4 > 1)
D
Daniel Kang 已提交
1442
                            s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1443 1444
                        nnz4 >>= 8;
                        if (!nnz4)
1445
                            goto chroma_idct_end;
J
Jason Garrett-Glaser 已提交
1446
                    }
1447
                    ch_dst += 4*s->uvlinesize;
J
Jason Garrett-Glaser 已提交
1448
                }
1449
            } else {
D
Daniel Kang 已提交
1450
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
D
David Conrad 已提交
1451 1452
            }
        }
1453
chroma_idct_end: ;
D
David Conrad 已提交
1454 1455 1456
    }
}

1457
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
D
David Conrad 已提交
1458 1459 1460 1461
{
    int interior_limit, filter_level;

    if (s->segmentation.enabled) {
1462
        filter_level = s->segmentation.filter_level[mb->segment];
D
David Conrad 已提交
1463 1464 1465 1466 1467 1468 1469
        if (!s->segmentation.absolute_vals)
            filter_level += s->filter.level;
    } else
        filter_level = s->filter.level;

    if (s->lf_delta.enabled) {
        filter_level += s->lf_delta.ref[mb->ref_frame];
1470
        filter_level += s->lf_delta.mode[mb->mode];
D
David Conrad 已提交
1471
    }
1472

M
Mans Rullgard 已提交
1473
    filter_level = av_clip_uintp2(filter_level, 6);
D
David Conrad 已提交
1474 1475 1476

    interior_limit = filter_level;
    if (s->filter.sharpness) {
1477
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
D
David Conrad 已提交
1478 1479 1480 1481
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
    }
    interior_limit = FFMAX(interior_limit, 1);

1482 1483
    f->filter_level = filter_level;
    f->inner_limit = interior_limit;
1484
    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
D
David Conrad 已提交
1485 1486
}

1487
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
D
David Conrad 已提交
1488
{
1489 1490 1491
    int mbedge_lim, bedge_lim, hev_thresh;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1492
    int inner_filter = f->inner_filter;
1493 1494
    int linesize = s->linesize;
    int uvlinesize = s->uvlinesize;
1495 1496 1497 1498 1499 1500 1501 1502 1503 1504
    static const uint8_t hev_thresh_lut[2][64] = {
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
          3, 3, 3, 3 },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          2, 2, 2, 2 }
    };
D
David Conrad 已提交
1505 1506 1507 1508

    if (!filter_level)
        return;

1509 1510
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
1511

1512
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1513

D
David Conrad 已提交
1514
    if (mb_x) {
1515
        s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
1516
                                       mbedge_lim, inner_limit, hev_thresh);
1517
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1518
                                       mbedge_lim, inner_limit, hev_thresh);
D
David Conrad 已提交
1519 1520
    }

1521
    if (inner_filter) {
1522 1523 1524 1525 1526 1527 1528 1529 1530
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
                                             uvlinesize,  bedge_lim,
                                             inner_limit, hev_thresh);
D
David Conrad 已提交
1531 1532 1533
    }

    if (mb_y) {
1534
        s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
1535
                                       mbedge_lim, inner_limit, hev_thresh);
1536
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1537
                                       mbedge_lim, inner_limit, hev_thresh);
D
David Conrad 已提交
1538 1539
    }

1540
    if (inner_filter) {
1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
                                             dst[2] + 4 * uvlinesize,
                                             uvlinesize,  bedge_lim,
1553
                                             inner_limit, hev_thresh);
D
David Conrad 已提交
1554 1555 1556
    }
}

1557
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
D
David Conrad 已提交
1558
{
1559 1560 1561
    int mbedge_lim, bedge_lim;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1562
    int inner_filter = f->inner_filter;
1563
    int linesize = s->linesize;
D
David Conrad 已提交
1564 1565 1566 1567

    if (!filter_level)
        return;

1568 1569
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
D
David Conrad 已提交
1570 1571

    if (mb_x)
1572
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1573
    if (inner_filter) {
1574 1575 1576
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
D
David Conrad 已提交
1577 1578 1579
    }

    if (mb_y)
1580
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1581
    if (inner_filter) {
1582 1583 1584
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
D
David Conrad 已提交
1585 1586 1587
    }
}

1588
#define MARGIN (16 << 2)
1589 1590
static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
                                   VP8Frame *prev_frame)
1591 1592
{
    VP8Context *s = avctx->priv_data;
D
Daniel Kang 已提交
1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607
    int mb_x, mb_y;

    s->mv_min.y = -MARGIN;
    s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
        VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
        int mb_xy = mb_y*s->mb_width;

        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);

        s->mv_min.x = -MARGIN;
        s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
            if (mb_y == 0)
                AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1608 1609 1610
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
                           prev_frame && prev_frame->seg_map ?
                           prev_frame->seg_map->data + mb_xy : NULL, 1);
D
Daniel Kang 已提交
1611 1612 1613 1614 1615 1616 1617 1618
            s->mv_min.x -= 64;
            s->mv_max.x -= 64;
        }
        s->mv_min.y -= 64;
        s->mv_max.y -= 64;
    }
}

1619
#if HAVE_THREADS
D
Daniel Kang 已提交
1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
    do {\
        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
        if (otd->thread_mb_pos < tmp) {\
            pthread_mutex_lock(&otd->lock);\
            td->wait_mb_pos = tmp;\
            do {\
                if (otd->thread_mb_pos >= tmp)\
                    break;\
                pthread_cond_wait(&otd->cond, &otd->lock);\
            } while (1);\
            td->wait_mb_pos = INT_MAX;\
            pthread_mutex_unlock(&otd->lock);\
        }\
    } while(0);

#define update_pos(td, mb_y, mb_x)\
    do {\
    int pos              = (mb_y << 16) | (mb_x & 0xFFFF);\
    int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
    int is_null          = (next_td == NULL) || (prev_td == NULL);\
    int pos_check        = (is_null) ? 1 :\
                            (next_td != td && pos >= next_td->wait_mb_pos) ||\
                            (prev_td != td && pos >= prev_td->wait_mb_pos);\
    td->thread_mb_pos = pos;\
    if (sliced_threading && pos_check) {\
        pthread_mutex_lock(&td->lock);\
        pthread_cond_broadcast(&td->cond);\
        pthread_mutex_unlock(&td->lock);\
    }\
    } while(0);
1651 1652 1653 1654
#else
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
#define update_pos(td, mb_y, mb_x)
#endif
D
Daniel Kang 已提交
1655 1656 1657 1658 1659 1660 1661

static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
                                        int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
    int mb_y = td->thread_mb_pos>>16;
1662
    int i, y, mb_x, mb_xy = mb_y*s->mb_width;
D
Daniel Kang 已提交
1663
    int num_jobs = s->num_jobs;
1664
    VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
D
Daniel Kang 已提交
1665 1666
    VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
    VP8Macroblock *mb;
D
David Conrad 已提交
1667
    uint8_t *dst[3] = {
1668 1669 1670
        curframe->tf.f->data[0] + 16*mb_y*s->linesize,
        curframe->tf.f->data[1] +  8*mb_y*s->uvlinesize,
        curframe->tf.f->data[2] +  8*mb_y*s->uvlinesize
D
David Conrad 已提交
1671
    };
D
Daniel Kang 已提交
1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684
    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];
    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else {
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
    }

    memset(td->left_nnz, 0, sizeof(td->left_nnz));
1685 1686 1687 1688
    // left edge of 129 for intra prediction
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
        for (i = 0; i < 3; i++)
            for (y = 0; y < 16>>!!i; y++)
1689
                dst[i][y*curframe->tf.f->linesize[i]-1] = 129;
D
Daniel Kang 已提交
1690
        if (mb_y == 1) {
1691
            s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
D
Daniel Kang 已提交
1692
        }
1693 1694 1695 1696 1697 1698
    }

    s->mv_min.x = -MARGIN;
    s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
D
Daniel Kang 已提交
1699 1700 1701 1702 1703 1704 1705 1706 1707
        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
        if (prev_td != td) {
            if (threadnr != 0) {
                check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
            } else {
                check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
            }
        }

R
Ronald S. Bultje 已提交
1708 1709
        s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
        s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1710

D
Daniel Kang 已提交
1711
        if (!s->mb_layout)
1712 1713 1714
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
                           prev_frame && prev_frame->seg_map ?
                           prev_frame->seg_map->data + mb_xy : NULL, 0);
1715 1716 1717 1718

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);

        if (!mb->skip)
D
Daniel Kang 已提交
1719
            decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1720 1721

        if (mb->mode <= MODE_I4x4)
D
Daniel Kang 已提交
1722
            intra_predict(s, td, dst, mb, mb_x, mb_y);
1723
        else
D
Daniel Kang 已提交
1724
            inter_predict(s, td, dst, mb, mb_x, mb_y);
1725 1726 1727 1728

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);

        if (!mb->skip) {
D
Daniel Kang 已提交
1729
            idct_mb(s, td, dst, mb);
1730
        } else {
D
Daniel Kang 已提交
1731
            AV_ZERO64(td->left_nnz);
1732 1733 1734 1735
            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned

            // Reset DC block predictors if they would exist if the mb had coefficients
            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
D
Daniel Kang 已提交
1736
                td->left_nnz[8]     = 0;
1737 1738 1739 1740 1741
                s->top_nnz[mb_x][8] = 0;
            }
        }

        if (s->deblock_filter)
D
Daniel Kang 已提交
1742 1743 1744 1745 1746 1747 1748 1749
            filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);

        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }
1750 1751

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
D
David Conrad 已提交
1752 1753 1754 1755

        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;
1756 1757
        s->mv_min.x -= 64;
        s->mv_max.x -= 64;
D
Daniel Kang 已提交
1758 1759 1760 1761 1762 1763

        if (mb_x == s->mb_width+1) {
            update_pos(td, mb_y, s->mb_width+3);
        } else {
            update_pos(td, mb_y, mb_x);
        }
D
David Conrad 已提交
1764 1765 1766
    }
}

D
Daniel Kang 已提交
1767 1768
static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
                              int jobnr, int threadnr)
D
David Conrad 已提交
1769
{
D
Daniel Kang 已提交
1770 1771 1772
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[threadnr];
    int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1773
    AVFrame *curframe = s->curframe->tf.f;
D
Daniel Kang 已提交
1774 1775 1776 1777 1778 1779 1780
    VP8Macroblock *mb;
    VP8ThreadData *prev_td, *next_td;
    uint8_t *dst[3] = {
        curframe->data[0] + 16*mb_y*s->linesize,
        curframe->data[1] +  8*mb_y*s->uvlinesize,
        curframe->data[2] +  8*mb_y*s->uvlinesize
    };
D
David Conrad 已提交
1781

D
Daniel Kang 已提交
1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808
    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;

    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
        VP8FilterStrength *f = &td->filter_strength[mb_x];
        if (prev_td != td) {
            check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
        }
        if (next_td != td)
            if (next_td != &s->thread_data[0]) {
                check_thread_pos(td, next_td, mb_x+1, mb_y+1);
            }

        if (num_jobs == 1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }

1809
        if (s->filter.simple)
D
Daniel Kang 已提交
1810
            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1811
        else
D
Daniel Kang 已提交
1812 1813 1814 1815 1816 1817
            filter_mb(s, dst, f, mb_x, mb_y);
        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;

        update_pos(td, mb_y, (s->mb_width+3) + mb_x);
D
David Conrad 已提交
1818 1819 1820
    }
}

D
Daniel Kang 已提交
1821 1822
static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
                                    int jobnr, int threadnr)
1823
{
D
Daniel Kang 已提交
1824 1825 1826
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[jobnr];
    VP8ThreadData *next_td = NULL, *prev_td = NULL;
1827
    VP8Frame *curframe = s->curframe;
D
Daniel Kang 已提交
1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841
    int mb_y, num_jobs = s->num_jobs;
    td->thread_nr = threadnr;
    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
        if (mb_y >= s->mb_height) break;
        td->thread_mb_pos = mb_y<<16;
        vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
        if (s->deblock_filter)
            vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
        update_pos(td, mb_y, INT_MAX & 0xFFFF);

        s->mv_min.y -= 64;
        s->mv_max.y -= 64;

        if (avctx->active_thread_type == FF_THREAD_FRAME)
1842
            ff_thread_report_progress(&curframe->tf, mb_y, 0);
1843
    }
D
Daniel Kang 已提交
1844 1845

    return 0;
1846 1847
}

1848
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
D
David Conrad 已提交
1849 1850 1851
                            AVPacket *avpkt)
{
    VP8Context *s = avctx->priv_data;
D
Daniel Kang 已提交
1852
    int ret, i, referenced, num_jobs;
D
David Conrad 已提交
1853
    enum AVDiscard skip_thresh;
1854
    VP8Frame *av_uninit(curframe), *prev_frame;
1855

D
David Conrad 已提交
1856
    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1857
        goto err;
D
David Conrad 已提交
1858

1859 1860
    prev_frame = s->framep[VP56_FRAME_CURRENT];

D
David Conrad 已提交
1861 1862 1863 1864 1865 1866 1867 1868
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
                                || s->update_altref == VP56_FRAME_CURRENT;

    skip_thresh = !referenced ? AVDISCARD_NONREF :
                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;

    if (avctx->skip_frame >= skip_thresh) {
        s->invisible = 1;
1869
        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
D
David Conrad 已提交
1870 1871
        goto skip_decode;
    }
1872
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
D
David Conrad 已提交
1873

R
Ronald S. Bultje 已提交
1874 1875
    // release no longer referenced frames
    for (i = 0; i < 5; i++)
1876
        if (s->frames[i].tf.f->data[0] &&
R
Ronald S. Bultje 已提交
1877 1878 1879 1880
            &s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1881
            vp8_release_frame(s, &s->frames[i]);
R
Ronald S. Bultje 已提交
1882 1883 1884 1885 1886

    // find a free buffer
    for (i = 0; i < 5; i++)
        if (&s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
D
David Conrad 已提交
1887 1888 1889 1890 1891
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
            break;
        }
R
Ronald S. Bultje 已提交
1892 1893 1894 1895
    if (i == 5) {
        av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
        abort();
    }
1896 1897
    if (curframe->tf.f->data[0])
        vp8_release_frame(s, curframe);
D
David Conrad 已提交
1898

1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909
    // Given that arithmetic probabilities are updated every frame, it's quite likely
    // that the values we have on a random interframe are complete junk if we didn't
    // start decode on a keyframe. So just don't display anything rather than junk.
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
                         !s->framep[VP56_FRAME_GOLDEN] ||
                         !s->framep[VP56_FRAME_GOLDEN2])) {
        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
        ret = AVERROR_INVALIDDATA;
        goto err;
    }

1910 1911
    curframe->tf.f->key_frame = s->keyframe;
    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1912
    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
1913
        goto err;
D
David Conrad 已提交
1914

R
Ronald S. Bultje 已提交
1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932
    // check if golden and altref are swapped
    if (s->update_altref != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
    }
    if (s->update_golden != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
    }
    if (s->update_last) {
        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
    } else {
        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
    }
    s->next_framep[VP56_FRAME_CURRENT]      = curframe;

1933 1934
    ff_thread_finish_setup(avctx);

1935 1936
    s->linesize   = curframe->tf.f->linesize[0];
    s->uvlinesize = curframe->tf.f->linesize[1];
D
David Conrad 已提交
1937

D
Daniel Kang 已提交
1938 1939 1940
    if (!s->thread_data[0].edge_emu_buffer)
        for (i = 0; i < MAX_THREADS; i++)
            s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
D
David Conrad 已提交
1941 1942

    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
P
Pascal Massimino 已提交
1943
    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
D
Daniel Kang 已提交
1944 1945 1946 1947
    if (!s->mb_layout)
        memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
    if (!s->mb_layout && s->keyframe)
        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1948

D
David Conrad 已提交
1949
    // top edge of 127 for intra prediction
1950 1951
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
        s->top_border[0][15] = s->top_border[0][23] = 127;
1952 1953
        s->top_border[0][31] = 127;
        memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1954
    }
J
Jason Garrett-Glaser 已提交
1955
    memset(s->ref_count, 0, sizeof(s->ref_count));
D
David Conrad 已提交
1956 1957


D
Daniel Kang 已提交
1958 1959 1960
    // Make sure the previous frame has read its segmentation map,
    // if we re-use the same map.
    if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1961
        ff_thread_await_progress(&prev_frame->tf, 1, 0);
J
Jason Garrett-Glaser 已提交
1962

D
Daniel Kang 已提交
1963 1964
    if (s->mb_layout == 1)
        vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
D
David Conrad 已提交
1965

D
Daniel Kang 已提交
1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979
    if (avctx->active_thread_type == FF_THREAD_FRAME)
        num_jobs = 1;
    else
        num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
    s->num_jobs   = num_jobs;
    s->curframe   = curframe;
    s->prev_frame = prev_frame;
    s->mv_min.y   = -MARGIN;
    s->mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].thread_mb_pos = 0;
        s->thread_data[i].wait_mb_pos = INT_MAX;
    }
    avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
D
David Conrad 已提交
1980

1981
    ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1982 1983
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);

D
David Conrad 已提交
1984 1985 1986 1987 1988 1989 1990
skip_decode:
    // if future frames don't use the updated probabilities,
    // reset them to the values we saved
    if (!s->update_probabilities)
        s->prob[0] = s->prob[1];

    if (!s->invisible) {
1991 1992
        if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
            return ret;
1993
        *got_frame      = 1;
D
David Conrad 已提交
1994 1995 1996
    }

    return avpkt->size;
1997 1998
err:
    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1999
    return ret;
D
David Conrad 已提交
2000 2001
}

2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024
static av_cold int vp8_decode_free(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;
    int i;

    vp8_decode_flush_impl(avctx, 1);
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
        av_frame_free(&s->frames[i].tf.f);

    return 0;
}

static av_cold int vp8_init_frames(VP8Context *s)
{
    int i;
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
        s->frames[i].tf.f = av_frame_alloc();
        if (!s->frames[i].tf.f)
            return AVERROR(ENOMEM);
    }
    return 0;
}

D
David Conrad 已提交
2025 2026 2027
static av_cold int vp8_decode_init(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;
2028
    int ret;
D
David Conrad 已提交
2029 2030

    s->avctx = avctx;
2031
    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2032
    avctx->internal->allocate_progress = 1;
D
David Conrad 已提交
2033

R
Ronald S. Bultje 已提交
2034
    ff_videodsp_init(&s->vdsp, 8);
2035
    ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
D
David Conrad 已提交
2036 2037
    ff_vp8dsp_init(&s->vp8dsp);

2038 2039 2040 2041
    if ((ret = vp8_init_frames(s)) < 0) {
        vp8_decode_free(avctx);
        return ret;
    }
D
David Conrad 已提交
2042 2043 2044 2045

    return 0;
}

R
Ronald S. Bultje 已提交
2046 2047 2048
static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;
2049
    int ret;
R
Ronald S. Bultje 已提交
2050 2051 2052

    s->avctx = avctx;

2053 2054 2055 2056 2057
    if ((ret = vp8_init_frames(s)) < 0) {
        vp8_decode_free(avctx);
        return ret;
    }

R
Ronald S. Bultje 已提交
2058 2059 2060 2061 2062 2063 2064 2065 2066
    return 0;
}

#define REBASE(pic) \
    pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL

static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
{
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2067
    int i;
R
Ronald S. Bultje 已提交
2068

2069 2070 2071
    if (s->macroblocks_base &&
        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
        free_buffers(s);
2072 2073
        s->mb_width  = s_src->mb_width;
        s->mb_height = s_src->mb_height;
2074 2075
    }

R
Ronald S. Bultje 已提交
2076 2077 2078 2079 2080
    s->prob[0] = s_src->prob[!s_src->update_probabilities];
    s->segmentation = s_src->segmentation;
    s->lf_delta = s_src->lf_delta;
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));

2081 2082 2083 2084 2085 2086 2087 2088
    for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
        if (s_src->frames[i].tf.f->data[0]) {
            int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
            if (ret < 0)
                return ret;
        }
    }

R
Ronald S. Bultje 已提交
2089 2090 2091 2092 2093 2094 2095 2096
    s->framep[0] = REBASE(s_src->next_framep[0]);
    s->framep[1] = REBASE(s_src->next_framep[1]);
    s->framep[2] = REBASE(s_src->next_framep[2]);
    s->framep[3] = REBASE(s_src->next_framep[3]);

    return 0;
}

M
Michael Niedermayer 已提交
2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142
static unsigned apply_padding(unsigned size) { return size + (size & 1); }

static int webp_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
                             AVPacket *avpkt)
{
    const uint8_t *buf = avpkt->data;
    int buf_size       = avpkt->size;
    AVPacket pkt       = *avpkt;

    if (buf_size >= 16
        && AV_RL32(buf   ) == AV_RL32("RIFF")
        && AV_RL32(buf+ 8) == AV_RL32("WEBP")) {
        unsigned riff_size = apply_padding(AV_RL32(buf+4)) + 8;
        buf += 12;   // Skip over main header
        buf_size -= 12;
        if (buf_size < 8 || riff_size < 8) {
            av_log(avctx, AV_LOG_ERROR, "Incomplete header.\n");
            return AVERROR_INVALIDDATA;
        }
        if (AV_RL32(buf) == AV_RL32("VP8L")) {
            av_log(avctx, AV_LOG_ERROR, "Unsupported WebP lossless format.\n");
            return AVERROR_PATCHWELCOME;
        }
        if (AV_RL32(buf) == AV_RL32("VP8X") && AV_RL32(buf+4) < (unsigned)buf_size) {
            unsigned size = apply_padding(AV_RL32(buf+4) + 8);
            buf      += size;
            buf_size -= size;
        }
        if (buf_size >= 8
            && AV_RL32(buf) == AV_RL32("ALPH") && AV_RL32(buf+4) < (unsigned)buf_size) {
            unsigned size = apply_padding(AV_RL32(buf+4) + 8);
            buf      += size;
            buf_size -= size;
            av_log(avctx, AV_LOG_WARNING, "Skipping alpha plane\n");
        }
        if (buf_size >= 8 && AV_RL32(buf) == AV_RL32("VP8 ")) {
            buf      += 8;
            buf_size -= 8;
        }
    }
    pkt.data = buf;
    pkt.size = buf_size;

    return vp8_decode_frame(avctx, data, data_size, &pkt);
}

2143
AVCodec ff_vp8_decoder = {
2144 2145
    .name                  = "vp8",
    .type                  = AVMEDIA_TYPE_VIDEO,
2146
    .id                    = AV_CODEC_ID_VP8,
2147 2148 2149 2150
    .priv_data_size        = sizeof(VP8Context),
    .init                  = vp8_decode_init,
    .close                 = vp8_decode_free,
    .decode                = vp8_decode_frame,
D
Daniel Kang 已提交
2151
    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2152 2153
    .flush                 = vp8_decode_flush,
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
R
Ronald S. Bultje 已提交
2154 2155
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
D
David Conrad 已提交
2156
};
M
Michael Niedermayer 已提交
2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171

AVCodec ff_webp_decoder = {
    .name                  = "webp",
    .type                  = AVMEDIA_TYPE_VIDEO,
    .id                    = AV_CODEC_ID_WEBP,
    .priv_data_size        = sizeof(VP8Context),
    .init                  = vp8_decode_init,
    .close                 = vp8_decode_free,
    .decode                = webp_decode_frame,
    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
    .flush                 = vp8_decode_flush,
    .long_name             = NULL_IF_CONFIG_SMALL("WebP"),
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
};