vp8.c 75.3 KB
Newer Older
1
/*
D
David Conrad 已提交
2 3 4 5
 * VP8 compatible video decoder
 *
 * Copyright (C) 2010 David Conrad
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Jason Garrett-Glaser
D
Daniel Kang 已提交
7
 * Copyright (C) 2012 Daniel Kang
D
David Conrad 已提交
8
 *
9
 * This file is part of Libav.
D
David Conrad 已提交
10
 *
11
 * Libav is free software; you can redistribute it and/or
D
David Conrad 已提交
12 13 14 15
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
16
 * Libav is distributed in the hope that it will be useful,
D
David Conrad 已提交
17 18 19 20 21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with Libav; if not, write to the Free Software
D
David Conrad 已提交
23 24 25
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

26
#include "libavutil/imgutils.h"
D
David Conrad 已提交
27
#include "avcodec.h"
28
#include "internal.h"
29
#include "vp8.h"
D
David Conrad 已提交
30 31
#include "vp8data.h"
#include "rectangle.h"
R
Ronald S. Bultje 已提交
32
#include "thread.h"
D
David Conrad 已提交
33

34 35 36 37
#if ARCH_ARM
#   include "arm/vp8.h"
#endif

38 39
static void free_buffers(VP8Context *s)
{
D
Daniel Kang 已提交
40 41 42 43 44 45 46
    int i;
    if (s->thread_data)
        for (i = 0; i < MAX_THREADS; i++) {
            av_freep(&s->thread_data[i].filter_strength);
            av_freep(&s->thread_data[i].edge_emu_buffer);
        }
    av_freep(&s->thread_data);
47 48 49 50 51 52 53 54
    av_freep(&s->macroblocks_base);
    av_freep(&s->intra4x4_pred_mode_top);
    av_freep(&s->top_nnz);
    av_freep(&s->top_border);

    s->macroblocks = NULL;
}

55
static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
56 57
{
    int ret;
58 59
    if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
                                    ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
60
        return ret;
61 62
    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
        ff_thread_release_buffer(s->avctx, &f->tf);
63 64 65 66 67
        return AVERROR(ENOMEM);
    }
    return 0;
}

68
static void vp8_release_frame(VP8Context *s, VP8Frame *f)
69
{
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
    av_buffer_unref(&f->seg_map);
    ff_thread_release_buffer(s->avctx, &f->tf);
}

static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
{
    int ret;

    vp8_release_frame(s, dst);

    if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
        return ret;
    if (src->seg_map &&
        !(dst->seg_map = av_buffer_ref(src->seg_map))) {
        vp8_release_frame(s, dst);
        return AVERROR(ENOMEM);
86
    }
87 88

    return 0;
89 90
}

91 92

static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
D
David Conrad 已提交
93 94 95 96
{
    VP8Context *s = avctx->priv_data;
    int i;

97 98
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
        vp8_release_frame(s, &s->frames[i]);
D
David Conrad 已提交
99 100
    memset(s->framep, 0, sizeof(s->framep));

101
    if (free_mem)
102
        free_buffers(s);
103 104 105 106
}

static void vp8_decode_flush(AVCodecContext *avctx)
{
107
    vp8_decode_flush_impl(avctx, 0);
D
David Conrad 已提交
108 109 110 111
}

static int update_dimensions(VP8Context *s, int width, int height)
{
D
Daniel Kang 已提交
112 113 114
    AVCodecContext *avctx = s->avctx;
    int i;

R
Ronald S. Bultje 已提交
115 116 117 118
    if (width  != s->avctx->width ||
        height != s->avctx->height) {
        if (av_image_check_size(width, height, 0, s->avctx))
            return AVERROR_INVALIDDATA;
D
David Conrad 已提交
119

120
        vp8_decode_flush_impl(s->avctx, 1);
D
David Conrad 已提交
121

R
Ronald S. Bultje 已提交
122 123
        avcodec_set_dimensions(s->avctx, width, height);
    }
D
David Conrad 已提交
124 125 126 127

    s->mb_width  = (s->avctx->coded_width +15) / 16;
    s->mb_height = (s->avctx->coded_height+15) / 16;

D
Daniel Kang 已提交
128 129 130 131 132 133 134 135 136 137
    s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
    if (!s->mb_layout) { // Frame threading and one thread
        s->macroblocks_base       = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
    }
    else // Sliced threading
        s->macroblocks_base       = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
    s->top_nnz                    = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
    s->top_border                 = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
    s->thread_data                = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
D
David Conrad 已提交
138

D
Daniel Kang 已提交
139 140
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
141
#if HAVE_THREADS
D
Daniel Kang 已提交
142 143
        pthread_mutex_init(&s->thread_data[i].lock, NULL);
        pthread_cond_init(&s->thread_data[i].cond, NULL);
144
#endif
D
Daniel Kang 已提交
145 146 147 148
    }

    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
        (!s->intra4x4_pred_mode_top && !s->mb_layout))
D
David Conrad 已提交
149 150
        return AVERROR(ENOMEM);

151
    s->macroblocks        = s->macroblocks_base + 1;
D
David Conrad 已提交
152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181

    return 0;
}

static void parse_segment_info(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

    s->segmentation.update_map = vp8_rac_get(c);

    if (vp8_rac_get(c)) { // update segment feature data
        s->segmentation.absolute_vals = vp8_rac_get(c);

        for (i = 0; i < 4; i++)
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);

        for (i = 0; i < 4; i++)
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
    }
    if (s->segmentation.update_map)
        for (i = 0; i < 3; i++)
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
}

static void update_lf_deltas(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

182 183 184
    for (i = 0; i < 4; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
D
David Conrad 已提交
185

186 187 188 189 190 191 192 193 194 195 196 197 198
            if (vp8_rac_get(c))
                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
        }
    }

    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);

            if (vp8_rac_get(c))
                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
        }
    }
D
David Conrad 已提交
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213
}

static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
{
    const uint8_t *sizes = buf;
    int i;

    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);

    buf      += 3*(s->num_coeff_partitions-1);
    buf_size -= 3*(s->num_coeff_partitions-1);
    if (buf_size < 0)
        return -1;

    for (i = 0; i < s->num_coeff_partitions-1; i++) {
214
        int size = AV_RL24(sizes + 3*i);
D
David Conrad 已提交
215 216 217
        if (buf_size - size < 0)
            return -1;

218
        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
D
David Conrad 已提交
219 220 221
        buf      += size;
        buf_size -= size;
    }
222
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
D
David Conrad 已提交
223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246

    return 0;
}

static void get_quants(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i, base_qi;

    int yac_qi     = vp8_rac_get_uint(c, 7);
    int ydc_delta  = vp8_rac_get_sint(c, 4);
    int y2dc_delta = vp8_rac_get_sint(c, 4);
    int y2ac_delta = vp8_rac_get_sint(c, 4);
    int uvdc_delta = vp8_rac_get_sint(c, 4);
    int uvac_delta = vp8_rac_get_sint(c, 4);

    for (i = 0; i < 4; i++) {
        if (s->segmentation.enabled) {
            base_qi = s->segmentation.base_quant[i];
            if (!s->segmentation.absolute_vals)
                base_qi += yac_qi;
        } else
            base_qi = yac_qi;

247 248 249 250 251 252 253
        s->qmat[i].luma_qmul[0]    =           vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
        s->qmat[i].luma_qmul[1]    =           vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
        s->qmat[i].luma_dc_qmul[0] =       2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
        /* 101581>>16 is equivalent to 155/100 */
        s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
        s->qmat[i].chroma_qmul[0]  =           vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
        s->qmat[i].chroma_qmul[1]  =           vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
254 255 256

        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
D
David Conrad 已提交
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
    }
}

/**
 * Determine which buffers golden and altref should be updated with after this frame.
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
 *
 * Intra frames update all 3 references
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
 * If the update (golden|altref) flag is set, it's updated with the current frame
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
 * If the flag is not set, the number read means:
 *      0: no update
 *      1: VP56_FRAME_PREVIOUS
 *      2: update golden with altref, or update altref with golden
 */
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
{
    VP56RangeCoder *c = &s->c;

    if (update)
        return VP56_FRAME_CURRENT;

    switch (vp8_rac_get_uint(c, 2)) {
    case 1:
        return VP56_FRAME_PREVIOUS;
    case 2:
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
    }
    return VP56_FRAME_NONE;
}

static void update_refs(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;

    int update_golden = vp8_rac_get(c);
    int update_altref = vp8_rac_get(c);

    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
}

static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
{
    VP56RangeCoder *c = &s->c;
303
    int header_size, hscale, vscale, i, j, k, l, m, ret;
D
David Conrad 已提交
304 305 306 307 308 309
    int width  = s->avctx->width;
    int height = s->avctx->height;

    s->keyframe  = !(buf[0] & 1);
    s->profile   =  (buf[0]>>1) & 7;
    s->invisible = !(buf[0] & 0x10);
310
    header_size  = AV_RL24(buf) >> 5;
D
David Conrad 已提交
311 312 313
    buf      += 3;
    buf_size -= 3;

D
David Conrad 已提交
314 315 316 317 318 319 320
    if (s->profile > 3)
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);

    if (!s->profile)
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
D
David Conrad 已提交
321 322 323 324 325 326 327

    if (header_size > buf_size - 7*s->keyframe) {
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
        return AVERROR_INVALIDDATA;
    }

    if (s->keyframe) {
328 329
        if (AV_RL24(buf) != 0x2a019d) {
            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
D
David Conrad 已提交
330 331 332 333 334 335 336 337 338
            return AVERROR_INVALIDDATA;
        }
        width  = AV_RL16(buf+3) & 0x3fff;
        height = AV_RL16(buf+5) & 0x3fff;
        hscale = buf[4] >> 6;
        vscale = buf[6] >> 6;
        buf      += 7;
        buf_size -= 7;

339
        if (hscale || vscale)
340
            avpriv_request_sample(s->avctx, "Upscaling");
341

D
David Conrad 已提交
342
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
343 344 345 346
        for (i = 0; i < 4; i++)
            for (j = 0; j < 16; j++)
                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
                       sizeof(s->prob->token[i][j]));
D
David Conrad 已提交
347 348 349 350
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
        memset(&s->segmentation, 0, sizeof(s->segmentation));
351
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
D
David Conrad 已提交
352 353
    }

354
    ff_vp56_init_range_decoder(c, buf, header_size);
D
David Conrad 已提交
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
    buf      += header_size;
    buf_size -= header_size;

    if (s->keyframe) {
        if (vp8_rac_get(c))
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
        vp8_rac_get(c); // whether we can skip clamping in dsp functions
    }

    if ((s->segmentation.enabled = vp8_rac_get(c)))
        parse_segment_info(s);
    else
        s->segmentation.update_map = 0; // FIXME: move this to some init function?

    s->filter.simple    = vp8_rac_get(c);
    s->filter.level     = vp8_rac_get_uint(c, 6);
    s->filter.sharpness = vp8_rac_get_uint(c, 3);

    if ((s->lf_delta.enabled = vp8_rac_get(c)))
        if (vp8_rac_get(c))
            update_lf_deltas(s);

    if (setup_partitions(s, buf, buf_size)) {
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
        return AVERROR_INVALIDDATA;
    }

D
Daniel Kang 已提交
382 383 384 385 386 387
    if (!s->macroblocks_base || /* first frame */
        width != s->avctx->width || height != s->avctx->height) {
        if ((ret = update_dimensions(s, width, height)) < 0)
            return ret;
    }

D
David Conrad 已提交
388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406
    get_quants(s);

    if (!s->keyframe) {
        update_refs(s);
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
    }

    // if we aren't saving this frame's probabilities for future frames,
    // make a copy of the current probabilities
    if (!(s->update_probabilities = vp8_rac_get(c)))
        s->prob[1] = s->prob[0];

    s->update_last = s->keyframe || vp8_rac_get(c);

    for (i = 0; i < 4; i++)
        for (j = 0; j < 8; j++)
            for (k = 0; k < 3; k++)
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
407 408
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
                        int prob = vp8_rac_get_uint(c, 8);
409 410
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
411
                    }
D
David Conrad 已提交
412 413

    if ((s->mbskip_enabled = vp8_rac_get(c)))
414
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
D
David Conrad 已提交
415 416

    if (!s->keyframe) {
417 418 419
        s->prob->intra  = vp8_rac_get_uint(c, 8);
        s->prob->last   = vp8_rac_get_uint(c, 8);
        s->prob->golden = vp8_rac_get_uint(c, 8);
D
David Conrad 已提交
420 421 422 423 424 425 426 427 428 429 430

        if (vp8_rac_get(c))
            for (i = 0; i < 4; i++)
                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
        if (vp8_rac_get(c))
            for (i = 0; i < 3; i++)
                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);

        // 17.2 MV probability update
        for (i = 0; i < 2; i++)
            for (j = 0; j < 19; j++)
431
                if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
D
David Conrad 已提交
432 433 434 435 436 437
                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
    }

    return 0;
}

J
Jason Garrett-Glaser 已提交
438
static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
D
David Conrad 已提交
439
{
J
Jason Garrett-Glaser 已提交
440 441
    dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
    dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
D
David Conrad 已提交
442 443 444 445 446 447 448
}

/**
 * Motion vector coding, 17.1.
 */
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
{
449
    int bit, x = 0;
D
David Conrad 已提交
450

451
    if (vp56_rac_get_prob_branchy(c, p[0])) {
D
David Conrad 已提交
452 453 454 455 456 457 458 459
        int i;

        for (i = 0; i < 3; i++)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        for (i = 9; i > 3; i--)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
            x += 8;
460 461 462 463 464 465 466 467 468 469 470
    } else {
        // small_mvtree
        const uint8_t *ps = p+2;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + 3*bit;
        x  += 4*bit;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + bit;
        x  += 2*bit;
        x  += vp56_rac_get_prob(c, *ps);
    }
D
David Conrad 已提交
471 472 473 474

    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
}

475 476
static av_always_inline
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
D
David Conrad 已提交
477
{
478 479 480
    if (left == top)
        return vp8_submv_prob[4-!!left];
    if (!top)
D
David Conrad 已提交
481
        return vp8_submv_prob[2];
482
    return vp8_submv_prob[1-!!left];
D
David Conrad 已提交
483 484 485 486
}

/**
 * Split motion vector prediction, 16.4.
487
 * @returns the number of motion vectors parsed (2, 4 or 16)
D
David Conrad 已提交
488
 */
489
static av_always_inline
D
Daniel Kang 已提交
490
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
D
David Conrad 已提交
491
{
492 493
    int part_idx;
    int n, num;
D
Daniel Kang 已提交
494
    VP8Macroblock *top_mb;
495 496
    VP8Macroblock *left_mb = &mb[-1];
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
D
Daniel Kang 已提交
497
                  *mbsplits_top,
498
                  *mbsplits_cur, *firstidx;
D
Daniel Kang 已提交
499
    VP56mv *top_mv;
500 501
    VP56mv *left_mv = left_mb->bmv;
    VP56mv *cur_mv  = mb->bmv;
D
David Conrad 已提交
502

D
Daniel Kang 已提交
503 504 505 506 507 508 509
    if (!layout) // layout is inlined, s->mb_layout is not
        top_mb = &mb[2];
    else
        top_mb = &mb[-s->mb_width-1];
    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
    top_mv = top_mb->bmv;

510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
        } else {
            part_idx = VP8_SPLITMVMODE_8x8;
        }
    } else {
        part_idx = VP8_SPLITMVMODE_4x4;
    }

    num = vp8_mbsplit_count[part_idx];
    mbsplits_cur = vp8_mbsplits[part_idx],
    firstidx = vp8_mbfirstidx[part_idx];
    mb->partitioning = part_idx;

D
David Conrad 已提交
525
    for (n = 0; n < num; n++) {
526
        int k = firstidx[n];
527
        uint32_t left, above;
528 529
        const uint8_t *submv_prob;

530 531 532 533 534 535 536 537
        if (!(k & 3))
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
        else
            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
        if (k <= 3)
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
        else
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
538 539

        submv_prob = get_submv_prob(left, above);
D
David Conrad 已提交
540

541 542 543 544 545 546 547 548 549 550 551 552
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
                    mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
                    mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
                } else {
                    AV_ZERO32(&mb->bmv[n]);
                }
            } else {
                AV_WN32A(&mb->bmv[n], above);
            }
        } else {
553
            AV_WN32A(&mb->bmv[n], left);
D
David Conrad 已提交
554 555
        }
    }
556 557

    return num;
D
David Conrad 已提交
558 559
}

560
static av_always_inline
D
Daniel Kang 已提交
561
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
562
{
D
Daniel Kang 已提交
563
    VP8Macroblock *mb_edge[3] = { 0 /* top */,
564
                                  mb - 1 /* left */,
D
Daniel Kang 已提交
565
                                  0 /* top-left */ };
566
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
567
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
568 569
    int idx = CNT_ZERO;
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
570
    int8_t *sign_bias = s->sign_bias;
571 572 573 574
    VP56mv near_mv[4];
    uint8_t cnt[4] = { 0 };
    VP56RangeCoder *c = &s->c;

D
Daniel Kang 已提交
575 576 577 578 579 580 581 582 583
    if (!layout) { // layout is inlined (s->mb_layout is not)
        mb_edge[0] = mb + 2;
        mb_edge[2] = mb + 1;
    }
    else {
        mb_edge[0] = mb - s->mb_width-1;
        mb_edge[2] = mb - s->mb_width-2;
    }

584 585
    AV_ZERO32(&near_mv[0]);
    AV_ZERO32(&near_mv[1]);
586
    AV_ZERO32(&near_mv[2]);
587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617

    /* Process MB on top, left and top-left */
    #define MV_EDGE_CHECK(n)\
    {\
        VP8Macroblock *edge = mb_edge[n];\
        int edge_ref = edge->ref_frame;\
        if (edge_ref != VP56_FRAME_CURRENT) {\
            uint32_t mv = AV_RN32A(&edge->mv);\
            if (mv) {\
                if (cur_sign_bias != sign_bias[edge_ref]) {\
                    /* SWAR negate of the values in mv. */\
                    mv = ~mv;\
                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
                }\
                if (!n || mv != AV_RN32A(&near_mv[idx]))\
                    AV_WN32A(&near_mv[++idx], mv);\
                cnt[idx]      += 1 + (n != 2);\
            } else\
                cnt[CNT_ZERO] += 1 + (n != 2);\
        }\
    }

    MV_EDGE_CHECK(0)
    MV_EDGE_CHECK(1)
    MV_EDGE_CHECK(2)

    mb->partitioning = VP8_SPLITMVMODE_NONE;
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
        mb->mode = VP8_MVMODE_MV;

        /* If we have three distinct MVs, merge first and last if they're the same */
618
        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
619 620 621 622 623 624 625 626 627 628 629 630
            cnt[CNT_NEAREST] += 1;

        /* Swap near and nearest if necessary */
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
        }

        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {

                /* Choose the best mv out of 0,0 and the nearest mv */
J
Jason Garrett-Glaser 已提交
631
                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
632 633 634
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
635 636 637

                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
                    mb->mode = VP8_MVMODE_SPLIT;
D
Daniel Kang 已提交
638
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
639 640 641 642 643 644
                } else {
                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
                    mb->bmv[0] = mb->mv;
                }
            } else {
J
Jason Garrett-Glaser 已提交
645
                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
646 647 648
                mb->bmv[0] = mb->mv;
            }
        } else {
J
Jason Garrett-Glaser 已提交
649
            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
650 651 652 653 654 655 656 657 658
            mb->bmv[0] = mb->mv;
        }
    } else {
        mb->mode = VP8_MVMODE_ZERO;
        AV_ZERO32(&mb->mv);
        mb->bmv[0] = mb->mv;
    }
}

659
static av_always_inline
660
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
D
Daniel Kang 已提交
661
                           int mb_x, int keyframe, int layout)
D
David Conrad 已提交
662
{
663 664
    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;

D
Daniel Kang 已提交
665 666 667 668
    if (layout == 1) {
        VP8Macroblock *mb_top = mb - s->mb_width - 1;
        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
    }
669
    if (keyframe) {
670
        int x, y;
D
Daniel Kang 已提交
671
        uint8_t* top;
672
        uint8_t* const left = s->intra4x4_pred_mode_left;
D
Daniel Kang 已提交
673 674 675 676
        if (layout == 1)
            top = mb->intra4x4_pred_mode_top;
        else
            top = s->intra4x4_pred_mode_top + 4 * mb_x;
677 678
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
679 680 681 682 683
                const uint8_t *ctx;
                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
                left[y] = top[x] = *intra4x4;
                intra4x4++;
D
David Conrad 已提交
684 685
            }
        }
686
    } else {
687
        int i;
688 689
        for (i = 0; i < 16; i++)
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
D
David Conrad 已提交
690 691 692
    }
}

693
static av_always_inline
D
Daniel Kang 已提交
694 695
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
                    uint8_t *segment, uint8_t *ref, int layout)
D
David Conrad 已提交
696 697 698 699
{
    VP56RangeCoder *c = &s->c;

    if (s->segmentation.update_map)
700
        *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
701
    else if (s->segmentation.enabled)
R
Ronald S. Bultje 已提交
702
        *segment = ref ? *ref : *segment;
703
    mb->segment = *segment;
D
David Conrad 已提交
704

705
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
D
David Conrad 已提交
706 707 708 709 710

    if (s->keyframe) {
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);

        if (mb->mode == MODE_I4x4) {
D
Daniel Kang 已提交
711
            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
712 713
        } else {
            const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
D
Daniel Kang 已提交
714 715 716 717 718
            if (s->mb_layout == 1)
                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
            else
                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
            AV_WN32A( s->intra4x4_pred_mode_left, modes);
719
        }
D
David Conrad 已提交
720

721
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
D
David Conrad 已提交
722
        mb->ref_frame = VP56_FRAME_CURRENT;
723
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
D
David Conrad 已提交
724
        // inter MB, 16.2
725 726
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
D
David Conrad 已提交
727 728 729
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
        else
            mb->ref_frame = VP56_FRAME_PREVIOUS;
J
Jason Garrett-Glaser 已提交
730
        s->ref_count[mb->ref_frame-1]++;
D
David Conrad 已提交
731 732

        // motion vectors, 16.3
D
Daniel Kang 已提交
733
        decode_mvs(s, mb, mb_x, mb_y, layout);
D
David Conrad 已提交
734 735 736 737
    } else {
        // intra MB, 16.1
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);

738
        if (mb->mode == MODE_I4x4)
D
Daniel Kang 已提交
739
            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
D
David Conrad 已提交
740

741
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
D
David Conrad 已提交
742
        mb->ref_frame = VP56_FRAME_CURRENT;
743
        mb->partitioning = VP8_SPLITMVMODE_NONE;
744
        AV_ZERO32(&mb->bmv[0]);
D
David Conrad 已提交
745 746 747
    }
}

748
#ifndef decode_block_coeffs_internal
D
David Conrad 已提交
749
/**
750
 * @param r arithmetic bitstream reader context
751 752
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
D
David Conrad 已提交
753
 * @param i initial coeff index, 0 unless a separate DC block is coded
754
 * @param qmul array holding the dc/ac dequant factor at position 0/1
D
David Conrad 已提交
755 756 757
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
D
Diego Biurrun 已提交
758
static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
759
                                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
760
                                        int i, uint8_t *token_prob, int16_t qmul[2])
D
David Conrad 已提交
761
{
762
    VP56RangeCoder c = *r;
763
    goto skip_eob;
764
    do {
765
        int coeff;
766 767
        if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
            break;
D
David Conrad 已提交
768

769
skip_eob:
770
        if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
771
            if (++i == 16)
772
                break; // invalid input; blocks should end with EOB
773
            token_prob = probs[i][0];
774
            goto skip_eob;
775 776
        }

777
        if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
778
            coeff = 1;
779
            token_prob = probs[i+1][1];
780
        } else {
781 782
            if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
                coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
783
                if (coeff)
784
                    coeff += vp56_rac_get_prob(&c, token_prob[5]);
785 786 787
                coeff += 2;
            } else {
                // DCT_CAT*
788 789 790
                if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
                    if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
                        coeff  = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
791 792
                    } else {                                    // DCT_CAT2
                        coeff  = 7;
793 794
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
795 796
                    }
                } else {    // DCT_CAT3 and up
797 798
                    int a = vp56_rac_get_prob(&c, token_prob[8]);
                    int b = vp56_rac_get_prob(&c, token_prob[9+a]);
799 800
                    int cat = (a<<1) + b;
                    coeff  = 3 + (8<<cat);
801
                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
802 803
                }
            }
804
            token_prob = probs[i+1][2];
805
        }
806
        block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
807
    } while (++i < 16);
808

809
    *r = c;
810
    return i;
D
David Conrad 已提交
811
}
812
#endif
D
David Conrad 已提交
813

814 815 816 817 818 819 820 821 822 823 824
/**
 * @param c arithmetic bitstream reader context
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
 * @param i initial coeff index, 0 unless a separate DC block is coded
 * @param zero_nhood the initial prediction context for number of surrounding
 *                   all-zero blocks (only left/top, so 0-2)
 * @param qmul array holding the dc/ac dequant factor at position 0/1
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
825
static av_always_inline
D
Diego Biurrun 已提交
826
int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
827
                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
828 829 830 831 832 833 834 835
                        int i, int zero_nhood, int16_t qmul[2])
{
    uint8_t *token_prob = probs[i][zero_nhood];
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
        return 0;
    return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
}

836
static av_always_inline
D
Daniel Kang 已提交
837
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
838
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
D
David Conrad 已提交
839 840 841
{
    int i, x, y, luma_start = 0, luma_ctx = 3;
    int nnz_pred, nnz, nnz_total = 0;
842
    int segment = mb->segment;
843
    int block_dc = 0;
D
David Conrad 已提交
844 845 846 847 848

    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
        nnz_pred = t_nnz[8] + l_nnz[8];

        // decode DC values and do hadamard
D
Daniel Kang 已提交
849
        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
D
David Conrad 已提交
850 851
                                  s->qmat[segment].luma_dc_qmul);
        l_nnz[8] = t_nnz[8] = !!nnz;
852 853 854 855
        if (nnz) {
            nnz_total += nnz;
            block_dc = 1;
            if (nnz == 1)
D
Daniel Kang 已提交
856
                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
857
            else
D
Daniel Kang 已提交
858
                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
859
        }
D
David Conrad 已提交
860 861 862 863 864 865 866
        luma_start = 1;
        luma_ctx = 0;
    }

    // luma blocks
    for (y = 0; y < 4; y++)
        for (x = 0; x < 4; x++) {
J
Jason Garrett-Glaser 已提交
867
            nnz_pred = l_nnz[y] + t_nnz[x];
D
Daniel Kang 已提交
868
            nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
J
Jason Garrett-Glaser 已提交
869
                                      nnz_pred, s->qmat[segment].luma_qmul);
870
            // nnz+block_dc may be one more than the actual last index, but we don't care
D
Daniel Kang 已提交
871
            td->non_zero_count_cache[y][x] = nnz + block_dc;
D
David Conrad 已提交
872 873 874 875 876 877 878 879 880 881 882
            t_nnz[x] = l_nnz[y] = !!nnz;
            nnz_total += nnz;
        }

    // chroma blocks
    // TODO: what to do about dimensions? 2nd dim for luma is x,
    // but for chroma it's (y<<1)|x
    for (i = 4; i < 6; i++)
        for (y = 0; y < 2; y++)
            for (x = 0; x < 2; x++) {
                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
D
Daniel Kang 已提交
883
                nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
D
David Conrad 已提交
884
                                          nnz_pred, s->qmat[segment].chroma_qmul);
D
Daniel Kang 已提交
885
                td->non_zero_count_cache[i][(y<<1)+x] = nnz;
D
David Conrad 已提交
886 887 888 889 890 891 892 893 894 895 896
                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
                nnz_total += nnz;
            }

    // if there were no coded coeffs despite the macroblock not being marked skip,
    // we MUST not do the inner loop filter and should not do IDCT
    // Since skip isn't used for bitstream prediction, just manually set it.
    if (!nnz_total)
        mb->skip = 1;
}

897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917
static av_always_inline
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                      int linesize, int uvlinesize, int simple)
{
    AV_COPY128(top_border, src_y + 15*linesize);
    if (!simple) {
        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
    }
}

static av_always_inline
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
                    int simple, int xchg)
{
    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
    src_y  -=   linesize;
    src_cb -= uvlinesize;
    src_cr -= uvlinesize;

M
Måns Rullgård 已提交
918 919 920 921
#define XCHG(a,b,xchg) do {                     \
        if (xchg) AV_SWAP64(b,a);               \
        else      AV_COPY64(b,a);               \
    } while (0)
922 923 924 925

    XCHG(top_border_m1+8, src_y-8, xchg);
    XCHG(top_border,      src_y,   xchg);
    XCHG(top_border+8,    src_y+8, 1);
926
    if (mb_x < mb_width-1)
927
        XCHG(top_border+32, src_y+16, 1);
928

929 930 931 932 933 934 935 936 937 938
    // only copy chroma for normal loop filter
    // or to initialize the top row to 127
    if (!simple || !mb_y) {
        XCHG(top_border_m1+16, src_cb-8, xchg);
        XCHG(top_border_m1+24, src_cr-8, xchg);
        XCHG(top_border+16,    src_cb, 1);
        XCHG(top_border+24,    src_cr, 1);
    }
}

939
static av_always_inline
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
    } else {
        return mb_y ? mode : LEFT_DC_PRED8x8;
    }
}

static av_always_inline
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
    } else {
        return mb_y ? mode : HOR_PRED8x8;
    }
}

static av_always_inline
int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
D
David Conrad 已提交
961 962
{
    if (mode == DC_PRED8x8) {
963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
    } else {
        return mode;
    }
}

static av_always_inline
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
{
    switch (mode) {
    case DC_PRED8x8:
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
    case VERT_PRED8x8:
        return !mb_y ? DC_127_PRED8x8 : mode;
    case HOR_PRED8x8:
        return !mb_x ? DC_129_PRED8x8 : mode;
    case PLANE_PRED8x8 /*TM*/:
        return check_tm_pred8x8_mode(mode, mb_x, mb_y);
    }
    return mode;
}

static av_always_inline
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_VP8_PRED : DC_129_PRED;
    } else {
        return mb_y ? mode : HOR_VP8_PRED;
    }
}

static av_always_inline
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
{
    switch (mode) {
    case VERT_PRED:
        if (!mb_x && mb_y) {
            *copy_buf = 1;
            return mode;
        }
        /* fall-through */
    case DIAG_DOWN_LEFT_PRED:
    case VERT_LEFT_PRED:
        return !mb_y ? DC_127_PRED : mode;
    case HOR_PRED:
        if (!mb_y) {
            *copy_buf = 1;
            return mode;
1012
        }
1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
        /* fall-through */
    case HOR_UP_PRED:
        return !mb_x ? DC_129_PRED : mode;
    case TM_VP8_PRED:
        return check_tm_pred4x4_mode(mode, mb_x, mb_y);
    case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
    case DIAG_DOWN_RIGHT_PRED:
    case VERT_RIGHT_PRED:
    case HOR_DOWN_PRED:
        if (!mb_y || !mb_x)
            *copy_buf = 1;
        return mode;
D
David Conrad 已提交
1025 1026 1027 1028
    }
    return mode;
}

1029
static av_always_inline
D
Daniel Kang 已提交
1030 1031
void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
D
David Conrad 已提交
1032
{
1033
    AVCodecContext *avctx = s->avctx;
M
Mans Rullgard 已提交
1034 1035
    int x, y, mode, nnz;
    uint32_t tr;
D
David Conrad 已提交
1036

1037 1038
    // for the first row, we need to run xchg_mb_border to init the top edge to 127
    // otherwise, skip it if we aren't going to deblock
D
Daniel Kang 已提交
1039
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1040 1041 1042 1043
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 1);

D
David Conrad 已提交
1044
    if (mb->mode < MODE_I4x4) {
1045 1046 1047 1048 1049
        if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
            mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
        } else {
            mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
        }
D
David Conrad 已提交
1050 1051 1052
        s->hpc.pred16x16[mode](dst[0], s->linesize);
    } else {
        uint8_t *ptr = dst[0];
1053
        uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1054
        uint8_t tr_top[4] = { 127, 127, 127, 127 };
D
David Conrad 已提交
1055 1056 1057 1058 1059 1060 1061

        // all blocks on the right edge of the macroblock use bottom edge
        // the top macroblock for their topright edge
        uint8_t *tr_right = ptr - s->linesize + 16;

        // if we're on the right edge of the frame, said edge is extended
        // from the top macroblock
1062 1063
        if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
            mb_x == s->mb_width-1) {
M
Mans Rullgard 已提交
1064
            tr = tr_right[-1]*0x01010101u;
D
David Conrad 已提交
1065 1066 1067
            tr_right = (uint8_t *)&tr;
        }

1068
        if (mb->skip)
D
Daniel Kang 已提交
1069
            AV_ZERO128(td->non_zero_count_cache);
1070

D
David Conrad 已提交
1071 1072 1073
        for (y = 0; y < 4; y++) {
            uint8_t *topright = ptr + 4 - s->linesize;
            for (x = 0; x < 4; x++) {
1074 1075 1076 1077 1078 1079 1080
                int copy = 0, linesize = s->linesize;
                uint8_t *dst = ptr+4*x;
                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];

                if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
                    topright = tr_top;
                } else if (x == 3)
D
David Conrad 已提交
1081 1082
                    topright = tr_right;

1083 1084 1085 1086 1087 1088 1089
                if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
                    mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
                    if (copy) {
                        dst = copy_dst + 12;
                        linesize = 8;
                        if (!(mb_y + y)) {
                            copy_dst[3] = 127U;
R
Ronald S. Bultje 已提交
1090
                            AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1091
                        } else {
R
Ronald S. Bultje 已提交
1092
                            AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
                            if (!(mb_x + x)) {
                                copy_dst[3] = 129U;
                            } else {
                                copy_dst[3] = ptr[4*x-s->linesize-1];
                            }
                        }
                        if (!(mb_x + x)) {
                            copy_dst[11] =
                            copy_dst[19] =
                            copy_dst[27] =
                            copy_dst[35] = 129U;
                        } else {
                            copy_dst[11] = ptr[4*x              -1];
                            copy_dst[19] = ptr[4*x+s->linesize  -1];
                            copy_dst[27] = ptr[4*x+s->linesize*2-1];
                            copy_dst[35] = ptr[4*x+s->linesize*3-1];
                        }
                    }
                } else {
                    mode = intra4x4[x];
                }
                s->hpc.pred4x4[mode](dst, topright, linesize);
                if (copy) {
R
Ronald S. Bultje 已提交
1116 1117 1118 1119
                    AV_COPY32(ptr+4*x              , copy_dst+12);
                    AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
                    AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
                    AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1120
                }
D
David Conrad 已提交
1121

D
Daniel Kang 已提交
1122
                nnz = td->non_zero_count_cache[y][x];
D
David Conrad 已提交
1123 1124
                if (nnz) {
                    if (nnz == 1)
D
Daniel Kang 已提交
1125
                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
D
David Conrad 已提交
1126
                    else
D
Daniel Kang 已提交
1127
                        s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
D
David Conrad 已提交
1128 1129 1130 1131 1132
                }
                topright += 4;
            }

            ptr   += 4*s->linesize;
1133
            intra4x4 += 4;
D
David Conrad 已提交
1134 1135 1136
        }
    }

1137
    if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1138
        mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1139
    } else {
1140
        mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1141
    }
D
David Conrad 已提交
1142 1143
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1144

D
Daniel Kang 已提交
1145
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1146 1147 1148
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 0);
D
David Conrad 已提交
1149 1150
}

1151 1152 1153 1154 1155 1156 1157
static const uint8_t subpel_idx[3][8] = {
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
                                // also function pointer index
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
};

D
David Conrad 已提交
1158
/**
1159
 * luma MC function
D
David Conrad 已提交
1160 1161 1162
 *
 * @param s VP8 decoding context
 * @param dst target buffer for block data at block position
1163
 * @param ref reference picture buffer at origin (0, 0)
D
David Conrad 已提交
1164 1165 1166 1167 1168 1169 1170 1171
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
1172
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
D
David Conrad 已提交
1173
 */
1174
static av_always_inline
D
Daniel Kang 已提交
1175
void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1176
                 ThreadFrame *ref, const VP56mv *mv,
1177 1178 1179
                 int x_off, int y_off, int block_w, int block_h,
                 int width, int height, int linesize,
                 vp8_mc_func mc_func[3][3])
D
David Conrad 已提交
1180
{
1181
    uint8_t *src = ref->f->data[0];
R
Ronald S. Bultje 已提交
1182

1183
    if (AV_RN32A(mv)) {
1184 1185 1186 1187 1188 1189

        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
        int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 2;
        y_off += mv->y >> 2;
1190 1191

        // edge emulation
R
Ronald S. Bultje 已提交
1192
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1193
        src += y_off * linesize + x_off;
1194 1195
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
R
Ronald S. Bultje 已提交
1196 1197 1198
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                     x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1199
            src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1200 1201
        }
        mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
R
Ronald S. Bultje 已提交
1202 1203
    } else {
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1204
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
R
Ronald S. Bultje 已提交
1205
    }
D
David Conrad 已提交
1206 1207
}

1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
/**
 * chroma MC function
 *
 * @param s VP8 decoding context
 * @param dst1 target buffer for block data at block position (U plane)
 * @param dst2 target buffer for block data at block position (V plane)
 * @param ref reference picture buffer at origin (0, 0)
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
 */
1225
static av_always_inline
D
Daniel Kang 已提交
1226
void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1227
                   ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1228 1229 1230
                   int block_w, int block_h, int width, int height, int linesize,
                   vp8_mc_func mc_func[3][3])
{
1231
    uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
R
Ronald S. Bultje 已提交
1232

1233 1234 1235 1236 1237 1238 1239 1240 1241 1242
    if (AV_RN32A(mv)) {
        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
        int my = mv->y&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 3;
        y_off += mv->y >> 3;

        // edge emulation
        src1 += y_off * linesize + x_off;
        src2 += y_off * linesize + x_off;
R
Ronald S. Bultje 已提交
1243
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1244 1245
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
R
Ronald S. Bultje 已提交
1246 1247 1248
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                     x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1249
            src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1250 1251
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);

R
Ronald S. Bultje 已提交
1252 1253 1254
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
                                     block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                     x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1255
            src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1256 1257 1258 1259 1260 1261
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
        } else {
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
        }
    } else {
R
Ronald S. Bultje 已提交
1262
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1263 1264 1265 1266 1267
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
    }
}

1268
static av_always_inline
D
Daniel Kang 已提交
1269
void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1270
                 ThreadFrame *ref_frame, int x_off, int y_off,
1271 1272 1273
                 int bx_off, int by_off,
                 int block_w, int block_h,
                 int width, int height, VP56mv *mv)
1274 1275 1276 1277
{
    VP56mv uvmv = *mv;

    /* Y */
D
Daniel Kang 已提交
1278
    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
R
Ronald S. Bultje 已提交
1279
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1280 1281
                block_w, block_h, width, height, s->linesize,
                s->put_pixels_tab[block_w == 8]);
1282 1283 1284 1285 1286 1287 1288 1289 1290 1291

    /* U/V */
    if (s->profile == 3) {
        uvmv.x &= ~7;
        uvmv.y &= ~7;
    }
    x_off   >>= 1; y_off   >>= 1;
    bx_off  >>= 1; by_off  >>= 1;
    width   >>= 1; height  >>= 1;
    block_w >>= 1; block_h >>= 1;
D
Daniel Kang 已提交
1292
    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
R
Ronald S. Bultje 已提交
1293 1294
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
                  &uvmv, x_off + bx_off, y_off + by_off,
1295 1296
                  block_w, block_h, width, height, s->uvlinesize,
                  s->put_pixels_tab[1 + (block_w == 4)]);
1297 1298
}

1299 1300
/* Fetch pixels for estimated mv 4 macroblocks ahead.
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1301
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1302
{
J
Jason Garrett-Glaser 已提交
1303 1304
    /* Don't prefetch refs that haven't been used very often this frame. */
    if (s->ref_count[ref-1] > (mb_xy >> 5)) {
J
Jason Garrett-Glaser 已提交
1305
        int x_off = mb_x << 4, y_off = mb_y << 4;
J
Jason Garrett-Glaser 已提交
1306 1307
        int mx = (mb->mv.x>>2) + x_off + 8;
        int my = (mb->mv.y>>2) + y_off;
1308
        uint8_t **src= s->framep[ref]->tf.f->data;
J
Jason Garrett-Glaser 已提交
1309
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
R
Ronald S. Bultje 已提交
1310 1311 1312
        /* For threading, a ff_thread_await_progress here might be useful, but
         * it actually slows down the decoder. Since a bad prefetch doesn't
         * generate bad decoder output, we don't run it here. */
R
Ronald S. Bultje 已提交
1313
        s->vdsp.prefetch(src[0]+off, s->linesize, 4);
J
Jason Garrett-Glaser 已提交
1314
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
R
Ronald S. Bultje 已提交
1315
        s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
J
Jason Garrett-Glaser 已提交
1316
    }
1317 1318
}

D
David Conrad 已提交
1319 1320 1321
/**
 * Apply motion vectors to prediction buffer, chapter 18.
 */
1322
static av_always_inline
D
Daniel Kang 已提交
1323 1324
void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
D
David Conrad 已提交
1325 1326 1327
{
    int x_off = mb_x << 4, y_off = mb_y << 4;
    int width = 16*s->mb_width, height = 16*s->mb_height;
1328
    ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1329
    VP56mv *bmv = mb->bmv;
D
David Conrad 已提交
1330

1331 1332
    switch (mb->partitioning) {
    case VP8_SPLITMVMODE_NONE:
D
Daniel Kang 已提交
1333
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1334
                    0, 0, 16, 16, width, height, &mb->mv);
1335
        break;
1336
    case VP8_SPLITMVMODE_4x4: {
D
David Conrad 已提交
1337
        int x, y;
1338
        VP56mv uvmv;
D
David Conrad 已提交
1339 1340 1341 1342

        /* Y */
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
D
Daniel Kang 已提交
1343
                vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
R
Ronald S. Bultje 已提交
1344
                            ref, &bmv[4*y + x],
1345 1346 1347
                            4*x + x_off, 4*y + y_off, 4, 4,
                            width, height, s->linesize,
                            s->put_pixels_tab[2]);
D
David Conrad 已提交
1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362
            }
        }

        /* U/V */
        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
        for (y = 0; y < 2; y++) {
            for (x = 0; x < 2; x++) {
                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1363 1364
                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
D
David Conrad 已提交
1365 1366 1367 1368
                if (s->profile == 3) {
                    uvmv.x &= ~7;
                    uvmv.y &= ~7;
                }
D
Daniel Kang 已提交
1369
                vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
R
Ronald S. Bultje 已提交
1370
                              dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1371 1372 1373
                              4*x + x_off, 4*y + y_off, 4, 4,
                              width, height, s->uvlinesize,
                              s->put_pixels_tab[2]);
D
David Conrad 已提交
1374 1375
            }
        }
1376 1377 1378
        break;
    }
    case VP8_SPLITMVMODE_16x8:
D
Daniel Kang 已提交
1379
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1380
                    0, 0, 16, 8, width, height, &bmv[0]);
D
Daniel Kang 已提交
1381
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1382
                    0, 8, 16, 8, width, height, &bmv[1]);
1383 1384
        break;
    case VP8_SPLITMVMODE_8x16:
D
Daniel Kang 已提交
1385
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1386
                    0, 0, 8, 16, width, height, &bmv[0]);
D
Daniel Kang 已提交
1387
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1388
                    8, 0, 8, 16, width, height, &bmv[1]);
1389 1390
        break;
    case VP8_SPLITMVMODE_8x8:
D
Daniel Kang 已提交
1391
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1392
                    0, 0, 8, 8, width, height, &bmv[0]);
D
Daniel Kang 已提交
1393
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1394
                    8, 0, 8, 8, width, height, &bmv[1]);
D
Daniel Kang 已提交
1395
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1396
                    0, 8, 8, 8, width, height, &bmv[2]);
D
Daniel Kang 已提交
1397
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1398
                    8, 8, 8, 8, width, height, &bmv[3]);
1399
        break;
D
David Conrad 已提交
1400 1401 1402
    }
}

D
Daniel Kang 已提交
1403 1404
static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
                                     uint8_t *dst[3], VP8Macroblock *mb)
D
David Conrad 已提交
1405
{
1406
    int x, y, ch;
D
David Conrad 已提交
1407

J
Jason Garrett-Glaser 已提交
1408 1409
    if (mb->mode != MODE_I4x4) {
        uint8_t *y_dst = dst[0];
D
David Conrad 已提交
1410
        for (y = 0; y < 4; y++) {
D
Daniel Kang 已提交
1411
            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1412 1413
            if (nnz4) {
                if (nnz4&~0x01010101) {
J
Jason Garrett-Glaser 已提交
1414
                    for (x = 0; x < 4; x++) {
1415
                        if ((uint8_t)nnz4 == 1)
D
Daniel Kang 已提交
1416
                            s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1417
                        else if((uint8_t)nnz4 > 1)
D
Daniel Kang 已提交
1418
                            s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1419 1420 1421
                        nnz4 >>= 8;
                        if (!nnz4)
                            break;
J
Jason Garrett-Glaser 已提交
1422 1423
                    }
                } else {
D
Daniel Kang 已提交
1424
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
D
David Conrad 已提交
1425 1426 1427 1428
                }
            }
            y_dst += 4*s->linesize;
        }
J
Jason Garrett-Glaser 已提交
1429
    }
D
David Conrad 已提交
1430

J
Jason Garrett-Glaser 已提交
1431
    for (ch = 0; ch < 2; ch++) {
D
Daniel Kang 已提交
1432
        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1433
        if (nnz4) {
J
Jason Garrett-Glaser 已提交
1434
            uint8_t *ch_dst = dst[1+ch];
1435 1436 1437
            if (nnz4&~0x01010101) {
                for (y = 0; y < 2; y++) {
                    for (x = 0; x < 2; x++) {
1438
                        if ((uint8_t)nnz4 == 1)
D
Daniel Kang 已提交
1439
                            s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1440
                        else if((uint8_t)nnz4 > 1)
D
Daniel Kang 已提交
1441
                            s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1442 1443
                        nnz4 >>= 8;
                        if (!nnz4)
1444
                            goto chroma_idct_end;
J
Jason Garrett-Glaser 已提交
1445
                    }
1446
                    ch_dst += 4*s->uvlinesize;
J
Jason Garrett-Glaser 已提交
1447
                }
1448
            } else {
D
Daniel Kang 已提交
1449
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
D
David Conrad 已提交
1450 1451
            }
        }
1452
chroma_idct_end: ;
D
David Conrad 已提交
1453 1454 1455
    }
}

1456
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
D
David Conrad 已提交
1457 1458 1459 1460
{
    int interior_limit, filter_level;

    if (s->segmentation.enabled) {
1461
        filter_level = s->segmentation.filter_level[mb->segment];
D
David Conrad 已提交
1462 1463 1464 1465 1466 1467 1468
        if (!s->segmentation.absolute_vals)
            filter_level += s->filter.level;
    } else
        filter_level = s->filter.level;

    if (s->lf_delta.enabled) {
        filter_level += s->lf_delta.ref[mb->ref_frame];
1469
        filter_level += s->lf_delta.mode[mb->mode];
D
David Conrad 已提交
1470
    }
1471

M
Mans Rullgard 已提交
1472
    filter_level = av_clip_uintp2(filter_level, 6);
D
David Conrad 已提交
1473 1474 1475

    interior_limit = filter_level;
    if (s->filter.sharpness) {
1476
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
D
David Conrad 已提交
1477 1478 1479 1480
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
    }
    interior_limit = FFMAX(interior_limit, 1);

1481 1482
    f->filter_level = filter_level;
    f->inner_limit = interior_limit;
1483
    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
D
David Conrad 已提交
1484 1485
}

1486
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
D
David Conrad 已提交
1487
{
1488 1489 1490
    int mbedge_lim, bedge_lim, hev_thresh;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1491
    int inner_filter = f->inner_filter;
1492 1493
    int linesize = s->linesize;
    int uvlinesize = s->uvlinesize;
1494 1495 1496 1497 1498 1499 1500 1501 1502 1503
    static const uint8_t hev_thresh_lut[2][64] = {
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
          3, 3, 3, 3 },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          2, 2, 2, 2 }
    };
D
David Conrad 已提交
1504 1505 1506 1507

    if (!filter_level)
        return;

1508 1509
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
1510

1511
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1512

D
David Conrad 已提交
1513
    if (mb_x) {
1514
        s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
1515
                                       mbedge_lim, inner_limit, hev_thresh);
1516
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1517
                                       mbedge_lim, inner_limit, hev_thresh);
D
David Conrad 已提交
1518 1519
    }

1520
    if (inner_filter) {
1521 1522 1523 1524 1525 1526 1527 1528 1529
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
                                             uvlinesize,  bedge_lim,
                                             inner_limit, hev_thresh);
D
David Conrad 已提交
1530 1531 1532
    }

    if (mb_y) {
1533
        s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
1534
                                       mbedge_lim, inner_limit, hev_thresh);
1535
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1536
                                       mbedge_lim, inner_limit, hev_thresh);
D
David Conrad 已提交
1537 1538
    }

1539
    if (inner_filter) {
1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
                                             dst[2] + 4 * uvlinesize,
                                             uvlinesize,  bedge_lim,
1552
                                             inner_limit, hev_thresh);
D
David Conrad 已提交
1553 1554 1555
    }
}

1556
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
D
David Conrad 已提交
1557
{
1558 1559 1560
    int mbedge_lim, bedge_lim;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1561
    int inner_filter = f->inner_filter;
1562
    int linesize = s->linesize;
D
David Conrad 已提交
1563 1564 1565 1566

    if (!filter_level)
        return;

1567 1568
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
D
David Conrad 已提交
1569 1570

    if (mb_x)
1571
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1572
    if (inner_filter) {
1573 1574 1575
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
D
David Conrad 已提交
1576 1577 1578
    }

    if (mb_y)
1579
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1580
    if (inner_filter) {
1581 1582 1583
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
D
David Conrad 已提交
1584 1585 1586
    }
}

1587
#define MARGIN (16 << 2)
1588 1589
static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
                                   VP8Frame *prev_frame)
1590 1591
{
    VP8Context *s = avctx->priv_data;
D
Daniel Kang 已提交
1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606
    int mb_x, mb_y;

    s->mv_min.y = -MARGIN;
    s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
        VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
        int mb_xy = mb_y*s->mb_width;

        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);

        s->mv_min.x = -MARGIN;
        s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
            if (mb_y == 0)
                AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1607 1608 1609
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
                           prev_frame && prev_frame->seg_map ?
                           prev_frame->seg_map->data + mb_xy : NULL, 1);
D
Daniel Kang 已提交
1610 1611 1612 1613 1614 1615 1616 1617
            s->mv_min.x -= 64;
            s->mv_max.x -= 64;
        }
        s->mv_min.y -= 64;
        s->mv_max.y -= 64;
    }
}

1618
#if HAVE_THREADS
D
Daniel Kang 已提交
1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
    do {\
        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
        if (otd->thread_mb_pos < tmp) {\
            pthread_mutex_lock(&otd->lock);\
            td->wait_mb_pos = tmp;\
            do {\
                if (otd->thread_mb_pos >= tmp)\
                    break;\
                pthread_cond_wait(&otd->cond, &otd->lock);\
            } while (1);\
            td->wait_mb_pos = INT_MAX;\
            pthread_mutex_unlock(&otd->lock);\
        }\
    } while(0);

#define update_pos(td, mb_y, mb_x)\
    do {\
    int pos              = (mb_y << 16) | (mb_x & 0xFFFF);\
    int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
    int is_null          = (next_td == NULL) || (prev_td == NULL);\
    int pos_check        = (is_null) ? 1 :\
                            (next_td != td && pos >= next_td->wait_mb_pos) ||\
                            (prev_td != td && pos >= prev_td->wait_mb_pos);\
    td->thread_mb_pos = pos;\
    if (sliced_threading && pos_check) {\
        pthread_mutex_lock(&td->lock);\
        pthread_cond_broadcast(&td->cond);\
        pthread_mutex_unlock(&td->lock);\
    }\
    } while(0);
1650 1651 1652 1653
#else
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
#define update_pos(td, mb_y, mb_x)
#endif
D
Daniel Kang 已提交
1654 1655 1656 1657 1658 1659 1660

static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
                                        int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
    int mb_y = td->thread_mb_pos>>16;
1661
    int i, y, mb_x, mb_xy = mb_y*s->mb_width;
D
Daniel Kang 已提交
1662
    int num_jobs = s->num_jobs;
1663
    VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
D
Daniel Kang 已提交
1664 1665
    VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
    VP8Macroblock *mb;
1666
    uint8_t *dst[3] = {
1667 1668 1669
        curframe->tf.f->data[0] + 16*mb_y*s->linesize,
        curframe->tf.f->data[1] +  8*mb_y*s->uvlinesize,
        curframe->tf.f->data[2] +  8*mb_y*s->uvlinesize
1670
    };
D
Daniel Kang 已提交
1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683
    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];
    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else {
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
    }

    memset(td->left_nnz, 0, sizeof(td->left_nnz));
1684 1685 1686 1687
    // left edge of 129 for intra prediction
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
        for (i = 0; i < 3; i++)
            for (y = 0; y < 16>>!!i; y++)
1688
                dst[i][y*curframe->tf.f->linesize[i]-1] = 129;
D
Daniel Kang 已提交
1689
        if (mb_y == 1) {
1690
            s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
D
Daniel Kang 已提交
1691
        }
1692 1693 1694 1695 1696 1697
    }

    s->mv_min.x = -MARGIN;
    s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
D
Daniel Kang 已提交
1698 1699 1700 1701 1702 1703 1704 1705 1706
        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
        if (prev_td != td) {
            if (threadnr != 0) {
                check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
            } else {
                check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
            }
        }

R
Ronald S. Bultje 已提交
1707 1708
        s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
        s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1709

D
Daniel Kang 已提交
1710
        if (!s->mb_layout)
1711 1712 1713
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
                           prev_frame && prev_frame->seg_map ?
                           prev_frame->seg_map->data + mb_xy : NULL, 0);
1714 1715 1716 1717

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);

        if (!mb->skip)
D
Daniel Kang 已提交
1718
            decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1719 1720

        if (mb->mode <= MODE_I4x4)
D
Daniel Kang 已提交
1721
            intra_predict(s, td, dst, mb, mb_x, mb_y);
1722
        else
D
Daniel Kang 已提交
1723
            inter_predict(s, td, dst, mb, mb_x, mb_y);
1724 1725 1726 1727

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);

        if (!mb->skip) {
D
Daniel Kang 已提交
1728
            idct_mb(s, td, dst, mb);
1729
        } else {
D
Daniel Kang 已提交
1730
            AV_ZERO64(td->left_nnz);
1731 1732 1733 1734
            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned

            // Reset DC block predictors if they would exist if the mb had coefficients
            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
D
Daniel Kang 已提交
1735
                td->left_nnz[8]     = 0;
1736 1737 1738 1739 1740
                s->top_nnz[mb_x][8] = 0;
            }
        }

        if (s->deblock_filter)
D
Daniel Kang 已提交
1741 1742 1743 1744 1745 1746 1747 1748
            filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);

        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }
1749 1750 1751 1752 1753 1754 1755 1756

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);

        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;
        s->mv_min.x -= 64;
        s->mv_max.x -= 64;
D
Daniel Kang 已提交
1757 1758 1759 1760 1761 1762

        if (mb_x == s->mb_width+1) {
            update_pos(td, mb_y, s->mb_width+3);
        } else {
            update_pos(td, mb_y, mb_x);
        }
1763
    }
D
Daniel Kang 已提交
1764 1765 1766 1767 1768 1769 1770 1771
}

static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
                              int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[threadnr];
    int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1772
    AVFrame *curframe = s->curframe->tf.f;
D
Daniel Kang 已提交
1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807
    VP8Macroblock *mb;
    VP8ThreadData *prev_td, *next_td;
    uint8_t *dst[3] = {
        curframe->data[0] + 16*mb_y*s->linesize,
        curframe->data[1] +  8*mb_y*s->uvlinesize,
        curframe->data[2] +  8*mb_y*s->uvlinesize
    };

    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;

    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
        VP8FilterStrength *f = &td->filter_strength[mb_x];
        if (prev_td != td) {
            check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
        }
        if (next_td != td)
            if (next_td != &s->thread_data[0]) {
                check_thread_pos(td, next_td, mb_x+1, mb_y+1);
            }

        if (num_jobs == 1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }

1808
        if (s->filter.simple)
D
Daniel Kang 已提交
1809
            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1810
        else
D
Daniel Kang 已提交
1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825
            filter_mb(s, dst, f, mb_x, mb_y);
        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;

        update_pos(td, mb_y, (s->mb_width+3) + mb_x);
    }
}

static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
                                    int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[jobnr];
    VP8ThreadData *next_td = NULL, *prev_td = NULL;
1826
    VP8Frame *curframe = s->curframe;
D
Daniel Kang 已提交
1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840
    int mb_y, num_jobs = s->num_jobs;
    td->thread_nr = threadnr;
    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
        if (mb_y >= s->mb_height) break;
        td->thread_mb_pos = mb_y<<16;
        vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
        if (s->deblock_filter)
            vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
        update_pos(td, mb_y, INT_MAX & 0xFFFF);

        s->mv_min.y -= 64;
        s->mv_max.y -= 64;

        if (avctx->active_thread_type == FF_THREAD_FRAME)
1841
            ff_thread_report_progress(&curframe->tf, mb_y, 0);
1842
    }
D
Daniel Kang 已提交
1843 1844

    return 0;
1845 1846
}

1847
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
D
David Conrad 已提交
1848 1849 1850
                            AVPacket *avpkt)
{
    VP8Context *s = avctx->priv_data;
D
Daniel Kang 已提交
1851
    int ret, i, referenced, num_jobs;
D
David Conrad 已提交
1852
    enum AVDiscard skip_thresh;
1853
    VP8Frame *av_uninit(curframe), *prev_frame;
1854

D
David Conrad 已提交
1855
    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1856
        goto err;
D
David Conrad 已提交
1857

1858 1859
    prev_frame = s->framep[VP56_FRAME_CURRENT];

D
David Conrad 已提交
1860 1861 1862 1863 1864 1865 1866 1867
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
                                || s->update_altref == VP56_FRAME_CURRENT;

    skip_thresh = !referenced ? AVDISCARD_NONREF :
                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;

    if (avctx->skip_frame >= skip_thresh) {
        s->invisible = 1;
1868
        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
D
David Conrad 已提交
1869 1870
        goto skip_decode;
    }
1871
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
D
David Conrad 已提交
1872

R
Ronald S. Bultje 已提交
1873 1874
    // release no longer referenced frames
    for (i = 0; i < 5; i++)
1875
        if (s->frames[i].tf.f->data[0] &&
R
Ronald S. Bultje 已提交
1876 1877 1878 1879
            &s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1880
            vp8_release_frame(s, &s->frames[i]);
R
Ronald S. Bultje 已提交
1881 1882 1883 1884 1885

    // find a free buffer
    for (i = 0; i < 5; i++)
        if (&s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
D
David Conrad 已提交
1886 1887 1888 1889 1890
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
            break;
        }
R
Ronald S. Bultje 已提交
1891 1892 1893 1894
    if (i == 5) {
        av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
        abort();
    }
1895 1896
    if (curframe->tf.f->data[0])
        vp8_release_frame(s, curframe);
D
David Conrad 已提交
1897

1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908
    // Given that arithmetic probabilities are updated every frame, it's quite likely
    // that the values we have on a random interframe are complete junk if we didn't
    // start decode on a keyframe. So just don't display anything rather than junk.
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
                         !s->framep[VP56_FRAME_GOLDEN] ||
                         !s->framep[VP56_FRAME_GOLDEN2])) {
        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
        ret = AVERROR_INVALIDDATA;
        goto err;
    }

1909 1910 1911
    curframe->tf.f->key_frame = s->keyframe;
    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
    if ((ret = vp8_alloc_frame(s, curframe, referenced))) {
D
David Conrad 已提交
1912
        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1913
        goto err;
D
David Conrad 已提交
1914 1915
    }

R
Ronald S. Bultje 已提交
1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935
    // check if golden and altref are swapped
    if (s->update_altref != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
    }
    if (s->update_golden != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
    }
    if (s->update_last) {
        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
    } else {
        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
    }
    s->next_framep[VP56_FRAME_CURRENT]      = curframe;

    ff_thread_finish_setup(avctx);

1936 1937
    s->linesize   = curframe->tf.f->linesize[0];
    s->uvlinesize = curframe->tf.f->linesize[1];
D
David Conrad 已提交
1938

D
Daniel Kang 已提交
1939 1940 1941
    if (!s->thread_data[0].edge_emu_buffer)
        for (i = 0; i < MAX_THREADS; i++)
            s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
D
David Conrad 已提交
1942 1943

    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
P
Pascal Massimino 已提交
1944
    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
D
Daniel Kang 已提交
1945 1946 1947 1948
    if (!s->mb_layout)
        memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
    if (!s->mb_layout && s->keyframe)
        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1949

D
David Conrad 已提交
1950
    // top edge of 127 for intra prediction
1951 1952
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
        s->top_border[0][15] = s->top_border[0][23] = 127;
1953 1954
        s->top_border[0][31] = 127;
        memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1955
    }
J
Jason Garrett-Glaser 已提交
1956
    memset(s->ref_count, 0, sizeof(s->ref_count));
D
David Conrad 已提交
1957

J
Jason Garrett-Glaser 已提交
1958

D
Daniel Kang 已提交
1959 1960 1961
    // Make sure the previous frame has read its segmentation map,
    // if we re-use the same map.
    if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
1962
        ff_thread_await_progress(&prev_frame->tf, 1, 0);
J
Jason Garrett-Glaser 已提交
1963

D
Daniel Kang 已提交
1964 1965
    if (s->mb_layout == 1)
        vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
R
Ronald S. Bultje 已提交
1966

D
Daniel Kang 已提交
1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980
    if (avctx->active_thread_type == FF_THREAD_FRAME)
        num_jobs = 1;
    else
        num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
    s->num_jobs   = num_jobs;
    s->curframe   = curframe;
    s->prev_frame = prev_frame;
    s->mv_min.y   = -MARGIN;
    s->mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].thread_mb_pos = 0;
        s->thread_data[i].wait_mb_pos = INT_MAX;
    }
    avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
D
David Conrad 已提交
1981

1982
    ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1983 1984
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);

D
David Conrad 已提交
1985 1986 1987 1988 1989 1990 1991
skip_decode:
    // if future frames don't use the updated probabilities,
    // reset them to the values we saved
    if (!s->update_probabilities)
        s->prob[0] = s->prob[1];

    if (!s->invisible) {
1992 1993
        if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
            return ret;
1994
        *got_frame      = 1;
D
David Conrad 已提交
1995 1996 1997
    }

    return avpkt->size;
1998 1999 2000
err:
    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
    return ret;
D
David Conrad 已提交
2001 2002
}

2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025
static av_cold int vp8_decode_free(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;
    int i;

    vp8_decode_flush_impl(avctx, 1);
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
        av_frame_free(&s->frames[i].tf.f);

    return 0;
}

static av_cold int vp8_init_frames(VP8Context *s)
{
    int i;
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
        s->frames[i].tf.f = av_frame_alloc();
        if (!s->frames[i].tf.f)
            return AVERROR(ENOMEM);
    }
    return 0;
}

D
David Conrad 已提交
2026 2027 2028
static av_cold int vp8_decode_init(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;
2029
    int ret;
D
David Conrad 已提交
2030 2031

    s->avctx = avctx;
2032
    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2033
    avctx->internal->allocate_progress = 1;
D
David Conrad 已提交
2034

R
Ronald S. Bultje 已提交
2035
    ff_videodsp_init(&s->vdsp, 8);
2036
    ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
D
David Conrad 已提交
2037 2038
    ff_vp8dsp_init(&s->vp8dsp);

2039 2040 2041 2042
    if ((ret = vp8_init_frames(s)) < 0) {
        vp8_decode_free(avctx);
        return ret;
    }
D
David Conrad 已提交
2043 2044 2045 2046

    return 0;
}

R
Ronald S. Bultje 已提交
2047 2048 2049
static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;
2050
    int ret;
R
Ronald S. Bultje 已提交
2051 2052 2053

    s->avctx = avctx;

2054 2055 2056 2057 2058
    if ((ret = vp8_init_frames(s)) < 0) {
        vp8_decode_free(avctx);
        return ret;
    }

R
Ronald S. Bultje 已提交
2059 2060 2061 2062 2063 2064 2065 2066 2067
    return 0;
}

#define REBASE(pic) \
    pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL

static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
{
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2068
    int i;
R
Ronald S. Bultje 已提交
2069

2070 2071 2072
    if (s->macroblocks_base &&
        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
        free_buffers(s);
2073 2074
        s->mb_width  = s_src->mb_width;
        s->mb_height = s_src->mb_height;
2075 2076
    }

R
Ronald S. Bultje 已提交
2077 2078 2079 2080 2081
    s->prob[0] = s_src->prob[!s_src->update_probabilities];
    s->segmentation = s_src->segmentation;
    s->lf_delta = s_src->lf_delta;
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));

2082 2083 2084 2085 2086 2087 2088 2089
    for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
        if (s_src->frames[i].tf.f->data[0]) {
            int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
            if (ret < 0)
                return ret;
        }
    }

R
Ronald S. Bultje 已提交
2090 2091 2092 2093 2094 2095 2096 2097
    s->framep[0] = REBASE(s_src->next_framep[0]);
    s->framep[1] = REBASE(s_src->next_framep[1]);
    s->framep[2] = REBASE(s_src->next_framep[2]);
    s->framep[3] = REBASE(s_src->next_framep[3]);

    return 0;
}

2098
AVCodec ff_vp8_decoder = {
2099 2100
    .name                  = "vp8",
    .type                  = AVMEDIA_TYPE_VIDEO,
2101
    .id                    = AV_CODEC_ID_VP8,
2102 2103 2104 2105
    .priv_data_size        = sizeof(VP8Context),
    .init                  = vp8_decode_init,
    .close                 = vp8_decode_free,
    .decode                = vp8_decode_frame,
D
Daniel Kang 已提交
2106
    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2107 2108
    .flush                 = vp8_decode_flush,
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
R
Ronald S. Bultje 已提交
2109 2110
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
D
David Conrad 已提交
2111
};