vp8.c 75.5 KB
Newer Older
1
/*
D
David Conrad 已提交
2 3 4 5
 * VP8 compatible video decoder
 *
 * Copyright (C) 2010 David Conrad
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Jason Garrett-Glaser
D
Daniel Kang 已提交
7
 * Copyright (C) 2012 Daniel Kang
D
David Conrad 已提交
8
 *
9
 * This file is part of Libav.
D
David Conrad 已提交
10
 *
11
 * Libav is free software; you can redistribute it and/or
D
David Conrad 已提交
12 13 14 15
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
16
 * Libav is distributed in the hope that it will be useful,
D
David Conrad 已提交
17 18 19 20 21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with Libav; if not, write to the Free Software
D
David Conrad 已提交
23 24 25
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

26
#include "libavutil/imgutils.h"
D
David Conrad 已提交
27
#include "avcodec.h"
28
#include "internal.h"
29
#include "vp8.h"
D
David Conrad 已提交
30 31
#include "vp8data.h"
#include "rectangle.h"
R
Ronald S. Bultje 已提交
32
#include "thread.h"
D
David Conrad 已提交
33

34 35 36 37
#if ARCH_ARM
#   include "arm/vp8.h"
#endif

38 39
static void free_buffers(VP8Context *s)
{
D
Daniel Kang 已提交
40 41 42 43 44 45 46
    int i;
    if (s->thread_data)
        for (i = 0; i < MAX_THREADS; i++) {
            av_freep(&s->thread_data[i].filter_strength);
            av_freep(&s->thread_data[i].edge_emu_buffer);
        }
    av_freep(&s->thread_data);
47 48 49 50 51 52 53 54
    av_freep(&s->macroblocks_base);
    av_freep(&s->intra4x4_pred_mode_top);
    av_freep(&s->top_nnz);
    av_freep(&s->top_border);

    s->macroblocks = NULL;
}

55 56 57 58 59
static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
{
    int ret;
    if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
        return ret;
60
    if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
61 62 63 64 65 66 67 68
        f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
    } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
        ff_thread_release_buffer(s->avctx, f);
        return AVERROR(ENOMEM);
    }
    return 0;
}

69
static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
70
{
71 72 73 74 75 76 77 78 79 80 81 82
    if (f->ref_index[0]) {
        if (prefer_delayed_free) {
            /* Upon a size change, we want to free the maps but other threads may still
             * be using them, so queue them. Upon a seek, all threads are inactive so
             * we want to cache one to prevent re-allocation in the next decoding
             * iteration, but the rest we can free directly. */
            int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
            if (s->num_maps_to_be_freed < max_queued_maps) {
                s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
            } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
                av_free(f->ref_index[0]);
            } /* else: MEMLEAK (should never happen, but better that than crash) */
83
            f->ref_index[0] = NULL;
84 85
        } else /* vp8_decode_free() */ {
            av_free(f->ref_index[0]);
86 87 88 89 90
        }
    }
    ff_thread_release_buffer(s->avctx, f);
}

91 92
static void vp8_decode_flush_impl(AVCodecContext *avctx,
                                  int prefer_delayed_free, int can_direct_free, int free_mem)
D
David Conrad 已提交
93 94 95 96
{
    VP8Context *s = avctx->priv_data;
    int i;

97
    if (!avctx->internal->is_copy) {
R
Ronald S. Bultje 已提交
98 99
        for (i = 0; i < 5; i++)
            if (s->frames[i].data[0])
100
                vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
R
Ronald S. Bultje 已提交
101
    }
D
David Conrad 已提交
102 103
    memset(s->framep, 0, sizeof(s->framep));

104 105 106 107
    if (free_mem) {
        free_buffers(s);
        s->maps_are_invalid = 1;
    }
108 109 110 111
}

static void vp8_decode_flush(AVCodecContext *avctx)
{
112
    vp8_decode_flush_impl(avctx, 1, 1, 0);
D
David Conrad 已提交
113 114 115 116
}

static int update_dimensions(VP8Context *s, int width, int height)
{
D
Daniel Kang 已提交
117 118 119
    AVCodecContext *avctx = s->avctx;
    int i;

R
Ronald S. Bultje 已提交
120 121 122 123
    if (width  != s->avctx->width ||
        height != s->avctx->height) {
        if (av_image_check_size(width, height, 0, s->avctx))
            return AVERROR_INVALIDDATA;
D
David Conrad 已提交
124

125
        vp8_decode_flush_impl(s->avctx, 1, 0, 1);
D
David Conrad 已提交
126

R
Ronald S. Bultje 已提交
127 128
        avcodec_set_dimensions(s->avctx, width, height);
    }
D
David Conrad 已提交
129 130 131 132

    s->mb_width  = (s->avctx->coded_width +15) / 16;
    s->mb_height = (s->avctx->coded_height+15) / 16;

D
Daniel Kang 已提交
133 134 135 136 137 138 139 140 141 142
    s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
    if (!s->mb_layout) { // Frame threading and one thread
        s->macroblocks_base       = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
    }
    else // Sliced threading
        s->macroblocks_base       = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
    s->top_nnz                    = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
    s->top_border                 = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
    s->thread_data                = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
D
David Conrad 已提交
143

D
Daniel Kang 已提交
144 145
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
146
#if HAVE_THREADS
D
Daniel Kang 已提交
147 148
        pthread_mutex_init(&s->thread_data[i].lock, NULL);
        pthread_cond_init(&s->thread_data[i].cond, NULL);
149
#endif
D
Daniel Kang 已提交
150 151 152 153
    }

    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
        (!s->intra4x4_pred_mode_top && !s->mb_layout))
D
David Conrad 已提交
154 155
        return AVERROR(ENOMEM);

156
    s->macroblocks        = s->macroblocks_base + 1;
D
David Conrad 已提交
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186

    return 0;
}

static void parse_segment_info(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

    s->segmentation.update_map = vp8_rac_get(c);

    if (vp8_rac_get(c)) { // update segment feature data
        s->segmentation.absolute_vals = vp8_rac_get(c);

        for (i = 0; i < 4; i++)
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);

        for (i = 0; i < 4; i++)
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
    }
    if (s->segmentation.update_map)
        for (i = 0; i < 3; i++)
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
}

static void update_lf_deltas(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

187 188 189
    for (i = 0; i < 4; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
D
David Conrad 已提交
190

191 192 193 194 195 196 197 198 199 200 201 202 203
            if (vp8_rac_get(c))
                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
        }
    }

    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);

            if (vp8_rac_get(c))
                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
        }
    }
D
David Conrad 已提交
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
}

static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
{
    const uint8_t *sizes = buf;
    int i;

    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);

    buf      += 3*(s->num_coeff_partitions-1);
    buf_size -= 3*(s->num_coeff_partitions-1);
    if (buf_size < 0)
        return -1;

    for (i = 0; i < s->num_coeff_partitions-1; i++) {
219
        int size = AV_RL24(sizes + 3*i);
D
David Conrad 已提交
220 221 222
        if (buf_size - size < 0)
            return -1;

223
        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
D
David Conrad 已提交
224 225 226
        buf      += size;
        buf_size -= size;
    }
227
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
D
David Conrad 已提交
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251

    return 0;
}

static void get_quants(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i, base_qi;

    int yac_qi     = vp8_rac_get_uint(c, 7);
    int ydc_delta  = vp8_rac_get_sint(c, 4);
    int y2dc_delta = vp8_rac_get_sint(c, 4);
    int y2ac_delta = vp8_rac_get_sint(c, 4);
    int uvdc_delta = vp8_rac_get_sint(c, 4);
    int uvac_delta = vp8_rac_get_sint(c, 4);

    for (i = 0; i < 4; i++) {
        if (s->segmentation.enabled) {
            base_qi = s->segmentation.base_quant[i];
            if (!s->segmentation.absolute_vals)
                base_qi += yac_qi;
        } else
            base_qi = yac_qi;

252 253 254 255 256 257 258
        s->qmat[i].luma_qmul[0]    =           vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
        s->qmat[i].luma_qmul[1]    =           vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
        s->qmat[i].luma_dc_qmul[0] =       2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
        /* 101581>>16 is equivalent to 155/100 */
        s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
        s->qmat[i].chroma_qmul[0]  =           vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
        s->qmat[i].chroma_qmul[1]  =           vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
259 260 261

        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
D
David Conrad 已提交
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
    }
}

/**
 * Determine which buffers golden and altref should be updated with after this frame.
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
 *
 * Intra frames update all 3 references
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
 * If the update (golden|altref) flag is set, it's updated with the current frame
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
 * If the flag is not set, the number read means:
 *      0: no update
 *      1: VP56_FRAME_PREVIOUS
 *      2: update golden with altref, or update altref with golden
 */
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
{
    VP56RangeCoder *c = &s->c;

    if (update)
        return VP56_FRAME_CURRENT;

    switch (vp8_rac_get_uint(c, 2)) {
    case 1:
        return VP56_FRAME_PREVIOUS;
    case 2:
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
    }
    return VP56_FRAME_NONE;
}

static void update_refs(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;

    int update_golden = vp8_rac_get(c);
    int update_altref = vp8_rac_get(c);

    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
}

static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
{
    VP56RangeCoder *c = &s->c;
308
    int header_size, hscale, vscale, i, j, k, l, m, ret;
D
David Conrad 已提交
309 310 311 312 313 314
    int width  = s->avctx->width;
    int height = s->avctx->height;

    s->keyframe  = !(buf[0] & 1);
    s->profile   =  (buf[0]>>1) & 7;
    s->invisible = !(buf[0] & 0x10);
315
    header_size  = AV_RL24(buf) >> 5;
D
David Conrad 已提交
316 317 318
    buf      += 3;
    buf_size -= 3;

D
David Conrad 已提交
319 320 321 322 323 324 325
    if (s->profile > 3)
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);

    if (!s->profile)
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
D
David Conrad 已提交
326 327 328 329 330 331 332

    if (header_size > buf_size - 7*s->keyframe) {
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
        return AVERROR_INVALIDDATA;
    }

    if (s->keyframe) {
333 334
        if (AV_RL24(buf) != 0x2a019d) {
            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
D
David Conrad 已提交
335 336 337 338 339 340 341 342 343
            return AVERROR_INVALIDDATA;
        }
        width  = AV_RL16(buf+3) & 0x3fff;
        height = AV_RL16(buf+5) & 0x3fff;
        hscale = buf[4] >> 6;
        vscale = buf[6] >> 6;
        buf      += 7;
        buf_size -= 7;

344 345 346
        if (hscale || vscale)
            av_log_missing_feature(s->avctx, "Upscaling", 1);

D
David Conrad 已提交
347
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
348 349 350 351
        for (i = 0; i < 4; i++)
            for (j = 0; j < 16; j++)
                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
                       sizeof(s->prob->token[i][j]));
D
David Conrad 已提交
352 353 354 355
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
        memset(&s->segmentation, 0, sizeof(s->segmentation));
356
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
D
David Conrad 已提交
357 358
    }

359
    ff_vp56_init_range_decoder(c, buf, header_size);
D
David Conrad 已提交
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
    buf      += header_size;
    buf_size -= header_size;

    if (s->keyframe) {
        if (vp8_rac_get(c))
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
        vp8_rac_get(c); // whether we can skip clamping in dsp functions
    }

    if ((s->segmentation.enabled = vp8_rac_get(c)))
        parse_segment_info(s);
    else
        s->segmentation.update_map = 0; // FIXME: move this to some init function?

    s->filter.simple    = vp8_rac_get(c);
    s->filter.level     = vp8_rac_get_uint(c, 6);
    s->filter.sharpness = vp8_rac_get_uint(c, 3);

    if ((s->lf_delta.enabled = vp8_rac_get(c)))
        if (vp8_rac_get(c))
            update_lf_deltas(s);

    if (setup_partitions(s, buf, buf_size)) {
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
        return AVERROR_INVALIDDATA;
    }

D
Daniel Kang 已提交
387 388 389 390 391 392
    if (!s->macroblocks_base || /* first frame */
        width != s->avctx->width || height != s->avctx->height) {
        if ((ret = update_dimensions(s, width, height)) < 0)
            return ret;
    }

D
David Conrad 已提交
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
    get_quants(s);

    if (!s->keyframe) {
        update_refs(s);
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
    }

    // if we aren't saving this frame's probabilities for future frames,
    // make a copy of the current probabilities
    if (!(s->update_probabilities = vp8_rac_get(c)))
        s->prob[1] = s->prob[0];

    s->update_last = s->keyframe || vp8_rac_get(c);

    for (i = 0; i < 4; i++)
        for (j = 0; j < 8; j++)
            for (k = 0; k < 3; k++)
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
412 413
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
                        int prob = vp8_rac_get_uint(c, 8);
414 415
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
416
                    }
D
David Conrad 已提交
417 418

    if ((s->mbskip_enabled = vp8_rac_get(c)))
419
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
D
David Conrad 已提交
420 421

    if (!s->keyframe) {
422 423 424
        s->prob->intra  = vp8_rac_get_uint(c, 8);
        s->prob->last   = vp8_rac_get_uint(c, 8);
        s->prob->golden = vp8_rac_get_uint(c, 8);
D
David Conrad 已提交
425 426 427 428 429 430 431 432 433 434 435

        if (vp8_rac_get(c))
            for (i = 0; i < 4; i++)
                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
        if (vp8_rac_get(c))
            for (i = 0; i < 3; i++)
                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);

        // 17.2 MV probability update
        for (i = 0; i < 2; i++)
            for (j = 0; j < 19; j++)
436
                if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
D
David Conrad 已提交
437 438 439 440 441 442
                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
    }

    return 0;
}

J
Jason Garrett-Glaser 已提交
443
static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
D
David Conrad 已提交
444
{
J
Jason Garrett-Glaser 已提交
445 446
    dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
    dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
D
David Conrad 已提交
447 448 449 450 451 452 453
}

/**
 * Motion vector coding, 17.1.
 */
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
{
454
    int bit, x = 0;
D
David Conrad 已提交
455

456
    if (vp56_rac_get_prob_branchy(c, p[0])) {
D
David Conrad 已提交
457 458 459 460 461 462 463 464
        int i;

        for (i = 0; i < 3; i++)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        for (i = 9; i > 3; i--)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
            x += 8;
465 466 467 468 469 470 471 472 473 474 475
    } else {
        // small_mvtree
        const uint8_t *ps = p+2;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + 3*bit;
        x  += 4*bit;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + bit;
        x  += 2*bit;
        x  += vp56_rac_get_prob(c, *ps);
    }
D
David Conrad 已提交
476 477 478 479

    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
}

480 481
static av_always_inline
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
D
David Conrad 已提交
482
{
483 484 485
    if (left == top)
        return vp8_submv_prob[4-!!left];
    if (!top)
D
David Conrad 已提交
486
        return vp8_submv_prob[2];
487
    return vp8_submv_prob[1-!!left];
D
David Conrad 已提交
488 489 490 491
}

/**
 * Split motion vector prediction, 16.4.
492
 * @returns the number of motion vectors parsed (2, 4 or 16)
D
David Conrad 已提交
493
 */
494
static av_always_inline
D
Daniel Kang 已提交
495
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
D
David Conrad 已提交
496
{
497 498
    int part_idx;
    int n, num;
D
Daniel Kang 已提交
499
    VP8Macroblock *top_mb;
500 501
    VP8Macroblock *left_mb = &mb[-1];
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
D
Daniel Kang 已提交
502
                  *mbsplits_top,
503
                  *mbsplits_cur, *firstidx;
D
Daniel Kang 已提交
504
    VP56mv *top_mv;
505 506
    VP56mv *left_mv = left_mb->bmv;
    VP56mv *cur_mv  = mb->bmv;
D
David Conrad 已提交
507

D
Daniel Kang 已提交
508 509 510 511 512 513 514
    if (!layout) // layout is inlined, s->mb_layout is not
        top_mb = &mb[2];
    else
        top_mb = &mb[-s->mb_width-1];
    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
    top_mv = top_mb->bmv;

515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
        } else {
            part_idx = VP8_SPLITMVMODE_8x8;
        }
    } else {
        part_idx = VP8_SPLITMVMODE_4x4;
    }

    num = vp8_mbsplit_count[part_idx];
    mbsplits_cur = vp8_mbsplits[part_idx],
    firstidx = vp8_mbfirstidx[part_idx];
    mb->partitioning = part_idx;

D
David Conrad 已提交
530
    for (n = 0; n < num; n++) {
531
        int k = firstidx[n];
532
        uint32_t left, above;
533 534
        const uint8_t *submv_prob;

535 536 537 538 539 540 541 542
        if (!(k & 3))
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
        else
            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
        if (k <= 3)
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
        else
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
543 544

        submv_prob = get_submv_prob(left, above);
D
David Conrad 已提交
545

546 547 548 549 550 551 552 553 554 555 556 557
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
                    mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
                    mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
                } else {
                    AV_ZERO32(&mb->bmv[n]);
                }
            } else {
                AV_WN32A(&mb->bmv[n], above);
            }
        } else {
558
            AV_WN32A(&mb->bmv[n], left);
D
David Conrad 已提交
559 560
        }
    }
561 562

    return num;
D
David Conrad 已提交
563 564
}

565
static av_always_inline
D
Daniel Kang 已提交
566
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
567
{
D
Daniel Kang 已提交
568
    VP8Macroblock *mb_edge[3] = { 0 /* top */,
569
                                  mb - 1 /* left */,
D
Daniel Kang 已提交
570
                                  0 /* top-left */ };
571
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
572
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
573 574
    int idx = CNT_ZERO;
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
575
    int8_t *sign_bias = s->sign_bias;
576 577 578 579
    VP56mv near_mv[4];
    uint8_t cnt[4] = { 0 };
    VP56RangeCoder *c = &s->c;

D
Daniel Kang 已提交
580 581 582 583 584 585 586 587 588
    if (!layout) { // layout is inlined (s->mb_layout is not)
        mb_edge[0] = mb + 2;
        mb_edge[2] = mb + 1;
    }
    else {
        mb_edge[0] = mb - s->mb_width-1;
        mb_edge[2] = mb - s->mb_width-2;
    }

589 590
    AV_ZERO32(&near_mv[0]);
    AV_ZERO32(&near_mv[1]);
591
    AV_ZERO32(&near_mv[2]);
592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622

    /* Process MB on top, left and top-left */
    #define MV_EDGE_CHECK(n)\
    {\
        VP8Macroblock *edge = mb_edge[n];\
        int edge_ref = edge->ref_frame;\
        if (edge_ref != VP56_FRAME_CURRENT) {\
            uint32_t mv = AV_RN32A(&edge->mv);\
            if (mv) {\
                if (cur_sign_bias != sign_bias[edge_ref]) {\
                    /* SWAR negate of the values in mv. */\
                    mv = ~mv;\
                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
                }\
                if (!n || mv != AV_RN32A(&near_mv[idx]))\
                    AV_WN32A(&near_mv[++idx], mv);\
                cnt[idx]      += 1 + (n != 2);\
            } else\
                cnt[CNT_ZERO] += 1 + (n != 2);\
        }\
    }

    MV_EDGE_CHECK(0)
    MV_EDGE_CHECK(1)
    MV_EDGE_CHECK(2)

    mb->partitioning = VP8_SPLITMVMODE_NONE;
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
        mb->mode = VP8_MVMODE_MV;

        /* If we have three distinct MVs, merge first and last if they're the same */
623
        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
624 625 626 627 628 629 630 631 632 633 634 635
            cnt[CNT_NEAREST] += 1;

        /* Swap near and nearest if necessary */
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
        }

        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {

                /* Choose the best mv out of 0,0 and the nearest mv */
J
Jason Garrett-Glaser 已提交
636
                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
637 638 639
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
640 641 642

                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
                    mb->mode = VP8_MVMODE_SPLIT;
D
Daniel Kang 已提交
643
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
644 645 646 647 648 649
                } else {
                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
                    mb->bmv[0] = mb->mv;
                }
            } else {
J
Jason Garrett-Glaser 已提交
650
                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
651 652 653
                mb->bmv[0] = mb->mv;
            }
        } else {
J
Jason Garrett-Glaser 已提交
654
            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
655 656 657 658 659 660 661 662 663
            mb->bmv[0] = mb->mv;
        }
    } else {
        mb->mode = VP8_MVMODE_ZERO;
        AV_ZERO32(&mb->mv);
        mb->bmv[0] = mb->mv;
    }
}

664
static av_always_inline
665
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
D
Daniel Kang 已提交
666
                           int mb_x, int keyframe, int layout)
D
David Conrad 已提交
667
{
668 669
    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;

D
Daniel Kang 已提交
670 671 672 673
    if (layout == 1) {
        VP8Macroblock *mb_top = mb - s->mb_width - 1;
        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
    }
674
    if (keyframe) {
675
        int x, y;
D
Daniel Kang 已提交
676
        uint8_t* top;
677
        uint8_t* const left = s->intra4x4_pred_mode_left;
D
Daniel Kang 已提交
678 679 680 681
        if (layout == 1)
            top = mb->intra4x4_pred_mode_top;
        else
            top = s->intra4x4_pred_mode_top + 4 * mb_x;
682 683
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
684 685 686 687 688
                const uint8_t *ctx;
                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
                left[y] = top[x] = *intra4x4;
                intra4x4++;
D
David Conrad 已提交
689 690
            }
        }
691
    } else {
692
        int i;
693 694
        for (i = 0; i < 16; i++)
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
D
David Conrad 已提交
695 696 697
    }
}

698
static av_always_inline
D
Daniel Kang 已提交
699 700
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
                    uint8_t *segment, uint8_t *ref, int layout)
D
David Conrad 已提交
701 702 703 704
{
    VP56RangeCoder *c = &s->c;

    if (s->segmentation.update_map)
705
        *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
706
    else if (s->segmentation.enabled)
R
Ronald S. Bultje 已提交
707
        *segment = ref ? *ref : *segment;
708
    mb->segment = *segment;
D
David Conrad 已提交
709

710
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
D
David Conrad 已提交
711 712 713 714 715

    if (s->keyframe) {
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);

        if (mb->mode == MODE_I4x4) {
D
Daniel Kang 已提交
716
            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
717 718
        } else {
            const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
D
Daniel Kang 已提交
719 720 721 722 723
            if (s->mb_layout == 1)
                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
            else
                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
            AV_WN32A( s->intra4x4_pred_mode_left, modes);
724
        }
D
David Conrad 已提交
725

726
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
D
David Conrad 已提交
727
        mb->ref_frame = VP56_FRAME_CURRENT;
728
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
D
David Conrad 已提交
729
        // inter MB, 16.2
730 731
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
D
David Conrad 已提交
732 733 734
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
        else
            mb->ref_frame = VP56_FRAME_PREVIOUS;
J
Jason Garrett-Glaser 已提交
735
        s->ref_count[mb->ref_frame-1]++;
D
David Conrad 已提交
736 737

        // motion vectors, 16.3
D
Daniel Kang 已提交
738
        decode_mvs(s, mb, mb_x, mb_y, layout);
D
David Conrad 已提交
739 740 741 742
    } else {
        // intra MB, 16.1
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);

743
        if (mb->mode == MODE_I4x4)
D
Daniel Kang 已提交
744
            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
D
David Conrad 已提交
745

746
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
D
David Conrad 已提交
747
        mb->ref_frame = VP56_FRAME_CURRENT;
748
        mb->partitioning = VP8_SPLITMVMODE_NONE;
749
        AV_ZERO32(&mb->bmv[0]);
D
David Conrad 已提交
750 751 752
    }
}

753
#ifndef decode_block_coeffs_internal
D
David Conrad 已提交
754
/**
755
 * @param r arithmetic bitstream reader context
756 757
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
D
David Conrad 已提交
758
 * @param i initial coeff index, 0 unless a separate DC block is coded
759
 * @param qmul array holding the dc/ac dequant factor at position 0/1
D
David Conrad 已提交
760 761 762
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
763
static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
764
                                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
765
                                        int i, uint8_t *token_prob, int16_t qmul[2])
D
David Conrad 已提交
766
{
767
    VP56RangeCoder c = *r;
768
    goto skip_eob;
769
    do {
770
        int coeff;
771 772
        if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
            break;
D
David Conrad 已提交
773

774
skip_eob:
775
        if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
776
            if (++i == 16)
777
                break; // invalid input; blocks should end with EOB
778
            token_prob = probs[i][0];
779
            goto skip_eob;
780 781
        }

782
        if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
783
            coeff = 1;
784
            token_prob = probs[i+1][1];
785
        } else {
786 787
            if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
                coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
788
                if (coeff)
789
                    coeff += vp56_rac_get_prob(&c, token_prob[5]);
790 791 792
                coeff += 2;
            } else {
                // DCT_CAT*
793 794 795
                if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
                    if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
                        coeff  = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
796 797
                    } else {                                    // DCT_CAT2
                        coeff  = 7;
798 799
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
800 801
                    }
                } else {    // DCT_CAT3 and up
802 803
                    int a = vp56_rac_get_prob(&c, token_prob[8]);
                    int b = vp56_rac_get_prob(&c, token_prob[9+a]);
804 805
                    int cat = (a<<1) + b;
                    coeff  = 3 + (8<<cat);
806
                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
807 808
                }
            }
809
            token_prob = probs[i+1][2];
810
        }
811
        block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
812
    } while (++i < 16);
813

814
    *r = c;
815
    return i;
D
David Conrad 已提交
816
}
817
#endif
D
David Conrad 已提交
818

819 820 821 822 823 824 825 826 827 828 829
/**
 * @param c arithmetic bitstream reader context
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
 * @param i initial coeff index, 0 unless a separate DC block is coded
 * @param zero_nhood the initial prediction context for number of surrounding
 *                   all-zero blocks (only left/top, so 0-2)
 * @param qmul array holding the dc/ac dequant factor at position 0/1
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
830 831
static av_always_inline
int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
832
                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
833 834 835 836 837 838 839 840
                        int i, int zero_nhood, int16_t qmul[2])
{
    uint8_t *token_prob = probs[i][zero_nhood];
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
        return 0;
    return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
}

841
static av_always_inline
D
Daniel Kang 已提交
842
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
843
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
D
David Conrad 已提交
844 845 846
{
    int i, x, y, luma_start = 0, luma_ctx = 3;
    int nnz_pred, nnz, nnz_total = 0;
847
    int segment = mb->segment;
848
    int block_dc = 0;
D
David Conrad 已提交
849 850 851 852 853

    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
        nnz_pred = t_nnz[8] + l_nnz[8];

        // decode DC values and do hadamard
D
Daniel Kang 已提交
854
        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
D
David Conrad 已提交
855 856
                                  s->qmat[segment].luma_dc_qmul);
        l_nnz[8] = t_nnz[8] = !!nnz;
857 858 859 860
        if (nnz) {
            nnz_total += nnz;
            block_dc = 1;
            if (nnz == 1)
D
Daniel Kang 已提交
861
                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
862
            else
D
Daniel Kang 已提交
863
                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
864
        }
D
David Conrad 已提交
865 866 867 868 869 870 871
        luma_start = 1;
        luma_ctx = 0;
    }

    // luma blocks
    for (y = 0; y < 4; y++)
        for (x = 0; x < 4; x++) {
J
Jason Garrett-Glaser 已提交
872
            nnz_pred = l_nnz[y] + t_nnz[x];
D
Daniel Kang 已提交
873
            nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
J
Jason Garrett-Glaser 已提交
874
                                      nnz_pred, s->qmat[segment].luma_qmul);
875
            // nnz+block_dc may be one more than the actual last index, but we don't care
D
Daniel Kang 已提交
876
            td->non_zero_count_cache[y][x] = nnz + block_dc;
D
David Conrad 已提交
877 878 879 880 881 882 883 884 885 886 887
            t_nnz[x] = l_nnz[y] = !!nnz;
            nnz_total += nnz;
        }

    // chroma blocks
    // TODO: what to do about dimensions? 2nd dim for luma is x,
    // but for chroma it's (y<<1)|x
    for (i = 4; i < 6; i++)
        for (y = 0; y < 2; y++)
            for (x = 0; x < 2; x++) {
                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
D
Daniel Kang 已提交
888
                nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
D
David Conrad 已提交
889
                                          nnz_pred, s->qmat[segment].chroma_qmul);
D
Daniel Kang 已提交
890
                td->non_zero_count_cache[i][(y<<1)+x] = nnz;
D
David Conrad 已提交
891 892 893 894 895 896 897 898 899 900 901
                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
                nnz_total += nnz;
            }

    // if there were no coded coeffs despite the macroblock not being marked skip,
    // we MUST not do the inner loop filter and should not do IDCT
    // Since skip isn't used for bitstream prediction, just manually set it.
    if (!nnz_total)
        mb->skip = 1;
}

902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922
static av_always_inline
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                      int linesize, int uvlinesize, int simple)
{
    AV_COPY128(top_border, src_y + 15*linesize);
    if (!simple) {
        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
    }
}

static av_always_inline
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
                    int simple, int xchg)
{
    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
    src_y  -=   linesize;
    src_cb -= uvlinesize;
    src_cr -= uvlinesize;

M
Måns Rullgård 已提交
923 924 925 926
#define XCHG(a,b,xchg) do {                     \
        if (xchg) AV_SWAP64(b,a);               \
        else      AV_COPY64(b,a);               \
    } while (0)
927 928 929 930

    XCHG(top_border_m1+8, src_y-8, xchg);
    XCHG(top_border,      src_y,   xchg);
    XCHG(top_border+8,    src_y+8, 1);
931
    if (mb_x < mb_width-1)
932
        XCHG(top_border+32, src_y+16, 1);
933

934 935 936 937 938 939 940 941 942 943
    // only copy chroma for normal loop filter
    // or to initialize the top row to 127
    if (!simple || !mb_y) {
        XCHG(top_border_m1+16, src_cb-8, xchg);
        XCHG(top_border_m1+24, src_cr-8, xchg);
        XCHG(top_border+16,    src_cb, 1);
        XCHG(top_border+24,    src_cr, 1);
    }
}

944
static av_always_inline
945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
    } else {
        return mb_y ? mode : LEFT_DC_PRED8x8;
    }
}

static av_always_inline
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
    } else {
        return mb_y ? mode : HOR_PRED8x8;
    }
}

static av_always_inline
int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
D
David Conrad 已提交
966 967
{
    if (mode == DC_PRED8x8) {
968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
    } else {
        return mode;
    }
}

static av_always_inline
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
{
    switch (mode) {
    case DC_PRED8x8:
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
    case VERT_PRED8x8:
        return !mb_y ? DC_127_PRED8x8 : mode;
    case HOR_PRED8x8:
        return !mb_x ? DC_129_PRED8x8 : mode;
    case PLANE_PRED8x8 /*TM*/:
        return check_tm_pred8x8_mode(mode, mb_x, mb_y);
    }
    return mode;
}

static av_always_inline
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_VP8_PRED : DC_129_PRED;
    } else {
        return mb_y ? mode : HOR_VP8_PRED;
    }
}

static av_always_inline
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
{
    switch (mode) {
    case VERT_PRED:
        if (!mb_x && mb_y) {
            *copy_buf = 1;
            return mode;
        }
        /* fall-through */
    case DIAG_DOWN_LEFT_PRED:
    case VERT_LEFT_PRED:
        return !mb_y ? DC_127_PRED : mode;
    case HOR_PRED:
        if (!mb_y) {
            *copy_buf = 1;
            return mode;
1017
        }
1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029
        /* fall-through */
    case HOR_UP_PRED:
        return !mb_x ? DC_129_PRED : mode;
    case TM_VP8_PRED:
        return check_tm_pred4x4_mode(mode, mb_x, mb_y);
    case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
    case DIAG_DOWN_RIGHT_PRED:
    case VERT_RIGHT_PRED:
    case HOR_DOWN_PRED:
        if (!mb_y || !mb_x)
            *copy_buf = 1;
        return mode;
D
David Conrad 已提交
1030 1031 1032 1033
    }
    return mode;
}

1034
static av_always_inline
D
Daniel Kang 已提交
1035 1036
void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
D
David Conrad 已提交
1037
{
1038
    AVCodecContext *avctx = s->avctx;
M
Mans Rullgard 已提交
1039 1040
    int x, y, mode, nnz;
    uint32_t tr;
D
David Conrad 已提交
1041

1042 1043
    // for the first row, we need to run xchg_mb_border to init the top edge to 127
    // otherwise, skip it if we aren't going to deblock
D
Daniel Kang 已提交
1044
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1045 1046 1047 1048
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 1);

D
David Conrad 已提交
1049
    if (mb->mode < MODE_I4x4) {
1050 1051 1052 1053 1054
        if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
            mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
        } else {
            mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
        }
D
David Conrad 已提交
1055 1056 1057
        s->hpc.pred16x16[mode](dst[0], s->linesize);
    } else {
        uint8_t *ptr = dst[0];
1058
        uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1059
        uint8_t tr_top[4] = { 127, 127, 127, 127 };
D
David Conrad 已提交
1060 1061 1062 1063 1064 1065 1066

        // all blocks on the right edge of the macroblock use bottom edge
        // the top macroblock for their topright edge
        uint8_t *tr_right = ptr - s->linesize + 16;

        // if we're on the right edge of the frame, said edge is extended
        // from the top macroblock
1067 1068
        if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
            mb_x == s->mb_width-1) {
M
Mans Rullgard 已提交
1069
            tr = tr_right[-1]*0x01010101u;
D
David Conrad 已提交
1070 1071 1072
            tr_right = (uint8_t *)&tr;
        }

1073
        if (mb->skip)
D
Daniel Kang 已提交
1074
            AV_ZERO128(td->non_zero_count_cache);
1075

D
David Conrad 已提交
1076 1077 1078
        for (y = 0; y < 4; y++) {
            uint8_t *topright = ptr + 4 - s->linesize;
            for (x = 0; x < 4; x++) {
1079 1080 1081 1082 1083 1084 1085
                int copy = 0, linesize = s->linesize;
                uint8_t *dst = ptr+4*x;
                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];

                if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
                    topright = tr_top;
                } else if (x == 3)
D
David Conrad 已提交
1086 1087
                    topright = tr_right;

1088 1089 1090 1091 1092 1093 1094
                if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
                    mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
                    if (copy) {
                        dst = copy_dst + 12;
                        linesize = 8;
                        if (!(mb_y + y)) {
                            copy_dst[3] = 127U;
R
Ronald S. Bultje 已提交
1095
                            AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1096
                        } else {
R
Ronald S. Bultje 已提交
1097
                            AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
                            if (!(mb_x + x)) {
                                copy_dst[3] = 129U;
                            } else {
                                copy_dst[3] = ptr[4*x-s->linesize-1];
                            }
                        }
                        if (!(mb_x + x)) {
                            copy_dst[11] =
                            copy_dst[19] =
                            copy_dst[27] =
                            copy_dst[35] = 129U;
                        } else {
                            copy_dst[11] = ptr[4*x              -1];
                            copy_dst[19] = ptr[4*x+s->linesize  -1];
                            copy_dst[27] = ptr[4*x+s->linesize*2-1];
                            copy_dst[35] = ptr[4*x+s->linesize*3-1];
                        }
                    }
                } else {
                    mode = intra4x4[x];
                }
                s->hpc.pred4x4[mode](dst, topright, linesize);
                if (copy) {
R
Ronald S. Bultje 已提交
1121 1122 1123 1124
                    AV_COPY32(ptr+4*x              , copy_dst+12);
                    AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
                    AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
                    AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1125
                }
D
David Conrad 已提交
1126

D
Daniel Kang 已提交
1127
                nnz = td->non_zero_count_cache[y][x];
D
David Conrad 已提交
1128 1129
                if (nnz) {
                    if (nnz == 1)
D
Daniel Kang 已提交
1130
                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
D
David Conrad 已提交
1131
                    else
D
Daniel Kang 已提交
1132
                        s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
D
David Conrad 已提交
1133 1134 1135 1136 1137
                }
                topright += 4;
            }

            ptr   += 4*s->linesize;
1138
            intra4x4 += 4;
D
David Conrad 已提交
1139 1140 1141
        }
    }

1142
    if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1143
        mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1144
    } else {
1145
        mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1146
    }
D
David Conrad 已提交
1147 1148
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1149

D
Daniel Kang 已提交
1150
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1151 1152 1153
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 0);
D
David Conrad 已提交
1154 1155
}

1156 1157 1158 1159 1160 1161 1162
static const uint8_t subpel_idx[3][8] = {
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
                                // also function pointer index
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
};

D
David Conrad 已提交
1163
/**
1164
 * luma MC function
D
David Conrad 已提交
1165 1166 1167
 *
 * @param s VP8 decoding context
 * @param dst target buffer for block data at block position
1168
 * @param ref reference picture buffer at origin (0, 0)
D
David Conrad 已提交
1169 1170 1171 1172 1173 1174 1175 1176
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
1177
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
D
David Conrad 已提交
1178
 */
1179
static av_always_inline
D
Daniel Kang 已提交
1180 1181
void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
                 AVFrame *ref, const VP56mv *mv,
1182 1183 1184
                 int x_off, int y_off, int block_w, int block_h,
                 int width, int height, int linesize,
                 vp8_mc_func mc_func[3][3])
D
David Conrad 已提交
1185
{
R
Ronald S. Bultje 已提交
1186 1187
    uint8_t *src = ref->data[0];

1188
    if (AV_RN32A(mv)) {
1189 1190 1191 1192 1193 1194

        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
        int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 2;
        y_off += mv->y >> 2;
1195 1196

        // edge emulation
R
Ronald S. Bultje 已提交
1197
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1198
        src += y_off * linesize + x_off;
1199 1200
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
D
Daniel Kang 已提交
1201
            s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1202 1203
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                    x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1204
            src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1205 1206
        }
        mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
R
Ronald S. Bultje 已提交
1207 1208
    } else {
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1209
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
R
Ronald S. Bultje 已提交
1210
    }
D
David Conrad 已提交
1211 1212
}

1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229
/**
 * chroma MC function
 *
 * @param s VP8 decoding context
 * @param dst1 target buffer for block data at block position (U plane)
 * @param dst2 target buffer for block data at block position (V plane)
 * @param ref reference picture buffer at origin (0, 0)
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
 */
1230
static av_always_inline
D
Daniel Kang 已提交
1231 1232
void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
                   AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
1233 1234 1235
                   int block_w, int block_h, int width, int height, int linesize,
                   vp8_mc_func mc_func[3][3])
{
R
Ronald S. Bultje 已提交
1236 1237
    uint8_t *src1 = ref->data[1], *src2 = ref->data[2];

1238 1239 1240 1241 1242 1243 1244 1245 1246 1247
    if (AV_RN32A(mv)) {
        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
        int my = mv->y&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 3;
        y_off += mv->y >> 3;

        // edge emulation
        src1 += y_off * linesize + x_off;
        src2 += y_off * linesize + x_off;
R
Ronald S. Bultje 已提交
1248
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1249 1250
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
D
Daniel Kang 已提交
1251
            s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1252 1253
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                    x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1254
            src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1255 1256
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);

D
Daniel Kang 已提交
1257
            s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1258 1259
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                    x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1260
            src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1261 1262 1263 1264 1265 1266
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
        } else {
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
        }
    } else {
R
Ronald S. Bultje 已提交
1267
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1268 1269 1270 1271 1272
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
    }
}

1273
static av_always_inline
D
Daniel Kang 已提交
1274
void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1275 1276 1277 1278
                 AVFrame *ref_frame, int x_off, int y_off,
                 int bx_off, int by_off,
                 int block_w, int block_h,
                 int width, int height, VP56mv *mv)
1279 1280 1281 1282
{
    VP56mv uvmv = *mv;

    /* Y */
D
Daniel Kang 已提交
1283
    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
R
Ronald S. Bultje 已提交
1284
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1285 1286
                block_w, block_h, width, height, s->linesize,
                s->put_pixels_tab[block_w == 8]);
1287 1288 1289 1290 1291 1292 1293 1294 1295 1296

    /* U/V */
    if (s->profile == 3) {
        uvmv.x &= ~7;
        uvmv.y &= ~7;
    }
    x_off   >>= 1; y_off   >>= 1;
    bx_off  >>= 1; by_off  >>= 1;
    width   >>= 1; height  >>= 1;
    block_w >>= 1; block_h >>= 1;
D
Daniel Kang 已提交
1297
    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
R
Ronald S. Bultje 已提交
1298 1299
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
                  &uvmv, x_off + bx_off, y_off + by_off,
1300 1301
                  block_w, block_h, width, height, s->uvlinesize,
                  s->put_pixels_tab[1 + (block_w == 4)]);
1302 1303
}

1304 1305
/* Fetch pixels for estimated mv 4 macroblocks ahead.
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1306
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1307
{
J
Jason Garrett-Glaser 已提交
1308 1309
    /* Don't prefetch refs that haven't been used very often this frame. */
    if (s->ref_count[ref-1] > (mb_xy >> 5)) {
J
Jason Garrett-Glaser 已提交
1310
        int x_off = mb_x << 4, y_off = mb_y << 4;
J
Jason Garrett-Glaser 已提交
1311 1312
        int mx = (mb->mv.x>>2) + x_off + 8;
        int my = (mb->mv.y>>2) + y_off;
J
Jason Garrett-Glaser 已提交
1313 1314
        uint8_t **src= s->framep[ref]->data;
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
R
Ronald S. Bultje 已提交
1315 1316 1317
        /* For threading, a ff_thread_await_progress here might be useful, but
         * it actually slows down the decoder. Since a bad prefetch doesn't
         * generate bad decoder output, we don't run it here. */
J
Jason Garrett-Glaser 已提交
1318 1319 1320 1321
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
    }
1322 1323
}

D
David Conrad 已提交
1324 1325 1326
/**
 * Apply motion vectors to prediction buffer, chapter 18.
 */
1327
static av_always_inline
D
Daniel Kang 已提交
1328 1329
void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
D
David Conrad 已提交
1330 1331 1332
{
    int x_off = mb_x << 4, y_off = mb_y << 4;
    int width = 16*s->mb_width, height = 16*s->mb_height;
1333 1334
    AVFrame *ref = s->framep[mb->ref_frame];
    VP56mv *bmv = mb->bmv;
D
David Conrad 已提交
1335

1336 1337
    switch (mb->partitioning) {
    case VP8_SPLITMVMODE_NONE:
D
Daniel Kang 已提交
1338
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1339
                    0, 0, 16, 16, width, height, &mb->mv);
1340
        break;
1341
    case VP8_SPLITMVMODE_4x4: {
D
David Conrad 已提交
1342
        int x, y;
1343
        VP56mv uvmv;
D
David Conrad 已提交
1344 1345 1346 1347

        /* Y */
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
D
Daniel Kang 已提交
1348
                vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
R
Ronald S. Bultje 已提交
1349
                            ref, &bmv[4*y + x],
1350 1351 1352
                            4*x + x_off, 4*y + y_off, 4, 4,
                            width, height, s->linesize,
                            s->put_pixels_tab[2]);
D
David Conrad 已提交
1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367
            }
        }

        /* U/V */
        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
        for (y = 0; y < 2; y++) {
            for (x = 0; x < 2; x++) {
                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1368 1369
                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
D
David Conrad 已提交
1370 1371 1372 1373
                if (s->profile == 3) {
                    uvmv.x &= ~7;
                    uvmv.y &= ~7;
                }
D
Daniel Kang 已提交
1374
                vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
R
Ronald S. Bultje 已提交
1375
                              dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1376 1377 1378
                              4*x + x_off, 4*y + y_off, 4, 4,
                              width, height, s->uvlinesize,
                              s->put_pixels_tab[2]);
D
David Conrad 已提交
1379 1380
            }
        }
1381 1382 1383
        break;
    }
    case VP8_SPLITMVMODE_16x8:
D
Daniel Kang 已提交
1384
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1385
                    0, 0, 16, 8, width, height, &bmv[0]);
D
Daniel Kang 已提交
1386
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1387
                    0, 8, 16, 8, width, height, &bmv[1]);
1388 1389
        break;
    case VP8_SPLITMVMODE_8x16:
D
Daniel Kang 已提交
1390
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1391
                    0, 0, 8, 16, width, height, &bmv[0]);
D
Daniel Kang 已提交
1392
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1393
                    8, 0, 8, 16, width, height, &bmv[1]);
1394 1395
        break;
    case VP8_SPLITMVMODE_8x8:
D
Daniel Kang 已提交
1396
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1397
                    0, 0, 8, 8, width, height, &bmv[0]);
D
Daniel Kang 已提交
1398
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1399
                    8, 0, 8, 8, width, height, &bmv[1]);
D
Daniel Kang 已提交
1400
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1401
                    0, 8, 8, 8, width, height, &bmv[2]);
D
Daniel Kang 已提交
1402
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1403
                    8, 8, 8, 8, width, height, &bmv[3]);
1404
        break;
D
David Conrad 已提交
1405 1406 1407
    }
}

D
Daniel Kang 已提交
1408 1409
static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
                                     uint8_t *dst[3], VP8Macroblock *mb)
D
David Conrad 已提交
1410
{
1411
    int x, y, ch;
D
David Conrad 已提交
1412

J
Jason Garrett-Glaser 已提交
1413 1414
    if (mb->mode != MODE_I4x4) {
        uint8_t *y_dst = dst[0];
D
David Conrad 已提交
1415
        for (y = 0; y < 4; y++) {
D
Daniel Kang 已提交
1416
            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1417 1418
            if (nnz4) {
                if (nnz4&~0x01010101) {
J
Jason Garrett-Glaser 已提交
1419
                    for (x = 0; x < 4; x++) {
1420
                        if ((uint8_t)nnz4 == 1)
D
Daniel Kang 已提交
1421
                            s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1422
                        else if((uint8_t)nnz4 > 1)
D
Daniel Kang 已提交
1423
                            s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1424 1425 1426
                        nnz4 >>= 8;
                        if (!nnz4)
                            break;
J
Jason Garrett-Glaser 已提交
1427 1428
                    }
                } else {
D
Daniel Kang 已提交
1429
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
D
David Conrad 已提交
1430 1431 1432 1433
                }
            }
            y_dst += 4*s->linesize;
        }
J
Jason Garrett-Glaser 已提交
1434
    }
D
David Conrad 已提交
1435

J
Jason Garrett-Glaser 已提交
1436
    for (ch = 0; ch < 2; ch++) {
D
Daniel Kang 已提交
1437
        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1438
        if (nnz4) {
J
Jason Garrett-Glaser 已提交
1439
            uint8_t *ch_dst = dst[1+ch];
1440 1441 1442
            if (nnz4&~0x01010101) {
                for (y = 0; y < 2; y++) {
                    for (x = 0; x < 2; x++) {
1443
                        if ((uint8_t)nnz4 == 1)
D
Daniel Kang 已提交
1444
                            s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1445
                        else if((uint8_t)nnz4 > 1)
D
Daniel Kang 已提交
1446
                            s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1447 1448
                        nnz4 >>= 8;
                        if (!nnz4)
1449
                            goto chroma_idct_end;
J
Jason Garrett-Glaser 已提交
1450
                    }
1451
                    ch_dst += 4*s->uvlinesize;
J
Jason Garrett-Glaser 已提交
1452
                }
1453
            } else {
D
Daniel Kang 已提交
1454
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
D
David Conrad 已提交
1455 1456
            }
        }
1457
chroma_idct_end: ;
D
David Conrad 已提交
1458 1459 1460
    }
}

1461
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
D
David Conrad 已提交
1462 1463 1464 1465
{
    int interior_limit, filter_level;

    if (s->segmentation.enabled) {
1466
        filter_level = s->segmentation.filter_level[mb->segment];
D
David Conrad 已提交
1467 1468 1469 1470 1471 1472 1473
        if (!s->segmentation.absolute_vals)
            filter_level += s->filter.level;
    } else
        filter_level = s->filter.level;

    if (s->lf_delta.enabled) {
        filter_level += s->lf_delta.ref[mb->ref_frame];
1474
        filter_level += s->lf_delta.mode[mb->mode];
D
David Conrad 已提交
1475
    }
1476

M
Mans Rullgard 已提交
1477
    filter_level = av_clip_uintp2(filter_level, 6);
D
David Conrad 已提交
1478 1479 1480

    interior_limit = filter_level;
    if (s->filter.sharpness) {
1481
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
D
David Conrad 已提交
1482 1483 1484 1485
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
    }
    interior_limit = FFMAX(interior_limit, 1);

1486 1487
    f->filter_level = filter_level;
    f->inner_limit = interior_limit;
1488
    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
D
David Conrad 已提交
1489 1490
}

1491
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
D
David Conrad 已提交
1492
{
1493 1494 1495
    int mbedge_lim, bedge_lim, hev_thresh;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1496
    int inner_filter = f->inner_filter;
1497 1498
    int linesize = s->linesize;
    int uvlinesize = s->uvlinesize;
1499 1500 1501 1502 1503 1504 1505 1506 1507 1508
    static const uint8_t hev_thresh_lut[2][64] = {
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
          3, 3, 3, 3 },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          2, 2, 2, 2 }
    };
D
David Conrad 已提交
1509 1510 1511 1512

    if (!filter_level)
        return;

1513 1514
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
1515

1516
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1517

D
David Conrad 已提交
1518
    if (mb_x) {
1519
        s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
1520
                                       mbedge_lim, inner_limit, hev_thresh);
1521
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1522
                                       mbedge_lim, inner_limit, hev_thresh);
D
David Conrad 已提交
1523 1524
    }

1525
    if (inner_filter) {
1526 1527 1528 1529 1530 1531 1532 1533 1534
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
                                             uvlinesize,  bedge_lim,
                                             inner_limit, hev_thresh);
D
David Conrad 已提交
1535 1536 1537
    }

    if (mb_y) {
1538
        s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
1539
                                       mbedge_lim, inner_limit, hev_thresh);
1540
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1541
                                       mbedge_lim, inner_limit, hev_thresh);
D
David Conrad 已提交
1542 1543
    }

1544
    if (inner_filter) {
1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
                                             dst[2] + 4 * uvlinesize,
                                             uvlinesize,  bedge_lim,
1557
                                             inner_limit, hev_thresh);
D
David Conrad 已提交
1558 1559 1560
    }
}

1561
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
D
David Conrad 已提交
1562
{
1563 1564 1565
    int mbedge_lim, bedge_lim;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1566
    int inner_filter = f->inner_filter;
1567
    int linesize = s->linesize;
D
David Conrad 已提交
1568 1569 1570 1571

    if (!filter_level)
        return;

1572 1573
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
D
David Conrad 已提交
1574 1575

    if (mb_x)
1576
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1577
    if (inner_filter) {
1578 1579 1580
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
D
David Conrad 已提交
1581 1582 1583
    }

    if (mb_y)
1584
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1585
    if (inner_filter) {
1586 1587 1588
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
D
David Conrad 已提交
1589 1590 1591
    }
}

1592 1593 1594 1595 1596 1597 1598 1599
static void release_queued_segmaps(VP8Context *s, int is_close)
{
    int leave_behind = is_close ? 0 : !s->maps_are_invalid;
    while (s->num_maps_to_be_freed > leave_behind)
        av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
    s->maps_are_invalid = 0;
}

1600
#define MARGIN (16 << 2)
D
Daniel Kang 已提交
1601 1602
static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
                                   AVFrame *prev_frame)
1603 1604
{
    VP8Context *s = avctx->priv_data;
D
Daniel Kang 已提交
1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629
    int mb_x, mb_y;

    s->mv_min.y = -MARGIN;
    s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
        VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
        int mb_xy = mb_y*s->mb_width;

        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);

        s->mv_min.x = -MARGIN;
        s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
            if (mb_y == 0)
                AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
                           prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
            s->mv_min.x -= 64;
            s->mv_max.x -= 64;
        }
        s->mv_min.y -= 64;
        s->mv_max.y -= 64;
    }
}

1630
#if HAVE_THREADS
D
Daniel Kang 已提交
1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
    do {\
        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
        if (otd->thread_mb_pos < tmp) {\
            pthread_mutex_lock(&otd->lock);\
            td->wait_mb_pos = tmp;\
            do {\
                if (otd->thread_mb_pos >= tmp)\
                    break;\
                pthread_cond_wait(&otd->cond, &otd->lock);\
            } while (1);\
            td->wait_mb_pos = INT_MAX;\
            pthread_mutex_unlock(&otd->lock);\
        }\
    } while(0);

#define update_pos(td, mb_y, mb_x)\
    do {\
    int pos              = (mb_y << 16) | (mb_x & 0xFFFF);\
    int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
    int is_null          = (next_td == NULL) || (prev_td == NULL);\
    int pos_check        = (is_null) ? 1 :\
                            (next_td != td && pos >= next_td->wait_mb_pos) ||\
                            (prev_td != td && pos >= prev_td->wait_mb_pos);\
    td->thread_mb_pos = pos;\
    if (sliced_threading && pos_check) {\
        pthread_mutex_lock(&td->lock);\
        pthread_cond_broadcast(&td->cond);\
        pthread_mutex_unlock(&td->lock);\
    }\
    } while(0);
1662 1663 1664 1665
#else
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
#define update_pos(td, mb_y, mb_x)
#endif
D
Daniel Kang 已提交
1666 1667 1668 1669 1670 1671 1672

static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
                                        int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
    int mb_y = td->thread_mb_pos>>16;
1673
    int i, y, mb_x, mb_xy = mb_y*s->mb_width;
D
Daniel Kang 已提交
1674 1675 1676 1677
    int num_jobs = s->num_jobs;
    AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
    VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
    VP8Macroblock *mb;
1678 1679 1680 1681 1682
    uint8_t *dst[3] = {
        curframe->data[0] + 16*mb_y*s->linesize,
        curframe->data[1] +  8*mb_y*s->uvlinesize,
        curframe->data[2] +  8*mb_y*s->uvlinesize
    };
D
Daniel Kang 已提交
1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695
    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];
    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else {
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
    }

    memset(td->left_nnz, 0, sizeof(td->left_nnz));
1696 1697 1698 1699 1700
    // left edge of 129 for intra prediction
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
        for (i = 0; i < 3; i++)
            for (y = 0; y < 16>>!!i; y++)
                dst[i][y*curframe->linesize[i]-1] = 129;
D
Daniel Kang 已提交
1701
        if (mb_y == 1) {
1702
            s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
D
Daniel Kang 已提交
1703
        }
1704 1705 1706 1707 1708 1709
    }

    s->mv_min.x = -MARGIN;
    s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
D
Daniel Kang 已提交
1710 1711 1712 1713 1714 1715 1716 1717 1718
        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
        if (prev_td != td) {
            if (threadnr != 0) {
                check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
            } else {
                check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
            }
        }

1719 1720 1721
        s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
        s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);

D
Daniel Kang 已提交
1722 1723 1724
        if (!s->mb_layout)
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
                           prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
1725 1726 1727 1728

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);

        if (!mb->skip)
D
Daniel Kang 已提交
1729
            decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1730 1731

        if (mb->mode <= MODE_I4x4)
D
Daniel Kang 已提交
1732
            intra_predict(s, td, dst, mb, mb_x, mb_y);
1733
        else
D
Daniel Kang 已提交
1734
            inter_predict(s, td, dst, mb, mb_x, mb_y);
1735 1736 1737 1738

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);

        if (!mb->skip) {
D
Daniel Kang 已提交
1739
            idct_mb(s, td, dst, mb);
1740
        } else {
D
Daniel Kang 已提交
1741
            AV_ZERO64(td->left_nnz);
1742 1743 1744 1745
            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned

            // Reset DC block predictors if they would exist if the mb had coefficients
            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
D
Daniel Kang 已提交
1746
                td->left_nnz[8]     = 0;
1747 1748 1749 1750 1751
                s->top_nnz[mb_x][8] = 0;
            }
        }

        if (s->deblock_filter)
D
Daniel Kang 已提交
1752 1753 1754 1755 1756 1757 1758 1759
            filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);

        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }
1760 1761 1762 1763 1764 1765 1766 1767

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);

        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;
        s->mv_min.x -= 64;
        s->mv_max.x -= 64;
D
Daniel Kang 已提交
1768 1769 1770 1771 1772 1773

        if (mb_x == s->mb_width+1) {
            update_pos(td, mb_y, s->mb_width+3);
        } else {
            update_pos(td, mb_y, mb_x);
        }
1774
    }
D
Daniel Kang 已提交
1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818
}

static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
                              int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[threadnr];
    int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
    AVFrame *curframe = s->curframe;
    VP8Macroblock *mb;
    VP8ThreadData *prev_td, *next_td;
    uint8_t *dst[3] = {
        curframe->data[0] + 16*mb_y*s->linesize,
        curframe->data[1] +  8*mb_y*s->uvlinesize,
        curframe->data[2] +  8*mb_y*s->uvlinesize
    };

    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;

    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
        VP8FilterStrength *f = &td->filter_strength[mb_x];
        if (prev_td != td) {
            check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
        }
        if (next_td != td)
            if (next_td != &s->thread_data[0]) {
                check_thread_pos(td, next_td, mb_x+1, mb_y+1);
            }

        if (num_jobs == 1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }

1819
        if (s->filter.simple)
D
Daniel Kang 已提交
1820
            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1821
        else
D
Daniel Kang 已提交
1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852
            filter_mb(s, dst, f, mb_x, mb_y);
        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;

        update_pos(td, mb_y, (s->mb_width+3) + mb_x);
    }
}

static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
                                    int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[jobnr];
    VP8ThreadData *next_td = NULL, *prev_td = NULL;
    AVFrame *curframe = s->curframe;
    int mb_y, num_jobs = s->num_jobs;
    td->thread_nr = threadnr;
    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
        if (mb_y >= s->mb_height) break;
        td->thread_mb_pos = mb_y<<16;
        vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
        if (s->deblock_filter)
            vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
        update_pos(td, mb_y, INT_MAX & 0xFFFF);

        s->mv_min.y -= 64;
        s->mv_max.y -= 64;

        if (avctx->active_thread_type == FF_THREAD_FRAME)
            ff_thread_report_progress(curframe, mb_y, 0);
1853
    }
D
Daniel Kang 已提交
1854 1855

    return 0;
1856 1857
}

1858
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
D
David Conrad 已提交
1859 1860 1861
                            AVPacket *avpkt)
{
    VP8Context *s = avctx->priv_data;
D
Daniel Kang 已提交
1862
    int ret, i, referenced, num_jobs;
D
David Conrad 已提交
1863
    enum AVDiscard skip_thresh;
1864
    AVFrame *av_uninit(curframe), *prev_frame;
D
David Conrad 已提交
1865

1866 1867
    release_queued_segmaps(s, 0);

D
David Conrad 已提交
1868
    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1869
        goto err;
D
David Conrad 已提交
1870

1871 1872
    prev_frame = s->framep[VP56_FRAME_CURRENT];

D
David Conrad 已提交
1873 1874 1875 1876 1877 1878 1879 1880
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
                                || s->update_altref == VP56_FRAME_CURRENT;

    skip_thresh = !referenced ? AVDISCARD_NONREF :
                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;

    if (avctx->skip_frame >= skip_thresh) {
        s->invisible = 1;
1881
        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
D
David Conrad 已提交
1882 1883
        goto skip_decode;
    }
1884
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
D
David Conrad 已提交
1885

R
Ronald S. Bultje 已提交
1886 1887 1888 1889 1890 1891 1892
    // release no longer referenced frames
    for (i = 0; i < 5; i++)
        if (s->frames[i].data[0] &&
            &s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1893
            vp8_release_frame(s, &s->frames[i], 1, 0);
R
Ronald S. Bultje 已提交
1894 1895 1896 1897 1898

    // find a free buffer
    for (i = 0; i < 5; i++)
        if (&s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
D
David Conrad 已提交
1899 1900 1901 1902 1903
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
            break;
        }
R
Ronald S. Bultje 已提交
1904 1905 1906 1907
    if (i == 5) {
        av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
        abort();
    }
D
David Conrad 已提交
1908
    if (curframe->data[0])
1909
        vp8_release_frame(s, curframe, 1, 0);
D
David Conrad 已提交
1910

1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921
    // Given that arithmetic probabilities are updated every frame, it's quite likely
    // that the values we have on a random interframe are complete junk if we didn't
    // start decode on a keyframe. So just don't display anything rather than junk.
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
                         !s->framep[VP56_FRAME_GOLDEN] ||
                         !s->framep[VP56_FRAME_GOLDEN2])) {
        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
        ret = AVERROR_INVALIDDATA;
        goto err;
    }

D
David Conrad 已提交
1922
    curframe->key_frame = s->keyframe;
1923
    curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
D
David Conrad 已提交
1924
    curframe->reference = referenced ? 3 : 0;
1925
    if ((ret = vp8_alloc_frame(s, curframe))) {
D
David Conrad 已提交
1926
        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1927
        goto err;
D
David Conrad 已提交
1928 1929
    }

R
Ronald S. Bultje 已提交
1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949
    // check if golden and altref are swapped
    if (s->update_altref != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
    }
    if (s->update_golden != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
    }
    if (s->update_last) {
        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
    } else {
        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
    }
    s->next_framep[VP56_FRAME_CURRENT]      = curframe;

    ff_thread_finish_setup(avctx);

D
David Conrad 已提交
1950 1951 1952
    s->linesize   = curframe->linesize[0];
    s->uvlinesize = curframe->linesize[1];

D
Daniel Kang 已提交
1953 1954 1955
    if (!s->thread_data[0].edge_emu_buffer)
        for (i = 0; i < MAX_THREADS; i++)
            s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
D
David Conrad 已提交
1956 1957

    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
P
Pascal Massimino 已提交
1958
    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
D
Daniel Kang 已提交
1959 1960 1961 1962
    if (!s->mb_layout)
        memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
    if (!s->mb_layout && s->keyframe)
        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1963

D
David Conrad 已提交
1964
    // top edge of 127 for intra prediction
1965 1966
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
        s->top_border[0][15] = s->top_border[0][23] = 127;
1967 1968
        s->top_border[0][31] = 127;
        memset(s->top_border[1], 127, s->mb_width*sizeof(*s->top_border));
1969
    }
J
Jason Garrett-Glaser 已提交
1970
    memset(s->ref_count, 0, sizeof(s->ref_count));
D
David Conrad 已提交
1971

J
Jason Garrett-Glaser 已提交
1972

D
Daniel Kang 已提交
1973 1974 1975 1976
    // Make sure the previous frame has read its segmentation map,
    // if we re-use the same map.
    if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
        ff_thread_await_progress(prev_frame, 1, 0);
J
Jason Garrett-Glaser 已提交
1977

D
Daniel Kang 已提交
1978 1979
    if (s->mb_layout == 1)
        vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
R
Ronald S. Bultje 已提交
1980

D
Daniel Kang 已提交
1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994
    if (avctx->active_thread_type == FF_THREAD_FRAME)
        num_jobs = 1;
    else
        num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
    s->num_jobs   = num_jobs;
    s->curframe   = curframe;
    s->prev_frame = prev_frame;
    s->mv_min.y   = -MARGIN;
    s->mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].thread_mb_pos = 0;
        s->thread_data[i].wait_mb_pos = INT_MAX;
    }
    avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
D
David Conrad 已提交
1995

R
Ronald S. Bultje 已提交
1996
    ff_thread_report_progress(curframe, INT_MAX, 0);
1997 1998
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);

D
David Conrad 已提交
1999 2000 2001 2002 2003 2004 2005
skip_decode:
    // if future frames don't use the updated probabilities,
    // reset them to the values we saved
    if (!s->update_probabilities)
        s->prob[0] = s->prob[1];

    if (!s->invisible) {
R
Ronald S. Bultje 已提交
2006
        *(AVFrame*)data = *curframe;
2007
        *got_frame      = 1;
D
David Conrad 已提交
2008 2009 2010
    }

    return avpkt->size;
2011 2012 2013
err:
    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
    return ret;
D
David Conrad 已提交
2014 2015 2016 2017 2018 2019 2020
}

static av_cold int vp8_decode_init(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;

    s->avctx = avctx;
2021
    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
D
David Conrad 已提交
2022

2023
    ff_dsputil_init(&s->dsp, avctx);
2024
    ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
D
David Conrad 已提交
2025 2026 2027 2028 2029 2030 2031
    ff_vp8dsp_init(&s->vp8dsp);

    return 0;
}

static av_cold int vp8_decode_free(AVCodecContext *avctx)
{
2032
    vp8_decode_flush_impl(avctx, 0, 1, 1);
2033
    release_queued_segmaps(avctx->priv_data, 1);
D
David Conrad 已提交
2034 2035 2036
    return 0;
}

R
Ronald S. Bultje 已提交
2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052
static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;

    s->avctx = avctx;

    return 0;
}

#define REBASE(pic) \
    pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL

static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
{
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;

2053 2054 2055
    if (s->macroblocks_base &&
        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
        free_buffers(s);
2056
        s->maps_are_invalid = 1;
2057 2058
        s->mb_width  = s_src->mb_width;
        s->mb_height = s_src->mb_height;
2059 2060
    }

R
Ronald S. Bultje 已提交
2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074
    s->prob[0] = s_src->prob[!s_src->update_probabilities];
    s->segmentation = s_src->segmentation;
    s->lf_delta = s_src->lf_delta;
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));

    memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
    s->framep[0] = REBASE(s_src->next_framep[0]);
    s->framep[1] = REBASE(s_src->next_framep[1]);
    s->framep[2] = REBASE(s_src->next_framep[2]);
    s->framep[3] = REBASE(s_src->next_framep[3]);

    return 0;
}

2075
AVCodec ff_vp8_decoder = {
2076 2077
    .name                  = "vp8",
    .type                  = AVMEDIA_TYPE_VIDEO,
2078
    .id                    = AV_CODEC_ID_VP8,
2079 2080 2081 2082
    .priv_data_size        = sizeof(VP8Context),
    .init                  = vp8_decode_init,
    .close                 = vp8_decode_free,
    .decode                = vp8_decode_frame,
D
Daniel Kang 已提交
2083
    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2084 2085
    .flush                 = vp8_decode_flush,
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
R
Ronald S. Bultje 已提交
2086 2087
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
D
David Conrad 已提交
2088
};