vp8.c 75.5 KB
Newer Older
1
/*
D
David Conrad 已提交
2 3 4 5
 * VP8 compatible video decoder
 *
 * Copyright (C) 2010 David Conrad
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Jason Garrett-Glaser
D
Daniel Kang 已提交
7
 * Copyright (C) 2012 Daniel Kang
D
David Conrad 已提交
8
 *
9
 * This file is part of Libav.
D
David Conrad 已提交
10
 *
11
 * Libav is free software; you can redistribute it and/or
D
David Conrad 已提交
12 13 14 15
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
16
 * Libav is distributed in the hope that it will be useful,
D
David Conrad 已提交
17 18 19 20 21
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
22
 * License along with Libav; if not, write to the Free Software
D
David Conrad 已提交
23 24 25
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

26
#include "libavutil/imgutils.h"
D
David Conrad 已提交
27
#include "avcodec.h"
28
#include "internal.h"
29
#include "vp8.h"
D
David Conrad 已提交
30 31
#include "vp8data.h"
#include "rectangle.h"
R
Ronald S. Bultje 已提交
32
#include "thread.h"
D
David Conrad 已提交
33

34 35 36 37
#if ARCH_ARM
#   include "arm/vp8.h"
#endif

38 39
static void free_buffers(VP8Context *s)
{
D
Daniel Kang 已提交
40 41 42 43 44 45 46
    int i;
    if (s->thread_data)
        for (i = 0; i < MAX_THREADS; i++) {
            av_freep(&s->thread_data[i].filter_strength);
            av_freep(&s->thread_data[i].edge_emu_buffer);
        }
    av_freep(&s->thread_data);
47 48 49 50 51 52 53 54
    av_freep(&s->macroblocks_base);
    av_freep(&s->intra4x4_pred_mode_top);
    av_freep(&s->top_nnz);
    av_freep(&s->top_border);

    s->macroblocks = NULL;
}

55 56 57 58 59
static int vp8_alloc_frame(VP8Context *s, AVFrame *f)
{
    int ret;
    if ((ret = ff_thread_get_buffer(s->avctx, f)) < 0)
        return ret;
60
    if (s->num_maps_to_be_freed && !s->maps_are_invalid) {
61 62 63 64 65 66 67 68
        f->ref_index[0] = s->segmentation_maps[--s->num_maps_to_be_freed];
    } else if (!(f->ref_index[0] = av_mallocz(s->mb_width * s->mb_height))) {
        ff_thread_release_buffer(s->avctx, f);
        return AVERROR(ENOMEM);
    }
    return 0;
}

69
static void vp8_release_frame(VP8Context *s, AVFrame *f, int prefer_delayed_free, int can_direct_free)
70
{
71 72 73 74 75 76 77 78 79 80 81 82
    if (f->ref_index[0]) {
        if (prefer_delayed_free) {
            /* Upon a size change, we want to free the maps but other threads may still
             * be using them, so queue them. Upon a seek, all threads are inactive so
             * we want to cache one to prevent re-allocation in the next decoding
             * iteration, but the rest we can free directly. */
            int max_queued_maps = can_direct_free ? 1 : FF_ARRAY_ELEMS(s->segmentation_maps);
            if (s->num_maps_to_be_freed < max_queued_maps) {
                s->segmentation_maps[s->num_maps_to_be_freed++] = f->ref_index[0];
            } else if (can_direct_free) /* vp8_decode_flush(), but our queue is full */ {
                av_free(f->ref_index[0]);
            } /* else: MEMLEAK (should never happen, but better that than crash) */
83
            f->ref_index[0] = NULL;
84 85
        } else /* vp8_decode_free() */ {
            av_free(f->ref_index[0]);
86 87 88 89 90
        }
    }
    ff_thread_release_buffer(s->avctx, f);
}

91 92
static void vp8_decode_flush_impl(AVCodecContext *avctx,
                                  int prefer_delayed_free, int can_direct_free, int free_mem)
D
David Conrad 已提交
93 94 95 96
{
    VP8Context *s = avctx->priv_data;
    int i;

97
    if (!avctx->internal->is_copy) {
R
Ronald S. Bultje 已提交
98 99
        for (i = 0; i < 5; i++)
            if (s->frames[i].data[0])
100
                vp8_release_frame(s, &s->frames[i], prefer_delayed_free, can_direct_free);
R
Ronald S. Bultje 已提交
101
    }
D
David Conrad 已提交
102 103
    memset(s->framep, 0, sizeof(s->framep));

104 105 106 107
    if (free_mem) {
        free_buffers(s);
        s->maps_are_invalid = 1;
    }
108 109 110 111
}

static void vp8_decode_flush(AVCodecContext *avctx)
{
112
    vp8_decode_flush_impl(avctx, 1, 1, 0);
D
David Conrad 已提交
113 114 115 116
}

static int update_dimensions(VP8Context *s, int width, int height)
{
D
Daniel Kang 已提交
117 118 119
    AVCodecContext *avctx = s->avctx;
    int i;

R
Ronald S. Bultje 已提交
120 121 122 123
    if (width  != s->avctx->width ||
        height != s->avctx->height) {
        if (av_image_check_size(width, height, 0, s->avctx))
            return AVERROR_INVALIDDATA;
D
David Conrad 已提交
124

125
        vp8_decode_flush_impl(s->avctx, 1, 0, 1);
D
David Conrad 已提交
126

R
Ronald S. Bultje 已提交
127 128
        avcodec_set_dimensions(s->avctx, width, height);
    }
D
David Conrad 已提交
129 130 131 132

    s->mb_width  = (s->avctx->coded_width +15) / 16;
    s->mb_height = (s->avctx->coded_height+15) / 16;

D
Daniel Kang 已提交
133 134 135 136 137 138 139 140 141 142
    s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
    if (!s->mb_layout) { // Frame threading and one thread
        s->macroblocks_base       = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
    }
    else // Sliced threading
        s->macroblocks_base       = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
    s->top_nnz                    = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
    s->top_border                 = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
    s->thread_data                = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
D
David Conrad 已提交
143

D
Daniel Kang 已提交
144 145
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
146
#if HAVE_THREADS
D
Daniel Kang 已提交
147 148
        pthread_mutex_init(&s->thread_data[i].lock, NULL);
        pthread_cond_init(&s->thread_data[i].cond, NULL);
149
#endif
D
Daniel Kang 已提交
150 151 152 153
    }

    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
        (!s->intra4x4_pred_mode_top && !s->mb_layout))
D
David Conrad 已提交
154 155
        return AVERROR(ENOMEM);

156
    s->macroblocks        = s->macroblocks_base + 1;
D
David Conrad 已提交
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186

    return 0;
}

static void parse_segment_info(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

    s->segmentation.update_map = vp8_rac_get(c);

    if (vp8_rac_get(c)) { // update segment feature data
        s->segmentation.absolute_vals = vp8_rac_get(c);

        for (i = 0; i < 4; i++)
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);

        for (i = 0; i < 4; i++)
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
    }
    if (s->segmentation.update_map)
        for (i = 0; i < 3; i++)
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
}

static void update_lf_deltas(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

187 188 189
    for (i = 0; i < 4; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
D
David Conrad 已提交
190

191 192 193 194 195 196 197 198 199 200 201 202 203
            if (vp8_rac_get(c))
                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
        }
    }

    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);

            if (vp8_rac_get(c))
                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
        }
    }
D
David Conrad 已提交
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218
}

static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
{
    const uint8_t *sizes = buf;
    int i;

    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);

    buf      += 3*(s->num_coeff_partitions-1);
    buf_size -= 3*(s->num_coeff_partitions-1);
    if (buf_size < 0)
        return -1;

    for (i = 0; i < s->num_coeff_partitions-1; i++) {
219
        int size = AV_RL24(sizes + 3*i);
D
David Conrad 已提交
220 221 222
        if (buf_size - size < 0)
            return -1;

223
        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
D
David Conrad 已提交
224 225 226
        buf      += size;
        buf_size -= size;
    }
227
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
D
David Conrad 已提交
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251

    return 0;
}

static void get_quants(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i, base_qi;

    int yac_qi     = vp8_rac_get_uint(c, 7);
    int ydc_delta  = vp8_rac_get_sint(c, 4);
    int y2dc_delta = vp8_rac_get_sint(c, 4);
    int y2ac_delta = vp8_rac_get_sint(c, 4);
    int uvdc_delta = vp8_rac_get_sint(c, 4);
    int uvac_delta = vp8_rac_get_sint(c, 4);

    for (i = 0; i < 4; i++) {
        if (s->segmentation.enabled) {
            base_qi = s->segmentation.base_quant[i];
            if (!s->segmentation.absolute_vals)
                base_qi += yac_qi;
        } else
            base_qi = yac_qi;

252 253 254 255 256 257 258
        s->qmat[i].luma_qmul[0]    =           vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
        s->qmat[i].luma_qmul[1]    =           vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
        s->qmat[i].luma_dc_qmul[0] =       2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
        /* 101581>>16 is equivalent to 155/100 */
        s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
        s->qmat[i].chroma_qmul[0]  =           vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
        s->qmat[i].chroma_qmul[1]  =           vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
259 260 261

        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
D
David Conrad 已提交
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307
    }
}

/**
 * Determine which buffers golden and altref should be updated with after this frame.
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
 *
 * Intra frames update all 3 references
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
 * If the update (golden|altref) flag is set, it's updated with the current frame
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
 * If the flag is not set, the number read means:
 *      0: no update
 *      1: VP56_FRAME_PREVIOUS
 *      2: update golden with altref, or update altref with golden
 */
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
{
    VP56RangeCoder *c = &s->c;

    if (update)
        return VP56_FRAME_CURRENT;

    switch (vp8_rac_get_uint(c, 2)) {
    case 1:
        return VP56_FRAME_PREVIOUS;
    case 2:
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
    }
    return VP56_FRAME_NONE;
}

static void update_refs(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;

    int update_golden = vp8_rac_get(c);
    int update_altref = vp8_rac_get(c);

    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
}

static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
{
    VP56RangeCoder *c = &s->c;
308
    int header_size, hscale, vscale, i, j, k, l, m, ret;
D
David Conrad 已提交
309 310 311 312 313 314
    int width  = s->avctx->width;
    int height = s->avctx->height;

    s->keyframe  = !(buf[0] & 1);
    s->profile   =  (buf[0]>>1) & 7;
    s->invisible = !(buf[0] & 0x10);
315
    header_size  = AV_RL24(buf) >> 5;
D
David Conrad 已提交
316 317 318
    buf      += 3;
    buf_size -= 3;

D
David Conrad 已提交
319 320 321 322 323 324 325
    if (s->profile > 3)
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);

    if (!s->profile)
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
D
David Conrad 已提交
326 327 328 329 330 331 332

    if (header_size > buf_size - 7*s->keyframe) {
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
        return AVERROR_INVALIDDATA;
    }

    if (s->keyframe) {
333 334
        if (AV_RL24(buf) != 0x2a019d) {
            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
D
David Conrad 已提交
335 336 337 338 339 340 341 342 343
            return AVERROR_INVALIDDATA;
        }
        width  = AV_RL16(buf+3) & 0x3fff;
        height = AV_RL16(buf+5) & 0x3fff;
        hscale = buf[4] >> 6;
        vscale = buf[6] >> 6;
        buf      += 7;
        buf_size -= 7;

344 345 346
        if (hscale || vscale)
            av_log_missing_feature(s->avctx, "Upscaling", 1);

D
David Conrad 已提交
347
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
348 349 350 351
        for (i = 0; i < 4; i++)
            for (j = 0; j < 16; j++)
                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
                       sizeof(s->prob->token[i][j]));
D
David Conrad 已提交
352 353 354 355 356 357
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
        memset(&s->segmentation, 0, sizeof(s->segmentation));
    }

358
    ff_vp56_init_range_decoder(c, buf, header_size);
D
David Conrad 已提交
359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
    buf      += header_size;
    buf_size -= header_size;

    if (s->keyframe) {
        if (vp8_rac_get(c))
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
        vp8_rac_get(c); // whether we can skip clamping in dsp functions
    }

    if ((s->segmentation.enabled = vp8_rac_get(c)))
        parse_segment_info(s);
    else
        s->segmentation.update_map = 0; // FIXME: move this to some init function?

    s->filter.simple    = vp8_rac_get(c);
    s->filter.level     = vp8_rac_get_uint(c, 6);
    s->filter.sharpness = vp8_rac_get_uint(c, 3);

    if ((s->lf_delta.enabled = vp8_rac_get(c)))
        if (vp8_rac_get(c))
            update_lf_deltas(s);

    if (setup_partitions(s, buf, buf_size)) {
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
        return AVERROR_INVALIDDATA;
    }

D
Daniel Kang 已提交
386 387 388 389 390 391
    if (!s->macroblocks_base || /* first frame */
        width != s->avctx->width || height != s->avctx->height) {
        if ((ret = update_dimensions(s, width, height)) < 0)
            return ret;
    }

D
David Conrad 已提交
392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
    get_quants(s);

    if (!s->keyframe) {
        update_refs(s);
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
    }

    // if we aren't saving this frame's probabilities for future frames,
    // make a copy of the current probabilities
    if (!(s->update_probabilities = vp8_rac_get(c)))
        s->prob[1] = s->prob[0];

    s->update_last = s->keyframe || vp8_rac_get(c);

    for (i = 0; i < 4; i++)
        for (j = 0; j < 8; j++)
            for (k = 0; k < 3; k++)
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
411 412
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
                        int prob = vp8_rac_get_uint(c, 8);
413 414
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
415
                    }
D
David Conrad 已提交
416 417

    if ((s->mbskip_enabled = vp8_rac_get(c)))
418
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
D
David Conrad 已提交
419 420

    if (!s->keyframe) {
421 422 423
        s->prob->intra  = vp8_rac_get_uint(c, 8);
        s->prob->last   = vp8_rac_get_uint(c, 8);
        s->prob->golden = vp8_rac_get_uint(c, 8);
D
David Conrad 已提交
424 425 426 427 428 429 430 431 432 433 434

        if (vp8_rac_get(c))
            for (i = 0; i < 4; i++)
                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
        if (vp8_rac_get(c))
            for (i = 0; i < 3; i++)
                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);

        // 17.2 MV probability update
        for (i = 0; i < 2; i++)
            for (j = 0; j < 19; j++)
435
                if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
D
David Conrad 已提交
436 437 438 439 440 441
                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
    }

    return 0;
}

J
Jason Garrett-Glaser 已提交
442
static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
D
David Conrad 已提交
443
{
J
Jason Garrett-Glaser 已提交
444 445
    dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
    dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
D
David Conrad 已提交
446 447 448 449 450 451 452
}

/**
 * Motion vector coding, 17.1.
 */
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
{
453
    int bit, x = 0;
D
David Conrad 已提交
454

455
    if (vp56_rac_get_prob_branchy(c, p[0])) {
D
David Conrad 已提交
456 457 458 459 460 461 462 463
        int i;

        for (i = 0; i < 3; i++)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        for (i = 9; i > 3; i--)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
            x += 8;
464 465 466 467 468 469 470 471 472 473 474
    } else {
        // small_mvtree
        const uint8_t *ps = p+2;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + 3*bit;
        x  += 4*bit;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + bit;
        x  += 2*bit;
        x  += vp56_rac_get_prob(c, *ps);
    }
D
David Conrad 已提交
475 476 477 478

    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
}

479 480
static av_always_inline
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
D
David Conrad 已提交
481
{
482 483 484
    if (left == top)
        return vp8_submv_prob[4-!!left];
    if (!top)
D
David Conrad 已提交
485
        return vp8_submv_prob[2];
486
    return vp8_submv_prob[1-!!left];
D
David Conrad 已提交
487 488 489 490
}

/**
 * Split motion vector prediction, 16.4.
491
 * @returns the number of motion vectors parsed (2, 4 or 16)
D
David Conrad 已提交
492
 */
493
static av_always_inline
D
Daniel Kang 已提交
494
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
D
David Conrad 已提交
495
{
496 497
    int part_idx;
    int n, num;
D
Daniel Kang 已提交
498
    VP8Macroblock *top_mb;
499 500
    VP8Macroblock *left_mb = &mb[-1];
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
D
Daniel Kang 已提交
501
                  *mbsplits_top,
502
                  *mbsplits_cur, *firstidx;
D
Daniel Kang 已提交
503
    VP56mv *top_mv;
504 505
    VP56mv *left_mv = left_mb->bmv;
    VP56mv *cur_mv  = mb->bmv;
D
David Conrad 已提交
506

D
Daniel Kang 已提交
507 508 509 510 511 512 513
    if (!layout) // layout is inlined, s->mb_layout is not
        top_mb = &mb[2];
    else
        top_mb = &mb[-s->mb_width-1];
    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
    top_mv = top_mb->bmv;

514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
        } else {
            part_idx = VP8_SPLITMVMODE_8x8;
        }
    } else {
        part_idx = VP8_SPLITMVMODE_4x4;
    }

    num = vp8_mbsplit_count[part_idx];
    mbsplits_cur = vp8_mbsplits[part_idx],
    firstidx = vp8_mbfirstidx[part_idx];
    mb->partitioning = part_idx;

D
David Conrad 已提交
529
    for (n = 0; n < num; n++) {
530
        int k = firstidx[n];
531
        uint32_t left, above;
532 533
        const uint8_t *submv_prob;

534 535 536 537 538 539 540 541
        if (!(k & 3))
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
        else
            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
        if (k <= 3)
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
        else
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
542 543

        submv_prob = get_submv_prob(left, above);
D
David Conrad 已提交
544

545 546 547 548 549 550 551 552 553 554 555 556
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
                    mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
                    mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
                } else {
                    AV_ZERO32(&mb->bmv[n]);
                }
            } else {
                AV_WN32A(&mb->bmv[n], above);
            }
        } else {
557
            AV_WN32A(&mb->bmv[n], left);
D
David Conrad 已提交
558 559
        }
    }
560 561

    return num;
D
David Conrad 已提交
562 563
}

564
static av_always_inline
D
Daniel Kang 已提交
565
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
566
{
D
Daniel Kang 已提交
567
    VP8Macroblock *mb_edge[3] = { 0 /* top */,
568
                                  mb - 1 /* left */,
D
Daniel Kang 已提交
569
                                  0 /* top-left */ };
570
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
571
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
572 573
    int idx = CNT_ZERO;
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
574
    int8_t *sign_bias = s->sign_bias;
575 576 577 578
    VP56mv near_mv[4];
    uint8_t cnt[4] = { 0 };
    VP56RangeCoder *c = &s->c;

D
Daniel Kang 已提交
579 580 581 582 583 584 585 586 587
    if (!layout) { // layout is inlined (s->mb_layout is not)
        mb_edge[0] = mb + 2;
        mb_edge[2] = mb + 1;
    }
    else {
        mb_edge[0] = mb - s->mb_width-1;
        mb_edge[2] = mb - s->mb_width-2;
    }

588 589
    AV_ZERO32(&near_mv[0]);
    AV_ZERO32(&near_mv[1]);
590
    AV_ZERO32(&near_mv[2]);
591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621

    /* Process MB on top, left and top-left */
    #define MV_EDGE_CHECK(n)\
    {\
        VP8Macroblock *edge = mb_edge[n];\
        int edge_ref = edge->ref_frame;\
        if (edge_ref != VP56_FRAME_CURRENT) {\
            uint32_t mv = AV_RN32A(&edge->mv);\
            if (mv) {\
                if (cur_sign_bias != sign_bias[edge_ref]) {\
                    /* SWAR negate of the values in mv. */\
                    mv = ~mv;\
                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
                }\
                if (!n || mv != AV_RN32A(&near_mv[idx]))\
                    AV_WN32A(&near_mv[++idx], mv);\
                cnt[idx]      += 1 + (n != 2);\
            } else\
                cnt[CNT_ZERO] += 1 + (n != 2);\
        }\
    }

    MV_EDGE_CHECK(0)
    MV_EDGE_CHECK(1)
    MV_EDGE_CHECK(2)

    mb->partitioning = VP8_SPLITMVMODE_NONE;
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
        mb->mode = VP8_MVMODE_MV;

        /* If we have three distinct MVs, merge first and last if they're the same */
622
        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
623 624 625 626 627 628 629 630 631 632 633 634
            cnt[CNT_NEAREST] += 1;

        /* Swap near and nearest if necessary */
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
        }

        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {

                /* Choose the best mv out of 0,0 and the nearest mv */
J
Jason Garrett-Glaser 已提交
635
                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
636 637 638
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
639 640 641

                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
                    mb->mode = VP8_MVMODE_SPLIT;
D
Daniel Kang 已提交
642
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
643 644 645 646 647 648
                } else {
                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
                    mb->bmv[0] = mb->mv;
                }
            } else {
J
Jason Garrett-Glaser 已提交
649
                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
650 651 652
                mb->bmv[0] = mb->mv;
            }
        } else {
J
Jason Garrett-Glaser 已提交
653
            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
654 655 656 657 658 659 660 661 662
            mb->bmv[0] = mb->mv;
        }
    } else {
        mb->mode = VP8_MVMODE_ZERO;
        AV_ZERO32(&mb->mv);
        mb->bmv[0] = mb->mv;
    }
}

663
static av_always_inline
664
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
D
Daniel Kang 已提交
665
                           int mb_x, int keyframe, int layout)
D
David Conrad 已提交
666
{
667 668
    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;

D
Daniel Kang 已提交
669 670 671 672
    if (layout == 1) {
        VP8Macroblock *mb_top = mb - s->mb_width - 1;
        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
    }
673
    if (keyframe) {
674
        int x, y;
D
Daniel Kang 已提交
675
        uint8_t* top;
676
        uint8_t* const left = s->intra4x4_pred_mode_left;
D
Daniel Kang 已提交
677 678 679 680
        if (layout == 1)
            top = mb->intra4x4_pred_mode_top;
        else
            top = s->intra4x4_pred_mode_top + 4 * mb_x;
681 682
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
683 684 685 686 687
                const uint8_t *ctx;
                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
                left[y] = top[x] = *intra4x4;
                intra4x4++;
D
David Conrad 已提交
688 689
            }
        }
690
    } else {
691
        int i;
692 693
        for (i = 0; i < 16; i++)
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
D
David Conrad 已提交
694 695 696
    }
}

697
static av_always_inline
D
Daniel Kang 已提交
698 699
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
                    uint8_t *segment, uint8_t *ref, int layout)
D
David Conrad 已提交
700 701 702 703
{
    VP56RangeCoder *c = &s->c;

    if (s->segmentation.update_map)
704
        *segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid);
705
    else if (s->segmentation.enabled)
R
Ronald S. Bultje 已提交
706
        *segment = ref ? *ref : *segment;
707
    mb->segment = *segment;
D
David Conrad 已提交
708

709
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
D
David Conrad 已提交
710 711 712 713 714

    if (s->keyframe) {
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);

        if (mb->mode == MODE_I4x4) {
D
Daniel Kang 已提交
715
            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
716 717
        } else {
            const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
D
Daniel Kang 已提交
718 719 720 721 722
            if (s->mb_layout == 1)
                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
            else
                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
            AV_WN32A( s->intra4x4_pred_mode_left, modes);
723
        }
D
David Conrad 已提交
724

725
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
D
David Conrad 已提交
726
        mb->ref_frame = VP56_FRAME_CURRENT;
727
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
D
David Conrad 已提交
728
        // inter MB, 16.2
729 730
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
D
David Conrad 已提交
731 732 733
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
        else
            mb->ref_frame = VP56_FRAME_PREVIOUS;
J
Jason Garrett-Glaser 已提交
734
        s->ref_count[mb->ref_frame-1]++;
D
David Conrad 已提交
735 736

        // motion vectors, 16.3
D
Daniel Kang 已提交
737
        decode_mvs(s, mb, mb_x, mb_y, layout);
D
David Conrad 已提交
738 739 740 741
    } else {
        // intra MB, 16.1
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);

742
        if (mb->mode == MODE_I4x4)
D
Daniel Kang 已提交
743
            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
D
David Conrad 已提交
744

745
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
D
David Conrad 已提交
746
        mb->ref_frame = VP56_FRAME_CURRENT;
747
        mb->partitioning = VP8_SPLITMVMODE_NONE;
748
        AV_ZERO32(&mb->bmv[0]);
D
David Conrad 已提交
749 750 751
    }
}

752
#ifndef decode_block_coeffs_internal
D
David Conrad 已提交
753
/**
754
 * @param r arithmetic bitstream reader context
755 756
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
D
David Conrad 已提交
757
 * @param i initial coeff index, 0 unless a separate DC block is coded
758
 * @param qmul array holding the dc/ac dequant factor at position 0/1
D
David Conrad 已提交
759 760 761
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
762
static int decode_block_coeffs_internal(VP56RangeCoder *r, DCTELEM block[16],
763
                                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
764
                                        int i, uint8_t *token_prob, int16_t qmul[2])
D
David Conrad 已提交
765
{
766
    VP56RangeCoder c = *r;
767
    goto skip_eob;
768
    do {
769
        int coeff;
770 771
        if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
            break;
D
David Conrad 已提交
772

773
skip_eob:
774
        if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
775
            if (++i == 16)
776
                break; // invalid input; blocks should end with EOB
777
            token_prob = probs[i][0];
778
            goto skip_eob;
779 780
        }

781
        if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
782
            coeff = 1;
783
            token_prob = probs[i+1][1];
784
        } else {
785 786
            if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
                coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
787
                if (coeff)
788
                    coeff += vp56_rac_get_prob(&c, token_prob[5]);
789 790 791
                coeff += 2;
            } else {
                // DCT_CAT*
792 793 794
                if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
                    if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
                        coeff  = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
795 796
                    } else {                                    // DCT_CAT2
                        coeff  = 7;
797 798
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
799 800
                    }
                } else {    // DCT_CAT3 and up
801 802
                    int a = vp56_rac_get_prob(&c, token_prob[8]);
                    int b = vp56_rac_get_prob(&c, token_prob[9+a]);
803 804
                    int cat = (a<<1) + b;
                    coeff  = 3 + (8<<cat);
805
                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
806 807
                }
            }
808
            token_prob = probs[i+1][2];
809
        }
810
        block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
811
    } while (++i < 16);
812

813
    *r = c;
814
    return i;
D
David Conrad 已提交
815
}
816
#endif
D
David Conrad 已提交
817

818 819 820 821 822 823 824 825 826 827 828
/**
 * @param c arithmetic bitstream reader context
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
 * @param i initial coeff index, 0 unless a separate DC block is coded
 * @param zero_nhood the initial prediction context for number of surrounding
 *                   all-zero blocks (only left/top, so 0-2)
 * @param qmul array holding the dc/ac dequant factor at position 0/1
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
829 830
static av_always_inline
int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
831
                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
832 833 834 835 836 837 838 839
                        int i, int zero_nhood, int16_t qmul[2])
{
    uint8_t *token_prob = probs[i][zero_nhood];
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
        return 0;
    return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
}

840
static av_always_inline
D
Daniel Kang 已提交
841
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
842
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
D
David Conrad 已提交
843 844 845
{
    int i, x, y, luma_start = 0, luma_ctx = 3;
    int nnz_pred, nnz, nnz_total = 0;
846
    int segment = mb->segment;
847
    int block_dc = 0;
D
David Conrad 已提交
848 849 850 851 852

    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
        nnz_pred = t_nnz[8] + l_nnz[8];

        // decode DC values and do hadamard
D
Daniel Kang 已提交
853
        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
D
David Conrad 已提交
854 855
                                  s->qmat[segment].luma_dc_qmul);
        l_nnz[8] = t_nnz[8] = !!nnz;
856 857 858 859
        if (nnz) {
            nnz_total += nnz;
            block_dc = 1;
            if (nnz == 1)
D
Daniel Kang 已提交
860
                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
861
            else
D
Daniel Kang 已提交
862
                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
863
        }
D
David Conrad 已提交
864 865 866 867 868 869 870
        luma_start = 1;
        luma_ctx = 0;
    }

    // luma blocks
    for (y = 0; y < 4; y++)
        for (x = 0; x < 4; x++) {
J
Jason Garrett-Glaser 已提交
871
            nnz_pred = l_nnz[y] + t_nnz[x];
D
Daniel Kang 已提交
872
            nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
J
Jason Garrett-Glaser 已提交
873
                                      nnz_pred, s->qmat[segment].luma_qmul);
874
            // nnz+block_dc may be one more than the actual last index, but we don't care
D
Daniel Kang 已提交
875
            td->non_zero_count_cache[y][x] = nnz + block_dc;
D
David Conrad 已提交
876 877 878 879 880 881 882 883 884 885 886
            t_nnz[x] = l_nnz[y] = !!nnz;
            nnz_total += nnz;
        }

    // chroma blocks
    // TODO: what to do about dimensions? 2nd dim for luma is x,
    // but for chroma it's (y<<1)|x
    for (i = 4; i < 6; i++)
        for (y = 0; y < 2; y++)
            for (x = 0; x < 2; x++) {
                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
D
Daniel Kang 已提交
887
                nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
D
David Conrad 已提交
888
                                          nnz_pred, s->qmat[segment].chroma_qmul);
D
Daniel Kang 已提交
889
                td->non_zero_count_cache[i][(y<<1)+x] = nnz;
D
David Conrad 已提交
890 891 892 893 894 895 896 897 898 899 900
                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
                nnz_total += nnz;
            }

    // if there were no coded coeffs despite the macroblock not being marked skip,
    // we MUST not do the inner loop filter and should not do IDCT
    // Since skip isn't used for bitstream prediction, just manually set it.
    if (!nnz_total)
        mb->skip = 1;
}

901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921
static av_always_inline
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                      int linesize, int uvlinesize, int simple)
{
    AV_COPY128(top_border, src_y + 15*linesize);
    if (!simple) {
        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
    }
}

static av_always_inline
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
                    int simple, int xchg)
{
    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
    src_y  -=   linesize;
    src_cb -= uvlinesize;
    src_cr -= uvlinesize;

M
Måns Rullgård 已提交
922 923 924 925
#define XCHG(a,b,xchg) do {                     \
        if (xchg) AV_SWAP64(b,a);               \
        else      AV_COPY64(b,a);               \
    } while (0)
926 927 928 929

    XCHG(top_border_m1+8, src_y-8, xchg);
    XCHG(top_border,      src_y,   xchg);
    XCHG(top_border+8,    src_y+8, 1);
930
    if (mb_x < mb_width-1)
931
        XCHG(top_border+32, src_y+16, 1);
932

933 934 935 936 937 938 939 940 941 942
    // only copy chroma for normal loop filter
    // or to initialize the top row to 127
    if (!simple || !mb_y) {
        XCHG(top_border_m1+16, src_cb-8, xchg);
        XCHG(top_border_m1+24, src_cr-8, xchg);
        XCHG(top_border+16,    src_cb, 1);
        XCHG(top_border+24,    src_cr, 1);
    }
}

943
static av_always_inline
944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
    } else {
        return mb_y ? mode : LEFT_DC_PRED8x8;
    }
}

static av_always_inline
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
    } else {
        return mb_y ? mode : HOR_PRED8x8;
    }
}

static av_always_inline
int check_intra_pred8x8_mode(int mode, int mb_x, int mb_y)
D
David Conrad 已提交
965 966
{
    if (mode == DC_PRED8x8) {
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
    } else {
        return mode;
    }
}

static av_always_inline
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
{
    switch (mode) {
    case DC_PRED8x8:
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
    case VERT_PRED8x8:
        return !mb_y ? DC_127_PRED8x8 : mode;
    case HOR_PRED8x8:
        return !mb_x ? DC_129_PRED8x8 : mode;
    case PLANE_PRED8x8 /*TM*/:
        return check_tm_pred8x8_mode(mode, mb_x, mb_y);
    }
    return mode;
}

static av_always_inline
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_VP8_PRED : DC_129_PRED;
    } else {
        return mb_y ? mode : HOR_VP8_PRED;
    }
}

static av_always_inline
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
{
    switch (mode) {
    case VERT_PRED:
        if (!mb_x && mb_y) {
            *copy_buf = 1;
            return mode;
        }
        /* fall-through */
    case DIAG_DOWN_LEFT_PRED:
    case VERT_LEFT_PRED:
        return !mb_y ? DC_127_PRED : mode;
    case HOR_PRED:
        if (!mb_y) {
            *copy_buf = 1;
            return mode;
1016
        }
1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028
        /* fall-through */
    case HOR_UP_PRED:
        return !mb_x ? DC_129_PRED : mode;
    case TM_VP8_PRED:
        return check_tm_pred4x4_mode(mode, mb_x, mb_y);
    case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
    case DIAG_DOWN_RIGHT_PRED:
    case VERT_RIGHT_PRED:
    case HOR_DOWN_PRED:
        if (!mb_y || !mb_x)
            *copy_buf = 1;
        return mode;
D
David Conrad 已提交
1029 1030 1031 1032
    }
    return mode;
}

1033
static av_always_inline
D
Daniel Kang 已提交
1034 1035
void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
D
David Conrad 已提交
1036
{
1037
    AVCodecContext *avctx = s->avctx;
M
Mans Rullgard 已提交
1038 1039
    int x, y, mode, nnz;
    uint32_t tr;
D
David Conrad 已提交
1040

1041 1042
    // for the first row, we need to run xchg_mb_border to init the top edge to 127
    // otherwise, skip it if we aren't going to deblock
D
Daniel Kang 已提交
1043
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1044 1045 1046 1047
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 1);

D
David Conrad 已提交
1048
    if (mb->mode < MODE_I4x4) {
1049 1050 1051 1052 1053
        if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // tested
            mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
        } else {
            mode = check_intra_pred8x8_mode(mb->mode, mb_x, mb_y);
        }
D
David Conrad 已提交
1054 1055 1056
        s->hpc.pred16x16[mode](dst[0], s->linesize);
    } else {
        uint8_t *ptr = dst[0];
1057
        uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1058
        uint8_t tr_top[4] = { 127, 127, 127, 127 };
D
David Conrad 已提交
1059 1060 1061 1062 1063 1064 1065

        // all blocks on the right edge of the macroblock use bottom edge
        // the top macroblock for their topright edge
        uint8_t *tr_right = ptr - s->linesize + 16;

        // if we're on the right edge of the frame, said edge is extended
        // from the top macroblock
1066 1067
        if (!(!mb_y && avctx->flags & CODEC_FLAG_EMU_EDGE) &&
            mb_x == s->mb_width-1) {
M
Mans Rullgard 已提交
1068
            tr = tr_right[-1]*0x01010101u;
D
David Conrad 已提交
1069 1070 1071
            tr_right = (uint8_t *)&tr;
        }

1072
        if (mb->skip)
D
Daniel Kang 已提交
1073
            AV_ZERO128(td->non_zero_count_cache);
1074

D
David Conrad 已提交
1075 1076 1077
        for (y = 0; y < 4; y++) {
            uint8_t *topright = ptr + 4 - s->linesize;
            for (x = 0; x < 4; x++) {
1078 1079 1080 1081 1082 1083 1084
                int copy = 0, linesize = s->linesize;
                uint8_t *dst = ptr+4*x;
                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];

                if ((y == 0 || x == 3) && mb_y == 0 && avctx->flags & CODEC_FLAG_EMU_EDGE) {
                    topright = tr_top;
                } else if (x == 3)
D
David Conrad 已提交
1085 1086
                    topright = tr_right;

1087 1088 1089 1090 1091 1092 1093
                if (avctx->flags & CODEC_FLAG_EMU_EDGE) { // mb_x+x or mb_y+y is a hack but works
                    mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
                    if (copy) {
                        dst = copy_dst + 12;
                        linesize = 8;
                        if (!(mb_y + y)) {
                            copy_dst[3] = 127U;
R
Ronald S. Bultje 已提交
1094
                            AV_WN32A(copy_dst+4, 127U * 0x01010101U);
1095
                        } else {
R
Ronald S. Bultje 已提交
1096
                            AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
                            if (!(mb_x + x)) {
                                copy_dst[3] = 129U;
                            } else {
                                copy_dst[3] = ptr[4*x-s->linesize-1];
                            }
                        }
                        if (!(mb_x + x)) {
                            copy_dst[11] =
                            copy_dst[19] =
                            copy_dst[27] =
                            copy_dst[35] = 129U;
                        } else {
                            copy_dst[11] = ptr[4*x              -1];
                            copy_dst[19] = ptr[4*x+s->linesize  -1];
                            copy_dst[27] = ptr[4*x+s->linesize*2-1];
                            copy_dst[35] = ptr[4*x+s->linesize*3-1];
                        }
                    }
                } else {
                    mode = intra4x4[x];
                }
                s->hpc.pred4x4[mode](dst, topright, linesize);
                if (copy) {
R
Ronald S. Bultje 已提交
1120 1121 1122 1123
                    AV_COPY32(ptr+4*x              , copy_dst+12);
                    AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
                    AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
                    AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1124
                }
D
David Conrad 已提交
1125

D
Daniel Kang 已提交
1126
                nnz = td->non_zero_count_cache[y][x];
D
David Conrad 已提交
1127 1128
                if (nnz) {
                    if (nnz == 1)
D
Daniel Kang 已提交
1129
                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
D
David Conrad 已提交
1130
                    else
D
Daniel Kang 已提交
1131
                        s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
D
David Conrad 已提交
1132 1133 1134 1135 1136
                }
                topright += 4;
            }

            ptr   += 4*s->linesize;
1137
            intra4x4 += 4;
D
David Conrad 已提交
1138 1139 1140
        }
    }

1141
    if (avctx->flags & CODEC_FLAG_EMU_EDGE) {
1142
        mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
1143
    } else {
1144
        mode = check_intra_pred8x8_mode(mb->chroma_pred_mode, mb_x, mb_y);
1145
    }
D
David Conrad 已提交
1146 1147
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1148

D
Daniel Kang 已提交
1149
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE && !mb_y) && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1150 1151 1152
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 0);
D
David Conrad 已提交
1153 1154
}

1155 1156 1157 1158 1159 1160 1161
static const uint8_t subpel_idx[3][8] = {
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
                                // also function pointer index
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
};

D
David Conrad 已提交
1162
/**
1163
 * luma MC function
D
David Conrad 已提交
1164 1165 1166
 *
 * @param s VP8 decoding context
 * @param dst target buffer for block data at block position
1167
 * @param ref reference picture buffer at origin (0, 0)
D
David Conrad 已提交
1168 1169 1170 1171 1172 1173 1174 1175
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
1176
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
D
David Conrad 已提交
1177
 */
1178
static av_always_inline
D
Daniel Kang 已提交
1179 1180
void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
                 AVFrame *ref, const VP56mv *mv,
1181 1182 1183
                 int x_off, int y_off, int block_w, int block_h,
                 int width, int height, int linesize,
                 vp8_mc_func mc_func[3][3])
D
David Conrad 已提交
1184
{
R
Ronald S. Bultje 已提交
1185 1186
    uint8_t *src = ref->data[0];

1187
    if (AV_RN32A(mv)) {
1188 1189 1190 1191 1192 1193

        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
        int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 2;
        y_off += mv->y >> 2;
1194 1195

        // edge emulation
R
Ronald S. Bultje 已提交
1196
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1197
        src += y_off * linesize + x_off;
1198 1199
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
D
Daniel Kang 已提交
1200
            s->dsp.emulated_edge_mc(td->edge_emu_buffer, src - my_idx * linesize - mx_idx, linesize,
1201 1202
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                    x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1203
            src = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1204 1205
        }
        mc_func[my_idx][mx_idx](dst, linesize, src, linesize, block_h, mx, my);
R
Ronald S. Bultje 已提交
1206 1207
    } else {
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1208
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
R
Ronald S. Bultje 已提交
1209
    }
D
David Conrad 已提交
1210 1211
}

1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228
/**
 * chroma MC function
 *
 * @param s VP8 decoding context
 * @param dst1 target buffer for block data at block position (U plane)
 * @param dst2 target buffer for block data at block position (V plane)
 * @param ref reference picture buffer at origin (0, 0)
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
 */
1229
static av_always_inline
D
Daniel Kang 已提交
1230 1231
void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
                   AVFrame *ref, const VP56mv *mv, int x_off, int y_off,
1232 1233 1234
                   int block_w, int block_h, int width, int height, int linesize,
                   vp8_mc_func mc_func[3][3])
{
R
Ronald S. Bultje 已提交
1235 1236
    uint8_t *src1 = ref->data[1], *src2 = ref->data[2];

1237 1238 1239 1240 1241 1242 1243 1244 1245 1246
    if (AV_RN32A(mv)) {
        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
        int my = mv->y&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 3;
        y_off += mv->y >> 3;

        // edge emulation
        src1 += y_off * linesize + x_off;
        src2 += y_off * linesize + x_off;
R
Ronald S. Bultje 已提交
1247
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1248 1249
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
D
Daniel Kang 已提交
1250
            s->dsp.emulated_edge_mc(td->edge_emu_buffer, src1 - my_idx * linesize - mx_idx, linesize,
1251 1252
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                    x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1253
            src1 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1254 1255
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);

D
Daniel Kang 已提交
1256
            s->dsp.emulated_edge_mc(td->edge_emu_buffer, src2 - my_idx * linesize - mx_idx, linesize,
1257 1258
                                    block_w + subpel_idx[1][mx], block_h + subpel_idx[1][my],
                                    x_off - mx_idx, y_off - my_idx, width, height);
D
Daniel Kang 已提交
1259
            src2 = td->edge_emu_buffer + mx_idx + linesize * my_idx;
1260 1261 1262 1263 1264 1265
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
        } else {
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
        }
    } else {
R
Ronald S. Bultje 已提交
1266
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1267 1268 1269 1270 1271
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
    }
}

1272
static av_always_inline
D
Daniel Kang 已提交
1273
void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1274 1275 1276 1277
                 AVFrame *ref_frame, int x_off, int y_off,
                 int bx_off, int by_off,
                 int block_w, int block_h,
                 int width, int height, VP56mv *mv)
1278 1279 1280 1281
{
    VP56mv uvmv = *mv;

    /* Y */
D
Daniel Kang 已提交
1282
    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
R
Ronald S. Bultje 已提交
1283
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1284 1285
                block_w, block_h, width, height, s->linesize,
                s->put_pixels_tab[block_w == 8]);
1286 1287 1288 1289 1290 1291 1292 1293 1294 1295

    /* U/V */
    if (s->profile == 3) {
        uvmv.x &= ~7;
        uvmv.y &= ~7;
    }
    x_off   >>= 1; y_off   >>= 1;
    bx_off  >>= 1; by_off  >>= 1;
    width   >>= 1; height  >>= 1;
    block_w >>= 1; block_h >>= 1;
D
Daniel Kang 已提交
1296
    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
R
Ronald S. Bultje 已提交
1297 1298
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
                  &uvmv, x_off + bx_off, y_off + by_off,
1299 1300
                  block_w, block_h, width, height, s->uvlinesize,
                  s->put_pixels_tab[1 + (block_w == 4)]);
1301 1302
}

1303 1304
/* Fetch pixels for estimated mv 4 macroblocks ahead.
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1305
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1306
{
J
Jason Garrett-Glaser 已提交
1307 1308
    /* Don't prefetch refs that haven't been used very often this frame. */
    if (s->ref_count[ref-1] > (mb_xy >> 5)) {
J
Jason Garrett-Glaser 已提交
1309
        int x_off = mb_x << 4, y_off = mb_y << 4;
J
Jason Garrett-Glaser 已提交
1310 1311
        int mx = (mb->mv.x>>2) + x_off + 8;
        int my = (mb->mv.y>>2) + y_off;
J
Jason Garrett-Glaser 已提交
1312 1313
        uint8_t **src= s->framep[ref]->data;
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
R
Ronald S. Bultje 已提交
1314 1315 1316
        /* For threading, a ff_thread_await_progress here might be useful, but
         * it actually slows down the decoder. Since a bad prefetch doesn't
         * generate bad decoder output, we don't run it here. */
J
Jason Garrett-Glaser 已提交
1317 1318 1319 1320
        s->dsp.prefetch(src[0]+off, s->linesize, 4);
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
        s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
    }
1321 1322
}

D
David Conrad 已提交
1323 1324 1325
/**
 * Apply motion vectors to prediction buffer, chapter 18.
 */
1326
static av_always_inline
D
Daniel Kang 已提交
1327 1328
void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
D
David Conrad 已提交
1329 1330 1331
{
    int x_off = mb_x << 4, y_off = mb_y << 4;
    int width = 16*s->mb_width, height = 16*s->mb_height;
1332 1333
    AVFrame *ref = s->framep[mb->ref_frame];
    VP56mv *bmv = mb->bmv;
D
David Conrad 已提交
1334

1335 1336
    switch (mb->partitioning) {
    case VP8_SPLITMVMODE_NONE:
D
Daniel Kang 已提交
1337
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1338
                    0, 0, 16, 16, width, height, &mb->mv);
1339
        break;
1340
    case VP8_SPLITMVMODE_4x4: {
D
David Conrad 已提交
1341
        int x, y;
1342
        VP56mv uvmv;
D
David Conrad 已提交
1343 1344 1345 1346

        /* Y */
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
D
Daniel Kang 已提交
1347
                vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
R
Ronald S. Bultje 已提交
1348
                            ref, &bmv[4*y + x],
1349 1350 1351
                            4*x + x_off, 4*y + y_off, 4, 4,
                            width, height, s->linesize,
                            s->put_pixels_tab[2]);
D
David Conrad 已提交
1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366
            }
        }

        /* U/V */
        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
        for (y = 0; y < 2; y++) {
            for (x = 0; x < 2; x++) {
                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1367 1368
                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
D
David Conrad 已提交
1369 1370 1371 1372
                if (s->profile == 3) {
                    uvmv.x &= ~7;
                    uvmv.y &= ~7;
                }
D
Daniel Kang 已提交
1373
                vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
R
Ronald S. Bultje 已提交
1374
                              dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1375 1376 1377
                              4*x + x_off, 4*y + y_off, 4, 4,
                              width, height, s->uvlinesize,
                              s->put_pixels_tab[2]);
D
David Conrad 已提交
1378 1379
            }
        }
1380 1381 1382
        break;
    }
    case VP8_SPLITMVMODE_16x8:
D
Daniel Kang 已提交
1383
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1384
                    0, 0, 16, 8, width, height, &bmv[0]);
D
Daniel Kang 已提交
1385
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1386
                    0, 8, 16, 8, width, height, &bmv[1]);
1387 1388
        break;
    case VP8_SPLITMVMODE_8x16:
D
Daniel Kang 已提交
1389
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1390
                    0, 0, 8, 16, width, height, &bmv[0]);
D
Daniel Kang 已提交
1391
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1392
                    8, 0, 8, 16, width, height, &bmv[1]);
1393 1394
        break;
    case VP8_SPLITMVMODE_8x8:
D
Daniel Kang 已提交
1395
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1396
                    0, 0, 8, 8, width, height, &bmv[0]);
D
Daniel Kang 已提交
1397
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1398
                    8, 0, 8, 8, width, height, &bmv[1]);
D
Daniel Kang 已提交
1399
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1400
                    0, 8, 8, 8, width, height, &bmv[2]);
D
Daniel Kang 已提交
1401
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1402
                    8, 8, 8, 8, width, height, &bmv[3]);
1403
        break;
D
David Conrad 已提交
1404 1405 1406
    }
}

D
Daniel Kang 已提交
1407 1408
static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
                                     uint8_t *dst[3], VP8Macroblock *mb)
D
David Conrad 已提交
1409
{
1410
    int x, y, ch;
D
David Conrad 已提交
1411

J
Jason Garrett-Glaser 已提交
1412 1413
    if (mb->mode != MODE_I4x4) {
        uint8_t *y_dst = dst[0];
D
David Conrad 已提交
1414
        for (y = 0; y < 4; y++) {
D
Daniel Kang 已提交
1415
            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1416 1417
            if (nnz4) {
                if (nnz4&~0x01010101) {
J
Jason Garrett-Glaser 已提交
1418
                    for (x = 0; x < 4; x++) {
1419
                        if ((uint8_t)nnz4 == 1)
D
Daniel Kang 已提交
1420
                            s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1421
                        else if((uint8_t)nnz4 > 1)
D
Daniel Kang 已提交
1422
                            s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1423 1424 1425
                        nnz4 >>= 8;
                        if (!nnz4)
                            break;
J
Jason Garrett-Glaser 已提交
1426 1427
                    }
                } else {
D
Daniel Kang 已提交
1428
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
D
David Conrad 已提交
1429 1430 1431 1432
                }
            }
            y_dst += 4*s->linesize;
        }
J
Jason Garrett-Glaser 已提交
1433
    }
D
David Conrad 已提交
1434

J
Jason Garrett-Glaser 已提交
1435
    for (ch = 0; ch < 2; ch++) {
D
Daniel Kang 已提交
1436
        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1437
        if (nnz4) {
J
Jason Garrett-Glaser 已提交
1438
            uint8_t *ch_dst = dst[1+ch];
1439 1440 1441
            if (nnz4&~0x01010101) {
                for (y = 0; y < 2; y++) {
                    for (x = 0; x < 2; x++) {
1442
                        if ((uint8_t)nnz4 == 1)
D
Daniel Kang 已提交
1443
                            s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1444
                        else if((uint8_t)nnz4 > 1)
D
Daniel Kang 已提交
1445
                            s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1446 1447
                        nnz4 >>= 8;
                        if (!nnz4)
1448
                            goto chroma_idct_end;
J
Jason Garrett-Glaser 已提交
1449
                    }
1450
                    ch_dst += 4*s->uvlinesize;
J
Jason Garrett-Glaser 已提交
1451
                }
1452
            } else {
D
Daniel Kang 已提交
1453
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
D
David Conrad 已提交
1454 1455
            }
        }
1456
chroma_idct_end: ;
D
David Conrad 已提交
1457 1458 1459
    }
}

1460
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
D
David Conrad 已提交
1461 1462 1463 1464
{
    int interior_limit, filter_level;

    if (s->segmentation.enabled) {
1465
        filter_level = s->segmentation.filter_level[mb->segment];
D
David Conrad 已提交
1466 1467 1468 1469 1470 1471 1472
        if (!s->segmentation.absolute_vals)
            filter_level += s->filter.level;
    } else
        filter_level = s->filter.level;

    if (s->lf_delta.enabled) {
        filter_level += s->lf_delta.ref[mb->ref_frame];
1473
        filter_level += s->lf_delta.mode[mb->mode];
D
David Conrad 已提交
1474
    }
1475

M
Mans Rullgard 已提交
1476
    filter_level = av_clip_uintp2(filter_level, 6);
D
David Conrad 已提交
1477 1478 1479

    interior_limit = filter_level;
    if (s->filter.sharpness) {
1480
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
D
David Conrad 已提交
1481 1482 1483 1484
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
    }
    interior_limit = FFMAX(interior_limit, 1);

1485 1486
    f->filter_level = filter_level;
    f->inner_limit = interior_limit;
1487
    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
D
David Conrad 已提交
1488 1489
}

1490
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
D
David Conrad 已提交
1491
{
1492 1493 1494
    int mbedge_lim, bedge_lim, hev_thresh;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1495
    int inner_filter = f->inner_filter;
1496 1497
    int linesize = s->linesize;
    int uvlinesize = s->uvlinesize;
1498 1499 1500 1501 1502 1503 1504 1505 1506 1507
    static const uint8_t hev_thresh_lut[2][64] = {
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
          3, 3, 3, 3 },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          2, 2, 2, 2 }
    };
D
David Conrad 已提交
1508 1509 1510 1511

    if (!filter_level)
        return;

1512 1513
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
1514

1515
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1516

D
David Conrad 已提交
1517
    if (mb_x) {
1518
        s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
1519
                                       mbedge_lim, inner_limit, hev_thresh);
1520
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1521
                                       mbedge_lim, inner_limit, hev_thresh);
D
David Conrad 已提交
1522 1523
    }

1524
    if (inner_filter) {
1525 1526 1527 1528 1529 1530 1531 1532 1533
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
                                             uvlinesize,  bedge_lim,
                                             inner_limit, hev_thresh);
D
David Conrad 已提交
1534 1535 1536
    }

    if (mb_y) {
1537
        s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
1538
                                       mbedge_lim, inner_limit, hev_thresh);
1539
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1540
                                       mbedge_lim, inner_limit, hev_thresh);
D
David Conrad 已提交
1541 1542
    }

1543
    if (inner_filter) {
1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
                                             dst[2] + 4 * uvlinesize,
                                             uvlinesize,  bedge_lim,
1556
                                             inner_limit, hev_thresh);
D
David Conrad 已提交
1557 1558 1559
    }
}

1560
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
D
David Conrad 已提交
1561
{
1562 1563 1564
    int mbedge_lim, bedge_lim;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1565
    int inner_filter = f->inner_filter;
1566
    int linesize = s->linesize;
D
David Conrad 已提交
1567 1568 1569 1570

    if (!filter_level)
        return;

1571 1572
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
D
David Conrad 已提交
1573 1574

    if (mb_x)
1575
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1576
    if (inner_filter) {
1577 1578 1579
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
D
David Conrad 已提交
1580 1581 1582
    }

    if (mb_y)
1583
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1584
    if (inner_filter) {
1585 1586 1587
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
D
David Conrad 已提交
1588 1589 1590
    }
}

1591 1592 1593 1594 1595 1596 1597 1598
static void release_queued_segmaps(VP8Context *s, int is_close)
{
    int leave_behind = is_close ? 0 : !s->maps_are_invalid;
    while (s->num_maps_to_be_freed > leave_behind)
        av_freep(&s->segmentation_maps[--s->num_maps_to_be_freed]);
    s->maps_are_invalid = 0;
}

1599
#define MARGIN (16 << 2)
D
Daniel Kang 已提交
1600 1601
static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, AVFrame *curframe,
                                   AVFrame *prev_frame)
1602 1603
{
    VP8Context *s = avctx->priv_data;
D
Daniel Kang 已提交
1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628
    int mb_x, mb_y;

    s->mv_min.y = -MARGIN;
    s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
        VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
        int mb_xy = mb_y*s->mb_width;

        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);

        s->mv_min.x = -MARGIN;
        s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
            if (mb_y == 0)
                AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
                           prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 1);
            s->mv_min.x -= 64;
            s->mv_max.x -= 64;
        }
        s->mv_min.y -= 64;
        s->mv_max.y -= 64;
    }
}

1629
#if HAVE_THREADS
D
Daniel Kang 已提交
1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
    do {\
        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
        if (otd->thread_mb_pos < tmp) {\
            pthread_mutex_lock(&otd->lock);\
            td->wait_mb_pos = tmp;\
            do {\
                if (otd->thread_mb_pos >= tmp)\
                    break;\
                pthread_cond_wait(&otd->cond, &otd->lock);\
            } while (1);\
            td->wait_mb_pos = INT_MAX;\
            pthread_mutex_unlock(&otd->lock);\
        }\
    } while(0);

#define update_pos(td, mb_y, mb_x)\
    do {\
    int pos              = (mb_y << 16) | (mb_x & 0xFFFF);\
    int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
    int is_null          = (next_td == NULL) || (prev_td == NULL);\
    int pos_check        = (is_null) ? 1 :\
                            (next_td != td && pos >= next_td->wait_mb_pos) ||\
                            (prev_td != td && pos >= prev_td->wait_mb_pos);\
    td->thread_mb_pos = pos;\
    if (sliced_threading && pos_check) {\
        pthread_mutex_lock(&td->lock);\
        pthread_cond_broadcast(&td->cond);\
        pthread_mutex_unlock(&td->lock);\
    }\
    } while(0);
1661 1662 1663 1664
#else
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
#define update_pos(td, mb_y, mb_x)
#endif
D
Daniel Kang 已提交
1665 1666 1667 1668 1669 1670 1671

static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
                                        int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
    int mb_y = td->thread_mb_pos>>16;
1672
    int i, y, mb_x, mb_xy = mb_y*s->mb_width;
D
Daniel Kang 已提交
1673 1674 1675 1676
    int num_jobs = s->num_jobs;
    AVFrame *curframe = s->curframe, *prev_frame = s->prev_frame;
    VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
    VP8Macroblock *mb;
1677 1678 1679 1680 1681
    uint8_t *dst[3] = {
        curframe->data[0] + 16*mb_y*s->linesize,
        curframe->data[1] +  8*mb_y*s->uvlinesize,
        curframe->data[2] +  8*mb_y*s->uvlinesize
    };
D
Daniel Kang 已提交
1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694
    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];
    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else {
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
    }

    memset(td->left_nnz, 0, sizeof(td->left_nnz));
1695 1696 1697 1698 1699
    // left edge of 129 for intra prediction
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
        for (i = 0; i < 3; i++)
            for (y = 0; y < 16>>!!i; y++)
                dst[i][y*curframe->linesize[i]-1] = 129;
D
Daniel Kang 已提交
1700
        if (mb_y == 1) {
1701
            s->top_border[0][15] = s->top_border[0][23] = s->top_border[0][31] = 129;
D
Daniel Kang 已提交
1702
        }
1703 1704 1705 1706 1707 1708
    }

    s->mv_min.x = -MARGIN;
    s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
D
Daniel Kang 已提交
1709 1710 1711 1712 1713 1714 1715 1716 1717
        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
        if (prev_td != td) {
            if (threadnr != 0) {
                check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
            } else {
                check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
            }
        }

1718 1719 1720
        s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
        s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);

D
Daniel Kang 已提交
1721 1722 1723
        if (!s->mb_layout)
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->ref_index[0] + mb_xy,
                           prev_frame && prev_frame->ref_index[0] ? prev_frame->ref_index[0] + mb_xy : NULL, 0);
1724 1725 1726 1727

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);

        if (!mb->skip)
D
Daniel Kang 已提交
1728
            decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1729 1730

        if (mb->mode <= MODE_I4x4)
D
Daniel Kang 已提交
1731
            intra_predict(s, td, dst, mb, mb_x, mb_y);
1732
        else
D
Daniel Kang 已提交
1733
            inter_predict(s, td, dst, mb, mb_x, mb_y);
1734 1735 1736 1737

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);

        if (!mb->skip) {
D
Daniel Kang 已提交
1738
            idct_mb(s, td, dst, mb);
1739
        } else {
D
Daniel Kang 已提交
1740
            AV_ZERO64(td->left_nnz);
1741 1742 1743 1744
            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned

            // Reset DC block predictors if they would exist if the mb had coefficients
            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
D
Daniel Kang 已提交
1745
                td->left_nnz[8]     = 0;
1746 1747 1748 1749 1750
                s->top_nnz[mb_x][8] = 0;
            }
        }

        if (s->deblock_filter)
D
Daniel Kang 已提交
1751 1752 1753 1754 1755 1756 1757 1758
            filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);

        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }
1759 1760 1761 1762 1763 1764 1765 1766

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);

        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;
        s->mv_min.x -= 64;
        s->mv_max.x -= 64;
D
Daniel Kang 已提交
1767 1768 1769 1770 1771 1772

        if (mb_x == s->mb_width+1) {
            update_pos(td, mb_y, s->mb_width+3);
        } else {
            update_pos(td, mb_y, mb_x);
        }
1773
    }
D
Daniel Kang 已提交
1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817
}

static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
                              int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[threadnr];
    int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
    AVFrame *curframe = s->curframe;
    VP8Macroblock *mb;
    VP8ThreadData *prev_td, *next_td;
    uint8_t *dst[3] = {
        curframe->data[0] + 16*mb_y*s->linesize,
        curframe->data[1] +  8*mb_y*s->uvlinesize,
        curframe->data[2] +  8*mb_y*s->uvlinesize
    };

    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;

    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
        VP8FilterStrength *f = &td->filter_strength[mb_x];
        if (prev_td != td) {
            check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
        }
        if (next_td != td)
            if (next_td != &s->thread_data[0]) {
                check_thread_pos(td, next_td, mb_x+1, mb_y+1);
            }

        if (num_jobs == 1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }

1818
        if (s->filter.simple)
D
Daniel Kang 已提交
1819
            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1820
        else
D
Daniel Kang 已提交
1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851
            filter_mb(s, dst, f, mb_x, mb_y);
        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;

        update_pos(td, mb_y, (s->mb_width+3) + mb_x);
    }
}

static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
                                    int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[jobnr];
    VP8ThreadData *next_td = NULL, *prev_td = NULL;
    AVFrame *curframe = s->curframe;
    int mb_y, num_jobs = s->num_jobs;
    td->thread_nr = threadnr;
    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
        if (mb_y >= s->mb_height) break;
        td->thread_mb_pos = mb_y<<16;
        vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
        if (s->deblock_filter)
            vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
        update_pos(td, mb_y, INT_MAX & 0xFFFF);

        s->mv_min.y -= 64;
        s->mv_max.y -= 64;

        if (avctx->active_thread_type == FF_THREAD_FRAME)
            ff_thread_report_progress(curframe, mb_y, 0);
1852
    }
D
Daniel Kang 已提交
1853 1854

    return 0;
1855 1856
}

D
David Conrad 已提交
1857 1858 1859 1860
static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
                            AVPacket *avpkt)
{
    VP8Context *s = avctx->priv_data;
D
Daniel Kang 已提交
1861
    int ret, i, referenced, num_jobs;
D
David Conrad 已提交
1862
    enum AVDiscard skip_thresh;
1863
    AVFrame *av_uninit(curframe), *prev_frame;
D
David Conrad 已提交
1864

1865 1866
    release_queued_segmaps(s, 0);

D
David Conrad 已提交
1867
    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1868
        goto err;
D
David Conrad 已提交
1869

1870 1871
    prev_frame = s->framep[VP56_FRAME_CURRENT];

D
David Conrad 已提交
1872 1873 1874 1875 1876 1877 1878 1879
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
                                || s->update_altref == VP56_FRAME_CURRENT;

    skip_thresh = !referenced ? AVDISCARD_NONREF :
                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;

    if (avctx->skip_frame >= skip_thresh) {
        s->invisible = 1;
1880
        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
D
David Conrad 已提交
1881 1882
        goto skip_decode;
    }
1883
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
D
David Conrad 已提交
1884

R
Ronald S. Bultje 已提交
1885 1886 1887 1888 1889 1890 1891
    // release no longer referenced frames
    for (i = 0; i < 5; i++)
        if (s->frames[i].data[0] &&
            &s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1892
            vp8_release_frame(s, &s->frames[i], 1, 0);
R
Ronald S. Bultje 已提交
1893 1894 1895 1896 1897

    // find a free buffer
    for (i = 0; i < 5; i++)
        if (&s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
D
David Conrad 已提交
1898 1899 1900 1901 1902
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
            break;
        }
R
Ronald S. Bultje 已提交
1903 1904 1905 1906
    if (i == 5) {
        av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
        abort();
    }
D
David Conrad 已提交
1907
    if (curframe->data[0])
1908
        vp8_release_frame(s, curframe, 1, 0);
D
David Conrad 已提交
1909

1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920
    // Given that arithmetic probabilities are updated every frame, it's quite likely
    // that the values we have on a random interframe are complete junk if we didn't
    // start decode on a keyframe. So just don't display anything rather than junk.
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
                         !s->framep[VP56_FRAME_GOLDEN] ||
                         !s->framep[VP56_FRAME_GOLDEN2])) {
        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
        ret = AVERROR_INVALIDDATA;
        goto err;
    }

D
David Conrad 已提交
1921
    curframe->key_frame = s->keyframe;
1922
    curframe->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
D
David Conrad 已提交
1923
    curframe->reference = referenced ? 3 : 0;
1924
    if ((ret = vp8_alloc_frame(s, curframe))) {
D
David Conrad 已提交
1925
        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n");
1926
        goto err;
D
David Conrad 已提交
1927 1928
    }

R
Ronald S. Bultje 已提交
1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948
    // check if golden and altref are swapped
    if (s->update_altref != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
    }
    if (s->update_golden != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
    }
    if (s->update_last) {
        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
    } else {
        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
    }
    s->next_framep[VP56_FRAME_CURRENT]      = curframe;

    ff_thread_finish_setup(avctx);

D
David Conrad 已提交
1949 1950 1951
    s->linesize   = curframe->linesize[0];
    s->uvlinesize = curframe->linesize[1];

D
Daniel Kang 已提交
1952 1953 1954
    if (!s->thread_data[0].edge_emu_buffer)
        for (i = 0; i < MAX_THREADS; i++)
            s->thread_data[i].edge_emu_buffer = av_malloc(21*s->linesize);
D
David Conrad 已提交
1955 1956

    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
P
Pascal Massimino 已提交
1957
    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
D
Daniel Kang 已提交
1958 1959 1960 1961
    if (!s->mb_layout)
        memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
    if (!s->mb_layout && s->keyframe)
        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1962

D
David Conrad 已提交
1963
    // top edge of 127 for intra prediction
1964 1965 1966 1967
    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
        s->top_border[0][15] = s->top_border[0][23] = 127;
        memset(s->top_border[1]-1, 127, s->mb_width*sizeof(*s->top_border)+1);
    }
J
Jason Garrett-Glaser 已提交
1968
    memset(s->ref_count, 0, sizeof(s->ref_count));
D
David Conrad 已提交
1969

J
Jason Garrett-Glaser 已提交
1970

D
Daniel Kang 已提交
1971 1972 1973 1974
    // Make sure the previous frame has read its segmentation map,
    // if we re-use the same map.
    if (prev_frame && s->segmentation.enabled && !s->segmentation.update_map)
        ff_thread_await_progress(prev_frame, 1, 0);
J
Jason Garrett-Glaser 已提交
1975

D
Daniel Kang 已提交
1976 1977
    if (s->mb_layout == 1)
        vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
R
Ronald S. Bultje 已提交
1978

D
Daniel Kang 已提交
1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992
    if (avctx->active_thread_type == FF_THREAD_FRAME)
        num_jobs = 1;
    else
        num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
    s->num_jobs   = num_jobs;
    s->curframe   = curframe;
    s->prev_frame = prev_frame;
    s->mv_min.y   = -MARGIN;
    s->mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].thread_mb_pos = 0;
        s->thread_data[i].wait_mb_pos = INT_MAX;
    }
    avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
D
David Conrad 已提交
1993

R
Ronald S. Bultje 已提交
1994
    ff_thread_report_progress(curframe, INT_MAX, 0);
1995 1996
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);

D
David Conrad 已提交
1997 1998 1999 2000 2001 2002 2003
skip_decode:
    // if future frames don't use the updated probabilities,
    // reset them to the values we saved
    if (!s->update_probabilities)
        s->prob[0] = s->prob[1];

    if (!s->invisible) {
R
Ronald S. Bultje 已提交
2004
        *(AVFrame*)data = *curframe;
D
David Conrad 已提交
2005 2006 2007 2008
        *data_size = sizeof(AVFrame);
    }

    return avpkt->size;
2009 2010 2011
err:
    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
    return ret;
D
David Conrad 已提交
2012 2013 2014 2015 2016 2017 2018 2019 2020
}

static av_cold int vp8_decode_init(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;

    s->avctx = avctx;
    avctx->pix_fmt = PIX_FMT_YUV420P;

2021
    ff_dsputil_init(&s->dsp, avctx);
2022
    ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
D
David Conrad 已提交
2023 2024 2025 2026 2027 2028 2029
    ff_vp8dsp_init(&s->vp8dsp);

    return 0;
}

static av_cold int vp8_decode_free(AVCodecContext *avctx)
{
2030
    vp8_decode_flush_impl(avctx, 0, 1, 1);
2031
    release_queued_segmaps(avctx->priv_data, 1);
D
David Conrad 已提交
2032 2033 2034
    return 0;
}

R
Ronald S. Bultje 已提交
2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050
static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;

    s->avctx = avctx;

    return 0;
}

#define REBASE(pic) \
    pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL

static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
{
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;

2051 2052 2053
    if (s->macroblocks_base &&
        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
        free_buffers(s);
2054
        s->maps_are_invalid = 1;
2055 2056
        s->mb_width  = s_src->mb_width;
        s->mb_height = s_src->mb_height;
2057 2058
    }

R
Ronald S. Bultje 已提交
2059 2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072
    s->prob[0] = s_src->prob[!s_src->update_probabilities];
    s->segmentation = s_src->segmentation;
    s->lf_delta = s_src->lf_delta;
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));

    memcpy(&s->frames, &s_src->frames, sizeof(s->frames));
    s->framep[0] = REBASE(s_src->next_framep[0]);
    s->framep[1] = REBASE(s_src->next_framep[1]);
    s->framep[2] = REBASE(s_src->next_framep[2]);
    s->framep[3] = REBASE(s_src->next_framep[3]);

    return 0;
}

2073
AVCodec ff_vp8_decoder = {
2074 2075
    .name                  = "vp8",
    .type                  = AVMEDIA_TYPE_VIDEO,
2076
    .id                    = AV_CODEC_ID_VP8,
2077 2078 2079 2080
    .priv_data_size        = sizeof(VP8Context),
    .init                  = vp8_decode_init,
    .close                 = vp8_decode_free,
    .decode                = vp8_decode_frame,
D
Daniel Kang 已提交
2081
    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2082 2083
    .flush                 = vp8_decode_flush,
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
R
Ronald S. Bultje 已提交
2084 2085
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
D
David Conrad 已提交
2086
};