vp8.c 74.4 KB
Newer Older
1
/*
D
David Conrad 已提交
2 3 4 5
 * VP8 compatible video decoder
 *
 * Copyright (C) 2010 David Conrad
 * Copyright (C) 2010 Ronald S. Bultje
6
 * Copyright (C) 2010 Jason Garrett-Glaser
D
Daniel Kang 已提交
7
 * Copyright (C) 2012 Daniel Kang
D
David Conrad 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

26
#include "libavutil/imgutils.h"
D
David Conrad 已提交
27
#include "avcodec.h"
28
#include "internal.h"
29
#include "vp8.h"
D
David Conrad 已提交
30 31
#include "vp8data.h"
#include "rectangle.h"
R
Ronald S. Bultje 已提交
32
#include "thread.h"
D
David Conrad 已提交
33

34 35 36 37
#if ARCH_ARM
#   include "arm/vp8.h"
#endif

38 39
static void free_buffers(VP8Context *s)
{
D
Daniel Kang 已提交
40 41 42
    int i;
    if (s->thread_data)
        for (i = 0; i < MAX_THREADS; i++) {
43 44 45 46
#if HAVE_THREADS
            pthread_cond_destroy(&s->thread_data[i].cond);
            pthread_mutex_destroy(&s->thread_data[i].lock);
#endif
D
Daniel Kang 已提交
47 48 49
            av_freep(&s->thread_data[i].filter_strength);
        }
    av_freep(&s->thread_data);
50 51 52 53 54 55 56 57
    av_freep(&s->macroblocks_base);
    av_freep(&s->intra4x4_pred_mode_top);
    av_freep(&s->top_nnz);
    av_freep(&s->top_border);

    s->macroblocks = NULL;
}

58
static int vp8_alloc_frame(VP8Context *s, VP8Frame *f, int ref)
59 60
{
    int ret;
61 62
    if ((ret = ff_thread_get_buffer(s->avctx, &f->tf,
                                    ref ? AV_GET_BUFFER_FLAG_REF : 0)) < 0)
63
        return ret;
64 65
    if (!(f->seg_map = av_buffer_allocz(s->mb_width * s->mb_height))) {
        ff_thread_release_buffer(s->avctx, &f->tf);
66 67 68 69 70
        return AVERROR(ENOMEM);
    }
    return 0;
}

71
static void vp8_release_frame(VP8Context *s, VP8Frame *f)
72
{
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    av_buffer_unref(&f->seg_map);
    ff_thread_release_buffer(s->avctx, &f->tf);
}

static int vp8_ref_frame(VP8Context *s, VP8Frame *dst, VP8Frame *src)
{
    int ret;

    vp8_release_frame(s, dst);

    if ((ret = ff_thread_ref_frame(&dst->tf, &src->tf)) < 0)
        return ret;
    if (src->seg_map &&
        !(dst->seg_map = av_buffer_ref(src->seg_map))) {
        vp8_release_frame(s, dst);
        return AVERROR(ENOMEM);
89
    }
90 91

    return 0;
92 93
}

94 95

static void vp8_decode_flush_impl(AVCodecContext *avctx, int free_mem)
D
David Conrad 已提交
96 97 98 99
{
    VP8Context *s = avctx->priv_data;
    int i;

100 101
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
        vp8_release_frame(s, &s->frames[i]);
D
David Conrad 已提交
102 103
    memset(s->framep, 0, sizeof(s->framep));

104
    if (free_mem)
105
        free_buffers(s);
106 107 108 109
}

static void vp8_decode_flush(AVCodecContext *avctx)
{
110
    vp8_decode_flush_impl(avctx, 0);
D
David Conrad 已提交
111 112 113 114
}

static int update_dimensions(VP8Context *s, int width, int height)
{
D
Daniel Kang 已提交
115
    AVCodecContext *avctx = s->avctx;
116
    int i, ret;
D
Daniel Kang 已提交
117

118
    if (width  != s->avctx->width || ((width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) && s->macroblocks_base ||
R
Ronald S. Bultje 已提交
119
        height != s->avctx->height) {
120
        vp8_decode_flush_impl(s->avctx, 1);
D
David Conrad 已提交
121

122 123 124
        ret = ff_set_dimensions(s->avctx, width, height);
        if (ret < 0)
            return ret;
R
Ronald S. Bultje 已提交
125
    }
D
David Conrad 已提交
126 127 128 129

    s->mb_width  = (s->avctx->coded_width +15) / 16;
    s->mb_height = (s->avctx->coded_height+15) / 16;

D
Daniel Kang 已提交
130 131 132 133 134 135 136 137 138 139
    s->mb_layout = (avctx->active_thread_type == FF_THREAD_SLICE) && (FFMIN(s->num_coeff_partitions, avctx->thread_count) > 1);
    if (!s->mb_layout) { // Frame threading and one thread
        s->macroblocks_base       = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
        s->intra4x4_pred_mode_top = av_mallocz(s->mb_width*4);
    }
    else // Sliced threading
        s->macroblocks_base       = av_mallocz((s->mb_width+2)*(s->mb_height+2)*sizeof(*s->macroblocks));
    s->top_nnz                    = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
    s->top_border                 = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
    s->thread_data                = av_mallocz(MAX_THREADS*sizeof(VP8ThreadData));
D
David Conrad 已提交
140

D
Daniel Kang 已提交
141 142
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].filter_strength = av_mallocz(s->mb_width*sizeof(*s->thread_data[0].filter_strength));
143
#if HAVE_THREADS
D
Daniel Kang 已提交
144 145
        pthread_mutex_init(&s->thread_data[i].lock, NULL);
        pthread_cond_init(&s->thread_data[i].cond, NULL);
146
#endif
D
Daniel Kang 已提交
147
    }
D
David Conrad 已提交
148

D
Daniel Kang 已提交
149 150
    if (!s->macroblocks_base || !s->top_nnz || !s->top_border ||
        (!s->intra4x4_pred_mode_top && !s->mb_layout))
D
David Conrad 已提交
151 152
        return AVERROR(ENOMEM);

153
    s->macroblocks        = s->macroblocks_base + 1;
D
David Conrad 已提交
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183

    return 0;
}

static void parse_segment_info(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

    s->segmentation.update_map = vp8_rac_get(c);

    if (vp8_rac_get(c)) { // update segment feature data
        s->segmentation.absolute_vals = vp8_rac_get(c);

        for (i = 0; i < 4; i++)
            s->segmentation.base_quant[i]   = vp8_rac_get_sint(c, 7);

        for (i = 0; i < 4; i++)
            s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6);
    }
    if (s->segmentation.update_map)
        for (i = 0; i < 3; i++)
            s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255;
}

static void update_lf_deltas(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i;

184 185 186
    for (i = 0; i < 4; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.ref[i] = vp8_rac_get_uint(c, 6);
D
David Conrad 已提交
187

188 189 190 191 192 193 194 195 196 197 198 199 200
            if (vp8_rac_get(c))
                s->lf_delta.ref[i] = -s->lf_delta.ref[i];
        }
    }

    for (i = MODE_I4x4; i <= VP8_MVMODE_SPLIT; i++) {
        if (vp8_rac_get(c)) {
            s->lf_delta.mode[i] = vp8_rac_get_uint(c, 6);

            if (vp8_rac_get(c))
                s->lf_delta.mode[i] = -s->lf_delta.mode[i];
        }
    }
D
David Conrad 已提交
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
}

static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size)
{
    const uint8_t *sizes = buf;
    int i;

    s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2);

    buf      += 3*(s->num_coeff_partitions-1);
    buf_size -= 3*(s->num_coeff_partitions-1);
    if (buf_size < 0)
        return -1;

    for (i = 0; i < s->num_coeff_partitions-1; i++) {
216
        int size = AV_RL24(sizes + 3*i);
D
David Conrad 已提交
217 218 219
        if (buf_size - size < 0)
            return -1;

220
        ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, size);
D
David Conrad 已提交
221 222 223
        buf      += size;
        buf_size -= size;
    }
224
    ff_vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size);
D
David Conrad 已提交
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248

    return 0;
}

static void get_quants(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;
    int i, base_qi;

    int yac_qi     = vp8_rac_get_uint(c, 7);
    int ydc_delta  = vp8_rac_get_sint(c, 4);
    int y2dc_delta = vp8_rac_get_sint(c, 4);
    int y2ac_delta = vp8_rac_get_sint(c, 4);
    int uvdc_delta = vp8_rac_get_sint(c, 4);
    int uvac_delta = vp8_rac_get_sint(c, 4);

    for (i = 0; i < 4; i++) {
        if (s->segmentation.enabled) {
            base_qi = s->segmentation.base_quant[i];
            if (!s->segmentation.absolute_vals)
                base_qi += yac_qi;
        } else
            base_qi = yac_qi;

249 250 251 252 253 254 255
        s->qmat[i].luma_qmul[0]    =           vp8_dc_qlookup[av_clip_uintp2(base_qi + ydc_delta , 7)];
        s->qmat[i].luma_qmul[1]    =           vp8_ac_qlookup[av_clip_uintp2(base_qi             , 7)];
        s->qmat[i].luma_dc_qmul[0] =       2 * vp8_dc_qlookup[av_clip_uintp2(base_qi + y2dc_delta, 7)];
        /* 101581>>16 is equivalent to 155/100 */
        s->qmat[i].luma_dc_qmul[1] = (101581 * vp8_ac_qlookup[av_clip_uintp2(base_qi + y2ac_delta, 7)]) >> 16;
        s->qmat[i].chroma_qmul[0]  =           vp8_dc_qlookup[av_clip_uintp2(base_qi + uvdc_delta, 7)];
        s->qmat[i].chroma_qmul[1]  =           vp8_ac_qlookup[av_clip_uintp2(base_qi + uvac_delta, 7)];
256 257 258

        s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8);
        s->qmat[i].chroma_qmul[0]  = FFMIN(s->qmat[i].chroma_qmul[0], 132);
D
David Conrad 已提交
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
    }
}

/**
 * Determine which buffers golden and altref should be updated with after this frame.
 * The spec isn't clear here, so I'm going by my understanding of what libvpx does
 *
 * Intra frames update all 3 references
 * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set
 * If the update (golden|altref) flag is set, it's updated with the current frame
 *      if update_last is set, and VP56_FRAME_PREVIOUS otherwise.
 * If the flag is not set, the number read means:
 *      0: no update
 *      1: VP56_FRAME_PREVIOUS
 *      2: update golden with altref, or update altref with golden
 */
static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref)
{
    VP56RangeCoder *c = &s->c;

    if (update)
        return VP56_FRAME_CURRENT;

    switch (vp8_rac_get_uint(c, 2)) {
    case 1:
        return VP56_FRAME_PREVIOUS;
    case 2:
        return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN;
    }
    return VP56_FRAME_NONE;
}

static void update_refs(VP8Context *s)
{
    VP56RangeCoder *c = &s->c;

    int update_golden = vp8_rac_get(c);
    int update_altref = vp8_rac_get(c);

    s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN);
    s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2);
}

static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size)
{
    VP56RangeCoder *c = &s->c;
305
    int header_size, hscale, vscale, i, j, k, l, m, ret;
D
David Conrad 已提交
306 307 308 309 310 311
    int width  = s->avctx->width;
    int height = s->avctx->height;

    s->keyframe  = !(buf[0] & 1);
    s->profile   =  (buf[0]>>1) & 7;
    s->invisible = !(buf[0] & 0x10);
312
    header_size  = AV_RL24(buf) >> 5;
D
David Conrad 已提交
313 314 315
    buf      += 3;
    buf_size -= 3;

D
David Conrad 已提交
316 317 318 319 320 321 322
    if (s->profile > 3)
        av_log(s->avctx, AV_LOG_WARNING, "Unknown profile %d\n", s->profile);

    if (!s->profile)
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_epel_pixels_tab, sizeof(s->put_pixels_tab));
    else    // profile 1-3 use bilinear, 4+ aren't defined so whatever
        memcpy(s->put_pixels_tab, s->vp8dsp.put_vp8_bilinear_pixels_tab, sizeof(s->put_pixels_tab));
D
David Conrad 已提交
323 324 325 326 327 328 329

    if (header_size > buf_size - 7*s->keyframe) {
        av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n");
        return AVERROR_INVALIDDATA;
    }

    if (s->keyframe) {
330 331
        if (AV_RL24(buf) != 0x2a019d) {
            av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", AV_RL24(buf));
D
David Conrad 已提交
332 333 334 335 336 337 338 339 340
            return AVERROR_INVALIDDATA;
        }
        width  = AV_RL16(buf+3) & 0x3fff;
        height = AV_RL16(buf+5) & 0x3fff;
        hscale = buf[4] >> 6;
        vscale = buf[6] >> 6;
        buf      += 7;
        buf_size -= 7;

341
        if (hscale || vscale)
342
            avpriv_request_sample(s->avctx, "Upscaling");
343

D
David Conrad 已提交
344
        s->update_golden = s->update_altref = VP56_FRAME_CURRENT;
345 346 347 348
        for (i = 0; i < 4; i++)
            for (j = 0; j < 16; j++)
                memcpy(s->prob->token[i][j], vp8_token_default_probs[i][vp8_coeff_band[j]],
                       sizeof(s->prob->token[i][j]));
D
David Conrad 已提交
349 350 351 352
        memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16));
        memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c));
        memcpy(s->prob->mvc      , vp8_mv_default_prob     , sizeof(s->prob->mvc));
        memset(&s->segmentation, 0, sizeof(s->segmentation));
353
        memset(&s->lf_delta, 0, sizeof(s->lf_delta));
D
David Conrad 已提交
354 355
    }

356
    ff_vp56_init_range_decoder(c, buf, header_size);
D
David Conrad 已提交
357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
    buf      += header_size;
    buf_size -= header_size;

    if (s->keyframe) {
        if (vp8_rac_get(c))
            av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n");
        vp8_rac_get(c); // whether we can skip clamping in dsp functions
    }

    if ((s->segmentation.enabled = vp8_rac_get(c)))
        parse_segment_info(s);
    else
        s->segmentation.update_map = 0; // FIXME: move this to some init function?

    s->filter.simple    = vp8_rac_get(c);
    s->filter.level     = vp8_rac_get_uint(c, 6);
    s->filter.sharpness = vp8_rac_get_uint(c, 3);

    if ((s->lf_delta.enabled = vp8_rac_get(c)))
        if (vp8_rac_get(c))
            update_lf_deltas(s);

    if (setup_partitions(s, buf, buf_size)) {
        av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n");
        return AVERROR_INVALIDDATA;
    }

D
Daniel Kang 已提交
384
    if (!s->macroblocks_base || /* first frame */
385
        width != s->avctx->width || height != s->avctx->height || (width+15)/16 != s->mb_width || (height+15)/16 != s->mb_height) {
D
Daniel Kang 已提交
386 387 388 389
        if ((ret = update_dimensions(s, width, height)) < 0)
            return ret;
    }

D
David Conrad 已提交
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408
    get_quants(s);

    if (!s->keyframe) {
        update_refs(s);
        s->sign_bias[VP56_FRAME_GOLDEN]               = vp8_rac_get(c);
        s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c);
    }

    // if we aren't saving this frame's probabilities for future frames,
    // make a copy of the current probabilities
    if (!(s->update_probabilities = vp8_rac_get(c)))
        s->prob[1] = s->prob[0];

    s->update_last = s->keyframe || vp8_rac_get(c);

    for (i = 0; i < 4; i++)
        for (j = 0; j < 8; j++)
            for (k = 0; k < 3; k++)
                for (l = 0; l < NUM_DCT_TOKENS-1; l++)
409 410
                    if (vp56_rac_get_prob_branchy(c, vp8_token_update_probs[i][j][k][l])) {
                        int prob = vp8_rac_get_uint(c, 8);
411 412
                        for (m = 0; vp8_coeff_band_indexes[j][m] >= 0; m++)
                            s->prob->token[i][vp8_coeff_band_indexes[j][m]][k][l] = prob;
413
                    }
D
David Conrad 已提交
414 415

    if ((s->mbskip_enabled = vp8_rac_get(c)))
416
        s->prob->mbskip = vp8_rac_get_uint(c, 8);
D
David Conrad 已提交
417 418

    if (!s->keyframe) {
419 420 421
        s->prob->intra  = vp8_rac_get_uint(c, 8);
        s->prob->last   = vp8_rac_get_uint(c, 8);
        s->prob->golden = vp8_rac_get_uint(c, 8);
D
David Conrad 已提交
422 423 424 425 426 427 428 429 430 431 432

        if (vp8_rac_get(c))
            for (i = 0; i < 4; i++)
                s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8);
        if (vp8_rac_get(c))
            for (i = 0; i < 3; i++)
                s->prob->pred8x8c[i]  = vp8_rac_get_uint(c, 8);

        // 17.2 MV probability update
        for (i = 0; i < 2; i++)
            for (j = 0; j < 19; j++)
433
                if (vp56_rac_get_prob_branchy(c, vp8_mv_update_prob[i][j]))
D
David Conrad 已提交
434 435 436 437 438 439
                    s->prob->mvc[i][j] = vp8_rac_get_nn(c);
    }

    return 0;
}

J
Jason Garrett-Glaser 已提交
440
static av_always_inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src)
D
David Conrad 已提交
441
{
J
Jason Garrett-Glaser 已提交
442 443
    dst->x = av_clip(src->x, s->mv_min.x, s->mv_max.x);
    dst->y = av_clip(src->y, s->mv_min.y, s->mv_max.y);
D
David Conrad 已提交
444 445 446 447 448 449 450
}

/**
 * Motion vector coding, 17.1.
 */
static int read_mv_component(VP56RangeCoder *c, const uint8_t *p)
{
451
    int bit, x = 0;
D
David Conrad 已提交
452

453
    if (vp56_rac_get_prob_branchy(c, p[0])) {
D
David Conrad 已提交
454 455 456 457 458 459 460 461
        int i;

        for (i = 0; i < 3; i++)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        for (i = 9; i > 3; i--)
            x += vp56_rac_get_prob(c, p[9 + i]) << i;
        if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12]))
            x += 8;
462 463 464 465 466 467 468 469 470 471 472
    } else {
        // small_mvtree
        const uint8_t *ps = p+2;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + 3*bit;
        x  += 4*bit;
        bit = vp56_rac_get_prob(c, *ps);
        ps += 1 + bit;
        x  += 2*bit;
        x  += vp56_rac_get_prob(c, *ps);
    }
D
David Conrad 已提交
473 474 475 476

    return (x && vp56_rac_get_prob(c, p[1])) ? -x : x;
}

477 478
static av_always_inline
const uint8_t *get_submv_prob(uint32_t left, uint32_t top)
D
David Conrad 已提交
479
{
480 481 482
    if (left == top)
        return vp8_submv_prob[4-!!left];
    if (!top)
D
David Conrad 已提交
483
        return vp8_submv_prob[2];
484
    return vp8_submv_prob[1-!!left];
D
David Conrad 已提交
485 486 487 488
}

/**
 * Split motion vector prediction, 16.4.
489
 * @returns the number of motion vectors parsed (2, 4 or 16)
D
David Conrad 已提交
490
 */
491
static av_always_inline
D
Daniel Kang 已提交
492
int decode_splitmvs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, int layout)
D
David Conrad 已提交
493
{
494 495
    int part_idx;
    int n, num;
D
Daniel Kang 已提交
496
    VP8Macroblock *top_mb;
497 498
    VP8Macroblock *left_mb = &mb[-1];
    const uint8_t *mbsplits_left = vp8_mbsplits[left_mb->partitioning],
D
Daniel Kang 已提交
499
                  *mbsplits_top,
500
                  *mbsplits_cur, *firstidx;
D
Daniel Kang 已提交
501
    VP56mv *top_mv;
502 503
    VP56mv *left_mv = left_mb->bmv;
    VP56mv *cur_mv  = mb->bmv;
D
David Conrad 已提交
504

D
Daniel Kang 已提交
505 506 507 508 509 510 511
    if (!layout) // layout is inlined, s->mb_layout is not
        top_mb = &mb[2];
    else
        top_mb = &mb[-s->mb_width-1];
    mbsplits_top = vp8_mbsplits[top_mb->partitioning];
    top_mv = top_mb->bmv;

512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
    if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[0])) {
        if (vp56_rac_get_prob_branchy(c, vp8_mbsplit_prob[1])) {
            part_idx = VP8_SPLITMVMODE_16x8 + vp56_rac_get_prob(c, vp8_mbsplit_prob[2]);
        } else {
            part_idx = VP8_SPLITMVMODE_8x8;
        }
    } else {
        part_idx = VP8_SPLITMVMODE_4x4;
    }

    num = vp8_mbsplit_count[part_idx];
    mbsplits_cur = vp8_mbsplits[part_idx],
    firstidx = vp8_mbfirstidx[part_idx];
    mb->partitioning = part_idx;

D
David Conrad 已提交
527
    for (n = 0; n < num; n++) {
528
        int k = firstidx[n];
529
        uint32_t left, above;
530 531
        const uint8_t *submv_prob;

532 533 534 535 536 537 538 539
        if (!(k & 3))
            left = AV_RN32A(&left_mv[mbsplits_left[k + 3]]);
        else
            left  = AV_RN32A(&cur_mv[mbsplits_cur[k - 1]]);
        if (k <= 3)
            above = AV_RN32A(&top_mv[mbsplits_top[k + 12]]);
        else
            above = AV_RN32A(&cur_mv[mbsplits_cur[k - 4]]);
540 541

        submv_prob = get_submv_prob(left, above);
D
David Conrad 已提交
542

543 544 545 546 547 548 549 550 551 552 553 554
        if (vp56_rac_get_prob_branchy(c, submv_prob[0])) {
            if (vp56_rac_get_prob_branchy(c, submv_prob[1])) {
                if (vp56_rac_get_prob_branchy(c, submv_prob[2])) {
                    mb->bmv[n].y = mb->mv.y + read_mv_component(c, s->prob->mvc[0]);
                    mb->bmv[n].x = mb->mv.x + read_mv_component(c, s->prob->mvc[1]);
                } else {
                    AV_ZERO32(&mb->bmv[n]);
                }
            } else {
                AV_WN32A(&mb->bmv[n], above);
            }
        } else {
555
            AV_WN32A(&mb->bmv[n], left);
D
David Conrad 已提交
556 557
        }
    }
558 559

    return num;
D
David Conrad 已提交
560 561
}

562
static av_always_inline
D
Daniel Kang 已提交
563
void decode_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int layout)
564
{
D
Daniel Kang 已提交
565
    VP8Macroblock *mb_edge[3] = { 0 /* top */,
566
                                  mb - 1 /* left */,
D
Daniel Kang 已提交
567
                                  0 /* top-left */ };
568
    enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV };
569
    enum { VP8_EDGE_TOP, VP8_EDGE_LEFT, VP8_EDGE_TOPLEFT };
570 571
    int idx = CNT_ZERO;
    int cur_sign_bias = s->sign_bias[mb->ref_frame];
572
    int8_t *sign_bias = s->sign_bias;
573 574 575 576
    VP56mv near_mv[4];
    uint8_t cnt[4] = { 0 };
    VP56RangeCoder *c = &s->c;

D
Daniel Kang 已提交
577 578 579 580 581 582 583 584 585
    if (!layout) { // layout is inlined (s->mb_layout is not)
        mb_edge[0] = mb + 2;
        mb_edge[2] = mb + 1;
    }
    else {
        mb_edge[0] = mb - s->mb_width-1;
        mb_edge[2] = mb - s->mb_width-2;
    }

586 587
    AV_ZERO32(&near_mv[0]);
    AV_ZERO32(&near_mv[1]);
588
    AV_ZERO32(&near_mv[2]);
589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619

    /* Process MB on top, left and top-left */
    #define MV_EDGE_CHECK(n)\
    {\
        VP8Macroblock *edge = mb_edge[n];\
        int edge_ref = edge->ref_frame;\
        if (edge_ref != VP56_FRAME_CURRENT) {\
            uint32_t mv = AV_RN32A(&edge->mv);\
            if (mv) {\
                if (cur_sign_bias != sign_bias[edge_ref]) {\
                    /* SWAR negate of the values in mv. */\
                    mv = ~mv;\
                    mv = ((mv&0x7fff7fff) + 0x00010001) ^ (mv&0x80008000);\
                }\
                if (!n || mv != AV_RN32A(&near_mv[idx]))\
                    AV_WN32A(&near_mv[++idx], mv);\
                cnt[idx]      += 1 + (n != 2);\
            } else\
                cnt[CNT_ZERO] += 1 + (n != 2);\
        }\
    }

    MV_EDGE_CHECK(0)
    MV_EDGE_CHECK(1)
    MV_EDGE_CHECK(2)

    mb->partitioning = VP8_SPLITMVMODE_NONE;
    if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_ZERO]][0])) {
        mb->mode = VP8_MVMODE_MV;

        /* If we have three distinct MVs, merge first and last if they're the same */
620
        if (cnt[CNT_SPLITMV] && AV_RN32A(&near_mv[1 + VP8_EDGE_TOP]) == AV_RN32A(&near_mv[1 + VP8_EDGE_TOPLEFT]))
621 622 623 624 625 626 627 628 629 630 631 632
            cnt[CNT_NEAREST] += 1;

        /* Swap near and nearest if necessary */
        if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) {
            FFSWAP(uint8_t,     cnt[CNT_NEAREST],     cnt[CNT_NEAR]);
            FFSWAP( VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]);
        }

        if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) {
            if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_NEAR]][2])) {

                /* Choose the best mv out of 0,0 and the nearest mv */
J
Jason Garrett-Glaser 已提交
633
                clamp_mv(s, &mb->mv, &near_mv[CNT_ZERO + (cnt[CNT_NEAREST] >= cnt[CNT_ZERO])]);
634 635 636
                cnt[CNT_SPLITMV] = ((mb_edge[VP8_EDGE_LEFT]->mode    == VP8_MVMODE_SPLIT) +
                                    (mb_edge[VP8_EDGE_TOP]->mode     == VP8_MVMODE_SPLIT)) * 2 +
                                    (mb_edge[VP8_EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT);
637 638 639

                if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) {
                    mb->mode = VP8_MVMODE_SPLIT;
D
Daniel Kang 已提交
640
                    mb->mv = mb->bmv[decode_splitmvs(s, c, mb, layout) - 1];
641 642 643 644 645 646
                } else {
                    mb->mv.y += read_mv_component(c, s->prob->mvc[0]);
                    mb->mv.x += read_mv_component(c, s->prob->mvc[1]);
                    mb->bmv[0] = mb->mv;
                }
            } else {
J
Jason Garrett-Glaser 已提交
647
                clamp_mv(s, &mb->mv, &near_mv[CNT_NEAR]);
648 649 650
                mb->bmv[0] = mb->mv;
            }
        } else {
J
Jason Garrett-Glaser 已提交
651
            clamp_mv(s, &mb->mv, &near_mv[CNT_NEAREST]);
652 653 654 655 656 657 658 659 660
            mb->bmv[0] = mb->mv;
        }
    } else {
        mb->mode = VP8_MVMODE_ZERO;
        AV_ZERO32(&mb->mv);
        mb->bmv[0] = mb->mv;
    }
}

661
static av_always_inline
662
void decode_intra4x4_modes(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
D
Daniel Kang 已提交
663
                           int mb_x, int keyframe, int layout)
D
David Conrad 已提交
664
{
665 666
    uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;

D
Daniel Kang 已提交
667 668 669 670
    if (layout == 1) {
        VP8Macroblock *mb_top = mb - s->mb_width - 1;
        memcpy(mb->intra4x4_pred_mode_top, mb_top->intra4x4_pred_mode_top, 4);
    }
671
    if (keyframe) {
672
        int x, y;
D
Daniel Kang 已提交
673
        uint8_t* top;
674
        uint8_t* const left = s->intra4x4_pred_mode_left;
D
Daniel Kang 已提交
675 676 677 678
        if (layout == 1)
            top = mb->intra4x4_pred_mode_top;
        else
            top = s->intra4x4_pred_mode_top + 4 * mb_x;
679 680
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
681 682 683 684 685
                const uint8_t *ctx;
                ctx = vp8_pred4x4_prob_intra[top[x]][left[y]];
                *intra4x4 = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx);
                left[y] = top[x] = *intra4x4;
                intra4x4++;
D
David Conrad 已提交
686 687
            }
        }
688
    } else {
689
        int i;
690 691
        for (i = 0; i < 16; i++)
            intra4x4[i] = vp8_rac_get_tree(c, vp8_pred4x4_tree, vp8_pred4x4_prob_inter);
D
David Conrad 已提交
692 693 694
    }
}

695
static av_always_inline
D
Daniel Kang 已提交
696 697
void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
                    uint8_t *segment, uint8_t *ref, int layout)
D
David Conrad 已提交
698 699 700
{
    VP56RangeCoder *c = &s->c;

D
David Conrad 已提交
701 702 703
    if (s->segmentation.update_map) {
        int bit  = vp56_rac_get_prob(c, s->prob->segmentid[0]);
        *segment = vp56_rac_get_prob(c, s->prob->segmentid[1+bit]) + 2*bit;
704
    } else if (s->segmentation.enabled)
R
Ronald S. Bultje 已提交
705
        *segment = ref ? *ref : *segment;
706
    mb->segment = *segment;
D
David Conrad 已提交
707

708
    mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0;
D
David Conrad 已提交
709 710 711 712 713

    if (s->keyframe) {
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra);

        if (mb->mode == MODE_I4x4) {
D
Daniel Kang 已提交
714
            decode_intra4x4_modes(s, c, mb, mb_x, 1, layout);
715 716
        } else {
            const uint32_t modes = vp8_pred4x4_mode[mb->mode] * 0x01010101u;
D
Daniel Kang 已提交
717 718 719 720 721
            if (s->mb_layout == 1)
                AV_WN32A(mb->intra4x4_pred_mode_top, modes);
            else
                AV_WN32A(s->intra4x4_pred_mode_top + 4 * mb_x, modes);
            AV_WN32A( s->intra4x4_pred_mode_left, modes);
722
        }
D
David Conrad 已提交
723

724
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra);
D
David Conrad 已提交
725
        mb->ref_frame = VP56_FRAME_CURRENT;
726
    } else if (vp56_rac_get_prob_branchy(c, s->prob->intra)) {
D
David Conrad 已提交
727
        // inter MB, 16.2
728 729
        if (vp56_rac_get_prob_branchy(c, s->prob->last))
            mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ?
D
David Conrad 已提交
730 731 732
                VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN;
        else
            mb->ref_frame = VP56_FRAME_PREVIOUS;
J
Jason Garrett-Glaser 已提交
733
        s->ref_count[mb->ref_frame-1]++;
D
David Conrad 已提交
734 735

        // motion vectors, 16.3
D
Daniel Kang 已提交
736
        decode_mvs(s, mb, mb_x, mb_y, layout);
D
David Conrad 已提交
737 738 739 740
    } else {
        // intra MB, 16.1
        mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16);

741
        if (mb->mode == MODE_I4x4)
D
Daniel Kang 已提交
742
            decode_intra4x4_modes(s, c, mb, mb_x, 0, layout);
D
David Conrad 已提交
743

744
        mb->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c);
D
David Conrad 已提交
745
        mb->ref_frame = VP56_FRAME_CURRENT;
746
        mb->partitioning = VP8_SPLITMVMODE_NONE;
747
        AV_ZERO32(&mb->bmv[0]);
D
David Conrad 已提交
748 749 750
    }
}

751
#ifndef decode_block_coeffs_internal
D
David Conrad 已提交
752
/**
753
 * @param r arithmetic bitstream reader context
754 755
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
D
David Conrad 已提交
756
 * @param i initial coeff index, 0 unless a separate DC block is coded
757
 * @param qmul array holding the dc/ac dequant factor at position 0/1
D
David Conrad 已提交
758 759 760
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
D
Diego Biurrun 已提交
761
static int decode_block_coeffs_internal(VP56RangeCoder *r, int16_t block[16],
762
                                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
763
                                        int i, uint8_t *token_prob, int16_t qmul[2])
D
David Conrad 已提交
764
{
765
    VP56RangeCoder c = *r;
766
    goto skip_eob;
767
    do {
768
        int coeff;
769 770
        if (!vp56_rac_get_prob_branchy(&c, token_prob[0]))   // DCT_EOB
            break;
D
David Conrad 已提交
771

772
skip_eob:
773
        if (!vp56_rac_get_prob_branchy(&c, token_prob[1])) { // DCT_0
774
            if (++i == 16)
775
                break; // invalid input; blocks should end with EOB
776
            token_prob = probs[i][0];
777
            goto skip_eob;
778 779
        }

780
        if (!vp56_rac_get_prob_branchy(&c, token_prob[2])) { // DCT_1
781
            coeff = 1;
782
            token_prob = probs[i+1][1];
783
        } else {
784 785
            if (!vp56_rac_get_prob_branchy(&c, token_prob[3])) { // DCT 2,3,4
                coeff = vp56_rac_get_prob_branchy(&c, token_prob[4]);
786
                if (coeff)
787
                    coeff += vp56_rac_get_prob(&c, token_prob[5]);
788 789 790
                coeff += 2;
            } else {
                // DCT_CAT*
791 792 793
                if (!vp56_rac_get_prob_branchy(&c, token_prob[6])) {
                    if (!vp56_rac_get_prob_branchy(&c, token_prob[7])) { // DCT_CAT1
                        coeff  = 5 + vp56_rac_get_prob(&c, vp8_dct_cat1_prob[0]);
794 795
                    } else {                                    // DCT_CAT2
                        coeff  = 7;
796 797
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[0]) << 1;
                        coeff += vp56_rac_get_prob(&c, vp8_dct_cat2_prob[1]);
798 799
                    }
                } else {    // DCT_CAT3 and up
800 801
                    int a = vp56_rac_get_prob(&c, token_prob[8]);
                    int b = vp56_rac_get_prob(&c, token_prob[9+a]);
802 803
                    int cat = (a<<1) + b;
                    coeff  = 3 + (8<<cat);
804
                    coeff += vp8_rac_get_coeff(&c, ff_vp8_dct_cat_prob[cat]);
805 806
                }
            }
807
            token_prob = probs[i+1][2];
808
        }
809
        block[zigzag_scan[i]] = (vp8_rac_get(&c) ? -coeff : coeff) * qmul[!!i];
810
    } while (++i < 16);
811

812
    *r = c;
813
    return i;
D
David Conrad 已提交
814
}
815
#endif
D
David Conrad 已提交
816

817 818 819 820 821 822 823 824 825 826 827
/**
 * @param c arithmetic bitstream reader context
 * @param block destination for block coefficients
 * @param probs probabilities to use when reading trees from the bitstream
 * @param i initial coeff index, 0 unless a separate DC block is coded
 * @param zero_nhood the initial prediction context for number of surrounding
 *                   all-zero blocks (only left/top, so 0-2)
 * @param qmul array holding the dc/ac dequant factor at position 0/1
 * @return 0 if no coeffs were decoded
 *         otherwise, the index of the last coeff decoded plus one
 */
828
static av_always_inline
D
Diego Biurrun 已提交
829
int decode_block_coeffs(VP56RangeCoder *c, int16_t block[16],
830
                        uint8_t probs[16][3][NUM_DCT_TOKENS-1],
831 832 833 834 835 836 837 838
                        int i, int zero_nhood, int16_t qmul[2])
{
    uint8_t *token_prob = probs[i][zero_nhood];
    if (!vp56_rac_get_prob_branchy(c, token_prob[0]))   // DCT_EOB
        return 0;
    return decode_block_coeffs_internal(c, block, probs, i, token_prob, qmul);
}

839
static av_always_inline
D
Daniel Kang 已提交
840
void decode_mb_coeffs(VP8Context *s, VP8ThreadData *td, VP56RangeCoder *c, VP8Macroblock *mb,
841
                      uint8_t t_nnz[9], uint8_t l_nnz[9])
D
David Conrad 已提交
842 843 844
{
    int i, x, y, luma_start = 0, luma_ctx = 3;
    int nnz_pred, nnz, nnz_total = 0;
845
    int segment = mb->segment;
846
    int block_dc = 0;
D
David Conrad 已提交
847 848 849 850 851

    if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
        nnz_pred = t_nnz[8] + l_nnz[8];

        // decode DC values and do hadamard
D
Daniel Kang 已提交
852
        nnz = decode_block_coeffs(c, td->block_dc, s->prob->token[1], 0, nnz_pred,
D
David Conrad 已提交
853 854
                                  s->qmat[segment].luma_dc_qmul);
        l_nnz[8] = t_nnz[8] = !!nnz;
855 856 857 858
        if (nnz) {
            nnz_total += nnz;
            block_dc = 1;
            if (nnz == 1)
D
Daniel Kang 已提交
859
                s->vp8dsp.vp8_luma_dc_wht_dc(td->block, td->block_dc);
860
            else
D
Daniel Kang 已提交
861
                s->vp8dsp.vp8_luma_dc_wht(td->block, td->block_dc);
862
        }
D
David Conrad 已提交
863 864 865 866 867 868 869
        luma_start = 1;
        luma_ctx = 0;
    }

    // luma blocks
    for (y = 0; y < 4; y++)
        for (x = 0; x < 4; x++) {
J
Jason Garrett-Glaser 已提交
870
            nnz_pred = l_nnz[y] + t_nnz[x];
D
Daniel Kang 已提交
871
            nnz = decode_block_coeffs(c, td->block[y][x], s->prob->token[luma_ctx], luma_start,
J
Jason Garrett-Glaser 已提交
872
                                      nnz_pred, s->qmat[segment].luma_qmul);
873
            // nnz+block_dc may be one more than the actual last index, but we don't care
D
Daniel Kang 已提交
874
            td->non_zero_count_cache[y][x] = nnz + block_dc;
D
David Conrad 已提交
875 876 877 878 879 880 881 882 883 884 885
            t_nnz[x] = l_nnz[y] = !!nnz;
            nnz_total += nnz;
        }

    // chroma blocks
    // TODO: what to do about dimensions? 2nd dim for luma is x,
    // but for chroma it's (y<<1)|x
    for (i = 4; i < 6; i++)
        for (y = 0; y < 2; y++)
            for (x = 0; x < 2; x++) {
                nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x];
D
Daniel Kang 已提交
886
                nnz = decode_block_coeffs(c, td->block[i][(y<<1)+x], s->prob->token[2], 0,
D
David Conrad 已提交
887
                                          nnz_pred, s->qmat[segment].chroma_qmul);
D
Daniel Kang 已提交
888
                td->non_zero_count_cache[i][(y<<1)+x] = nnz;
D
David Conrad 已提交
889 890 891 892 893 894 895 896 897 898 899
                t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz;
                nnz_total += nnz;
            }

    // if there were no coded coeffs despite the macroblock not being marked skip,
    // we MUST not do the inner loop filter and should not do IDCT
    // Since skip isn't used for bitstream prediction, just manually set it.
    if (!nnz_total)
        mb->skip = 1;
}

900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920
static av_always_inline
void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                      int linesize, int uvlinesize, int simple)
{
    AV_COPY128(top_border, src_y + 15*linesize);
    if (!simple) {
        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
    }
}

static av_always_inline
void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
                    int simple, int xchg)
{
    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
    src_y  -=   linesize;
    src_cb -= uvlinesize;
    src_cr -= uvlinesize;

M
Måns Rullgård 已提交
921 922 923 924
#define XCHG(a,b,xchg) do {                     \
        if (xchg) AV_SWAP64(b,a);               \
        else      AV_COPY64(b,a);               \
    } while (0)
925 926 927 928

    XCHG(top_border_m1+8, src_y-8, xchg);
    XCHG(top_border,      src_y,   xchg);
    XCHG(top_border+8,    src_y+8, 1);
929
    if (mb_x < mb_width-1)
930
        XCHG(top_border+32, src_y+16, 1);
931

932 933 934 935 936 937 938 939 940 941
    // only copy chroma for normal loop filter
    // or to initialize the top row to 127
    if (!simple || !mb_y) {
        XCHG(top_border_m1+16, src_cb-8, xchg);
        XCHG(top_border_m1+24, src_cr-8, xchg);
        XCHG(top_border+16,    src_cb, 1);
        XCHG(top_border+24,    src_cr, 1);
    }
}

942
static av_always_inline
943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
int check_dc_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? TOP_DC_PRED8x8 : DC_128_PRED8x8;
    } else {
        return mb_y ? mode : LEFT_DC_PRED8x8;
    }
}

static av_always_inline
int check_tm_pred8x8_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_PRED8x8 : DC_129_PRED8x8;
    } else {
        return mb_y ? mode : HOR_PRED8x8;
    }
}

static av_always_inline
int check_intra_pred8x8_mode_emuedge(int mode, int mb_x, int mb_y)
{
    switch (mode) {
    case DC_PRED8x8:
        return check_dc_pred8x8_mode(mode, mb_x, mb_y);
    case VERT_PRED8x8:
        return !mb_y ? DC_127_PRED8x8 : mode;
    case HOR_PRED8x8:
        return !mb_x ? DC_129_PRED8x8 : mode;
    case PLANE_PRED8x8 /*TM*/:
        return check_tm_pred8x8_mode(mode, mb_x, mb_y);
    }
    return mode;
}

static av_always_inline
int check_tm_pred4x4_mode(int mode, int mb_x, int mb_y)
{
    if (!mb_x) {
        return mb_y ? VERT_VP8_PRED : DC_129_PRED;
    } else {
        return mb_y ? mode : HOR_VP8_PRED;
    }
}

static av_always_inline
int check_intra_pred4x4_mode_emuedge(int mode, int mb_x, int mb_y, int *copy_buf)
{
    switch (mode) {
    case VERT_PRED:
        if (!mb_x && mb_y) {
            *copy_buf = 1;
            return mode;
        }
        /* fall-through */
    case DIAG_DOWN_LEFT_PRED:
    case VERT_LEFT_PRED:
        return !mb_y ? DC_127_PRED : mode;
    case HOR_PRED:
        if (!mb_y) {
            *copy_buf = 1;
            return mode;
1005
        }
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
        /* fall-through */
    case HOR_UP_PRED:
        return !mb_x ? DC_129_PRED : mode;
    case TM_VP8_PRED:
        return check_tm_pred4x4_mode(mode, mb_x, mb_y);
    case DC_PRED: // 4x4 DC doesn't use the same "H.264-style" exceptions as 16x16/8x8 DC
    case DIAG_DOWN_RIGHT_PRED:
    case VERT_RIGHT_PRED:
    case HOR_DOWN_PRED:
        if (!mb_y || !mb_x)
            *copy_buf = 1;
        return mode;
D
David Conrad 已提交
1018 1019 1020 1021
    }
    return mode;
}

1022
static av_always_inline
D
Daniel Kang 已提交
1023 1024
void intra_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
D
David Conrad 已提交
1025
{
M
Mans Rullgard 已提交
1026 1027
    int x, y, mode, nnz;
    uint32_t tr;
D
David Conrad 已提交
1028

1029 1030
    // for the first row, we need to run xchg_mb_border to init the top edge to 127
    // otherwise, skip it if we aren't going to deblock
1031
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1032 1033 1034 1035
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 1);

D
David Conrad 已提交
1036
    if (mb->mode < MODE_I4x4) {
1037
        mode = check_intra_pred8x8_mode_emuedge(mb->mode, mb_x, mb_y);
D
David Conrad 已提交
1038 1039 1040
        s->hpc.pred16x16[mode](dst[0], s->linesize);
    } else {
        uint8_t *ptr = dst[0];
1041
        uint8_t *intra4x4 = mb->intra4x4_pred_mode_mb;
1042
        uint8_t tr_top[4] = { 127, 127, 127, 127 };
D
David Conrad 已提交
1043 1044 1045 1046 1047 1048 1049

        // all blocks on the right edge of the macroblock use bottom edge
        // the top macroblock for their topright edge
        uint8_t *tr_right = ptr - s->linesize + 16;

        // if we're on the right edge of the frame, said edge is extended
        // from the top macroblock
1050
        if (mb_y &&
1051
            mb_x == s->mb_width-1) {
M
Mans Rullgard 已提交
1052
            tr = tr_right[-1]*0x01010101u;
D
David Conrad 已提交
1053 1054 1055
            tr_right = (uint8_t *)&tr;
        }

1056
        if (mb->skip)
D
Daniel Kang 已提交
1057
            AV_ZERO128(td->non_zero_count_cache);
1058

D
David Conrad 已提交
1059 1060 1061
        for (y = 0; y < 4; y++) {
            uint8_t *topright = ptr + 4 - s->linesize;
            for (x = 0; x < 4; x++) {
1062 1063 1064 1065
                int copy = 0, linesize = s->linesize;
                uint8_t *dst = ptr+4*x;
                DECLARE_ALIGNED(4, uint8_t, copy_dst)[5*8];

1066
                if ((y == 0 || x == 3) && mb_y == 0) {
1067 1068
                    topright = tr_top;
                } else if (x == 3)
D
David Conrad 已提交
1069 1070
                    topright = tr_right;

1071 1072 1073 1074 1075 1076 1077 1078 1079
                mode = check_intra_pred4x4_mode_emuedge(intra4x4[x], mb_x + x, mb_y + y, &copy);
                if (copy) {
                    dst = copy_dst + 12;
                    linesize = 8;
                    if (!(mb_y + y)) {
                        copy_dst[3] = 127U;
                        AV_WN32A(copy_dst+4, 127U * 0x01010101U);
                    } else {
                        AV_COPY32(copy_dst+4, ptr+4*x-s->linesize);
1080
                        if (!(mb_x + x)) {
1081
                            copy_dst[3] = 129U;
1082
                        } else {
1083
                            copy_dst[3] = ptr[4*x-s->linesize-1];
1084 1085
                        }
                    }
1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096
                    if (!(mb_x + x)) {
                        copy_dst[11] =
                        copy_dst[19] =
                        copy_dst[27] =
                        copy_dst[35] = 129U;
                    } else {
                        copy_dst[11] = ptr[4*x              -1];
                        copy_dst[19] = ptr[4*x+s->linesize  -1];
                        copy_dst[27] = ptr[4*x+s->linesize*2-1];
                        copy_dst[35] = ptr[4*x+s->linesize*3-1];
                    }
1097 1098 1099
                }
                s->hpc.pred4x4[mode](dst, topright, linesize);
                if (copy) {
R
Ronald S. Bultje 已提交
1100 1101 1102 1103
                    AV_COPY32(ptr+4*x              , copy_dst+12);
                    AV_COPY32(ptr+4*x+s->linesize  , copy_dst+20);
                    AV_COPY32(ptr+4*x+s->linesize*2, copy_dst+28);
                    AV_COPY32(ptr+4*x+s->linesize*3, copy_dst+36);
1104
                }
D
David Conrad 已提交
1105

D
Daniel Kang 已提交
1106
                nnz = td->non_zero_count_cache[y][x];
D
David Conrad 已提交
1107 1108
                if (nnz) {
                    if (nnz == 1)
D
Daniel Kang 已提交
1109
                        s->vp8dsp.vp8_idct_dc_add(ptr+4*x, td->block[y][x], s->linesize);
D
David Conrad 已提交
1110
                    else
D
Daniel Kang 已提交
1111
                        s->vp8dsp.vp8_idct_add(ptr+4*x, td->block[y][x], s->linesize);
D
David Conrad 已提交
1112 1113 1114 1115 1116
                }
                topright += 4;
            }

            ptr   += 4*s->linesize;
1117
            intra4x4 += 4;
D
David Conrad 已提交
1118 1119 1120
        }
    }

1121
    mode = check_intra_pred8x8_mode_emuedge(mb->chroma_pred_mode, mb_x, mb_y);
D
David Conrad 已提交
1122 1123
    s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
    s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
1124

1125
    if (mb_y && (s->deblock_filter || !mb_y) && td->thread_nr == 0)
1126 1127 1128
        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
                       s->filter.simple, 0);
D
David Conrad 已提交
1129 1130
}

1131 1132 1133 1134 1135 1136 1137
static const uint8_t subpel_idx[3][8] = {
    { 0, 1, 2, 1, 2, 1, 2, 1 }, // nr. of left extra pixels,
                                // also function pointer index
    { 0, 3, 5, 3, 5, 3, 5, 3 }, // nr. of extra pixels required
    { 0, 2, 3, 2, 3, 2, 3, 2 }, // nr. of right extra pixels
};

D
David Conrad 已提交
1138
/**
1139
 * luma MC function
D
David Conrad 已提交
1140 1141 1142
 *
 * @param s VP8 decoding context
 * @param dst target buffer for block data at block position
1143
 * @param ref reference picture buffer at origin (0, 0)
D
David Conrad 已提交
1144 1145 1146 1147 1148 1149 1150 1151
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
1152
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
D
David Conrad 已提交
1153
 */
1154
static av_always_inline
D
Daniel Kang 已提交
1155
void vp8_mc_luma(VP8Context *s, VP8ThreadData *td, uint8_t *dst,
1156
                 ThreadFrame *ref, const VP56mv *mv,
1157
                 int x_off, int y_off, int block_w, int block_h,
1158
                 int width, int height, ptrdiff_t linesize,
1159
                 vp8_mc_func mc_func[3][3])
D
David Conrad 已提交
1160
{
1161
    uint8_t *src = ref->f->data[0];
R
Ronald S. Bultje 已提交
1162

1163
    if (AV_RN32A(mv)) {
1164
        int src_linesize = linesize;
1165 1166 1167 1168 1169
        int mx = (mv->x << 1)&7, mx_idx = subpel_idx[0][mx];
        int my = (mv->y << 1)&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 2;
        y_off += mv->y >> 2;
1170 1171

        // edge emulation
R
Ronald S. Bultje 已提交
1172
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 4, 0);
1173
        src += y_off * linesize + x_off;
1174 1175
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1176 1177
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                     src - my_idx * linesize - mx_idx,
1178
                                     32, linesize,
1179 1180
                                     block_w + subpel_idx[1][mx],
                                     block_h + subpel_idx[1][my],
R
Ronald S. Bultje 已提交
1181
                                     x_off - mx_idx, y_off - my_idx, width, height);
1182 1183
            src = td->edge_emu_buffer + mx_idx + 32 * my_idx;
            src_linesize = 32;
1184
        }
1185
        mc_func[my_idx][mx_idx](dst, linesize, src, src_linesize, block_h, mx, my);
R
Ronald S. Bultje 已提交
1186 1187
    } else {
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 4, 0);
1188
        mc_func[0][0](dst, linesize, src + y_off * linesize + x_off, linesize, block_h, 0, 0);
R
Ronald S. Bultje 已提交
1189
    }
D
David Conrad 已提交
1190 1191
}

1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208
/**
 * chroma MC function
 *
 * @param s VP8 decoding context
 * @param dst1 target buffer for block data at block position (U plane)
 * @param dst2 target buffer for block data at block position (V plane)
 * @param ref reference picture buffer at origin (0, 0)
 * @param mv motion vector (relative to block position) to get pixel data from
 * @param x_off horizontal position of block from origin (0, 0)
 * @param y_off vertical position of block from origin (0, 0)
 * @param block_w width of block (16, 8 or 4)
 * @param block_h height of block (always same as block_w)
 * @param width width of src/dst plane data
 * @param height height of src/dst plane data
 * @param linesize size of a single line of plane data, including padding
 * @param mc_func motion compensation function pointers (bilinear or sixtap MC)
 */
1209
static av_always_inline
D
Daniel Kang 已提交
1210
void vp8_mc_chroma(VP8Context *s, VP8ThreadData *td, uint8_t *dst1, uint8_t *dst2,
1211
                   ThreadFrame *ref, const VP56mv *mv, int x_off, int y_off,
1212
                   int block_w, int block_h, int width, int height, ptrdiff_t linesize,
1213 1214
                   vp8_mc_func mc_func[3][3])
{
1215
    uint8_t *src1 = ref->f->data[1], *src2 = ref->f->data[2];
R
Ronald S. Bultje 已提交
1216

1217 1218 1219 1220 1221 1222 1223 1224 1225 1226
    if (AV_RN32A(mv)) {
        int mx = mv->x&7, mx_idx = subpel_idx[0][mx];
        int my = mv->y&7, my_idx = subpel_idx[0][my];

        x_off += mv->x >> 3;
        y_off += mv->y >> 3;

        // edge emulation
        src1 += y_off * linesize + x_off;
        src2 += y_off * linesize + x_off;
R
Ronald S. Bultje 已提交
1227
        ff_thread_await_progress(ref, (3 + y_off + block_h + subpel_idx[2][my]) >> 3, 0);
1228 1229
        if (x_off < mx_idx || x_off >= width  - block_w - subpel_idx[2][mx] ||
            y_off < my_idx || y_off >= height - block_h - subpel_idx[2][my]) {
1230 1231
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                     src1 - my_idx * linesize - mx_idx,
1232
                                     32, linesize,
1233 1234
                                     block_w + subpel_idx[1][mx],
                                     block_h + subpel_idx[1][my],
R
Ronald S. Bultje 已提交
1235
                                     x_off - mx_idx, y_off - my_idx, width, height);
1236 1237
            src1 = td->edge_emu_buffer + mx_idx + 32 * my_idx;
            mc_func[my_idx][mx_idx](dst1, linesize, src1, 32, block_h, mx, my);
1238

1239 1240
            s->vdsp.emulated_edge_mc(td->edge_emu_buffer,
                                     src2 - my_idx * linesize - mx_idx,
1241
                                     32, linesize,
1242 1243
                                     block_w + subpel_idx[1][mx],
                                     block_h + subpel_idx[1][my],
R
Ronald S. Bultje 已提交
1244
                                     x_off - mx_idx, y_off - my_idx, width, height);
1245 1246
            src2 = td->edge_emu_buffer + mx_idx + 32 * my_idx;
            mc_func[my_idx][mx_idx](dst2, linesize, src2, 32, block_h, mx, my);
1247 1248 1249 1250 1251
        } else {
            mc_func[my_idx][mx_idx](dst1, linesize, src1, linesize, block_h, mx, my);
            mc_func[my_idx][mx_idx](dst2, linesize, src2, linesize, block_h, mx, my);
        }
    } else {
R
Ronald S. Bultje 已提交
1252
        ff_thread_await_progress(ref, (3 + y_off + block_h) >> 3, 0);
1253 1254 1255 1256 1257
        mc_func[0][0](dst1, linesize, src1 + y_off * linesize + x_off, linesize, block_h, 0, 0);
        mc_func[0][0](dst2, linesize, src2 + y_off * linesize + x_off, linesize, block_h, 0, 0);
    }
}

1258
static av_always_inline
D
Daniel Kang 已提交
1259
void vp8_mc_part(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
1260
                 ThreadFrame *ref_frame, int x_off, int y_off,
1261 1262 1263
                 int bx_off, int by_off,
                 int block_w, int block_h,
                 int width, int height, VP56mv *mv)
1264 1265 1266 1267
{
    VP56mv uvmv = *mv;

    /* Y */
D
Daniel Kang 已提交
1268
    vp8_mc_luma(s, td, dst[0] + by_off * s->linesize + bx_off,
R
Ronald S. Bultje 已提交
1269
                ref_frame, mv, x_off + bx_off, y_off + by_off,
1270 1271
                block_w, block_h, width, height, s->linesize,
                s->put_pixels_tab[block_w == 8]);
1272 1273 1274 1275 1276 1277 1278 1279 1280 1281

    /* U/V */
    if (s->profile == 3) {
        uvmv.x &= ~7;
        uvmv.y &= ~7;
    }
    x_off   >>= 1; y_off   >>= 1;
    bx_off  >>= 1; by_off  >>= 1;
    width   >>= 1; height  >>= 1;
    block_w >>= 1; block_h >>= 1;
D
Daniel Kang 已提交
1282
    vp8_mc_chroma(s, td, dst[1] + by_off * s->uvlinesize + bx_off,
R
Ronald S. Bultje 已提交
1283 1284
                  dst[2] + by_off * s->uvlinesize + bx_off, ref_frame,
                  &uvmv, x_off + bx_off, y_off + by_off,
1285 1286
                  block_w, block_h, width, height, s->uvlinesize,
                  s->put_pixels_tab[1 + (block_w == 4)]);
1287 1288
}

1289 1290
/* Fetch pixels for estimated mv 4 macroblocks ahead.
 * Optimized for 64-byte cache lines.  Inspired by ffh264 prefetch_motion. */
1291
static av_always_inline void prefetch_motion(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, int mb_xy, int ref)
1292
{
J
Jason Garrett-Glaser 已提交
1293 1294
    /* Don't prefetch refs that haven't been used very often this frame. */
    if (s->ref_count[ref-1] > (mb_xy >> 5)) {
J
Jason Garrett-Glaser 已提交
1295
        int x_off = mb_x << 4, y_off = mb_y << 4;
J
Jason Garrett-Glaser 已提交
1296 1297
        int mx = (mb->mv.x>>2) + x_off + 8;
        int my = (mb->mv.y>>2) + y_off;
1298
        uint8_t **src= s->framep[ref]->tf.f->data;
J
Jason Garrett-Glaser 已提交
1299
        int off= mx + (my + (mb_x&3)*4)*s->linesize + 64;
R
Ronald S. Bultje 已提交
1300 1301 1302
        /* For threading, a ff_thread_await_progress here might be useful, but
         * it actually slows down the decoder. Since a bad prefetch doesn't
         * generate bad decoder output, we don't run it here. */
R
Ronald S. Bultje 已提交
1303
        s->vdsp.prefetch(src[0]+off, s->linesize, 4);
J
Jason Garrett-Glaser 已提交
1304
        off= (mx>>1) + ((my>>1) + (mb_x&7))*s->uvlinesize + 64;
R
Ronald S. Bultje 已提交
1305
        s->vdsp.prefetch(src[1]+off, src[2]-src[1], 2);
J
Jason Garrett-Glaser 已提交
1306
    }
1307 1308
}

D
David Conrad 已提交
1309 1310 1311
/**
 * Apply motion vectors to prediction buffer, chapter 18.
 */
1312
static av_always_inline
D
Daniel Kang 已提交
1313 1314
void inter_predict(VP8Context *s, VP8ThreadData *td, uint8_t *dst[3],
                   VP8Macroblock *mb, int mb_x, int mb_y)
D
David Conrad 已提交
1315 1316 1317
{
    int x_off = mb_x << 4, y_off = mb_y << 4;
    int width = 16*s->mb_width, height = 16*s->mb_height;
1318
    ThreadFrame *ref = &s->framep[mb->ref_frame]->tf;
1319
    VP56mv *bmv = mb->bmv;
D
David Conrad 已提交
1320

1321 1322
    switch (mb->partitioning) {
    case VP8_SPLITMVMODE_NONE:
D
Daniel Kang 已提交
1323
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1324
                    0, 0, 16, 16, width, height, &mb->mv);
1325
        break;
1326
    case VP8_SPLITMVMODE_4x4: {
D
David Conrad 已提交
1327
        int x, y;
1328
        VP56mv uvmv;
D
David Conrad 已提交
1329 1330 1331 1332

        /* Y */
        for (y = 0; y < 4; y++) {
            for (x = 0; x < 4; x++) {
D
Daniel Kang 已提交
1333
                vp8_mc_luma(s, td, dst[0] + 4*y*s->linesize + x*4,
R
Ronald S. Bultje 已提交
1334
                            ref, &bmv[4*y + x],
1335 1336 1337
                            4*x + x_off, 4*y + y_off, 4, 4,
                            width, height, s->linesize,
                            s->put_pixels_tab[2]);
D
David Conrad 已提交
1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352
            }
        }

        /* U/V */
        x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1;
        for (y = 0; y < 2; y++) {
            for (x = 0; x < 2; x++) {
                uvmv.x = mb->bmv[ 2*y    * 4 + 2*x  ].x +
                         mb->bmv[ 2*y    * 4 + 2*x+1].x +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].x +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].x;
                uvmv.y = mb->bmv[ 2*y    * 4 + 2*x  ].y +
                         mb->bmv[ 2*y    * 4 + 2*x+1].y +
                         mb->bmv[(2*y+1) * 4 + 2*x  ].y +
                         mb->bmv[(2*y+1) * 4 + 2*x+1].y;
1353 1354
                uvmv.x = (uvmv.x + 2 + (uvmv.x >> (INT_BIT-1))) >> 2;
                uvmv.y = (uvmv.y + 2 + (uvmv.y >> (INT_BIT-1))) >> 2;
D
David Conrad 已提交
1355 1356 1357 1358
                if (s->profile == 3) {
                    uvmv.x &= ~7;
                    uvmv.y &= ~7;
                }
D
Daniel Kang 已提交
1359
                vp8_mc_chroma(s, td, dst[1] + 4*y*s->uvlinesize + x*4,
R
Ronald S. Bultje 已提交
1360
                              dst[2] + 4*y*s->uvlinesize + x*4, ref, &uvmv,
1361 1362 1363
                              4*x + x_off, 4*y + y_off, 4, 4,
                              width, height, s->uvlinesize,
                              s->put_pixels_tab[2]);
D
David Conrad 已提交
1364 1365
            }
        }
1366 1367 1368
        break;
    }
    case VP8_SPLITMVMODE_16x8:
D
Daniel Kang 已提交
1369
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1370
                    0, 0, 16, 8, width, height, &bmv[0]);
D
Daniel Kang 已提交
1371
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1372
                    0, 8, 16, 8, width, height, &bmv[1]);
1373 1374
        break;
    case VP8_SPLITMVMODE_8x16:
D
Daniel Kang 已提交
1375
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1376
                    0, 0, 8, 16, width, height, &bmv[0]);
D
Daniel Kang 已提交
1377
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1378
                    8, 0, 8, 16, width, height, &bmv[1]);
1379 1380
        break;
    case VP8_SPLITMVMODE_8x8:
D
Daniel Kang 已提交
1381
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1382
                    0, 0, 8, 8, width, height, &bmv[0]);
D
Daniel Kang 已提交
1383
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1384
                    8, 0, 8, 8, width, height, &bmv[1]);
D
Daniel Kang 已提交
1385
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1386
                    0, 8, 8, 8, width, height, &bmv[2]);
D
Daniel Kang 已提交
1387
        vp8_mc_part(s, td, dst, ref, x_off, y_off,
1388
                    8, 8, 8, 8, width, height, &bmv[3]);
1389
        break;
D
David Conrad 已提交
1390 1391 1392
    }
}

D
Daniel Kang 已提交
1393 1394
static av_always_inline void idct_mb(VP8Context *s, VP8ThreadData *td,
                                     uint8_t *dst[3], VP8Macroblock *mb)
D
David Conrad 已提交
1395
{
1396
    int x, y, ch;
D
David Conrad 已提交
1397

J
Jason Garrett-Glaser 已提交
1398 1399
    if (mb->mode != MODE_I4x4) {
        uint8_t *y_dst = dst[0];
D
David Conrad 已提交
1400
        for (y = 0; y < 4; y++) {
D
Daniel Kang 已提交
1401
            uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[y]);
1402 1403
            if (nnz4) {
                if (nnz4&~0x01010101) {
J
Jason Garrett-Glaser 已提交
1404
                    for (x = 0; x < 4; x++) {
1405
                        if ((uint8_t)nnz4 == 1)
D
Daniel Kang 已提交
1406
                            s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, td->block[y][x], s->linesize);
1407
                        else if((uint8_t)nnz4 > 1)
D
Daniel Kang 已提交
1408
                            s->vp8dsp.vp8_idct_add(y_dst+4*x, td->block[y][x], s->linesize);
1409 1410 1411
                        nnz4 >>= 8;
                        if (!nnz4)
                            break;
J
Jason Garrett-Glaser 已提交
1412 1413
                    }
                } else {
D
Daniel Kang 已提交
1414
                    s->vp8dsp.vp8_idct_dc_add4y(y_dst, td->block[y], s->linesize);
D
David Conrad 已提交
1415 1416 1417 1418
                }
            }
            y_dst += 4*s->linesize;
        }
J
Jason Garrett-Glaser 已提交
1419
    }
D
David Conrad 已提交
1420

J
Jason Garrett-Glaser 已提交
1421
    for (ch = 0; ch < 2; ch++) {
D
Daniel Kang 已提交
1422
        uint32_t nnz4 = AV_RL32(td->non_zero_count_cache[4+ch]);
1423
        if (nnz4) {
J
Jason Garrett-Glaser 已提交
1424
            uint8_t *ch_dst = dst[1+ch];
1425 1426 1427
            if (nnz4&~0x01010101) {
                for (y = 0; y < 2; y++) {
                    for (x = 0; x < 2; x++) {
1428
                        if ((uint8_t)nnz4 == 1)
D
Daniel Kang 已提交
1429
                            s->vp8dsp.vp8_idct_dc_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1430
                        else if((uint8_t)nnz4 > 1)
D
Daniel Kang 已提交
1431
                            s->vp8dsp.vp8_idct_add(ch_dst+4*x, td->block[4+ch][(y<<1)+x], s->uvlinesize);
1432 1433
                        nnz4 >>= 8;
                        if (!nnz4)
1434
                            goto chroma_idct_end;
J
Jason Garrett-Glaser 已提交
1435
                    }
1436
                    ch_dst += 4*s->uvlinesize;
J
Jason Garrett-Glaser 已提交
1437
                }
1438
            } else {
D
Daniel Kang 已提交
1439
                s->vp8dsp.vp8_idct_dc_add4uv(ch_dst, td->block[4+ch], s->uvlinesize);
D
David Conrad 已提交
1440 1441
            }
        }
1442
chroma_idct_end: ;
D
David Conrad 已提交
1443 1444 1445
    }
}

1446
static av_always_inline void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
D
David Conrad 已提交
1447 1448 1449 1450
{
    int interior_limit, filter_level;

    if (s->segmentation.enabled) {
1451
        filter_level = s->segmentation.filter_level[mb->segment];
D
David Conrad 已提交
1452 1453 1454 1455 1456 1457 1458
        if (!s->segmentation.absolute_vals)
            filter_level += s->filter.level;
    } else
        filter_level = s->filter.level;

    if (s->lf_delta.enabled) {
        filter_level += s->lf_delta.ref[mb->ref_frame];
1459
        filter_level += s->lf_delta.mode[mb->mode];
D
David Conrad 已提交
1460
    }
1461

M
Mans Rullgard 已提交
1462
    filter_level = av_clip_uintp2(filter_level, 6);
D
David Conrad 已提交
1463 1464 1465

    interior_limit = filter_level;
    if (s->filter.sharpness) {
1466
        interior_limit >>= (s->filter.sharpness + 3) >> 2;
D
David Conrad 已提交
1467 1468 1469 1470
        interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness);
    }
    interior_limit = FFMAX(interior_limit, 1);

1471 1472
    f->filter_level = filter_level;
    f->inner_limit = interior_limit;
1473
    f->inner_filter = !mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT;
D
David Conrad 已提交
1474 1475
}

1476
static av_always_inline void filter_mb(VP8Context *s, uint8_t *dst[3], VP8FilterStrength *f, int mb_x, int mb_y)
D
David Conrad 已提交
1477
{
1478 1479 1480
    int mbedge_lim, bedge_lim, hev_thresh;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1481
    int inner_filter = f->inner_filter;
1482 1483
    int linesize = s->linesize;
    int uvlinesize = s->uvlinesize;
1484 1485 1486 1487 1488 1489 1490 1491 1492 1493
    static const uint8_t hev_thresh_lut[2][64] = {
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
          3, 3, 3, 3 },
        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
          2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
          2, 2, 2, 2 }
    };
D
David Conrad 已提交
1494 1495 1496 1497

    if (!filter_level)
        return;

1498 1499
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
1500

1501
    hev_thresh = hev_thresh_lut[s->keyframe][filter_level];
1502

D
David Conrad 已提交
1503
    if (mb_x) {
1504
        s->vp8dsp.vp8_h_loop_filter16y(dst[0],     linesize,
1505
                                       mbedge_lim, inner_limit, hev_thresh);
1506
        s->vp8dsp.vp8_h_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1507
                                       mbedge_lim, inner_limit, hev_thresh);
D
David Conrad 已提交
1508 1509
    }

1510
    if (inner_filter) {
1511 1512 1513 1514 1515 1516 1517 1518 1519
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 4, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+ 8, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter16y_inner(dst[0]+12, linesize, bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_h_loop_filter8uv_inner(dst[1] + 4, dst[2] + 4,
                                             uvlinesize,  bedge_lim,
                                             inner_limit, hev_thresh);
D
David Conrad 已提交
1520 1521 1522
    }

    if (mb_y) {
1523
        s->vp8dsp.vp8_v_loop_filter16y(dst[0],     linesize,
1524
                                       mbedge_lim, inner_limit, hev_thresh);
1525
        s->vp8dsp.vp8_v_loop_filter8uv(dst[1],     dst[2],      uvlinesize,
1526
                                       mbedge_lim, inner_limit, hev_thresh);
D
David Conrad 已提交
1527 1528
    }

1529
    if (inner_filter) {
1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 4*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+ 8*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter16y_inner(dst[0]+12*linesize,
                                             linesize,    bedge_lim,
                                             inner_limit, hev_thresh);
        s->vp8dsp.vp8_v_loop_filter8uv_inner(dst[1] + 4 * uvlinesize,
                                             dst[2] + 4 * uvlinesize,
                                             uvlinesize,  bedge_lim,
1542
                                             inner_limit, hev_thresh);
D
David Conrad 已提交
1543 1544 1545
    }
}

1546
static av_always_inline void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8FilterStrength *f, int mb_x, int mb_y)
D
David Conrad 已提交
1547
{
1548 1549 1550
    int mbedge_lim, bedge_lim;
    int filter_level = f->filter_level;
    int inner_limit = f->inner_limit;
1551
    int inner_filter = f->inner_filter;
1552
    int linesize = s->linesize;
D
David Conrad 已提交
1553 1554 1555 1556

    if (!filter_level)
        return;

1557 1558
     bedge_lim = 2*filter_level + inner_limit;
    mbedge_lim = bedge_lim + 4;
D
David Conrad 已提交
1559 1560

    if (mb_x)
1561
        s->vp8dsp.vp8_h_loop_filter_simple(dst, linesize, mbedge_lim);
1562
    if (inner_filter) {
1563 1564 1565
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, linesize, bedge_lim);
        s->vp8dsp.vp8_h_loop_filter_simple(dst+12, linesize, bedge_lim);
D
David Conrad 已提交
1566 1567 1568
    }

    if (mb_y)
1569
        s->vp8dsp.vp8_v_loop_filter_simple(dst, linesize, mbedge_lim);
1570
    if (inner_filter) {
1571 1572 1573
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*linesize, linesize, bedge_lim);
        s->vp8dsp.vp8_v_loop_filter_simple(dst+12*linesize, linesize, bedge_lim);
D
David Conrad 已提交
1574 1575 1576
    }
}

1577
#define MARGIN (16 << 2)
1578 1579
static void vp8_decode_mv_mb_modes(AVCodecContext *avctx, VP8Frame *curframe,
                                   VP8Frame *prev_frame)
1580 1581
{
    VP8Context *s = avctx->priv_data;
D
Daniel Kang 已提交
1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596
    int mb_x, mb_y;

    s->mv_min.y = -MARGIN;
    s->mv_max.y = ((s->mb_height - 1) << 6) + MARGIN;
    for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
        VP8Macroblock *mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
        int mb_xy = mb_y*s->mb_width;

        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);

        s->mv_min.x = -MARGIN;
        s->mv_max.x = ((s->mb_width - 1) << 6) + MARGIN;
        for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
            if (mb_y == 0)
                AV_WN32A((mb-s->mb_width-1)->intra4x4_pred_mode_top, DC_PRED*0x01010101);
1597 1598 1599
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
                           prev_frame && prev_frame->seg_map ?
                           prev_frame->seg_map->data + mb_xy : NULL, 1);
D
Daniel Kang 已提交
1600 1601 1602 1603 1604 1605 1606 1607
            s->mv_min.x -= 64;
            s->mv_max.x -= 64;
        }
        s->mv_min.y -= 64;
        s->mv_max.y -= 64;
    }
}

1608
#if HAVE_THREADS
D
Daniel Kang 已提交
1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)\
    do {\
        int tmp = (mb_y_check << 16) | (mb_x_check & 0xFFFF);\
        if (otd->thread_mb_pos < tmp) {\
            pthread_mutex_lock(&otd->lock);\
            td->wait_mb_pos = tmp;\
            do {\
                if (otd->thread_mb_pos >= tmp)\
                    break;\
                pthread_cond_wait(&otd->cond, &otd->lock);\
            } while (1);\
            td->wait_mb_pos = INT_MAX;\
            pthread_mutex_unlock(&otd->lock);\
        }\
    } while(0);

#define update_pos(td, mb_y, mb_x)\
    do {\
    int pos              = (mb_y << 16) | (mb_x & 0xFFFF);\
    int sliced_threading = (avctx->active_thread_type == FF_THREAD_SLICE) && (num_jobs > 1);\
    int is_null          = (next_td == NULL) || (prev_td == NULL);\
    int pos_check        = (is_null) ? 1 :\
                            (next_td != td && pos >= next_td->wait_mb_pos) ||\
                            (prev_td != td && pos >= prev_td->wait_mb_pos);\
    td->thread_mb_pos = pos;\
    if (sliced_threading && pos_check) {\
        pthread_mutex_lock(&td->lock);\
        pthread_cond_broadcast(&td->cond);\
        pthread_mutex_unlock(&td->lock);\
    }\
    } while(0);
1640 1641 1642 1643
#else
#define check_thread_pos(td, otd, mb_x_check, mb_y_check)
#define update_pos(td, mb_y, mb_x)
#endif
D
Daniel Kang 已提交
1644 1645 1646 1647 1648 1649 1650

static void vp8_decode_mb_row_no_filter(AVCodecContext *avctx, void *tdata,
                                        int jobnr, int threadnr)
{
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *prev_td, *next_td, *td = &s->thread_data[threadnr];
    int mb_y = td->thread_mb_pos>>16;
1651
    int mb_x, mb_xy = mb_y*s->mb_width;
D
Daniel Kang 已提交
1652
    int num_jobs = s->num_jobs;
1653
    VP8Frame *curframe = s->curframe, *prev_frame = s->prev_frame;
D
Daniel Kang 已提交
1654 1655
    VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
    VP8Macroblock *mb;
D
David Conrad 已提交
1656
    uint8_t *dst[3] = {
1657 1658 1659
        curframe->tf.f->data[0] + 16*mb_y*s->linesize,
        curframe->tf.f->data[1] +  8*mb_y*s->uvlinesize,
        curframe->tf.f->data[2] +  8*mb_y*s->uvlinesize
D
David Conrad 已提交
1660
    };
D
Daniel Kang 已提交
1661 1662 1663 1664 1665 1666 1667
    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];
    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else {
1668 1669 1670 1671 1672
        // Make sure the previous frame has read its segmentation map,
        // if we re-use the same map.
        if (prev_frame && s->segmentation.enabled &&
            !s->segmentation.update_map)
            ff_thread_await_progress(&prev_frame->tf, mb_y, 0);
D
Daniel Kang 已提交
1673 1674 1675 1676 1677 1678
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
        memset(mb - 1, 0, sizeof(*mb)); // zero left macroblock
        AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
    }

    memset(td->left_nnz, 0, sizeof(td->left_nnz));
1679 1680 1681 1682 1683

    s->mv_min.x = -MARGIN;
    s->mv_max.x = ((s->mb_width  - 1) << 6) + MARGIN;

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
D
Daniel Kang 已提交
1684 1685 1686 1687 1688 1689 1690 1691 1692
        // Wait for previous thread to read mb_x+2, and reach mb_y-1.
        if (prev_td != td) {
            if (threadnr != 0) {
                check_thread_pos(td, prev_td, mb_x+1, mb_y-1);
            } else {
                check_thread_pos(td, prev_td, (s->mb_width+3) + (mb_x+1), mb_y-1);
            }
        }

R
Ronald S. Bultje 已提交
1693 1694
        s->vdsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
        s->vdsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
1695

D
Daniel Kang 已提交
1696
        if (!s->mb_layout)
1697 1698 1699
            decode_mb_mode(s, mb, mb_x, mb_y, curframe->seg_map->data + mb_xy,
                           prev_frame && prev_frame->seg_map ?
                           prev_frame->seg_map->data + mb_xy : NULL, 0);
1700 1701 1702 1703

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);

        if (!mb->skip)
D
Daniel Kang 已提交
1704
            decode_mb_coeffs(s, td, c, mb, s->top_nnz[mb_x], td->left_nnz);
1705 1706

        if (mb->mode <= MODE_I4x4)
D
Daniel Kang 已提交
1707
            intra_predict(s, td, dst, mb, mb_x, mb_y);
1708
        else
D
Daniel Kang 已提交
1709
            inter_predict(s, td, dst, mb, mb_x, mb_y);
1710 1711 1712 1713

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN);

        if (!mb->skip) {
D
Daniel Kang 已提交
1714
            idct_mb(s, td, dst, mb);
1715
        } else {
D
Daniel Kang 已提交
1716
            AV_ZERO64(td->left_nnz);
1717 1718 1719 1720
            AV_WN64(s->top_nnz[mb_x], 0);   // array of 9, so unaligned

            // Reset DC block predictors if they would exist if the mb had coefficients
            if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) {
D
Daniel Kang 已提交
1721
                td->left_nnz[8]     = 0;
1722 1723 1724 1725 1726
                s->top_nnz[mb_x][8] = 0;
            }
        }

        if (s->deblock_filter)
D
Daniel Kang 已提交
1727 1728 1729 1730 1731 1732 1733 1734
            filter_level_for_mb(s, mb, &td->filter_strength[mb_x]);

        if (s->deblock_filter && num_jobs != 1 && threadnr == num_jobs-1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }
1735 1736

        prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_GOLDEN2);
D
David Conrad 已提交
1737 1738 1739 1740

        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;
1741 1742
        s->mv_min.x -= 64;
        s->mv_max.x -= 64;
D
Daniel Kang 已提交
1743 1744 1745 1746 1747 1748

        if (mb_x == s->mb_width+1) {
            update_pos(td, mb_y, s->mb_width+3);
        } else {
            update_pos(td, mb_y, mb_x);
        }
D
David Conrad 已提交
1749 1750 1751
    }
}

D
Daniel Kang 已提交
1752 1753
static void vp8_filter_mb_row(AVCodecContext *avctx, void *tdata,
                              int jobnr, int threadnr)
D
David Conrad 已提交
1754
{
D
Daniel Kang 已提交
1755 1756 1757
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[threadnr];
    int mb_x, mb_y = td->thread_mb_pos>>16, num_jobs = s->num_jobs;
1758
    AVFrame *curframe = s->curframe->tf.f;
D
Daniel Kang 已提交
1759 1760 1761 1762 1763 1764 1765
    VP8Macroblock *mb;
    VP8ThreadData *prev_td, *next_td;
    uint8_t *dst[3] = {
        curframe->data[0] + 16*mb_y*s->linesize,
        curframe->data[1] +  8*mb_y*s->uvlinesize,
        curframe->data[2] +  8*mb_y*s->uvlinesize
    };
D
David Conrad 已提交
1766

D
Daniel Kang 已提交
1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793
    if (s->mb_layout == 1)
        mb = s->macroblocks_base + ((s->mb_width+1)*(mb_y + 1) + 1);
    else
        mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;

    if (mb_y == 0) prev_td = td;
    else           prev_td = &s->thread_data[(jobnr + num_jobs - 1)%num_jobs];
    if (mb_y == s->mb_height-1) next_td = td;
    else                        next_td = &s->thread_data[(jobnr + 1)%num_jobs];

    for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb++) {
        VP8FilterStrength *f = &td->filter_strength[mb_x];
        if (prev_td != td) {
            check_thread_pos(td, prev_td, (mb_x+1) + (s->mb_width+3), mb_y-1);
        }
        if (next_td != td)
            if (next_td != &s->thread_data[0]) {
                check_thread_pos(td, next_td, mb_x+1, mb_y+1);
            }

        if (num_jobs == 1) {
            if (s->filter.simple)
                backup_mb_border(s->top_border[mb_x+1], dst[0], NULL, NULL, s->linesize, 0, 1);
            else
                backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
        }

1794
        if (s->filter.simple)
D
Daniel Kang 已提交
1795
            filter_mb_simple(s, dst[0], f, mb_x, mb_y);
1796
        else
D
Daniel Kang 已提交
1797 1798 1799 1800 1801 1802
            filter_mb(s, dst, f, mb_x, mb_y);
        dst[0] += 16;
        dst[1] += 8;
        dst[2] += 8;

        update_pos(td, mb_y, (s->mb_width+3) + mb_x);
D
David Conrad 已提交
1803 1804 1805
    }
}

D
Daniel Kang 已提交
1806 1807
static int vp8_decode_mb_row_sliced(AVCodecContext *avctx, void *tdata,
                                    int jobnr, int threadnr)
1808
{
D
Daniel Kang 已提交
1809 1810 1811
    VP8Context *s = avctx->priv_data;
    VP8ThreadData *td = &s->thread_data[jobnr];
    VP8ThreadData *next_td = NULL, *prev_td = NULL;
1812
    VP8Frame *curframe = s->curframe;
D
Daniel Kang 已提交
1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826
    int mb_y, num_jobs = s->num_jobs;
    td->thread_nr = threadnr;
    for (mb_y = jobnr; mb_y < s->mb_height; mb_y += num_jobs) {
        if (mb_y >= s->mb_height) break;
        td->thread_mb_pos = mb_y<<16;
        vp8_decode_mb_row_no_filter(avctx, tdata, jobnr, threadnr);
        if (s->deblock_filter)
            vp8_filter_mb_row(avctx, tdata, jobnr, threadnr);
        update_pos(td, mb_y, INT_MAX & 0xFFFF);

        s->mv_min.y -= 64;
        s->mv_max.y -= 64;

        if (avctx->active_thread_type == FF_THREAD_FRAME)
1827
            ff_thread_report_progress(&curframe->tf, mb_y, 0);
1828
    }
D
Daniel Kang 已提交
1829 1830

    return 0;
1831 1832
}

J
Justin Ruggles 已提交
1833 1834
int ff_vp8_decode_frame(AVCodecContext *avctx, void *data, int *got_frame,
                        AVPacket *avpkt)
D
David Conrad 已提交
1835 1836
{
    VP8Context *s = avctx->priv_data;
D
Daniel Kang 已提交
1837
    int ret, i, referenced, num_jobs;
D
David Conrad 已提交
1838
    enum AVDiscard skip_thresh;
1839
    VP8Frame *av_uninit(curframe), *prev_frame;
1840

D
David Conrad 已提交
1841
    if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0)
1842
        goto err;
D
David Conrad 已提交
1843

1844 1845
    prev_frame = s->framep[VP56_FRAME_CURRENT];

D
David Conrad 已提交
1846 1847 1848 1849 1850 1851 1852 1853
    referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT
                                || s->update_altref == VP56_FRAME_CURRENT;

    skip_thresh = !referenced ? AVDISCARD_NONREF :
                    !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL;

    if (avctx->skip_frame >= skip_thresh) {
        s->invisible = 1;
1854
        memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
D
David Conrad 已提交
1855 1856
        goto skip_decode;
    }
1857
    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
D
David Conrad 已提交
1858

R
Ronald S. Bultje 已提交
1859 1860
    // release no longer referenced frames
    for (i = 0; i < 5; i++)
1861
        if (s->frames[i].tf.f->data[0] &&
R
Ronald S. Bultje 已提交
1862 1863 1864 1865
            &s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2])
1866
            vp8_release_frame(s, &s->frames[i]);
R
Ronald S. Bultje 已提交
1867 1868 1869 1870 1871

    // find a free buffer
    for (i = 0; i < 5; i++)
        if (&s->frames[i] != prev_frame &&
            &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
D
David Conrad 已提交
1872 1873 1874 1875 1876
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] &&
            &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) {
            curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i];
            break;
        }
R
Ronald S. Bultje 已提交
1877 1878 1879 1880
    if (i == 5) {
        av_log(avctx, AV_LOG_FATAL, "Ran out of free frames!\n");
        abort();
    }
1881 1882
    if (curframe->tf.f->data[0])
        vp8_release_frame(s, curframe);
D
David Conrad 已提交
1883

1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894
    // Given that arithmetic probabilities are updated every frame, it's quite likely
    // that the values we have on a random interframe are complete junk if we didn't
    // start decode on a keyframe. So just don't display anything rather than junk.
    if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] ||
                         !s->framep[VP56_FRAME_GOLDEN] ||
                         !s->framep[VP56_FRAME_GOLDEN2])) {
        av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n");
        ret = AVERROR_INVALIDDATA;
        goto err;
    }

1895 1896
    curframe->tf.f->key_frame = s->keyframe;
    curframe->tf.f->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1897
    if ((ret = vp8_alloc_frame(s, curframe, referenced)) < 0)
1898
        goto err;
D
David Conrad 已提交
1899

R
Ronald S. Bultje 已提交
1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917
    // check if golden and altref are swapped
    if (s->update_altref != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[s->update_altref];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN2]  = s->framep[VP56_FRAME_GOLDEN2];
    }
    if (s->update_golden != VP56_FRAME_NONE) {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[s->update_golden];
    } else {
        s->next_framep[VP56_FRAME_GOLDEN]   = s->framep[VP56_FRAME_GOLDEN];
    }
    if (s->update_last) {
        s->next_framep[VP56_FRAME_PREVIOUS] = curframe;
    } else {
        s->next_framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_PREVIOUS];
    }
    s->next_framep[VP56_FRAME_CURRENT]      = curframe;

1918 1919
    ff_thread_finish_setup(avctx);

1920 1921
    s->linesize   = curframe->tf.f->linesize[0];
    s->uvlinesize = curframe->tf.f->linesize[1];
D
David Conrad 已提交
1922 1923

    memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
P
Pascal Massimino 已提交
1924
    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
D
Daniel Kang 已提交
1925 1926 1927 1928
    if (!s->mb_layout)
        memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
    if (!s->mb_layout && s->keyframe)
        memset(s->intra4x4_pred_mode_top, DC_PRED, s->mb_width*4);
1929

J
Jason Garrett-Glaser 已提交
1930
    memset(s->ref_count, 0, sizeof(s->ref_count));
D
David Conrad 已提交
1931 1932


1933 1934 1935 1936 1937 1938
    if (s->mb_layout == 1) {
        // Make sure the previous frame has read its segmentation map,
        // if we re-use the same map.
        if (prev_frame && s->segmentation.enabled &&
            !s->segmentation.update_map)
            ff_thread_await_progress(&prev_frame->tf, 1, 0);
D
Daniel Kang 已提交
1939
        vp8_decode_mv_mb_modes(avctx, curframe, prev_frame);
1940
    }
D
David Conrad 已提交
1941

D
Daniel Kang 已提交
1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955
    if (avctx->active_thread_type == FF_THREAD_FRAME)
        num_jobs = 1;
    else
        num_jobs = FFMIN(s->num_coeff_partitions, avctx->thread_count);
    s->num_jobs   = num_jobs;
    s->curframe   = curframe;
    s->prev_frame = prev_frame;
    s->mv_min.y   = -MARGIN;
    s->mv_max.y   = ((s->mb_height - 1) << 6) + MARGIN;
    for (i = 0; i < MAX_THREADS; i++) {
        s->thread_data[i].thread_mb_pos = 0;
        s->thread_data[i].wait_mb_pos = INT_MAX;
    }
    avctx->execute2(avctx, vp8_decode_mb_row_sliced, s->thread_data, NULL, num_jobs);
D
David Conrad 已提交
1956

1957
    ff_thread_report_progress(&curframe->tf, INT_MAX, 0);
1958 1959
    memcpy(&s->framep[0], &s->next_framep[0], sizeof(s->framep[0]) * 4);

D
David Conrad 已提交
1960 1961 1962 1963 1964 1965 1966
skip_decode:
    // if future frames don't use the updated probabilities,
    // reset them to the values we saved
    if (!s->update_probabilities)
        s->prob[0] = s->prob[1];

    if (!s->invisible) {
1967 1968
        if ((ret = av_frame_ref(data, curframe->tf.f)) < 0)
            return ret;
1969
        *got_frame      = 1;
D
David Conrad 已提交
1970 1971 1972
    }

    return avpkt->size;
1973 1974
err:
    memcpy(&s->next_framep[0], &s->framep[0], sizeof(s->framep[0]) * 4);
1975
    return ret;
D
David Conrad 已提交
1976 1977
}

J
Justin Ruggles 已提交
1978
av_cold int ff_vp8_decode_free(AVCodecContext *avctx)
1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000
{
    VP8Context *s = avctx->priv_data;
    int i;

    vp8_decode_flush_impl(avctx, 1);
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++)
        av_frame_free(&s->frames[i].tf.f);

    return 0;
}

static av_cold int vp8_init_frames(VP8Context *s)
{
    int i;
    for (i = 0; i < FF_ARRAY_ELEMS(s->frames); i++) {
        s->frames[i].tf.f = av_frame_alloc();
        if (!s->frames[i].tf.f)
            return AVERROR(ENOMEM);
    }
    return 0;
}

J
Justin Ruggles 已提交
2001
av_cold int ff_vp8_decode_init(AVCodecContext *avctx)
D
David Conrad 已提交
2002 2003
{
    VP8Context *s = avctx->priv_data;
2004
    int ret;
D
David Conrad 已提交
2005 2006

    s->avctx = avctx;
2007
    avctx->pix_fmt = AV_PIX_FMT_YUV420P;
2008
    avctx->internal->allocate_progress = 1;
D
David Conrad 已提交
2009

R
Ronald S. Bultje 已提交
2010
    ff_videodsp_init(&s->vdsp, 8);
2011
    ff_h264_pred_init(&s->hpc, AV_CODEC_ID_VP8, 8, 1);
D
David Conrad 已提交
2012 2013
    ff_vp8dsp_init(&s->vp8dsp);

2014
    if ((ret = vp8_init_frames(s)) < 0) {
J
Justin Ruggles 已提交
2015
        ff_vp8_decode_free(avctx);
2016 2017
        return ret;
    }
D
David Conrad 已提交
2018 2019 2020 2021

    return 0;
}

R
Ronald S. Bultje 已提交
2022 2023 2024
static av_cold int vp8_decode_init_thread_copy(AVCodecContext *avctx)
{
    VP8Context *s = avctx->priv_data;
2025
    int ret;
R
Ronald S. Bultje 已提交
2026 2027 2028

    s->avctx = avctx;

2029
    if ((ret = vp8_init_frames(s)) < 0) {
J
Justin Ruggles 已提交
2030
        ff_vp8_decode_free(avctx);
2031 2032 2033
        return ret;
    }

R
Ronald S. Bultje 已提交
2034 2035 2036 2037 2038 2039 2040 2041 2042
    return 0;
}

#define REBASE(pic) \
    pic ? pic - &s_src->frames[0] + &s->frames[0] : NULL

static int vp8_decode_update_thread_context(AVCodecContext *dst, const AVCodecContext *src)
{
    VP8Context *s = dst->priv_data, *s_src = src->priv_data;
2043
    int i;
R
Ronald S. Bultje 已提交
2044

2045 2046 2047
    if (s->macroblocks_base &&
        (s_src->mb_width != s->mb_width || s_src->mb_height != s->mb_height)) {
        free_buffers(s);
2048 2049
        s->mb_width  = s_src->mb_width;
        s->mb_height = s_src->mb_height;
2050 2051
    }

R
Ronald S. Bultje 已提交
2052 2053 2054 2055 2056
    s->prob[0] = s_src->prob[!s_src->update_probabilities];
    s->segmentation = s_src->segmentation;
    s->lf_delta = s_src->lf_delta;
    memcpy(s->sign_bias, s_src->sign_bias, sizeof(s->sign_bias));

2057 2058 2059 2060 2061 2062 2063 2064
    for (i = 0; i < FF_ARRAY_ELEMS(s_src->frames); i++) {
        if (s_src->frames[i].tf.f->data[0]) {
            int ret = vp8_ref_frame(s, &s->frames[i], &s_src->frames[i]);
            if (ret < 0)
                return ret;
        }
    }

R
Ronald S. Bultje 已提交
2065 2066 2067 2068 2069 2070 2071 2072
    s->framep[0] = REBASE(s_src->next_framep[0]);
    s->framep[1] = REBASE(s_src->next_framep[1]);
    s->framep[2] = REBASE(s_src->next_framep[2]);
    s->framep[3] = REBASE(s_src->next_framep[3]);

    return 0;
}

2073
AVCodec ff_vp8_decoder = {
2074
    .name                  = "vp8",
2075
    .long_name             = NULL_IF_CONFIG_SMALL("On2 VP8"),
2076
    .type                  = AVMEDIA_TYPE_VIDEO,
2077
    .id                    = AV_CODEC_ID_VP8,
2078
    .priv_data_size        = sizeof(VP8Context),
J
Justin Ruggles 已提交
2079 2080 2081
    .init                  = ff_vp8_decode_init,
    .close                 = ff_vp8_decode_free,
    .decode                = ff_vp8_decode_frame,
D
Daniel Kang 已提交
2082
    .capabilities          = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS | CODEC_CAP_SLICE_THREADS,
2083
    .flush                 = vp8_decode_flush,
R
Ronald S. Bultje 已提交
2084 2085
    .init_thread_copy      = ONLY_IF_THREADS_ENABLED(vp8_decode_init_thread_copy),
    .update_thread_context = ONLY_IF_THREADS_ENABLED(vp8_decode_update_thread_context),
D
David Conrad 已提交
2086
};
M
Michael Niedermayer 已提交
2087