dsputil.c 89.8 KB
Newer Older
F
Fabrice Bellard 已提交
1 2
/*
 * DSP utils
3
 * Copyright (c) 2000, 2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
F
Fabrice Bellard 已提交
5
 *
6 7
 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
 *
8
 * This file is part of Libav.
9
 *
10
 * Libav is free software; you can redistribute it and/or
F
Fabrice Bellard 已提交
11 12
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
F
Fabrice Bellard 已提交
14
 *
15
 * Libav is distributed in the hope that it will be useful,
F
Fabrice Bellard 已提交
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
F
Fabrice Bellard 已提交
17 18
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
F
Fabrice Bellard 已提交
19
 *
F
Fabrice Bellard 已提交
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with Libav; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
F
Fabrice Bellard 已提交
23
 */
24

M
Michael Niedermayer 已提交
25
/**
26
 * @file
M
Michael Niedermayer 已提交
27 28
 * DSP utils
 */
29

30
#include "libavutil/attributes.h"
31
#include "libavutil/imgutils.h"
32
#include "libavutil/internal.h"
F
Fabrice Bellard 已提交
33
#include "avcodec.h"
34
#include "copy_block.h"
35
#include "dct.h"
F
Fabrice Bellard 已提交
36
#include "dsputil.h"
37
#include "simple_idct.h"
M
Michael Niedermayer 已提交
38
#include "faandct.h"
M
Michael Niedermayer 已提交
39
#include "faanidct.h"
40
#include "imgconvert.h"
41
#include "mathops.h"
42 43
#include "mpegvideo.h"
#include "config.h"
44

M
Måns Rullgård 已提交
45
uint32_t ff_squareTbl[512] = {0, };
F
Fabrice Bellard 已提交
46

47
#define BIT_DEPTH 16
48 49 50 51
#include "dsputil_template.c"
#undef BIT_DEPTH

#define BIT_DEPTH 8
52 53
#include "dsputil_template.c"

54 55 56
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
#define pb_7f (~0UL/255 * 0x7f)
#define pb_80 (~0UL/255 * 0x80)
57

58 59 60 61 62 63 64 65 66 67 68 69 70
/* Specific zigzag scan for 248 idct. NOTE that unlike the
   specification, we interleave the fields */
const uint8_t ff_zigzag248_direct[64] = {
     0,  8,  1,  9, 16, 24,  2, 10,
    17, 25, 32, 40, 48, 56, 33, 41,
    18, 26,  3, 11,  4, 12, 19, 27,
    34, 42, 49, 57, 50, 58, 35, 43,
    20, 28,  5, 13,  6, 14, 21, 29,
    36, 44, 51, 59, 52, 60, 37, 45,
    22, 30,  7, 15, 23, 31, 38, 46,
    53, 61, 54, 62, 39, 47, 55, 63,
};

M
Michael Niedermayer 已提交
71
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
72
DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
M
Michael Niedermayer 已提交
73

74
const uint8_t ff_alternate_horizontal_scan[64] = {
75
    0,  1,   2,  3,  8,  9, 16, 17,
76
    10, 11,  4,  5,  6,  7, 15, 14,
77
    13, 12, 19, 18, 24, 25, 32, 33,
78
    26, 27, 20, 21, 22, 23, 28, 29,
79
    30, 31, 34, 35, 40, 41, 48, 49,
80
    42, 43, 36, 37, 38, 39, 44, 45,
81
    46, 47, 50, 51, 56, 57, 58, 59,
82 83 84
    52, 53, 54, 55, 60, 61, 62, 63,
};

85
const uint8_t ff_alternate_vertical_scan[64] = {
86
    0,  8,  16, 24,  1,  9,  2, 10,
87
    17, 25, 32, 40, 48, 56, 57, 49,
88
    41, 33, 26, 18,  3, 11,  4, 12,
89
    19, 27, 34, 42, 50, 58, 35, 43,
90
    51, 59, 20, 28,  5, 13,  6, 14,
91
    21, 29, 36, 44, 52, 60, 37, 45,
92
    53, 61, 22, 30,  7, 15, 23, 31,
93 94 95
    38, 46, 54, 62, 39, 47, 55, 63,
};

96 97
/* Input permutation for the simple_idct_mmx */
static const uint8_t simple_mmx_permutation[64]={
98 99 100 101 102 103 104 105
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
106 107
};

108 109
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};

110 111 112
av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st,
                               const uint8_t *src_scantable)
{
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
    int i;
    int end;

    st->scantable= src_scantable;

    for(i=0; i<64; i++){
        int j;
        j = src_scantable[i];
        st->permutated[i] = permutation[j];
    }

    end=-1;
    for(i=0; i<64; i++){
        int j;
        j = st->permutated[i];
        if(j>end) end=j;
        st->raster_end[i]= end;
    }
}

133 134
av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
                                           int idct_permutation_type)
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
{
    int i;

    switch(idct_permutation_type){
    case FF_NO_IDCT_PERM:
        for(i=0; i<64; i++)
            idct_permutation[i]= i;
        break;
    case FF_LIBMPEG2_IDCT_PERM:
        for(i=0; i<64; i++)
            idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
        break;
    case FF_SIMPLE_IDCT_PERM:
        for(i=0; i<64; i++)
            idct_permutation[i]= simple_mmx_permutation[i];
        break;
    case FF_TRANSPOSE_IDCT_PERM:
        for(i=0; i<64; i++)
            idct_permutation[i]= ((i&7)<<3) | (i>>3);
        break;
    case FF_PARTTRANS_IDCT_PERM:
        for(i=0; i<64; i++)
            idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
        break;
    case FF_SSE2_IDCT_PERM:
        for(i=0; i<64; i++)
            idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
        break;
    default:
        av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
    }
}

168
static int pix_sum_c(uint8_t * pix, int line_size)
M
Michael Niedermayer 已提交
169 170 171 172 173
{
    int s, i, j;

    s = 0;
    for (i = 0; i < 16; i++) {
174 175 176 177 178 179 180 181 182 183 184 185
        for (j = 0; j < 16; j += 8) {
            s += pix[0];
            s += pix[1];
            s += pix[2];
            s += pix[3];
            s += pix[4];
            s += pix[5];
            s += pix[6];
            s += pix[7];
            pix += 8;
        }
        pix += line_size - 16;
M
Michael Niedermayer 已提交
186 187 188 189
    }
    return s;
}

190
static int pix_norm1_c(uint8_t * pix, int line_size)
M
Michael Niedermayer 已提交
191 192
{
    int s, i, j;
M
Måns Rullgård 已提交
193
    uint32_t *sq = ff_squareTbl + 256;
M
Michael Niedermayer 已提交
194 195 196

    s = 0;
    for (i = 0; i < 16; i++) {
197
        for (j = 0; j < 16; j += 8) {
198 199 200 201 202 203 204 205 206 207
#if 0
            s += sq[pix[0]];
            s += sq[pix[1]];
            s += sq[pix[2]];
            s += sq[pix[3]];
            s += sq[pix[4]];
            s += sq[pix[5]];
            s += sq[pix[6]];
            s += sq[pix[7]];
#else
208
#if HAVE_FAST_64BIT
209 210 211 212 213
            register uint64_t x=*(uint64_t*)pix;
            s += sq[x&0xff];
            s += sq[(x>>8)&0xff];
            s += sq[(x>>16)&0xff];
            s += sq[(x>>24)&0xff];
214 215 216 217 218
            s += sq[(x>>32)&0xff];
            s += sq[(x>>40)&0xff];
            s += sq[(x>>48)&0xff];
            s += sq[(x>>56)&0xff];
#else
219 220 221 222 223
            register uint32_t x=*(uint32_t*)pix;
            s += sq[x&0xff];
            s += sq[(x>>8)&0xff];
            s += sq[(x>>16)&0xff];
            s += sq[(x>>24)&0xff];
224 225 226 227 228
            x=*(uint32_t*)(pix+4);
            s += sq[x&0xff];
            s += sq[(x>>8)&0xff];
            s += sq[(x>>16)&0xff];
            s += sq[(x>>24)&0xff];
229
#endif
230
#endif
231 232 233
            pix += 8;
        }
        pix += line_size - 16;
M
Michael Niedermayer 已提交
234 235 236 237
    }
    return s;
}

M
Michael Niedermayer 已提交
238
static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
M
Michael Niedermayer 已提交
239
    int i;
240

M
Michael Niedermayer 已提交
241
    for(i=0; i+8<=w; i+=8){
M
Måns Rullgård 已提交
242 243 244 245 246 247 248 249
        dst[i+0]= av_bswap32(src[i+0]);
        dst[i+1]= av_bswap32(src[i+1]);
        dst[i+2]= av_bswap32(src[i+2]);
        dst[i+3]= av_bswap32(src[i+3]);
        dst[i+4]= av_bswap32(src[i+4]);
        dst[i+5]= av_bswap32(src[i+5]);
        dst[i+6]= av_bswap32(src[i+6]);
        dst[i+7]= av_bswap32(src[i+7]);
M
Michael Niedermayer 已提交
250 251
    }
    for(;i<w; i++){
M
Måns Rullgård 已提交
252
        dst[i+0]= av_bswap32(src[i+0]);
M
Michael Niedermayer 已提交
253 254
    }
}
M
Michael Niedermayer 已提交
255

M
Mans Rullgard 已提交
256 257 258 259 260 261
static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
{
    while (len--)
        *dst++ = av_bswap16(*src++);
}

M
Michael Niedermayer 已提交
262 263 264
static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
{
    int s, i;
M
Måns Rullgård 已提交
265
    uint32_t *sq = ff_squareTbl + 256;
M
Michael Niedermayer 已提交
266 267 268 269 270 271 272 273 274 275 276 277 278

    s = 0;
    for (i = 0; i < h; i++) {
        s += sq[pix1[0] - pix2[0]];
        s += sq[pix1[1] - pix2[1]];
        s += sq[pix1[2] - pix2[2]];
        s += sq[pix1[3] - pix2[3]];
        pix1 += line_size;
        pix2 += line_size;
    }
    return s;
}

M
Michael Niedermayer 已提交
279
static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
M
Michael Niedermayer 已提交
280 281
{
    int s, i;
M
Måns Rullgård 已提交
282
    uint32_t *sq = ff_squareTbl + 256;
M
Michael Niedermayer 已提交
283 284

    s = 0;
M
Michael Niedermayer 已提交
285
    for (i = 0; i < h; i++) {
M
Michael Niedermayer 已提交
286 287 288 289 290 291 292 293 294 295 296 297 298 299
        s += sq[pix1[0] - pix2[0]];
        s += sq[pix1[1] - pix2[1]];
        s += sq[pix1[2] - pix2[2]];
        s += sq[pix1[3] - pix2[3]];
        s += sq[pix1[4] - pix2[4]];
        s += sq[pix1[5] - pix2[5]];
        s += sq[pix1[6] - pix2[6]];
        s += sq[pix1[7] - pix2[7]];
        pix1 += line_size;
        pix2 += line_size;
    }
    return s;
}

M
Michael Niedermayer 已提交
300
static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
301
{
302
    int s, i;
M
Måns Rullgård 已提交
303
    uint32_t *sq = ff_squareTbl + 256;
304 305

    s = 0;
M
Michael Niedermayer 已提交
306
    for (i = 0; i < h; i++) {
307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
        s += sq[pix1[ 0] - pix2[ 0]];
        s += sq[pix1[ 1] - pix2[ 1]];
        s += sq[pix1[ 2] - pix2[ 2]];
        s += sq[pix1[ 3] - pix2[ 3]];
        s += sq[pix1[ 4] - pix2[ 4]];
        s += sq[pix1[ 5] - pix2[ 5]];
        s += sq[pix1[ 6] - pix2[ 6]];
        s += sq[pix1[ 7] - pix2[ 7]];
        s += sq[pix1[ 8] - pix2[ 8]];
        s += sq[pix1[ 9] - pix2[ 9]];
        s += sq[pix1[10] - pix2[10]];
        s += sq[pix1[11] - pix2[11]];
        s += sq[pix1[12] - pix2[12]];
        s += sq[pix1[13] - pix2[13]];
        s += sq[pix1[14] - pix2[14]];
        s += sq[pix1[15] - pix2[15]];
323

324 325
        pix1 += line_size;
        pix2 += line_size;
326 327 328 329
    }
    return s;
}

D
Diego Biurrun 已提交
330
static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
331
                          const uint8_t *s2, int stride){
332 333 334 335
    int i;

    /* read the pixels */
    for(i=0;i<8;i++) {
F
Falk Hüffner 已提交
336 337 338 339 340 341 342 343
        block[0] = s1[0] - s2[0];
        block[1] = s1[1] - s2[1];
        block[2] = s1[2] - s2[2];
        block[3] = s1[3] - s2[3];
        block[4] = s1[4] - s2[4];
        block[5] = s1[5] - s2[5];
        block[6] = s1[6] - s2[6];
        block[7] = s1[7] - s2[7];
344 345
        s1 += stride;
        s2 += stride;
F
Falk Hüffner 已提交
346
        block += 8;
347 348 349 350
    }
}


D
Diego Biurrun 已提交
351
static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
352
                                 int line_size)
F
Fabrice Bellard 已提交
353 354
{
    int i;
355

F
Fabrice Bellard 已提交
356 357
    /* read the pixels */
    for(i=0;i<8;i++) {
358 359 360 361 362 363 364 365
        pixels[0] = av_clip_uint8(block[0]);
        pixels[1] = av_clip_uint8(block[1]);
        pixels[2] = av_clip_uint8(block[2]);
        pixels[3] = av_clip_uint8(block[3]);
        pixels[4] = av_clip_uint8(block[4]);
        pixels[5] = av_clip_uint8(block[5]);
        pixels[6] = av_clip_uint8(block[6]);
        pixels[7] = av_clip_uint8(block[7]);
F
Falk Hüffner 已提交
366 367 368

        pixels += line_size;
        block += 8;
F
Fabrice Bellard 已提交
369 370 371
    }
}

D
Diego Biurrun 已提交
372
static void put_signed_pixels_clamped_c(const int16_t *block,
373 374
                                        uint8_t *restrict pixels,
                                        int line_size)
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
{
    int i, j;

    for (i = 0; i < 8; i++) {
        for (j = 0; j < 8; j++) {
            if (*block < -128)
                *pixels = 0;
            else if (*block > 127)
                *pixels = 255;
            else
                *pixels = (uint8_t)(*block + 128);
            block++;
            pixels++;
        }
        pixels += (line_size - 8);
    }
}

393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412
static void add_pixels8_c(uint8_t *restrict pixels,
                          int16_t *block,
                          int line_size)
{
    int i;

    for(i=0;i<8;i++) {
        pixels[0] += block[0];
        pixels[1] += block[1];
        pixels[2] += block[2];
        pixels[3] += block[3];
        pixels[4] += block[4];
        pixels[5] += block[5];
        pixels[6] += block[6];
        pixels[7] += block[7];
        pixels += line_size;
        block += 8;
    }
}

D
Diego Biurrun 已提交
413
static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
414
                                 int line_size)
F
Fabrice Bellard 已提交
415 416
{
    int i;
417

F
Fabrice Bellard 已提交
418 419
    /* read the pixels */
    for(i=0;i<8;i++) {
420 421 422 423 424 425 426 427
        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
        pixels[4] = av_clip_uint8(pixels[4] + block[4]);
        pixels[5] = av_clip_uint8(pixels[5] + block[5]);
        pixels[6] = av_clip_uint8(pixels[6] + block[6]);
        pixels[7] = av_clip_uint8(pixels[7] + block[7]);
F
Falk Hüffner 已提交
428 429
        pixels += line_size;
        block += 8;
F
Fabrice Bellard 已提交
430 431
    }
}
M
Michael Niedermayer 已提交
432

D
Diego Biurrun 已提交
433
static int sum_abs_dctelem_c(int16_t *block)
434 435 436 437 438 439 440
{
    int sum=0, i;
    for(i=0; i<64; i++)
        sum+= FFABS(block[i]);
    return sum;
}

K
Kostya Shishkov 已提交
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
{
    int i;

    for (i = 0; i < h; i++) {
        memset(block, value, 16);
        block += line_size;
    }
}

static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
{
    int i;

    for (i = 0; i < h; i++) {
        memset(block, value, 8);
        block += line_size;
    }
}

F
Fabrice Bellard 已提交
461 462 463
#define avg2(a,b) ((a+b+1)>>1)
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)

464
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
M
Michael Niedermayer 已提交
465 466 467 468 469 470 471 472 473
{
    const int A=(16-x16)*(16-y16);
    const int B=(   x16)*(16-y16);
    const int C=(16-x16)*(   y16);
    const int D=(   x16)*(   y16);
    int i;

    for(i=0; i<h; i++)
    {
M
Michael Niedermayer 已提交
474 475 476 477 478 479 480 481 482 483
        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
        dst+= stride;
        src+= stride;
M
Michael Niedermayer 已提交
484 485 486
    }
}

487
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
488 489 490 491
                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
{
    int y, vx, vy;
    const int s= 1<<shift;
492

493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509
    width--;
    height--;

    for(y=0; y<h; y++){
        int x;

        vx= ox;
        vy= oy;
        for(x=0; x<8; x++){ //XXX FIXME optimize
            int src_x, src_y, frac_x, frac_y, index;

            src_x= vx>>16;
            src_y= vy>>16;
            frac_x= src_x&(s-1);
            frac_y= src_y&(s-1);
            src_x>>=shift;
            src_y>>=shift;
510

511 512 513 514 515 516 517 518 519
            if((unsigned)src_x < width){
                if((unsigned)src_y < height){
                    index= src_x + src_y*stride;
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
                                           + src[index       +1]*   frac_x )*(s-frac_y)
                                        + (  src[index+stride  ]*(s-frac_x)
                                           + src[index+stride+1]*   frac_x )*   frac_y
                                        + r)>>(shift*2);
                }else{
520
                    index= src_x + av_clip(src_y, 0, height)*stride;
521
                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
522 523 524 525 526
                                          + src[index       +1]*   frac_x )*s
                                        + r)>>(shift*2);
                }
            }else{
                if((unsigned)src_y < height){
527
                    index= av_clip(src_x, 0, width) + src_y*stride;
528
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
529 530 531
                                           + src[index+stride  ]*   frac_y )*s
                                        + r)>>(shift*2);
                }else{
532
                    index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
533 534 535
                    dst[y*stride + x]=    src[index         ];
                }
            }
536

537 538 539 540 541 542 543
            vx+= dxx;
            vy+= dyx;
        }
        ox += dxy;
        oy += dyy;
    }
}
544 545 546

static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    switch(width){
547 548 549 550
    case 2: put_pixels2_8_c (dst, src, stride, height); break;
    case 4: put_pixels4_8_c (dst, src, stride, height); break;
    case 8: put_pixels8_8_c (dst, src, stride, height); break;
    case 16:put_pixels16_8_c(dst, src, stride, height); break;
551 552 553 554 555 556 557
    }
}

static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
558
        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
559 560 561 562 563 564 565 566 567 568
      }
      src += stride;
      dst += stride;
    }
}

static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
569
        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
570 571 572 573 574
      }
      src += stride;
      dst += stride;
    }
}
575

576 577 578 579
static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
580
        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
581 582 583 584 585
      }
      src += stride;
      dst += stride;
    }
}
586

587 588 589 590
static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
591
        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
592 593 594 595 596 597 598 599 600 601
      }
      src += stride;
      dst += stride;
    }
}

static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
602
        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
603 604 605 606 607 608 609 610 611 612
      }
      src += stride;
      dst += stride;
    }
}

static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
613
        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
614 615 616 617 618 619 620 621 622 623
      }
      src += stride;
      dst += stride;
    }
}

static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
624
        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
625 626 627 628 629 630 631 632 633 634
      }
      src += stride;
      dst += stride;
    }
}

static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
635
        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
636 637 638 639 640
      }
      src += stride;
      dst += stride;
    }
}
641 642 643

static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    switch(width){
644 645 646 647
    case 2: avg_pixels2_8_c (dst, src, stride, height); break;
    case 4: avg_pixels4_8_c (dst, src, stride, height); break;
    case 8: avg_pixels8_8_c (dst, src, stride, height); break;
    case 16:avg_pixels16_8_c(dst, src, stride, height); break;
648 649 650 651 652 653 654
    }
}

static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
655
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
656 657 658 659 660 661 662 663 664 665
      }
      src += stride;
      dst += stride;
    }
}

static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
666
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
667 668 669 670 671
      }
      src += stride;
      dst += stride;
    }
}
672

673 674 675 676
static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
677
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
678 679 680 681 682
      }
      src += stride;
      dst += stride;
    }
}
683

684 685 686 687
static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
688
        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
689 690 691 692 693 694 695 696 697 698
      }
      src += stride;
      dst += stride;
    }
}

static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
699
        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
700 701 702 703 704 705 706 707 708 709
      }
      src += stride;
      dst += stride;
    }
}

static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
710
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
711 712 713 714 715 716 717 718 719 720
      }
      src += stride;
      dst += stride;
    }
}

static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
721
        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
722 723 724 725 726 727 728 729 730 731
      }
      src += stride;
      dst += stride;
    }
}

static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
    int i,j;
    for (i=0; i < height; i++) {
      for (j=0; j < width; j++) {
732
        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
733 734 735 736 737
      }
      src += stride;
      dst += stride;
    }
}
738

M
Michael Niedermayer 已提交
739
#define QPEL_MC(r, OPNAME, RND, OP) \
740
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
741
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
M
Michael Niedermayer 已提交
742 743 744 745 746 747 748 749 750 751 752 753 754 755
    int i;\
    for(i=0; i<h; i++)\
    {\
        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
        dst+=dstStride;\
        src+=srcStride;\
    }\
M
Michael Niedermayer 已提交
756 757
}\
\
758
static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
M
Michael Niedermayer 已提交
759
    const int w=8;\
760
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
M
Michael Niedermayer 已提交
761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785
    int i;\
    for(i=0; i<w; i++)\
    {\
        const int src0= src[0*srcStride];\
        const int src1= src[1*srcStride];\
        const int src2= src[2*srcStride];\
        const int src3= src[3*srcStride];\
        const int src4= src[4*srcStride];\
        const int src5= src[5*srcStride];\
        const int src6= src[6*srcStride];\
        const int src7= src[7*srcStride];\
        const int src8= src[8*srcStride];\
        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
        dst++;\
        src++;\
    }\
}\
\
786
static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
787
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
M
Michael Niedermayer 已提交
788
    int i;\
M
Michael Niedermayer 已提交
789
    \
M
Michael Niedermayer 已提交
790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812
    for(i=0; i<h; i++)\
    {\
        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
        dst+=dstStride;\
        src+=srcStride;\
    }\
}\
\
813
static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
814
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
M
Michael Niedermayer 已提交
815
    int i;\
M
Michael Niedermayer 已提交
816
    const int w=16;\
M
Michael Niedermayer 已提交
817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856
    for(i=0; i<w; i++)\
    {\
        const int src0= src[0*srcStride];\
        const int src1= src[1*srcStride];\
        const int src2= src[2*srcStride];\
        const int src3= src[3*srcStride];\
        const int src4= src[4*srcStride];\
        const int src5= src[5*srcStride];\
        const int src6= src[6*srcStride];\
        const int src7= src[7*srcStride];\
        const int src8= src[8*srcStride];\
        const int src9= src[9*srcStride];\
        const int src10= src[10*srcStride];\
        const int src11= src[11*srcStride];\
        const int src12= src[12*srcStride];\
        const int src13= src[13*srcStride];\
        const int src14= src[14*srcStride];\
        const int src15= src[15*srcStride];\
        const int src16= src[16*srcStride];\
        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
        dst++;\
        src++;\
    }\
}\
\
857 858
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
859
    uint8_t half[64];\
M
Michael Niedermayer 已提交
860
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
861
    OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
M
Michael Niedermayer 已提交
862 863
}\
\
864 865
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
M
Michael Niedermayer 已提交
866
    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
M
Michael Niedermayer 已提交
867 868
}\
\
869 870
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
871
    uint8_t half[64];\
M
Michael Niedermayer 已提交
872
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
873
    OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
M
Michael Niedermayer 已提交
874 875
}\
\
876 877
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
878 879
    uint8_t full[16*9];\
    uint8_t half[64];\
M
Michael Niedermayer 已提交
880
    copy_block9(full, src, 16, stride, 9);\
M
Michael Niedermayer 已提交
881
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
882
    OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
M
Michael Niedermayer 已提交
883 884
}\
\
885 886
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
887
    uint8_t full[16*9];\
M
Michael Niedermayer 已提交
888
    copy_block9(full, src, 16, stride, 9);\
M
Michael Niedermayer 已提交
889
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
M
Michael Niedermayer 已提交
890 891
}\
\
892 893
static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
894 895
    uint8_t full[16*9];\
    uint8_t half[64];\
M
Michael Niedermayer 已提交
896
    copy_block9(full, src, 16, stride, 9);\
M
Michael Niedermayer 已提交
897
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
898
    OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
M
Michael Niedermayer 已提交
899
}\
900 901
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
902 903 904 905
    uint8_t full[16*9];\
    uint8_t halfH[72];\
    uint8_t halfV[64];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
906 907
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
M
Michael Niedermayer 已提交
908 909
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
910
    OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
M
Michael Niedermayer 已提交
911
}\
912 913
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
914 915 916
    uint8_t full[16*9];\
    uint8_t halfH[72];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
917 918
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
919
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
M
Michael Niedermayer 已提交
920
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
921
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
M
Michael Niedermayer 已提交
922
}\
923 924
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
925 926 927 928
    uint8_t full[16*9];\
    uint8_t halfH[72];\
    uint8_t halfV[64];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
929 930
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
M
Michael Niedermayer 已提交
931 932
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
933
    OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
M
Michael Niedermayer 已提交
934
}\
935 936
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
937 938 939
    uint8_t full[16*9];\
    uint8_t halfH[72];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
940 941
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
942
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
M
Michael Niedermayer 已提交
943
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
944
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
M
Michael Niedermayer 已提交
945
}\
946 947
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
948 949 950 951
    uint8_t full[16*9];\
    uint8_t halfH[72];\
    uint8_t halfV[64];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
952 953
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
M
Michael Niedermayer 已提交
954 955
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
956
    OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
M
Michael Niedermayer 已提交
957
}\
958 959
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
960 961 962
    uint8_t full[16*9];\
    uint8_t halfH[72];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
963 964
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
965
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
M
Michael Niedermayer 已提交
966
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
967
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
M
Michael Niedermayer 已提交
968
}\
969 970
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
971 972 973 974
    uint8_t full[16*9];\
    uint8_t halfH[72];\
    uint8_t halfV[64];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
975 976
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
M
Michael Niedermayer 已提交
977 978
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
979
    OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
M
Michael Niedermayer 已提交
980
}\
981 982
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
983 984 985
    uint8_t full[16*9];\
    uint8_t halfH[72];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
986 987
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
988
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
M
Michael Niedermayer 已提交
989
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
990
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
M
Michael Niedermayer 已提交
991
}\
992 993
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
994 995
    uint8_t halfH[72];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
996
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
M
Michael Niedermayer 已提交
997
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
998
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
M
Michael Niedermayer 已提交
999
}\
1000 1001
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1002 1003
    uint8_t halfH[72];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
1004
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
M
Michael Niedermayer 已提交
1005
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1006
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
M
Michael Niedermayer 已提交
1007
}\
1008 1009
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1010 1011 1012 1013
    uint8_t full[16*9];\
    uint8_t halfH[72];\
    uint8_t halfV[64];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
1014 1015
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
M
Michael Niedermayer 已提交
1016 1017
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1018
    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
M
Michael Niedermayer 已提交
1019
}\
1020 1021
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1022 1023
    uint8_t full[16*9];\
    uint8_t halfH[72];\
M
Michael Niedermayer 已提交
1024 1025
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1026
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
M
Michael Niedermayer 已提交
1027 1028
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
1029 1030
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1031 1032 1033 1034
    uint8_t full[16*9];\
    uint8_t halfH[72];\
    uint8_t halfV[64];\
    uint8_t halfHV[64];\
M
Michael Niedermayer 已提交
1035 1036
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
M
Michael Niedermayer 已提交
1037 1038
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1039
    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
M
Michael Niedermayer 已提交
1040
}\
1041 1042
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1043 1044
    uint8_t full[16*9];\
    uint8_t halfH[72];\
M
Michael Niedermayer 已提交
1045 1046
    copy_block9(full, src, 16, stride, 9);\
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1047
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
M
Michael Niedermayer 已提交
1048 1049
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
}\
1050 1051
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1052
    uint8_t halfH[72];\
M
Michael Niedermayer 已提交
1053
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
M
Michael Niedermayer 已提交
1054
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
M
Michael Niedermayer 已提交
1055 1056
}\
\
1057 1058
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1059
    uint8_t half[256];\
M
Michael Niedermayer 已提交
1060
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1061
    OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
M
Michael Niedermayer 已提交
1062 1063
}\
\
1064 1065
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
M
Michael Niedermayer 已提交
1066
    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
M
Michael Niedermayer 已提交
1067
}\
M
Michael Niedermayer 已提交
1068
\
1069 1070
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1071
    uint8_t half[256];\
M
Michael Niedermayer 已提交
1072
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1073
    OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
M
Michael Niedermayer 已提交
1074 1075
}\
\
1076 1077
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1078 1079
    uint8_t full[24*17];\
    uint8_t half[256];\
M
Michael Niedermayer 已提交
1080
    copy_block17(full, src, 24, stride, 17);\
M
Michael Niedermayer 已提交
1081
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1082
    OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
M
Michael Niedermayer 已提交
1083 1084
}\
\
1085 1086
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1087
    uint8_t full[24*17];\
M
Michael Niedermayer 已提交
1088
    copy_block17(full, src, 24, stride, 17);\
M
Michael Niedermayer 已提交
1089
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
M
Michael Niedermayer 已提交
1090 1091
}\
\
1092 1093
static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1094 1095
    uint8_t full[24*17];\
    uint8_t half[256];\
M
Michael Niedermayer 已提交
1096
    copy_block17(full, src, 24, stride, 17);\
M
Michael Niedermayer 已提交
1097
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1098
    OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
M
Michael Niedermayer 已提交
1099
}\
1100 1101
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1102 1103 1104 1105
    uint8_t full[24*17];\
    uint8_t halfH[272];\
    uint8_t halfV[256];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1106 1107
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
M
Michael Niedermayer 已提交
1108 1109
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1110
    OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
M
Michael Niedermayer 已提交
1111
}\
1112 1113
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1114 1115 1116
    uint8_t full[24*17];\
    uint8_t halfH[272];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1117 1118
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1119
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
M
Michael Niedermayer 已提交
1120
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1121
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
M
Michael Niedermayer 已提交
1122
}\
1123 1124
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1125 1126 1127 1128
    uint8_t full[24*17];\
    uint8_t halfH[272];\
    uint8_t halfV[256];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1129 1130
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
M
Michael Niedermayer 已提交
1131 1132
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1133
    OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
M
Michael Niedermayer 已提交
1134
}\
1135 1136
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1137 1138 1139
    uint8_t full[24*17];\
    uint8_t halfH[272];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1140 1141
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1142
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
M
Michael Niedermayer 已提交
1143
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1144
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
M
Michael Niedermayer 已提交
1145
}\
1146 1147
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1148 1149 1150 1151
    uint8_t full[24*17];\
    uint8_t halfH[272];\
    uint8_t halfV[256];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1152 1153
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
M
Michael Niedermayer 已提交
1154 1155
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1156
    OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
M
Michael Niedermayer 已提交
1157
}\
1158 1159
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1160 1161 1162
    uint8_t full[24*17];\
    uint8_t halfH[272];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1163 1164
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1165
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
M
Michael Niedermayer 已提交
1166
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1167
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
M
Michael Niedermayer 已提交
1168
}\
1169 1170
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1171 1172 1173 1174
    uint8_t full[24*17];\
    uint8_t halfH[272];\
    uint8_t halfV[256];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1175 1176
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
M
Michael Niedermayer 已提交
1177 1178
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1179
    OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
M
Michael Niedermayer 已提交
1180
}\
1181 1182
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1183 1184 1185
    uint8_t full[24*17];\
    uint8_t halfH[272];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1186 1187
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1188
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
M
Michael Niedermayer 已提交
1189
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1190
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
M
Michael Niedermayer 已提交
1191
}\
1192 1193
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1194 1195
    uint8_t halfH[272];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1196
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
M
Michael Niedermayer 已提交
1197
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1198
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
M
Michael Niedermayer 已提交
1199
}\
1200 1201
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1202 1203
    uint8_t halfH[272];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1204
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
M
Michael Niedermayer 已提交
1205
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1206
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
M
Michael Niedermayer 已提交
1207
}\
1208 1209
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1210 1211 1212 1213
    uint8_t full[24*17];\
    uint8_t halfH[272];\
    uint8_t halfV[256];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1214 1215
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
M
Michael Niedermayer 已提交
1216 1217
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1218
    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
M
Michael Niedermayer 已提交
1219
}\
1220 1221
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1222 1223
    uint8_t full[24*17];\
    uint8_t halfH[272];\
M
Michael Niedermayer 已提交
1224 1225
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1226
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
M
Michael Niedermayer 已提交
1227 1228
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
}\
1229 1230
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1231 1232 1233 1234
    uint8_t full[24*17];\
    uint8_t halfH[272];\
    uint8_t halfV[256];\
    uint8_t halfHV[256];\
M
Michael Niedermayer 已提交
1235 1236
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
M
Michael Niedermayer 已提交
1237 1238
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1239
    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
M
Michael Niedermayer 已提交
1240
}\
1241 1242
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1243 1244
    uint8_t full[24*17];\
    uint8_t halfH[272];\
M
Michael Niedermayer 已提交
1245 1246
    copy_block17(full, src, 24, stride, 17);\
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1247
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
M
Michael Niedermayer 已提交
1248 1249
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
}\
1250 1251
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
{\
1252
    uint8_t halfH[272];\
M
Michael Niedermayer 已提交
1253
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
M
Michael Niedermayer 已提交
1254
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1255
}
M
Michael Niedermayer 已提交
1256

M
Michael Niedermayer 已提交
1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269
#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
#define op_put(a, b) a = cm[((b) + 16)>>5]
#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]

QPEL_MC(0, put_       , _       , op_put)
QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
QPEL_MC(0, avg_       , _       , op_avg)
//QPEL_MC(1, avg_no_rnd , _       , op_avg)
#undef op_avg
#undef op_avg_no_rnd
#undef op_put
#undef op_put_no_rnd
M
Michael Niedermayer 已提交
1270

1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
    put_pixels8_8_c(dst, src, stride, 8);
}
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
    avg_pixels8_8_c(dst, src, stride, 8);
}
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
    put_pixels16_8_c(dst, src, stride, 16);
}
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
    avg_pixels16_8_c(dst, src, stride, 16);
}

1288 1289 1290 1291 1292
#define put_qpel8_mc00_c  ff_put_pixels8x8_c
#define avg_qpel8_mc00_c  ff_avg_pixels8x8_c
#define put_qpel16_mc00_c ff_put_pixels16x16_c
#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
#define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
1293
#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
1294

M
Michael Niedermayer 已提交
1295
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1296
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
M
Michael Niedermayer 已提交
1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308
    int i;

    for(i=0; i<h; i++){
        dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
        dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
        dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
        dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
        dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
        dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
        dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
        dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
        dst+=dstStride;
1309
        src+=srcStride;
M
Michael Niedermayer 已提交
1310 1311 1312
    }
}

1313
#if CONFIG_RV40_DECODER
1314 1315
void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
1316
    put_pixels16_xy2_8_c(dst, src, stride, 16);
K
Kostya Shishkov 已提交
1317
}
1318 1319
void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
1320
    avg_pixels16_xy2_8_c(dst, src, stride, 16);
K
Kostya Shishkov 已提交
1321
}
1322 1323
void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
1324
    put_pixels8_xy2_8_c(dst, src, stride, 8);
K
Kostya Shishkov 已提交
1325
}
1326 1327
void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
1328
    avg_pixels8_xy2_8_c(dst, src, stride, 8);
K
Kostya Shishkov 已提交
1329 1330 1331
}
#endif /* CONFIG_RV40_DECODER */

M
Michael Niedermayer 已提交
1332
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1333
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
M
Michael Niedermayer 已提交
1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360
    int i;

    for(i=0; i<w; i++){
        const int src_1= src[ -srcStride];
        const int src0 = src[0          ];
        const int src1 = src[  srcStride];
        const int src2 = src[2*srcStride];
        const int src3 = src[3*srcStride];
        const int src4 = src[4*srcStride];
        const int src5 = src[5*srcStride];
        const int src6 = src[6*srcStride];
        const int src7 = src[7*srcStride];
        const int src8 = src[8*srcStride];
        const int src9 = src[9*srcStride];
        dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
        dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
        dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
        dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
        dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
        dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
        dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
        dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
        src++;
        dst++;
    }
}

1361 1362
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
M
Michael Niedermayer 已提交
1363 1364
    uint8_t half[64];
    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1365
    put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
M
Michael Niedermayer 已提交
1366 1367
}

1368 1369
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
M
Michael Niedermayer 已提交
1370 1371 1372
    wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
}

1373 1374
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
M
Michael Niedermayer 已提交
1375 1376
    uint8_t half[64];
    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1377
    put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
M
Michael Niedermayer 已提交
1378 1379
}

1380 1381
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
M
Michael Niedermayer 已提交
1382 1383 1384
    wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
}

1385 1386
static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
M
Michael Niedermayer 已提交
1387 1388 1389 1390 1391 1392
    uint8_t halfH[88];
    uint8_t halfV[64];
    uint8_t halfHV[64];
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
    wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1393
    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
M
Michael Niedermayer 已提交
1394
}
1395 1396
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
M
Michael Niedermayer 已提交
1397 1398 1399 1400 1401 1402
    uint8_t halfH[88];
    uint8_t halfV[64];
    uint8_t halfHV[64];
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
    wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1403
    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
M
Michael Niedermayer 已提交
1404
}
1405 1406
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
{
M
Michael Niedermayer 已提交
1407 1408 1409 1410 1411
    uint8_t halfH[88];
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
    wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
}

M
Michael Niedermayer 已提交
1412
static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
F
Fabrice Bellard 已提交
1413 1414 1415 1416
{
    int s, i;

    s = 0;
M
Michael Niedermayer 已提交
1417
    for(i=0;i<h;i++) {
F
Fabrice Bellard 已提交
1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439
        s += abs(pix1[0] - pix2[0]);
        s += abs(pix1[1] - pix2[1]);
        s += abs(pix1[2] - pix2[2]);
        s += abs(pix1[3] - pix2[3]);
        s += abs(pix1[4] - pix2[4]);
        s += abs(pix1[5] - pix2[5]);
        s += abs(pix1[6] - pix2[6]);
        s += abs(pix1[7] - pix2[7]);
        s += abs(pix1[8] - pix2[8]);
        s += abs(pix1[9] - pix2[9]);
        s += abs(pix1[10] - pix2[10]);
        s += abs(pix1[11] - pix2[11]);
        s += abs(pix1[12] - pix2[12]);
        s += abs(pix1[13] - pix2[13]);
        s += abs(pix1[14] - pix2[14]);
        s += abs(pix1[15] - pix2[15]);
        pix1 += line_size;
        pix2 += line_size;
    }
    return s;
}

M
Michael Niedermayer 已提交
1440
static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
F
Fabrice Bellard 已提交
1441 1442 1443 1444
{
    int s, i;

    s = 0;
M
Michael Niedermayer 已提交
1445
    for(i=0;i<h;i++) {
F
Fabrice Bellard 已提交
1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
        pix1 += line_size;
        pix2 += line_size;
    }
    return s;
}

M
Michael Niedermayer 已提交
1468
static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
F
Fabrice Bellard 已提交
1469 1470
{
    int s, i;
1471
    uint8_t *pix3 = pix2 + line_size;
F
Fabrice Bellard 已提交
1472 1473

    s = 0;
M
Michael Niedermayer 已提交
1474
    for(i=0;i<h;i++) {
F
Fabrice Bellard 已提交
1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
        pix1 += line_size;
        pix2 += line_size;
        pix3 += line_size;
    }
    return s;
}

M
Michael Niedermayer 已提交
1498
static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
F
Fabrice Bellard 已提交
1499 1500
{
    int s, i;
1501
    uint8_t *pix3 = pix2 + line_size;
F
Fabrice Bellard 已提交
1502 1503

    s = 0;
M
Michael Niedermayer 已提交
1504
    for(i=0;i<h;i++) {
F
Fabrice Bellard 已提交
1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
        pix1 += line_size;
        pix2 += line_size;
        pix3 += line_size;
    }
    return s;
}

M
Michael Niedermayer 已提交
1528
static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1529 1530 1531 1532
{
    int s, i;

    s = 0;
M
Michael Niedermayer 已提交
1533
    for(i=0;i<h;i++) {
1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547
        s += abs(pix1[0] - pix2[0]);
        s += abs(pix1[1] - pix2[1]);
        s += abs(pix1[2] - pix2[2]);
        s += abs(pix1[3] - pix2[3]);
        s += abs(pix1[4] - pix2[4]);
        s += abs(pix1[5] - pix2[5]);
        s += abs(pix1[6] - pix2[6]);
        s += abs(pix1[7] - pix2[7]);
        pix1 += line_size;
        pix2 += line_size;
    }
    return s;
}

M
Michael Niedermayer 已提交
1548
static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1549 1550 1551 1552
{
    int s, i;

    s = 0;
M
Michael Niedermayer 已提交
1553
    for(i=0;i<h;i++) {
1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
        pix1 += line_size;
        pix2 += line_size;
    }
    return s;
}

M
Michael Niedermayer 已提交
1568
static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1569 1570
{
    int s, i;
1571
    uint8_t *pix3 = pix2 + line_size;
1572 1573

    s = 0;
M
Michael Niedermayer 已提交
1574
    for(i=0;i<h;i++) {
1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
        pix1 += line_size;
        pix2 += line_size;
        pix3 += line_size;
    }
    return s;
}

M
Michael Niedermayer 已提交
1590
static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1591 1592
{
    int s, i;
1593
    uint8_t *pix3 = pix2 + line_size;
1594 1595

    s = 0;
M
Michael Niedermayer 已提交
1596
    for(i=0;i<h;i++) {
1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
        pix1 += line_size;
        pix2 += line_size;
        pix3 += line_size;
    }
    return s;
}

1612 1613
static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
    MpegEncContext *c = v;
1614 1615 1616
    int score1=0;
    int score2=0;
    int x,y;
M
Michael Niedermayer 已提交
1617

1618 1619 1620 1621 1622 1623
    for(y=0; y<h; y++){
        for(x=0; x<16; x++){
            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
        }
        if(y+1<h){
            for(x=0; x<15; x++){
D
Diego Biurrun 已提交
1624
                score2+= FFABS(  s1[x  ] - s1[x  +stride]
1625
                             - s1[x+1] + s1[x+1+stride])
D
Diego Biurrun 已提交
1626
                        -FFABS(  s2[x  ] - s2[x  +stride]
1627 1628 1629 1630 1631 1632
                             - s2[x+1] + s2[x+1+stride]);
            }
        }
        s1+= stride;
        s2+= stride;
    }
M
Michael Niedermayer 已提交
1633

D
Diego Biurrun 已提交
1634 1635
    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
    else  return score1 + FFABS(score2)*8;
1636 1637
}

1638 1639
static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
    MpegEncContext *c = v;
1640 1641 1642
    int score1=0;
    int score2=0;
    int x,y;
1643

1644 1645 1646 1647 1648 1649
    for(y=0; y<h; y++){
        for(x=0; x<8; x++){
            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
        }
        if(y+1<h){
            for(x=0; x<7; x++){
D
Diego Biurrun 已提交
1650
                score2+= FFABS(  s1[x  ] - s1[x  +stride]
1651
                             - s1[x+1] + s1[x+1+stride])
D
Diego Biurrun 已提交
1652
                        -FFABS(  s2[x  ] - s2[x  +stride]
1653 1654 1655 1656 1657 1658
                             - s2[x+1] + s2[x+1+stride]);
            }
        }
        s1+= stride;
        s2+= stride;
    }
1659

D
Diego Biurrun 已提交
1660 1661
    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
    else  return score1 + FFABS(score2)*8;
1662 1663
}

1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
    int i;
    unsigned int sum=0;

    for(i=0; i<8*8; i++){
        int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
        int w= weight[i];
        b>>= RECON_SHIFT;
        assert(-512<b && b<512);

        sum += (w*b)*(w*b)>>4;
    }
    return sum>>2;
}

static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
    int i;

    for(i=0; i<8*8; i++){
        rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1684
    }
1685 1686
}

1687 1688 1689 1690 1691 1692
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
    return 0;
}

void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
    int i;
1693

1694
    memset(cmp, 0, sizeof(void*)*6);
1695

1696
    for(i=0; i<6; i++){
1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709
        switch(type&0xFF){
        case FF_CMP_SAD:
            cmp[i]= c->sad[i];
            break;
        case FF_CMP_SATD:
            cmp[i]= c->hadamard8_diff[i];
            break;
        case FF_CMP_SSE:
            cmp[i]= c->sse[i];
            break;
        case FF_CMP_DCT:
            cmp[i]= c->dct_sad[i];
            break;
1710 1711 1712
        case FF_CMP_DCT264:
            cmp[i]= c->dct264_sad[i];
            break;
1713 1714 1715
        case FF_CMP_DCTMAX:
            cmp[i]= c->dct_max[i];
            break;
1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733
        case FF_CMP_PSNR:
            cmp[i]= c->quant_psnr[i];
            break;
        case FF_CMP_BIT:
            cmp[i]= c->bit[i];
            break;
        case FF_CMP_RD:
            cmp[i]= c->rd[i];
            break;
        case FF_CMP_VSAD:
            cmp[i]= c->vsad[i];
            break;
        case FF_CMP_VSSE:
            cmp[i]= c->vsse[i];
            break;
        case FF_CMP_ZERO:
            cmp[i]= zero_cmp;
            break;
1734 1735 1736
        case FF_CMP_NSSE:
            cmp[i]= c->nsse[i];
            break;
1737 1738 1739 1740 1741 1742
        default:
            av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
        }
    }
}

M
huffyuv  
Michael Niedermayer 已提交
1743
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1744 1745 1746 1747 1748
    long i;
    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
        long a = *(long*)(src+i);
        long b = *(long*)(dst+i);
        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
M
huffyuv  
Michael Niedermayer 已提交
1749 1750 1751 1752 1753 1754
    }
    for(; i<w; i++)
        dst[i+0] += src[i+0];
}

static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
1755
    long i;
1756
#if !HAVE_FAST_UNALIGNED
1757
    if((long)src2 & (sizeof(long)-1)){
L
indent  
Loren Merritt 已提交
1758 1759 1760 1761 1762 1763 1764 1765 1766 1767
        for(i=0; i+7<w; i+=8){
            dst[i+0] = src1[i+0]-src2[i+0];
            dst[i+1] = src1[i+1]-src2[i+1];
            dst[i+2] = src1[i+2]-src2[i+2];
            dst[i+3] = src1[i+3]-src2[i+3];
            dst[i+4] = src1[i+4]-src2[i+4];
            dst[i+5] = src1[i+5]-src2[i+5];
            dst[i+6] = src1[i+6]-src2[i+6];
            dst[i+7] = src1[i+7]-src2[i+7];
        }
1768 1769 1770 1771 1772 1773 1774
    }else
#endif
    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
        long a = *(long*)(src1+i);
        long b = *(long*)(src2+i);
        *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
    }
M
huffyuv  
Michael Niedermayer 已提交
1775 1776 1777 1778
    for(; i<w; i++)
        dst[i+0] = src1[i+0]-src2[i+0];
}

L
Loren Merritt 已提交
1779
static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795
    int i;
    uint8_t l, lt;

    l= *left;
    lt= *left_top;

    for(i=0; i<w; i++){
        l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
        lt= src1[i];
        dst[i]= l;
    }

    *left= l;
    *left_top= lt;
}

L
Loren Merritt 已提交
1796
static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807
    int i;
    uint8_t l, lt;

    l= *left;
    lt= *left_top;

    for(i=0; i<w; i++){
        const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
        lt= src1[i];
        l= src2[i];
        dst[i]= l - pred;
1808
    }
1809 1810 1811 1812 1813

    *left= l;
    *left_top= lt;
}

1814
static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836
    int i;

    for(i=0; i<w-1; i++){
        acc+= src[i];
        dst[i]= acc;
        i++;
        acc+= src[i];
        dst[i]= acc;
    }

    for(; i<w; i++){
        acc+= src[i];
        dst[i]= acc;
    }

    return acc;
}

#if HAVE_BIGENDIAN
#define B 3
#define G 2
#define R 1
1837
#define A 0
1838 1839 1840 1841
#else
#define B 0
#define G 1
#define R 2
1842
#define A 3
1843
#endif
1844
static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
1845
    int i;
1846
    int r,g,b,a;
1847 1848 1849
    r= *red;
    g= *green;
    b= *blue;
1850
    a= *alpha;
1851 1852 1853 1854 1855

    for(i=0; i<w; i++){
        b+= src[4*i+B];
        g+= src[4*i+G];
        r+= src[4*i+R];
1856
        a+= src[4*i+A];
1857 1858 1859 1860

        dst[4*i+B]= b;
        dst[4*i+G]= g;
        dst[4*i+R]= r;
1861
        dst[4*i+A]= a;
1862 1863 1864 1865 1866
    }

    *red= r;
    *green= g;
    *blue= b;
1867
    *alpha= a;
1868 1869 1870 1871
}
#undef B
#undef G
#undef R
1872
#undef A
1873

M
Michael Niedermayer 已提交
1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886
#define BUTTERFLY2(o1,o2,i1,i2) \
o1= (i1)+(i2);\
o2= (i1)-(i2);

#define BUTTERFLY1(x,y) \
{\
    int a,b;\
    a= x;\
    b= y;\
    x= a+b;\
    y= a-b;\
}

D
Diego Biurrun 已提交
1887
#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
M
Michael Niedermayer 已提交
1888

M
Michael Niedermayer 已提交
1889
static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
M
Michael Niedermayer 已提交
1890 1891 1892
    int i;
    int temp[64];
    int sum=0;
1893

M
Michael Niedermayer 已提交
1894
    assert(h==8);
M
Michael Niedermayer 已提交
1895 1896 1897 1898 1899 1900 1901

    for(i=0; i<8; i++){
        //FIXME try pointer walks
        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
1902

M
Michael Niedermayer 已提交
1903 1904 1905 1906
        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1907

M
Michael Niedermayer 已提交
1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918
        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
    }

    for(i=0; i<8; i++){
        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1919

M
Michael Niedermayer 已提交
1920 1921 1922 1923 1924
        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);

1925
        sum +=
M
Michael Niedermayer 已提交
1926 1927 1928 1929 1930 1931 1932 1933
             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
    }
    return sum;
}

1934
static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
M
Michael Niedermayer 已提交
1935 1936 1937
    int i;
    int temp[64];
    int sum=0;
1938

1939
    assert(h==8);
1940

M
Michael Niedermayer 已提交
1941 1942
    for(i=0; i<8; i++){
        //FIXME try pointer walks
1943 1944 1945 1946
        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
1947

M
Michael Niedermayer 已提交
1948 1949 1950 1951
        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
1952

M
Michael Niedermayer 已提交
1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963
        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
    }

    for(i=0; i<8; i++){
        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
1964

M
Michael Niedermayer 已提交
1965 1966 1967 1968
        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
1969 1970

        sum +=
M
Michael Niedermayer 已提交
1971 1972 1973 1974 1975
             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
    }
1976

D
Diego Biurrun 已提交
1977
    sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
1978

M
Michael Niedermayer 已提交
1979 1980 1981
    return sum;
}

M
Michael Niedermayer 已提交
1982
static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
M
Michael Niedermayer 已提交
1983
    MpegEncContext * const s= (MpegEncContext *)c;
D
Diego Biurrun 已提交
1984
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
1985

M
Michael Niedermayer 已提交
1986
    assert(h==8);
M
Michael Niedermayer 已提交
1987 1988

    s->dsp.diff_pixels(temp, src1, src2, stride);
1989
    s->dsp.fdct(temp);
1990
    return s->dsp.sum_abs_dctelem(temp);
M
Michael Niedermayer 已提交
1991 1992
}

1993
#if CONFIG_GPL
1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
#define DCT8_1D {\
    const int s07 = SRC(0) + SRC(7);\
    const int s16 = SRC(1) + SRC(6);\
    const int s25 = SRC(2) + SRC(5);\
    const int s34 = SRC(3) + SRC(4);\
    const int a0 = s07 + s34;\
    const int a1 = s16 + s25;\
    const int a2 = s07 - s34;\
    const int a3 = s16 - s25;\
    const int d07 = SRC(0) - SRC(7);\
    const int d16 = SRC(1) - SRC(6);\
    const int d25 = SRC(2) - SRC(5);\
    const int d34 = SRC(3) - SRC(4);\
    const int a4 = d16 + d25 + (d07 + (d07>>1));\
    const int a5 = d07 - d34 - (d25 + (d25>>1));\
    const int a6 = d07 + d34 - (d16 + (d16>>1));\
    const int a7 = d16 - d25 + (d34 + (d34>>1));\
    DST(0,  a0 + a1     ) ;\
    DST(1,  a4 + (a7>>2)) ;\
    DST(2,  a2 + (a3>>1)) ;\
    DST(3,  a5 + (a6>>2)) ;\
    DST(4,  a0 - a1     ) ;\
    DST(5,  a6 - (a5>>2)) ;\
    DST(6, (a2>>1) - a3 ) ;\
    DST(7, (a4>>2) - a7 ) ;\
}

static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
    MpegEncContext * const s= (MpegEncContext *)c;
D
Diego Biurrun 已提交
2023
    int16_t dct[8][8];
2024 2025 2026
    int i;
    int sum=0;

M
Måns Rullgård 已提交
2027
    s->dsp.diff_pixels(dct[0], src1, src2, stride);
2028 2029 2030 2031 2032 2033 2034 2035 2036

#define SRC(x) dct[i][x]
#define DST(x,v) dct[i][x]= v
    for( i = 0; i < 8; i++ )
        DCT8_1D
#undef SRC
#undef DST

#define SRC(x) dct[x][i]
D
Diego Biurrun 已提交
2037
#define DST(x,v) sum += FFABS(v)
2038 2039 2040 2041 2042 2043 2044 2045
    for( i = 0; i < 8; i++ )
        DCT8_1D
#undef SRC
#undef DST
    return sum;
}
#endif

2046 2047
static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
    MpegEncContext * const s= (MpegEncContext *)c;
D
Diego Biurrun 已提交
2048
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
2049
    int sum=0, i;
2050

2051 2052 2053 2054 2055 2056
    assert(h==8);

    s->dsp.diff_pixels(temp, src1, src2, stride);
    s->dsp.fdct(temp);

    for(i=0; i<64; i++)
D
Diego Biurrun 已提交
2057
        sum= FFMAX(sum, FFABS(temp[i]));
2058

2059 2060 2061
    return sum;
}

M
Michael Niedermayer 已提交
2062
static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
M
Michael Niedermayer 已提交
2063
    MpegEncContext * const s= (MpegEncContext *)c;
D
Diego Biurrun 已提交
2064 2065
    LOCAL_ALIGNED_16(int16_t, temp, [64*2]);
    int16_t * const bak = temp+64;
M
Michael Niedermayer 已提交
2066 2067
    int sum=0, i;

M
Michael Niedermayer 已提交
2068
    assert(h==8);
M
Michael Niedermayer 已提交
2069
    s->mb_intra=0;
2070

M
Michael Niedermayer 已提交
2071
    s->dsp.diff_pixels(temp, src1, src2, stride);
2072

D
Diego Biurrun 已提交
2073
    memcpy(bak, temp, 64*sizeof(int16_t));
2074

2075
    s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2076
    s->dct_unquantize_inter(s, temp, 0, s->qscale);
M
Mans Rullgard 已提交
2077
    ff_simple_idct_8(temp); //FIXME
2078

M
Michael Niedermayer 已提交
2079 2080
    for(i=0; i<64; i++)
        sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2081

M
Michael Niedermayer 已提交
2082 2083 2084
    return sum;
}

M
Michael Niedermayer 已提交
2085
static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2086
    MpegEncContext * const s= (MpegEncContext *)c;
2087
    const uint8_t *scantable= s->intra_scantable.permutated;
D
Diego Biurrun 已提交
2088
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
2089 2090
    LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
    LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
R
Ramiro Polla 已提交
2091
    int i, last, run, bits, level, distortion, start_i;
2092 2093 2094
    const int esc_length= s->ac_esc_length;
    uint8_t * length;
    uint8_t * last_length;
2095

M
Michael Niedermayer 已提交
2096 2097
    assert(h==8);

2098 2099
    copy_block8(lsrc1, src1, 8, stride, 8);
    copy_block8(lsrc2, src2, 8, stride, 8);
2100

2101
    s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2102 2103 2104 2105

    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);

    bits=0;
2106

2107
    if (s->mb_intra) {
2108
        start_i = 1;
2109 2110
        length     = s->intra_ac_vlc_length;
        last_length= s->intra_ac_vlc_last_length;
2111
        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2112 2113 2114 2115 2116
    } else {
        start_i = 0;
        length     = s->inter_ac_vlc_length;
        last_length= s->inter_ac_vlc_last_length;
    }
2117

2118
    if(last>=start_i){
2119 2120 2121 2122
        run=0;
        for(i=start_i; i<last; i++){
            int j= scantable[i];
            level= temp[j];
2123

2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134
            if(level){
                level+=64;
                if((level&(~127)) == 0){
                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
                }else
                    bits+= esc_length;
                run=0;
            }else
                run++;
        }
        i= scantable[last];
2135

2136
        level= temp[i] + 64;
M
bugs  
Michael Niedermayer 已提交
2137 2138

        assert(level - 64);
2139

2140 2141 2142 2143
        if((level&(~127)) == 0){
            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
        }else
            bits+= esc_length;
2144

2145 2146 2147
    }

    if(last>=0){
2148 2149 2150 2151
        if(s->mb_intra)
            s->dct_unquantize_intra(s, temp, 0, s->qscale);
        else
            s->dct_unquantize_inter(s, temp, 0, s->qscale);
2152
    }
2153

2154
    s->dsp.idct_add(lsrc2, 8, temp);
2155

2156
    distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2157

R
Ramiro Polla 已提交
2158
    return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2159 2160
}

M
Michael Niedermayer 已提交
2161
static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2162
    MpegEncContext * const s= (MpegEncContext *)c;
2163
    const uint8_t *scantable= s->intra_scantable.permutated;
D
Diego Biurrun 已提交
2164
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
2165 2166 2167 2168
    int i, last, run, bits, level, start_i;
    const int esc_length= s->ac_esc_length;
    uint8_t * length;
    uint8_t * last_length;
M
Michael Niedermayer 已提交
2169 2170

    assert(h==8);
2171

2172
    s->dsp.diff_pixels(temp, src1, src2, stride);
2173

2174 2175 2176
    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);

    bits=0;
2177

2178
    if (s->mb_intra) {
2179
        start_i = 1;
2180 2181
        length     = s->intra_ac_vlc_length;
        last_length= s->intra_ac_vlc_last_length;
2182
        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2183 2184 2185 2186 2187
    } else {
        start_i = 0;
        length     = s->inter_ac_vlc_length;
        last_length= s->inter_ac_vlc_last_length;
    }
2188

2189
    if(last>=start_i){
2190 2191 2192 2193
        run=0;
        for(i=start_i; i<last; i++){
            int j= scantable[i];
            level= temp[j];
2194

2195 2196 2197 2198 2199 2200 2201 2202 2203 2204 2205
            if(level){
                level+=64;
                if((level&(~127)) == 0){
                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
                }else
                    bits+= esc_length;
                run=0;
            }else
                run++;
        }
        i= scantable[last];
2206

2207
        level= temp[i] + 64;
2208

2209
        assert(level - 64);
2210

2211 2212 2213 2214 2215 2216 2217 2218 2219
        if((level&(~127)) == 0){
            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
        }else
            bits+= esc_length;
    }

    return bits;
}

2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236
#define VSAD_INTRA(size) \
static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
    int score=0;                                                                                            \
    int x,y;                                                                                                \
                                                                                                            \
    for(y=1; y<h; y++){                                                                                     \
        for(x=0; x<size; x+=4){                                                                             \
            score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
                   +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
        }                                                                                                   \
        s+= stride;                                                                                         \
    }                                                                                                       \
                                                                                                            \
    return score;                                                                                           \
}
VSAD_INTRA(8)
VSAD_INTRA(16)
2237 2238 2239 2240

static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
    int score=0;
    int x,y;
2241

2242 2243
    for(y=1; y<h; y++){
        for(x=0; x<16; x++){
D
Diego Biurrun 已提交
2244
            score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2245 2246 2247 2248
        }
        s1+= stride;
        s2+= stride;
    }
2249

2250 2251 2252 2253
    return score;
}

#define SQ(a) ((a)*(a))
2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270
#define VSSE_INTRA(size) \
static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
    int score=0;                                                                                            \
    int x,y;                                                                                                \
                                                                                                            \
    for(y=1; y<h; y++){                                                                                     \
        for(x=0; x<size; x+=4){                                                                               \
            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
        }                                                                                                   \
        s+= stride;                                                                                         \
    }                                                                                                       \
                                                                                                            \
    return score;                                                                                           \
}
VSSE_INTRA(8)
VSSE_INTRA(16)
2271 2272 2273 2274

static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
    int score=0;
    int x,y;
2275

2276 2277 2278 2279 2280 2281 2282
    for(y=1; y<h; y++){
        for(x=0; x<16; x++){
            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
        }
        s1+= stride;
        s2+= stride;
    }
2283

2284 2285 2286
    return score;
}

2287 2288
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
                               int size){
2289 2290 2291 2292 2293 2294 2295
    int score=0;
    int i;
    for(i=0; i<size; i++)
        score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
    return score;
}

2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309
#define WRAPPER8_16_SQ(name8, name16)\
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
    int score=0;\
    score +=name8(s, dst           , src           , stride, 8);\
    score +=name8(s, dst+8         , src+8         , stride, 8);\
    if(h==16){\
        dst += 8*stride;\
        src += 8*stride;\
        score +=name8(s, dst           , src           , stride, 8);\
        score +=name8(s, dst+8         , src+8         , stride, 8);\
    }\
    return score;\
}

2310 2311 2312
WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2313
#if CONFIG_GPL
2314
WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
M
Mike Melanson 已提交
2315
#endif
2316 2317 2318 2319
WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
M
Michael Niedermayer 已提交
2320

2321 2322 2323 2324 2325
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
                   uint32_t maxi, uint32_t maxisign)
{

    if(a > mini) return mini;
2326
    else if((a^(1U<<31)) > maxisign) return maxi;
2327 2328 2329
    else return a;
}

2330
static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2331 2332 2333
    int i;
    uint32_t mini = *(uint32_t*)min;
    uint32_t maxi = *(uint32_t*)max;
2334
    uint32_t maxisign = maxi ^ (1U<<31);
2335
    uint32_t *dsti = (uint32_t*)dst;
2336
    const uint32_t *srci = (const uint32_t*)src;
2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347
    for(i=0; i<len; i+=8) {
        dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
        dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
        dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
        dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
        dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
        dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
        dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
        dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
    }
}
2348
static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365
    int i;
    if(min < 0 && max > 0) {
        vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
    } else {
        for(i=0; i < len; i+=8) {
            dst[i    ] = av_clipf(src[i    ], min, max);
            dst[i + 1] = av_clipf(src[i + 1], min, max);
            dst[i + 2] = av_clipf(src[i + 2], min, max);
            dst[i + 3] = av_clipf(src[i + 3], min, max);
            dst[i + 4] = av_clipf(src[i + 4], min, max);
            dst[i + 5] = av_clipf(src[i + 5], min, max);
            dst[i + 6] = av_clipf(src[i + 6], min, max);
            dst[i + 7] = av_clipf(src[i + 7], min, max);
        }
    }
}

2366
static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2367 2368 2369 2370
{
    int res = 0;

    while (order--)
2371
        res += *v1++ * *v2++;
2372 2373 2374 2375

    return res;
}

2376
static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2377 2378 2379 2380 2381 2382 2383 2384 2385
{
    int res = 0;
    while (order--) {
        res   += *v1 * *v2++;
        *v1++ += mul * *v3++;
    }
    return res;
}

2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398
static void apply_window_int16_c(int16_t *output, const int16_t *input,
                                 const int16_t *window, unsigned int len)
{
    int i;
    int len2 = len >> 1;

    for (i = 0; i < len2; i++) {
        int16_t w       = window[i];
        output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15;
        output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
    }
}

2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
                                int32_t max, unsigned int len)
{
    do {
        *dst++ = av_clip(*src++, min, max);
        *dst++ = av_clip(*src++, min, max);
        *dst++ = av_clip(*src++, min, max);
        *dst++ = av_clip(*src++, min, max);
        *dst++ = av_clip(*src++, min, max);
        *dst++ = av_clip(*src++, min, max);
        *dst++ = av_clip(*src++, min, max);
        *dst++ = av_clip(*src++, min, max);
        len -= 8;
    } while (len > 0);
}

2415
static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
2416
{
2417
    ff_j_rev_dct (block);
2418
    put_pixels_clamped_c(block, dest, line_size);
2419
}
2420
static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
2421
{
2422
    ff_j_rev_dct (block);
2423
    add_pixels_clamped_c(block, dest, line_size);
2424 2425
}

F
Fabrice Bellard 已提交
2426
/* init static data */
2427
av_cold void ff_dsputil_static_init(void)
2428
{
M
Michael Niedermayer 已提交
2429
    int i;
2430

F
Fabrice Bellard 已提交
2431
    for(i=0;i<512;i++) {
M
Måns Rullgård 已提交
2432
        ff_squareTbl[i] = (i - 256) * (i - 256);
F
Fabrice Bellard 已提交
2433
    }
2434

2435
    for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
F
Fabrice Bellard 已提交
2436
}
2437

2438 2439
int ff_check_alignment(void){
    static int did_fail=0;
2440
    LOCAL_ALIGNED_16(int, aligned, [4]);
2441

2442
    if((intptr_t)aligned & 15){
2443
        if(!did_fail){
2444
#if HAVE_MMX || HAVE_ALTIVEC
2445
            av_log(NULL, AV_LOG_ERROR,
2446 2447
                "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
                "and may be very slow or crash. This is not a bug in libavcodec,\n"
2448
                "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2449
                "Do not report crashes to Libav developers.\n");
2450 2451 2452 2453 2454 2455 2456
#endif
            did_fail=1;
        }
        return -1;
    }
    return 0;
}
2457

2458
av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
F
Fabrice Bellard 已提交
2459
{
2460 2461
    ff_check_alignment();

2462
#if CONFIG_ENCODERS
M
Mans Rullgard 已提交
2463 2464 2465 2466 2467
    if (avctx->bits_per_raw_sample == 10) {
        c->fdct    = ff_jpeg_fdct_islow_10;
        c->fdct248 = ff_fdct248_islow_10;
    } else {
        if(avctx->dct_algo==FF_DCT_FASTINT) {
2468 2469
            c->fdct    = ff_fdct_ifast;
            c->fdct248 = ff_fdct_ifast248;
M
Mans Rullgard 已提交
2470 2471 2472 2473 2474 2475 2476 2477 2478
        }
        else if(avctx->dct_algo==FF_DCT_FAAN) {
            c->fdct    = ff_faandct;
            c->fdct248 = ff_faandct248;
        }
        else {
            c->fdct    = ff_jpeg_fdct_islow_8; //slow/accurate/default
            c->fdct248 = ff_fdct248_islow_8;
        }
2479
    }
2480 2481
#endif //CONFIG_ENCODERS

M
Mans Rullgard 已提交
2482 2483 2484 2485 2486 2487
    if (avctx->bits_per_raw_sample == 10) {
        c->idct_put              = ff_simple_idct_put_10;
        c->idct_add              = ff_simple_idct_add_10;
        c->idct                  = ff_simple_idct_10;
        c->idct_permutation_type = FF_NO_IDCT_PERM;
    } else {
M
Michael Niedermayer 已提交
2488
        if(avctx->idct_algo==FF_IDCT_INT){
2489 2490
            c->idct_put= jref_idct_put;
            c->idct_add= jref_idct_add;
2491
            c->idct    = ff_j_rev_dct;
M
Michael Niedermayer 已提交
2492
            c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
M
Michael Niedermayer 已提交
2493 2494 2495 2496 2497
        }else if(avctx->idct_algo==FF_IDCT_FAAN){
            c->idct_put= ff_faanidct_put;
            c->idct_add= ff_faanidct_add;
            c->idct    = ff_faanidct;
            c->idct_permutation_type= FF_NO_IDCT_PERM;
M
Michael Niedermayer 已提交
2498
        }else{ //accurate/default
M
Mans Rullgard 已提交
2499 2500 2501
            c->idct_put = ff_simple_idct_put_8;
            c->idct_add = ff_simple_idct_add_8;
            c->idct     = ff_simple_idct_8;
M
Michael Niedermayer 已提交
2502 2503
            c->idct_permutation_type= FF_NO_IDCT_PERM;
        }
2504 2505
    }

2506
    c->diff_pixels = diff_pixels_c;
2507 2508 2509
    c->put_pixels_clamped = put_pixels_clamped_c;
    c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
    c->add_pixels_clamped = add_pixels_clamped_c;
2510
    c->sum_abs_dctelem = sum_abs_dctelem_c;
2511
    c->gmc1 = gmc1_c;
2512
    c->gmc = ff_gmc_c;
2513 2514 2515
    c->pix_sum = pix_sum_c;
    c->pix_norm1 = pix_norm1_c;

K
Kostya Shishkov 已提交
2516 2517 2518
    c->fill_block_tab[0] = fill_block16_c;
    c->fill_block_tab[1] = fill_block8_c;

2519
    /* TODO [0] 16  [1] 8 */
M
Michael Niedermayer 已提交
2520 2521 2522 2523 2524 2525 2526 2527
    c->pix_abs[0][0] = pix_abs16_c;
    c->pix_abs[0][1] = pix_abs16_x2_c;
    c->pix_abs[0][2] = pix_abs16_y2_c;
    c->pix_abs[0][3] = pix_abs16_xy2_c;
    c->pix_abs[1][0] = pix_abs8_c;
    c->pix_abs[1][1] = pix_abs8_x2_c;
    c->pix_abs[1][2] = pix_abs8_y2_c;
    c->pix_abs[1][3] = pix_abs8_xy2_c;
2528

2529 2530 2531 2532 2533 2534 2535 2536 2537 2538
    c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
    c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
    c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
    c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
    c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
    c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
    c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
    c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
    c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;

2539 2540 2541 2542 2543 2544 2545 2546 2547 2548
    c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
    c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
    c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
    c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
    c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
    c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
    c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
    c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
    c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;

2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577
#define dspfunc(PFX, IDX, NUM) \
    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c

    dspfunc(put_qpel, 0, 16);
    dspfunc(put_no_rnd_qpel, 0, 16);

    dspfunc(avg_qpel, 0, 16);
    /* dspfunc(avg_no_rnd_qpel, 0, 16); */

    dspfunc(put_qpel, 1, 8);
    dspfunc(put_no_rnd_qpel, 1, 8);

    dspfunc(avg_qpel, 1, 8);
    /* dspfunc(avg_no_rnd_qpel, 1, 8); */
M
Michael Niedermayer 已提交
2578

2579
#undef dspfunc
A
Aurelien Jacobs 已提交
2580

2581
    c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
M
Michael Niedermayer 已提交
2582 2583 2584 2585 2586 2587 2588
    c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
    c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
    c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
    c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
    c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2589

M
Michael Niedermayer 已提交
2590 2591 2592
#define SET_CMP_FUNC(name) \
    c->name[0]= name ## 16_c;\
    c->name[1]= name ## 8x8_c;
2593

M
Michael Niedermayer 已提交
2594
    SET_CMP_FUNC(hadamard8_diff)
2595
    c->hadamard8_diff[4]= hadamard8_intra16_c;
2596
    c->hadamard8_diff[5]= hadamard8_intra8x8_c;
M
Michael Niedermayer 已提交
2597
    SET_CMP_FUNC(dct_sad)
2598
    SET_CMP_FUNC(dct_max)
2599
#if CONFIG_GPL
2600
    SET_CMP_FUNC(dct264_sad)
M
Mike Melanson 已提交
2601
#endif
M
Michael Niedermayer 已提交
2602 2603 2604 2605
    c->sad[0]= pix_abs16_c;
    c->sad[1]= pix_abs8_c;
    c->sse[0]= sse16_c;
    c->sse[1]= sse8_c;
M
Michael Niedermayer 已提交
2606
    c->sse[2]= sse4_c;
M
Michael Niedermayer 已提交
2607 2608 2609
    SET_CMP_FUNC(quant_psnr)
    SET_CMP_FUNC(rd)
    SET_CMP_FUNC(bit)
2610 2611
    c->vsad[0]= vsad16_c;
    c->vsad[4]= vsad_intra16_c;
2612
    c->vsad[5]= vsad_intra8_c;
2613 2614
    c->vsse[0]= vsse16_c;
    c->vsse[4]= vsse_intra16_c;
2615
    c->vsse[5]= vsse_intra8_c;
2616 2617
    c->nsse[0]= nsse16_c;
    c->nsse[1]= nsse8_c;
M
Michael Niedermayer 已提交
2618

2619 2620
    c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;

M
huffyuv  
Michael Niedermayer 已提交
2621 2622
    c->add_bytes= add_bytes_c;
    c->diff_bytes= diff_bytes_c;
2623
    c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
2624
    c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
2625 2626
    c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
    c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
M
Michael Niedermayer 已提交
2627
    c->bswap_buf= bswap_buf;
M
Mans Rullgard 已提交
2628
    c->bswap16_buf = bswap16_buf;
L
Loren Merritt 已提交
2629

2630 2631
    c->try_8x8basis= try_8x8basis_c;
    c->add_8x8basis= add_8x8basis_c;
M
huffyuv  
Michael Niedermayer 已提交
2632

2633
    c->vector_clipf = vector_clipf_c;
2634
    c->scalarproduct_int16 = scalarproduct_int16_c;
2635
    c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
2636
    c->apply_window_int16 = apply_window_int16_c;
2637
    c->vector_clip_int32 = vector_clip_int32_c;
2638

2639
    c->shrink[0]= av_image_copy_plane;
2640 2641 2642 2643
    c->shrink[1]= ff_shrink22;
    c->shrink[2]= ff_shrink44;
    c->shrink[3]= ff_shrink88;

2644 2645
    c->add_pixels8 = add_pixels8_c;

2646 2647 2648 2649 2650
#undef FUNC
#undef FUNCC
#define FUNC(f, depth) f ## _ ## depth
#define FUNCC(f, depth) f ## _ ## depth ## _c

2651
    c->draw_edges                    = FUNCC(draw_edges, 8);
2652 2653 2654 2655
    c->clear_block                   = FUNCC(clear_block, 8);
    c->clear_blocks                  = FUNCC(clear_blocks, 8);

#define BIT_DEPTH_FUNCS(depth) \
2656
    c->get_pixels                    = FUNCC(get_pixels,   depth);
2657

2658 2659 2660
    switch (avctx->bits_per_raw_sample) {
    case 9:
    case 10:
2661
        BIT_DEPTH_FUNCS(16);
2662 2663
        break;
    default:
2664
        BIT_DEPTH_FUNCS(8);
2665
        break;
2666 2667 2668
    }


2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682
    if (ARCH_ALPHA)
        ff_dsputil_init_alpha(c, avctx);
    if (ARCH_ARM)
        ff_dsputil_init_arm(c, avctx);
    if (ARCH_BFIN)
        ff_dsputil_init_bfin(c, avctx);
    if (ARCH_PPC)
        ff_dsputil_init_ppc(c, avctx);
    if (ARCH_SH4)
        ff_dsputil_init_sh4(c, avctx);
    if (HAVE_VIS)
        ff_dsputil_init_vis(c, avctx);
    if (ARCH_X86)
        ff_dsputil_init_x86(c, avctx);
2683

2684 2685
    ff_init_scantable_permutation(c->idct_permutation,
                                  c->idct_permutation_type);
2686
}