me_cmp.c 39.9 KB
Newer Older
F
Fabrice Bellard 已提交
1
/*
2 3 4 5
 * DSP utils
 * Copyright (c) 2000, 2001 Fabrice Bellard
 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
 *
6 7 8
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
F
Fabrice Bellard 已提交
9 10
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
F
Fabrice Bellard 已提交
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
F
Fabrice Bellard 已提交
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
F
Fabrice Bellard 已提交
15 16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
F
Fabrice Bellard 已提交
17
 *
F
Fabrice Bellard 已提交
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
F
Fabrice Bellard 已提交
21
 */
22

23
#include "libavutil/attributes.h"
24
#include "libavutil/internal.h"
F
Fabrice Bellard 已提交
25
#include "avcodec.h"
26
#include "copy_block.h"
27
#include "simple_idct.h"
28
#include "me_cmp.h"
29 30
#include "mpegvideo.h"
#include "config.h"
31

32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
/* (i - 256) * (i - 256) */
const uint32_t ff_square_tab[512] = {
    65536, 65025, 64516, 64009, 63504, 63001, 62500, 62001, 61504, 61009, 60516, 60025, 59536, 59049, 58564, 58081,
    57600, 57121, 56644, 56169, 55696, 55225, 54756, 54289, 53824, 53361, 52900, 52441, 51984, 51529, 51076, 50625,
    50176, 49729, 49284, 48841, 48400, 47961, 47524, 47089, 46656, 46225, 45796, 45369, 44944, 44521, 44100, 43681,
    43264, 42849, 42436, 42025, 41616, 41209, 40804, 40401, 40000, 39601, 39204, 38809, 38416, 38025, 37636, 37249,
    36864, 36481, 36100, 35721, 35344, 34969, 34596, 34225, 33856, 33489, 33124, 32761, 32400, 32041, 31684, 31329,
    30976, 30625, 30276, 29929, 29584, 29241, 28900, 28561, 28224, 27889, 27556, 27225, 26896, 26569, 26244, 25921,
    25600, 25281, 24964, 24649, 24336, 24025, 23716, 23409, 23104, 22801, 22500, 22201, 21904, 21609, 21316, 21025,
    20736, 20449, 20164, 19881, 19600, 19321, 19044, 18769, 18496, 18225, 17956, 17689, 17424, 17161, 16900, 16641,
    16384, 16129, 15876, 15625, 15376, 15129, 14884, 14641, 14400, 14161, 13924, 13689, 13456, 13225, 12996, 12769,
    12544, 12321, 12100, 11881, 11664, 11449, 11236, 11025, 10816, 10609, 10404, 10201, 10000,  9801,  9604,  9409,
     9216,  9025,  8836,  8649,  8464,  8281,  8100,  7921,  7744,  7569,  7396,  7225,  7056,  6889,  6724,  6561,
     6400,  6241,  6084,  5929,  5776,  5625,  5476,  5329,  5184,  5041,  4900,  4761,  4624,  4489,  4356,  4225,
     4096,  3969,  3844,  3721,  3600,  3481,  3364,  3249,  3136,  3025,  2916,  2809,  2704,  2601,  2500,  2401,
     2304,  2209,  2116,  2025,  1936,  1849,  1764,  1681,  1600,  1521,  1444,  1369,  1296,  1225,  1156,  1089,
     1024,   961,   900,   841,   784,   729,   676,   625,   576,   529,   484,   441,   400,   361,   324,   289,
      256,   225,   196,   169,   144,   121,   100,    81,    64,    49,    36,    25,    16,     9,     4,     1,
        0,     1,     4,     9,    16,    25,    36,    49,    64,    81,   100,   121,   144,   169,   196,   225,
      256,   289,   324,   361,   400,   441,   484,   529,   576,   625,   676,   729,   784,   841,   900,   961,
     1024,  1089,  1156,  1225,  1296,  1369,  1444,  1521,  1600,  1681,  1764,  1849,  1936,  2025,  2116,  2209,
     2304,  2401,  2500,  2601,  2704,  2809,  2916,  3025,  3136,  3249,  3364,  3481,  3600,  3721,  3844,  3969,
     4096,  4225,  4356,  4489,  4624,  4761,  4900,  5041,  5184,  5329,  5476,  5625,  5776,  5929,  6084,  6241,
     6400,  6561,  6724,  6889,  7056,  7225,  7396,  7569,  7744,  7921,  8100,  8281,  8464,  8649,  8836,  9025,
     9216,  9409,  9604,  9801, 10000, 10201, 10404, 10609, 10816, 11025, 11236, 11449, 11664, 11881, 12100, 12321,
    12544, 12769, 12996, 13225, 13456, 13689, 13924, 14161, 14400, 14641, 14884, 15129, 15376, 15625, 15876, 16129,
    16384, 16641, 16900, 17161, 17424, 17689, 17956, 18225, 18496, 18769, 19044, 19321, 19600, 19881, 20164, 20449,
    20736, 21025, 21316, 21609, 21904, 22201, 22500, 22801, 23104, 23409, 23716, 24025, 24336, 24649, 24964, 25281,
    25600, 25921, 26244, 26569, 26896, 27225, 27556, 27889, 28224, 28561, 28900, 29241, 29584, 29929, 30276, 30625,
    30976, 31329, 31684, 32041, 32400, 32761, 33124, 33489, 33856, 34225, 34596, 34969, 35344, 35721, 36100, 36481,
    36864, 37249, 37636, 38025, 38416, 38809, 39204, 39601, 40000, 40401, 40804, 41209, 41616, 42025, 42436, 42849,
    43264, 43681, 44100, 44521, 44944, 45369, 45796, 46225, 46656, 47089, 47524, 47961, 48400, 48841, 49284, 49729,
    50176, 50625, 51076, 51529, 51984, 52441, 52900, 53361, 53824, 54289, 54756, 55225, 55696, 56169, 56644, 57121,
    57600, 58081, 58564, 59049, 59536, 60025, 60516, 61009, 61504, 62001, 62500, 63001, 63504, 64009, 64516, 65025,
};
F
Fabrice Bellard 已提交
67

68
static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
69
                  ptrdiff_t stride, int h)
M
Michael Niedermayer 已提交
70
{
71
    int s = 0, i;
72
    const uint32_t *sq = ff_square_tab + 256;
M
Michael Niedermayer 已提交
73 74

    for (i = 0; i < h; i++) {
75 76 77 78
        s    += sq[pix1[0] - pix2[0]];
        s    += sq[pix1[1] - pix2[1]];
        s    += sq[pix1[2] - pix2[2]];
        s    += sq[pix1[3] - pix2[3]];
79 80
        pix1 += stride;
        pix2 += stride;
M
Michael Niedermayer 已提交
81 82 83 84
    }
    return s;
}

85
static int sse8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
86
                  ptrdiff_t stride, int h)
M
Michael Niedermayer 已提交
87
{
88
    int s = 0, i;
89
    const uint32_t *sq = ff_square_tab + 256;
M
Michael Niedermayer 已提交
90

M
Michael Niedermayer 已提交
91
    for (i = 0; i < h; i++) {
92 93 94 95 96 97 98 99
        s    += sq[pix1[0] - pix2[0]];
        s    += sq[pix1[1] - pix2[1]];
        s    += sq[pix1[2] - pix2[2]];
        s    += sq[pix1[3] - pix2[3]];
        s    += sq[pix1[4] - pix2[4]];
        s    += sq[pix1[5] - pix2[5]];
        s    += sq[pix1[6] - pix2[6]];
        s    += sq[pix1[7] - pix2[7]];
100 101
        pix1 += stride;
        pix2 += stride;
M
Michael Niedermayer 已提交
102 103 104 105
    }
    return s;
}

106
static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
107
                   ptrdiff_t stride, int h)
108
{
109
    int s = 0, i;
110
    const uint32_t *sq = ff_square_tab + 256;
111

M
Michael Niedermayer 已提交
112
    for (i = 0; i < h; i++) {
113 114 115 116 117 118 119 120 121 122
        s += sq[pix1[0]  - pix2[0]];
        s += sq[pix1[1]  - pix2[1]];
        s += sq[pix1[2]  - pix2[2]];
        s += sq[pix1[3]  - pix2[3]];
        s += sq[pix1[4]  - pix2[4]];
        s += sq[pix1[5]  - pix2[5]];
        s += sq[pix1[6]  - pix2[6]];
        s += sq[pix1[7]  - pix2[7]];
        s += sq[pix1[8]  - pix2[8]];
        s += sq[pix1[9]  - pix2[9]];
123 124 125 126 127 128
        s += sq[pix1[10] - pix2[10]];
        s += sq[pix1[11] - pix2[11]];
        s += sq[pix1[12] - pix2[12]];
        s += sq[pix1[13] - pix2[13]];
        s += sq[pix1[14] - pix2[14]];
        s += sq[pix1[15] - pix2[15]];
129

130 131
        pix1 += stride;
        pix2 += stride;
132 133 134 135
    }
    return s;
}

D
Diego Biurrun 已提交
136
static int sum_abs_dctelem_c(int16_t *block)
137
{
138 139 140 141
    int sum = 0, i;

    for (i = 0; i < 64; i++)
        sum += FFABS(block[i]);
142 143 144
    return sum;
}

145 146
#define avg2(a, b) (((a) + (b) + 1) >> 1)
#define avg4(a, b, c, d) (((a) + (b) + (c) + (d) + 2) >> 2)
F
Fabrice Bellard 已提交
147

148
static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
149
                              ptrdiff_t stride, int h)
F
Fabrice Bellard 已提交
150
{
151
    int s = 0, i;
F
Fabrice Bellard 已提交
152

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
    for (i = 0; i < h; i++) {
        s    += abs(pix1[0]  - pix2[0]);
        s    += abs(pix1[1]  - pix2[1]);
        s    += abs(pix1[2]  - pix2[2]);
        s    += abs(pix1[3]  - pix2[3]);
        s    += abs(pix1[4]  - pix2[4]);
        s    += abs(pix1[5]  - pix2[5]);
        s    += abs(pix1[6]  - pix2[6]);
        s    += abs(pix1[7]  - pix2[7]);
        s    += abs(pix1[8]  - pix2[8]);
        s    += abs(pix1[9]  - pix2[9]);
        s    += abs(pix1[10] - pix2[10]);
        s    += abs(pix1[11] - pix2[11]);
        s    += abs(pix1[12] - pix2[12]);
        s    += abs(pix1[13] - pix2[13]);
        s    += abs(pix1[14] - pix2[14]);
        s    += abs(pix1[15] - pix2[15]);
170 171
        pix1 += stride;
        pix2 += stride;
F
Fabrice Bellard 已提交
172 173 174 175
    }
    return s;
}

176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
static inline int pix_median_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
                             ptrdiff_t stride, int h)
{
    int s = 0, i, j;

#define V(x) (pix1[x] - pix2[x])

    s    += abs(V(0));
    s    += abs(V(1) - V(0));
    s    += abs(V(2) - V(1));
    s    += abs(V(3) - V(2));
    s    += abs(V(4) - V(3));
    s    += abs(V(5) - V(4));
    s    += abs(V(6) - V(5));
    s    += abs(V(7) - V(6));
    s    += abs(V(8) - V(7));
    s    += abs(V(9) - V(8));
    s    += abs(V(10) - V(9));
    s    += abs(V(11) - V(10));
    s    += abs(V(12) - V(11));
    s    += abs(V(13) - V(12));
    s    += abs(V(14) - V(13));
    s    += abs(V(15) - V(14));

    pix1 += stride;
    pix2 += stride;

    for (i = 1; i < h; i++) {
        s    += abs(V(0) - V(-stride));
        for (j = 1; j < 16; j++)
            s    += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
        pix1 += stride;
        pix2 += stride;

    }
#undef V
    return s;
}

215
static int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
216
                          ptrdiff_t stride, int h)
F
Fabrice Bellard 已提交
217
{
218
    int s = 0, i;
F
Fabrice Bellard 已提交
219

220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
    for (i = 0; i < h; i++) {
        s    += abs(pix1[0]  - avg2(pix2[0],  pix2[1]));
        s    += abs(pix1[1]  - avg2(pix2[1],  pix2[2]));
        s    += abs(pix1[2]  - avg2(pix2[2],  pix2[3]));
        s    += abs(pix1[3]  - avg2(pix2[3],  pix2[4]));
        s    += abs(pix1[4]  - avg2(pix2[4],  pix2[5]));
        s    += abs(pix1[5]  - avg2(pix2[5],  pix2[6]));
        s    += abs(pix1[6]  - avg2(pix2[6],  pix2[7]));
        s    += abs(pix1[7]  - avg2(pix2[7],  pix2[8]));
        s    += abs(pix1[8]  - avg2(pix2[8],  pix2[9]));
        s    += abs(pix1[9]  - avg2(pix2[9],  pix2[10]));
        s    += abs(pix1[10] - avg2(pix2[10], pix2[11]));
        s    += abs(pix1[11] - avg2(pix2[11], pix2[12]));
        s    += abs(pix1[12] - avg2(pix2[12], pix2[13]));
        s    += abs(pix1[13] - avg2(pix2[13], pix2[14]));
        s    += abs(pix1[14] - avg2(pix2[14], pix2[15]));
        s    += abs(pix1[15] - avg2(pix2[15], pix2[16]));
237 238
        pix1 += stride;
        pix2 += stride;
F
Fabrice Bellard 已提交
239 240 241 242
    }
    return s;
}

243
static int pix_abs16_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
244
                          ptrdiff_t stride, int h)
F
Fabrice Bellard 已提交
245
{
246
    int s = 0, i;
247
    uint8_t *pix3 = pix2 + stride;
F
Fabrice Bellard 已提交
248

249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
    for (i = 0; i < h; i++) {
        s    += abs(pix1[0]  - avg2(pix2[0],  pix3[0]));
        s    += abs(pix1[1]  - avg2(pix2[1],  pix3[1]));
        s    += abs(pix1[2]  - avg2(pix2[2],  pix3[2]));
        s    += abs(pix1[3]  - avg2(pix2[3],  pix3[3]));
        s    += abs(pix1[4]  - avg2(pix2[4],  pix3[4]));
        s    += abs(pix1[5]  - avg2(pix2[5],  pix3[5]));
        s    += abs(pix1[6]  - avg2(pix2[6],  pix3[6]));
        s    += abs(pix1[7]  - avg2(pix2[7],  pix3[7]));
        s    += abs(pix1[8]  - avg2(pix2[8],  pix3[8]));
        s    += abs(pix1[9]  - avg2(pix2[9],  pix3[9]));
        s    += abs(pix1[10] - avg2(pix2[10], pix3[10]));
        s    += abs(pix1[11] - avg2(pix2[11], pix3[11]));
        s    += abs(pix1[12] - avg2(pix2[12], pix3[12]));
        s    += abs(pix1[13] - avg2(pix2[13], pix3[13]));
        s    += abs(pix1[14] - avg2(pix2[14], pix3[14]));
        s    += abs(pix1[15] - avg2(pix2[15], pix3[15]));
266 267 268
        pix1 += stride;
        pix2 += stride;
        pix3 += stride;
F
Fabrice Bellard 已提交
269 270 271 272
    }
    return s;
}

273
static int pix_abs16_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
274
                           ptrdiff_t stride, int h)
F
Fabrice Bellard 已提交
275
{
276
    int s = 0, i;
277
    uint8_t *pix3 = pix2 + stride;
F
Fabrice Bellard 已提交
278

279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
    for (i = 0; i < h; i++) {
        s    += abs(pix1[0]  - avg4(pix2[0],  pix2[1],  pix3[0],  pix3[1]));
        s    += abs(pix1[1]  - avg4(pix2[1],  pix2[2],  pix3[1],  pix3[2]));
        s    += abs(pix1[2]  - avg4(pix2[2],  pix2[3],  pix3[2],  pix3[3]));
        s    += abs(pix1[3]  - avg4(pix2[3],  pix2[4],  pix3[3],  pix3[4]));
        s    += abs(pix1[4]  - avg4(pix2[4],  pix2[5],  pix3[4],  pix3[5]));
        s    += abs(pix1[5]  - avg4(pix2[5],  pix2[6],  pix3[5],  pix3[6]));
        s    += abs(pix1[6]  - avg4(pix2[6],  pix2[7],  pix3[6],  pix3[7]));
        s    += abs(pix1[7]  - avg4(pix2[7],  pix2[8],  pix3[7],  pix3[8]));
        s    += abs(pix1[8]  - avg4(pix2[8],  pix2[9],  pix3[8],  pix3[9]));
        s    += abs(pix1[9]  - avg4(pix2[9],  pix2[10], pix3[9],  pix3[10]));
        s    += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
        s    += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
        s    += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
        s    += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
        s    += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
        s    += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
296 297 298
        pix1 += stride;
        pix2 += stride;
        pix3 += stride;
F
Fabrice Bellard 已提交
299 300 301 302
    }
    return s;
}

303
static inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
304
                             ptrdiff_t stride, int h)
305
{
306
    int s = 0, i;
307

308 309 310 311 312 313 314 315 316
    for (i = 0; i < h; i++) {
        s    += abs(pix1[0] - pix2[0]);
        s    += abs(pix1[1] - pix2[1]);
        s    += abs(pix1[2] - pix2[2]);
        s    += abs(pix1[3] - pix2[3]);
        s    += abs(pix1[4] - pix2[4]);
        s    += abs(pix1[5] - pix2[5]);
        s    += abs(pix1[6] - pix2[6]);
        s    += abs(pix1[7] - pix2[7]);
317 318
        pix1 += stride;
        pix2 += stride;
319 320 321 322
    }
    return s;
}

323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
static inline int pix_median_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
                             ptrdiff_t stride, int h)
{
    int s = 0, i, j;

#define V(x) (pix1[x] - pix2[x])

    s    += abs(V(0));
    s    += abs(V(1) - V(0));
    s    += abs(V(2) - V(1));
    s    += abs(V(3) - V(2));
    s    += abs(V(4) - V(3));
    s    += abs(V(5) - V(4));
    s    += abs(V(6) - V(5));
    s    += abs(V(7) - V(6));

    pix1 += stride;
    pix2 += stride;

    for (i = 1; i < h; i++) {
        s    += abs(V(0) - V(-stride));
        for (j = 1; j < 8; j++)
            s    += abs(V(j) - mid_pred(V(j-stride), V(j-1), V(j-stride) + V(j-1) - V(j-stride-1)));
        pix1 += stride;
        pix2 += stride;

    }
#undef V
    return s;
}

354
static int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
355
                         ptrdiff_t stride, int h)
356
{
357
    int s = 0, i;
358

359 360 361 362 363 364 365 366 367
    for (i = 0; i < h; i++) {
        s    += abs(pix1[0] - avg2(pix2[0], pix2[1]));
        s    += abs(pix1[1] - avg2(pix2[1], pix2[2]));
        s    += abs(pix1[2] - avg2(pix2[2], pix2[3]));
        s    += abs(pix1[3] - avg2(pix2[3], pix2[4]));
        s    += abs(pix1[4] - avg2(pix2[4], pix2[5]));
        s    += abs(pix1[5] - avg2(pix2[5], pix2[6]));
        s    += abs(pix1[6] - avg2(pix2[6], pix2[7]));
        s    += abs(pix1[7] - avg2(pix2[7], pix2[8]));
368 369
        pix1 += stride;
        pix2 += stride;
370 371 372 373
    }
    return s;
}

374
static int pix_abs8_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
375
                         ptrdiff_t stride, int h)
376
{
377
    int s = 0, i;
378
    uint8_t *pix3 = pix2 + stride;
379

380 381 382 383 384 385 386 387 388
    for (i = 0; i < h; i++) {
        s    += abs(pix1[0] - avg2(pix2[0], pix3[0]));
        s    += abs(pix1[1] - avg2(pix2[1], pix3[1]));
        s    += abs(pix1[2] - avg2(pix2[2], pix3[2]));
        s    += abs(pix1[3] - avg2(pix2[3], pix3[3]));
        s    += abs(pix1[4] - avg2(pix2[4], pix3[4]));
        s    += abs(pix1[5] - avg2(pix2[5], pix3[5]));
        s    += abs(pix1[6] - avg2(pix2[6], pix3[6]));
        s    += abs(pix1[7] - avg2(pix2[7], pix3[7]));
389 390 391
        pix1 += stride;
        pix2 += stride;
        pix3 += stride;
392 393 394 395
    }
    return s;
}

396
static int pix_abs8_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
397
                          ptrdiff_t stride, int h)
398
{
399
    int s = 0, i;
400
    uint8_t *pix3 = pix2 + stride;
401

402 403 404 405 406 407 408 409 410
    for (i = 0; i < h; i++) {
        s    += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
        s    += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
        s    += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
        s    += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
        s    += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
        s    += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
        s    += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
        s    += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
411 412 413
        pix1 += stride;
        pix2 += stride;
        pix3 += stride;
414 415 416 417
    }
    return s;
}

418 419
static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
                    ptrdiff_t stride, int h)
420
{
421
    int score1 = 0, score2 = 0, x, y;
422 423 424 425 426 427 428 429 430 431

    for (y = 0; y < h; y++) {
        for (x = 0; x < 16; x++)
            score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
        if (y + 1 < h) {
            for (x = 0; x < 15; x++)
                score2 += FFABS(s1[x]     - s1[x + stride] -
                                s1[x + 1] + s1[x + stride + 1]) -
                          FFABS(s2[x]     - s2[x + stride] -
                                s2[x + 1] + s2[x + stride + 1]);
432
        }
433 434
        s1 += stride;
        s2 += stride;
435
    }
M
Michael Niedermayer 已提交
436

437 438 439 440
    if (c)
        return score1 + FFABS(score2) * c->avctx->nsse_weight;
    else
        return score1 + FFABS(score2) * 8;
441 442
}

443 444
static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
                   ptrdiff_t stride, int h)
445
{
446
    int score1 = 0, score2 = 0, x, y;
447 448 449 450 451 452 453 454 455 456

    for (y = 0; y < h; y++) {
        for (x = 0; x < 8; x++)
            score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
        if (y + 1 < h) {
            for (x = 0; x < 7; x++)
                score2 += FFABS(s1[x]     - s1[x + stride] -
                                s1[x + 1] + s1[x + stride + 1]) -
                          FFABS(s2[x]     - s2[x + stride] -
                                s2[x + 1] + s2[x + stride + 1]);
457
        }
458 459
        s1 += stride;
        s2 += stride;
460
    }
461

462 463 464 465
    if (c)
        return score1 + FFABS(score2) * c->avctx->nsse_weight;
    else
        return score1 + FFABS(score2) * 8;
466 467
}

468
static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
469
                    ptrdiff_t stride, int h)
470
{
471 472 473
    return 0;
}

474
void ff_set_cmp(MECmpContext *c, me_cmp_func *cmp, int type)
475
{
476
    int i;
477

478
    memset(cmp, 0, sizeof(void *) * 6);
479

480 481
    for (i = 0; i < 6; i++) {
        switch (type & 0xFF) {
482
        case FF_CMP_SAD:
483
            cmp[i] = c->sad[i];
484
            break;
485 486 487
        case FF_CMP_MEDIAN_SAD:
            cmp[i] = c->median_sad[i];
            break;
488
        case FF_CMP_SATD:
489
            cmp[i] = c->hadamard8_diff[i];
490 491
            break;
        case FF_CMP_SSE:
492
            cmp[i] = c->sse[i];
493 494
            break;
        case FF_CMP_DCT:
495
            cmp[i] = c->dct_sad[i];
496
            break;
497
        case FF_CMP_DCT264:
498
            cmp[i] = c->dct264_sad[i];
499
            break;
500
        case FF_CMP_DCTMAX:
501
            cmp[i] = c->dct_max[i];
502
            break;
503
        case FF_CMP_PSNR:
504
            cmp[i] = c->quant_psnr[i];
505 506
            break;
        case FF_CMP_BIT:
507
            cmp[i] = c->bit[i];
508 509
            break;
        case FF_CMP_RD:
510
            cmp[i] = c->rd[i];
511 512
            break;
        case FF_CMP_VSAD:
513
            cmp[i] = c->vsad[i];
514 515
            break;
        case FF_CMP_VSSE:
516
            cmp[i] = c->vsse[i];
517 518
            break;
        case FF_CMP_ZERO:
519
            cmp[i] = zero_cmp;
520
            break;
521
        case FF_CMP_NSSE:
522
            cmp[i] = c->nsse[i];
523
            break;
524
#if CONFIG_DWT
M
Michael Niedermayer 已提交
525 526 527 528 529 530
        case FF_CMP_W53:
            cmp[i]= c->w53[i];
            break;
        case FF_CMP_W97:
            cmp[i]= c->w97[i];
            break;
531
#endif
532
        default:
533 534
            av_log(NULL, AV_LOG_ERROR,
                   "internal error in cmp function selection\n");
535 536 537 538
        }
    }
}

539 540 541
#define BUTTERFLY2(o1, o2, i1, i2)              \
    o1 = (i1) + (i2);                           \
    o2 = (i1) - (i2);
M
Michael Niedermayer 已提交
542

543 544 545 546 547 548 549 550
#define BUTTERFLY1(x, y)                        \
    {                                           \
        int a, b;                               \
        a = x;                                  \
        b = y;                                  \
        x = a + b;                              \
        y = a - b;                              \
    }
M
Michael Niedermayer 已提交
551

552
#define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y)))
M
Michael Niedermayer 已提交
553

554
static int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst,
555
                               uint8_t *src, ptrdiff_t stride, int h)
556
{
557
    int i, temp[64], sum = 0;
558

559
    av_assert2(h == 8);
560

561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584
    for (i = 0; i < 8; i++) {
        // FIXME: try pointer walks
        BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
                   src[stride * i + 0] - dst[stride * i + 0],
                   src[stride * i + 1] - dst[stride * i + 1]);
        BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
                   src[stride * i + 2] - dst[stride * i + 2],
                   src[stride * i + 3] - dst[stride * i + 3]);
        BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
                   src[stride * i + 4] - dst[stride * i + 4],
                   src[stride * i + 5] - dst[stride * i + 5]);
        BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
                   src[stride * i + 6] - dst[stride * i + 6],
                   src[stride * i + 7] - dst[stride * i + 7]);

        BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
        BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
        BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
        BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);

        BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
        BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
        BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
        BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
M
Michael Niedermayer 已提交
585 586
    }

587 588 589 590 591
    for (i = 0; i < 8; i++) {
        BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
        BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
        BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
        BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
592

593 594 595 596
        BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
        BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
        BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
        BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
M
Michael Niedermayer 已提交
597

598 599 600 601
        sum += BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) +
               BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) +
               BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) +
               BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
M
Michael Niedermayer 已提交
602 603 604 605
    }
    return sum;
}

606
static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src,
607
                                uint8_t *dummy, ptrdiff_t stride, int h)
608
{
609
    int i, temp[64], sum = 0;
610

611
    av_assert2(h == 8);
612

613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
    for (i = 0; i < 8; i++) {
        // FIXME: try pointer walks
        BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
                   src[stride * i + 0], src[stride * i + 1]);
        BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
                   src[stride * i + 2], src[stride * i + 3]);
        BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
                   src[stride * i + 4], src[stride * i + 5]);
        BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
                   src[stride * i + 6], src[stride * i + 7]);

        BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
        BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
        BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
        BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);

        BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
        BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
        BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
        BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
M
Michael Niedermayer 已提交
633 634
    }

635 636 637 638 639
    for (i = 0; i < 8; i++) {
        BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
        BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
        BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
        BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
640

641 642 643 644
        BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
        BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
        BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
        BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
645 646

        sum +=
647 648 649 650
            BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i])
            + BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i])
            + BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i])
            + BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
M
Michael Niedermayer 已提交
651
    }
652

653
    sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean
654

M
Michael Niedermayer 已提交
655 656 657
    return sum;
}

658
static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
659
                        uint8_t *src2, ptrdiff_t stride, int h)
660
{
D
Diego Biurrun 已提交
661
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
662

663
    av_assert2(h == 8);
M
Michael Niedermayer 已提交
664

665
    s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
666
    s->fdsp.fdct(temp);
667
    return s->mecc.sum_abs_dctelem(temp);
M
Michael Niedermayer 已提交
668 669
}

670
#if CONFIG_GPL
671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
#define DCT8_1D                                         \
    {                                                   \
        const int s07 = SRC(0) + SRC(7);                \
        const int s16 = SRC(1) + SRC(6);                \
        const int s25 = SRC(2) + SRC(5);                \
        const int s34 = SRC(3) + SRC(4);                \
        const int a0  = s07 + s34;                      \
        const int a1  = s16 + s25;                      \
        const int a2  = s07 - s34;                      \
        const int a3  = s16 - s25;                      \
        const int d07 = SRC(0) - SRC(7);                \
        const int d16 = SRC(1) - SRC(6);                \
        const int d25 = SRC(2) - SRC(5);                \
        const int d34 = SRC(3) - SRC(4);                \
        const int a4  = d16 + d25 + (d07 + (d07 >> 1)); \
        const int a5  = d07 - d34 - (d25 + (d25 >> 1)); \
        const int a6  = d07 + d34 - (d16 + (d16 >> 1)); \
        const int a7  = d16 - d25 + (d34 + (d34 >> 1)); \
        DST(0, a0 + a1);                                \
        DST(1, a4 + (a7 >> 2));                         \
        DST(2, a2 + (a3 >> 1));                         \
        DST(3, a5 + (a6 >> 2));                         \
        DST(4, a0 - a1);                                \
        DST(5, a6 - (a5 >> 2));                         \
        DST(6, (a2 >> 1) - a3);                         \
        DST(7, (a4 >> 2) - a7);                         \
    }

699
static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1,
700
                           uint8_t *src2, ptrdiff_t stride, int h)
701
{
D
Diego Biurrun 已提交
702
    int16_t dct[8][8];
703
    int i, sum = 0;
704

705
    s->pdsp.diff_pixels_unaligned(dct[0], src1, src2, stride);
706 707

#define SRC(x) dct[i][x]
708 709
#define DST(x, v) dct[i][x] = v
    for (i = 0; i < 8; i++)
710 711 712 713 714
        DCT8_1D
#undef SRC
#undef DST

#define SRC(x) dct[x][i]
715 716 717
#define DST(x, v) sum += FFABS(v)
        for (i = 0; i < 8; i++)
            DCT8_1D
718 719
#undef SRC
#undef DST
720
            return sum;
721 722 723
}
#endif

724
static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
725
                        uint8_t *src2, ptrdiff_t stride, int h)
726
{
D
Diego Biurrun 已提交
727
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
728
    int sum = 0, i;
729

730
    av_assert2(h == 8);
731

732
    s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
733
    s->fdsp.fdct(temp);
734

735 736
    for (i = 0; i < 64; i++)
        sum = FFMAX(sum, FFABS(temp[i]));
737

738 739 740
    return sum;
}

741
static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
742
                           uint8_t *src2, ptrdiff_t stride, int h)
743 744 745 746
{
    LOCAL_ALIGNED_16(int16_t, temp, [64 * 2]);
    int16_t *const bak = temp + 64;
    int sum = 0, i;
M
Michael Niedermayer 已提交
747

748
    av_assert2(h == 8);
749
    s->mb_intra = 0;
750

751
    s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
752

753
    memcpy(bak, temp, 64 * sizeof(int16_t));
754

755 756
    s->block_last_index[0 /* FIXME */] =
        s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
757
    s->dct_unquantize_inter(s, temp, 0, s->qscale);
758
    ff_simple_idct_int16_8bit(temp); // FIXME
759

760 761
    for (i = 0; i < 64; i++)
        sum += (temp[i] - bak[i]) * (temp[i] - bak[i]);
762

M
Michael Niedermayer 已提交
763 764 765
    return sum;
}

766
static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
767
                   ptrdiff_t stride, int h)
768 769
{
    const uint8_t *scantable = s->intra_scantable.permutated;
D
Diego Biurrun 已提交
770
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
771 772
    LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
    LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
R
Ramiro Polla 已提交
773
    int i, last, run, bits, level, distortion, start_i;
774
    const int esc_length = s->ac_esc_length;
775
    uint8_t *length, *last_length;
776

777
    av_assert2(h == 8);
M
Michael Niedermayer 已提交
778

779 780
    copy_block8(lsrc1, src1, 8, stride, 8);
    copy_block8(lsrc2, src2, 8, stride, 8);
781

782
    s->pdsp.diff_pixels(temp, lsrc1, lsrc2, 8);
783

784 785 786
    s->block_last_index[0 /* FIXME */] =
    last                               =
        s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
787

788
    bits = 0;
789

790
    if (s->mb_intra) {
791 792 793 794
        start_i     = 1;
        length      = s->intra_ac_vlc_length;
        last_length = s->intra_ac_vlc_last_length;
        bits       += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
795
    } else {
796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814
        start_i     = 0;
        length      = s->inter_ac_vlc_length;
        last_length = s->inter_ac_vlc_last_length;
    }

    if (last >= start_i) {
        run = 0;
        for (i = start_i; i < last; i++) {
            int j = scantable[i];
            level = temp[j];

            if (level) {
                level += 64;
                if ((level & (~127)) == 0)
                    bits += length[UNI_AC_ENC_INDEX(run, level)];
                else
                    bits += esc_length;
                run = 0;
            } else
815 816
                run++;
        }
817
        i = scantable[last];
818

819
        level = temp[i] + 64;
M
bugs  
Michael Niedermayer 已提交
820

821
        av_assert2(level - 64);
822

823 824 825 826
        if ((level & (~127)) == 0) {
            bits += last_length[UNI_AC_ENC_INDEX(run, level)];
        } else
            bits += esc_length;
827 828
    }

829 830
    if (last >= 0) {
        if (s->mb_intra)
831 832 833
            s->dct_unquantize_intra(s, temp, 0, s->qscale);
        else
            s->dct_unquantize_inter(s, temp, 0, s->qscale);
834
    }
835

836
    s->idsp.idct_add(lsrc2, 8, temp);
837

838
    distortion = s->mecc.sse[1](NULL, lsrc2, lsrc1, 8, 8);
839

840
    return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7);
841 842
}

843
static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
844
                    ptrdiff_t stride, int h)
845 846
{
    const uint8_t *scantable = s->intra_scantable.permutated;
D
Diego Biurrun 已提交
847
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
848
    int i, last, run, bits, level, start_i;
849
    const int esc_length = s->ac_esc_length;
850
    uint8_t *length, *last_length;
M
Michael Niedermayer 已提交
851

852
    av_assert2(h == 8);
853

854
    s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride);
855

856 857 858
    s->block_last_index[0 /* FIXME */] =
    last                               =
        s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
859

860
    bits = 0;
861

862
    if (s->mb_intra) {
863 864 865 866
        start_i     = 1;
        length      = s->intra_ac_vlc_length;
        last_length = s->intra_ac_vlc_last_length;
        bits       += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
867
    } else {
868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886
        start_i     = 0;
        length      = s->inter_ac_vlc_length;
        last_length = s->inter_ac_vlc_last_length;
    }

    if (last >= start_i) {
        run = 0;
        for (i = start_i; i < last; i++) {
            int j = scantable[i];
            level = temp[j];

            if (level) {
                level += 64;
                if ((level & (~127)) == 0)
                    bits += length[UNI_AC_ENC_INDEX(run, level)];
                else
                    bits += esc_length;
                run = 0;
            } else
887 888
                run++;
        }
889
        i = scantable[last];
890

891
        level = temp[i] + 64;
892

893
        av_assert2(level - 64);
894

895 896 897 898
        if ((level & (~127)) == 0)
            bits += last_length[UNI_AC_ENC_INDEX(run, level)];
        else
            bits += esc_length;
899 900 901 902 903
    }

    return bits;
}

904
#define VSAD_INTRA(size)                                                \
905
static int vsad_intra ## size ## _c(MpegEncContext *c,                  \
906
                                    uint8_t *s, uint8_t *dummy,         \
907
                                    ptrdiff_t stride, int h)            \
908
{                                                                       \
909
    int score = 0, x, y;                                                \
910 911 912 913 914 915 916 917 918 919 920 921
                                                                        \
    for (y = 1; y < h; y++) {                                           \
        for (x = 0; x < size; x += 4) {                                 \
            score += FFABS(s[x]     - s[x + stride])     +              \
                     FFABS(s[x + 1] - s[x + stride + 1]) +              \
                     FFABS(s[x + 2] - s[x + 2 + stride]) +              \
                     FFABS(s[x + 3] - s[x + 3 + stride]);               \
        }                                                               \
        s += stride;                                                    \
    }                                                                   \
                                                                        \
    return score;                                                       \
922 923 924
}
VSAD_INTRA(8)
VSAD_INTRA(16)
925

926 927 928
#define VSAD(size)                                                             \
static int vsad ## size ## _c(MpegEncContext *c,                               \
                              uint8_t *s1, uint8_t *s2,                        \
929
                              ptrdiff_t stride, int h)                               \
930 931 932 933 934 935 936 937 938 939 940 941 942 943
{                                                                              \
    int score = 0, x, y;                                                       \
                                                                               \
    for (y = 1; y < h; y++) {                                                  \
        for (x = 0; x < size; x++)                                             \
            score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);   \
        s1 += stride;                                                          \
        s2 += stride;                                                          \
    }                                                                          \
                                                                               \
    return score;                                                              \
}
VSAD(8)
VSAD(16)
944

945 946
#define SQ(a) ((a) * (a))
#define VSSE_INTRA(size)                                                \
947
static int vsse_intra ## size ## _c(MpegEncContext *c,                  \
948
                                    uint8_t *s, uint8_t *dummy,         \
949
                                    ptrdiff_t stride, int h)            \
950
{                                                                       \
951
    int score = 0, x, y;                                                \
952 953 954 955 956 957 958 959 960 961 962 963
                                                                        \
    for (y = 1; y < h; y++) {                                           \
        for (x = 0; x < size; x += 4) {                                 \
            score += SQ(s[x]     - s[x + stride]) +                     \
                     SQ(s[x + 1] - s[x + stride + 1]) +                 \
                     SQ(s[x + 2] - s[x + stride + 2]) +                 \
                     SQ(s[x + 3] - s[x + stride + 3]);                  \
        }                                                               \
        s += stride;                                                    \
    }                                                                   \
                                                                        \
    return score;                                                       \
964 965 966
}
VSSE_INTRA(8)
VSSE_INTRA(16)
967

968 969
#define VSSE(size)                                                             \
static int vsse ## size ## _c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,     \
970
                              ptrdiff_t stride, int h)                         \
971 972 973 974 975 976 977 978 979 980 981 982 983 984
{                                                                              \
    int score = 0, x, y;                                                       \
                                                                               \
    for (y = 1; y < h; y++) {                                                  \
        for (x = 0; x < size; x++)                                             \
            score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);      \
        s1 += stride;                                                          \
        s2 += stride;                                                          \
    }                                                                          \
                                                                               \
    return score;                                                              \
}
VSSE(8)
VSSE(16)
985

986
#define WRAPPER8_16_SQ(name8, name16)                                   \
987
static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src,        \
988
                  ptrdiff_t stride, int h)                              \
989 990 991 992 993 994 995 996 997 998 999 1000
{                                                                       \
    int score = 0;                                                      \
                                                                        \
    score += name8(s, dst, src, stride, 8);                             \
    score += name8(s, dst + 8, src + 8, stride, 8);                     \
    if (h == 16) {                                                      \
        dst   += 8 * stride;                                            \
        src   += 8 * stride;                                            \
        score += name8(s, dst, src, stride, 8);                         \
        score += name8(s, dst + 8, src + 8, stride, 8);                 \
    }                                                                   \
    return score;                                                       \
1001 1002
}

1003 1004 1005
WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
1006
#if CONFIG_GPL
1007
WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
M
Mike Melanson 已提交
1008
#endif
1009 1010 1011 1012
WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
M
Michael Niedermayer 已提交
1013

1014 1015 1016
int ff_check_alignment(void)
{
    static int did_fail = 0;
1017
    LOCAL_ALIGNED_16(int, aligned, [4]);
1018

1019 1020
    if ((intptr_t)aligned & 15) {
        if (!did_fail) {
1021
#if HAVE_MMX || HAVE_ALTIVEC
1022
            av_log(NULL, AV_LOG_ERROR,
1023 1024
                "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
                "and may be very slow or crash. This is not a bug in libavcodec,\n"
1025
                "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
1026
                "Do not report crashes to FFmpeg developers.\n");
1027 1028 1029 1030 1031 1032 1033
#endif
            did_fail=1;
        }
        return -1;
    }
    return 0;
}
1034

1035
av_cold void ff_me_cmp_init(MECmpContext *c, AVCodecContext *avctx)
F
Fabrice Bellard 已提交
1036
{
1037 1038
    ff_check_alignment();

1039
    c->sum_abs_dctelem = sum_abs_dctelem_c;
1040

1041
    /* TODO [0] 16  [1] 8 */
M
Michael Niedermayer 已提交
1042 1043 1044 1045 1046 1047 1048 1049
    c->pix_abs[0][0] = pix_abs16_c;
    c->pix_abs[0][1] = pix_abs16_x2_c;
    c->pix_abs[0][2] = pix_abs16_y2_c;
    c->pix_abs[0][3] = pix_abs16_xy2_c;
    c->pix_abs[1][0] = pix_abs8_c;
    c->pix_abs[1][1] = pix_abs8_x2_c;
    c->pix_abs[1][2] = pix_abs8_y2_c;
    c->pix_abs[1][3] = pix_abs8_xy2_c;
1050

1051 1052 1053
#define SET_CMP_FUNC(name)                      \
    c->name[0] = name ## 16_c;                  \
    c->name[1] = name ## 8x8_c;
1054

M
Michael Niedermayer 已提交
1055
    SET_CMP_FUNC(hadamard8_diff)
1056 1057
    c->hadamard8_diff[4] = hadamard8_intra16_c;
    c->hadamard8_diff[5] = hadamard8_intra8x8_c;
M
Michael Niedermayer 已提交
1058
    SET_CMP_FUNC(dct_sad)
1059
    SET_CMP_FUNC(dct_max)
1060
#if CONFIG_GPL
1061
    SET_CMP_FUNC(dct264_sad)
M
Mike Melanson 已提交
1062
#endif
1063 1064 1065 1066 1067
    c->sad[0] = pix_abs16_c;
    c->sad[1] = pix_abs8_c;
    c->sse[0] = sse16_c;
    c->sse[1] = sse8_c;
    c->sse[2] = sse4_c;
M
Michael Niedermayer 已提交
1068 1069 1070
    SET_CMP_FUNC(quant_psnr)
    SET_CMP_FUNC(rd)
    SET_CMP_FUNC(bit)
1071
    c->vsad[0] = vsad16_c;
1072
    c->vsad[1] = vsad8_c;
1073 1074 1075
    c->vsad[4] = vsad_intra16_c;
    c->vsad[5] = vsad_intra8_c;
    c->vsse[0] = vsse16_c;
1076
    c->vsse[1] = vsse8_c;
1077 1078 1079 1080
    c->vsse[4] = vsse_intra16_c;
    c->vsse[5] = vsse_intra8_c;
    c->nsse[0] = nsse16_c;
    c->nsse[1] = nsse8_c;
1081
#if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
1082
    ff_dsputil_init_dwt(c);
1083
#endif
M
Michael Niedermayer 已提交
1084

1085
    if (ARCH_ALPHA)
1086
        ff_me_cmp_init_alpha(c, avctx);
1087
    if (ARCH_ARM)
1088
        ff_me_cmp_init_arm(c, avctx);
1089
    if (ARCH_PPC)
1090
        ff_me_cmp_init_ppc(c, avctx);
1091
    if (ARCH_X86)
1092
        ff_me_cmp_init_x86(c, avctx);
1093 1094
    if (ARCH_MIPS)
        ff_me_cmp_init_mips(c, avctx);
1095 1096 1097

    c->median_sad[0] = pix_median_abs16_c;
    c->median_sad[1] = pix_median_abs8_c;
1098
}