dsputil.h 8.8 KB
Newer Older
F
Fabrice Bellard 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
/*
 * DSP utils
 * Copyright (c) 2000, 2001, 2002 Fabrice Bellard.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
F
Fabrice Bellard 已提交
19 20 21 22
#ifndef DSPUTIL_H
#define DSPUTIL_H

#include "common.h"
23
#include "avcodec.h"
F
Fabrice Bellard 已提交
24

M
Michael Niedermayer 已提交
25
//#define DEBUG
F
Fabrice Bellard 已提交
26 27 28
/* dct code */
typedef short DCTELEM;

29
void fdct_ifast (DCTELEM *data);
30
void ff_jpeg_fdct_islow (DCTELEM *data);
F
Fabrice Bellard 已提交
31 32 33

void j_rev_dct (DCTELEM *data);

34
void ff_fdct_mmx(DCTELEM *block);
F
Fabrice Bellard 已提交
35

36
/* encoding scans */
37 38 39
extern const UINT8 ff_alternate_horizontal_scan[64];
extern const UINT8 ff_alternate_vertical_scan[64];
extern const UINT8 ff_zigzag_direct[64];
40

F
Fabrice Bellard 已提交
41 42 43 44 45
/* pixel operations */
#define MAX_NEG_CROP 384

/* temporary */
extern UINT32 squareTbl[512];
F
Fabrice Bellard 已提交
46
extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
F
Fabrice Bellard 已提交
47 48


M
Michael Niedermayer 已提交
49
/* minimum alignment rules ;)
50
if u notice errors in the align stuff, need more alignment for some asm code for some cpu
M
Michael Niedermayer 已提交
51 52 53 54
or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ...

!warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible)
i (michael) didnt check them, these are just the alignents which i think could be reached easily ...
F
Fabrice Bellard 已提交
55

M
Michael Niedermayer 已提交
56 57 58
!future video codecs might need functions with less strict alignment
*/

59
/*
F
Fabrice Bellard 已提交
60
void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
61
void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
F
Fabrice Bellard 已提交
62 63
void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
64
void clear_blocks_c(DCTELEM *blocks);
65
*/
F
Fabrice Bellard 已提交
66 67

/* add and put pixel (decoding) */
M
Michael Niedermayer 已提交
68 69 70
// blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h);
typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);
M
Michael Niedermayer 已提交
71

72

M
Michael Niedermayer 已提交
73 74 75 76 77 78

#define CALL_2X_PIXELS(a, b, n)\
static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
    b(block  , pixels  , line_size, h);\
    b(block+n, pixels+n, line_size, h);\
}
M
Michael Niedermayer 已提交
79

F
Fabrice Bellard 已提交
80 81
/* motion estimation */

M
Michael Niedermayer 已提交
82
typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size);
83
/*
84 85 86 87
int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
*/
typedef struct DSPContext {
    /* pixel ops : interface with DCT */
    void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
    void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
    void (*gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
    void (*gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy,
		    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
    int (*pix_sum)(UINT8 * pix, int line_size);
    int (*pix_norm1)(UINT8 * pix, int line_size);

    /* maybe create an array for 16/8 functions */
    op_pixels_func put_pixels_tab[2][4];
    op_pixels_func avg_pixels_tab[2][4];
    op_pixels_func put_no_rnd_pixels_tab[2][4];
    op_pixels_func avg_no_rnd_pixels_tab[2][4];
    qpel_mc_func put_qpel_pixels_tab[2][16];
    qpel_mc_func avg_qpel_pixels_tab[2][16];
    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];

    op_pixels_abs_func pix_abs16x16;
    op_pixels_abs_func pix_abs16x16_x2;
    op_pixels_abs_func pix_abs16x16_y2;
    op_pixels_abs_func pix_abs16x16_xy2;
    op_pixels_abs_func pix_abs8x8;
    op_pixels_abs_func pix_abs8x8_x2;
    op_pixels_abs_func pix_abs8x8_y2;
    op_pixels_abs_func pix_abs8x8_xy2;
} DSPContext;

void dsputil_init(DSPContext* p, unsigned mask);
F
Fabrice Bellard 已提交
123

M
Michael Niedermayer 已提交
124 125 126 127 128
/**
 * permute block according to permuatation.
 * @param last last non zero element in scantable order
 */
void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last);
129

130 131
#define emms_c()

F
Fabrice Bellard 已提交
132
#if defined(HAVE_MMX)
F
Fabrice Bellard 已提交
133

134 135
#undef emms_c()

F
Fabrice Bellard 已提交
136 137 138 139 140 141 142 143 144
#define MM_MMX    0x0001 /* standard MMX */
#define MM_3DNOW  0x0004 /* AMD 3DNOW */
#define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
#define MM_SSE    0x0008 /* SSE functions */
#define MM_SSE2   0x0010 /* PIV SSE2 functions */

extern int mm_flags;

int mm_support(void);
145 146
void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
F
Fabrice Bellard 已提交
147 148 149

static inline void emms(void)
{
150 151 152 153 154 155 156
    __asm __volatile ("emms;":::"memory");
}

#define emms_c() \
{\
    if (mm_flags & MM_MMX)\
        emms();\
F
Fabrice Bellard 已提交
157 158 159 160
}

#define __align8 __attribute__ ((aligned (8)))

161 162
void dsputil_init_mmx(DSPContext* c, unsigned mask);
void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask);
F
Fabrice Bellard 已提交
163

F
Fabrice Bellard 已提交
164 165 166 167 168 169
#elif defined(ARCH_ARMV4L)

/* This is to use 4 bytes read to the IDCT pointers for some 'zero'
   line ptimizations */
#define __align8 __attribute__ ((aligned (4)))

170
void dsputil_init_armv4l(DSPContext* c, unsigned mask);
F
Fabrice Bellard 已提交
171

F
Fabrice Bellard 已提交
172 173 174 175 176
#elif defined(HAVE_MLIB)

/* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
#define __align8 __attribute__ ((aligned (8)))

177
void dsputil_init_mlib(DSPContext* c, unsigned mask);
F
Fabrice Bellard 已提交
178

179 180 181 182
#elif defined(ARCH_ALPHA)

#define __align8 __attribute__ ((aligned (8)))

183
void dsputil_init_alpha(DSPContext* c, unsigned mask);
184

185 186 187 188
#elif defined(ARCH_POWERPC)

#define __align8 __attribute__ ((aligned (16)))

189
void dsputil_init_ppc(DSPContext* c, unsigned mask);
190

191 192 193 194
#elif defined(HAVE_MMI)

#define __align8 __attribute__ ((aligned (16)))

195
void dsputil_init_mmi(DSPContext* c, unsigned mask);
196

F
Fabrice Bellard 已提交
197 198 199 200 201 202
#else

#define __align8

#endif

203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221
#ifdef __GNUC__

struct unaligned_64 { uint64_t l; } __attribute__((packed));
struct unaligned_32 { uint32_t l; } __attribute__((packed));

#define LD32(a) (((const struct unaligned_32 *) (a))->l)
#define LD64(a) (((const struct unaligned_64 *) (a))->l)

#define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)

#else /* __GNUC__ */

#define LD32(a) (*((uint32_t*)(a)))
#define LD64(a) (*((uint64_t*)(a)))

#define ST32(a, b) *((uint32_t*)(a)) = (b)

#endif /* !__GNUC__ */

222 223 224 225
/* PSNR */
void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
              int orig_linesize[3], int coded_linesize,
              AVCodecContext *avctx);
226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266

/* FFT computation */

/* NOTE: soon integer code will be added, so you must use the
   FFTSample type */
typedef float FFTSample;

typedef struct FFTComplex {
    FFTSample re, im;
} FFTComplex;

typedef struct FFTContext {
    int nbits;
    int inverse;
    uint16_t *revtab;
    FFTComplex *exptab;
    FFTComplex *exptab1; /* only used by SSE code */
    void (*fft_calc)(struct FFTContext *s, FFTComplex *z);
} FFTContext;

int fft_init(FFTContext *s, int nbits, int inverse);
void fft_permute(FFTContext *s, FFTComplex *z);
void fft_calc_c(FFTContext *s, FFTComplex *z);
void fft_calc_sse(FFTContext *s, FFTComplex *z);
static inline void fft_calc(FFTContext *s, FFTComplex *z)
{
    s->fft_calc(s, z);
}
void fft_end(FFTContext *s);

/* MDCT computation */

typedef struct MDCTContext {
    int n;  /* size of MDCT (i.e. number of input data * 2) */
    int nbits; /* n = 2^nbits */
    /* pre/post rotation tables */
    FFTSample *tcos;
    FFTSample *tsin;
    FFTContext fft;
} MDCTContext;

F
Fabrice Bellard 已提交
267
int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
268
void ff_imdct_calc(MDCTContext *s, FFTSample *output,
269
                const FFTSample *input, FFTSample *tmp);
270
void ff_mdct_calc(MDCTContext *s, FFTSample *out,
271
               const FFTSample *input, FFTSample *tmp);
F
Fabrice Bellard 已提交
272
void ff_mdct_end(MDCTContext *s);
273

274
#ifndef HAVE_LRINTF
275 276
/* XXX: add ISOC specific test to avoid specific BSD testing. */
/* better than nothing implementation. */
277
/* btw, rintf() is existing on fbsd too -- alex */
278 279 280 281 282 283
static inline long int lrintf(float x)
{
    return (int)(rint(x));
}
#endif

F
Fabrice Bellard 已提交
284
#endif