diff --git a/libavcodec/bit_depth_template.c b/libavcodec/bit_depth_template.c index 80184892f5a64e8e918840abe008828574500648..d44d47ea456be2f5cbb5c02b1588e4edaa8ef55b 100644 --- a/libavcodec/bit_depth_template.c +++ b/libavcodec/bit_depth_template.c @@ -29,6 +29,7 @@ # undef pixel2 # undef pixel4 # undef dctcoef +# undef idctin # undef INIT_CLIP # undef no_rnd_avg_pixel4 # undef rnd_avg_pixel4 @@ -53,6 +54,16 @@ # define pixel4 uint64_t # define dctcoef int32_t +#ifdef IN_IDCT_DEPTH +#if IN_IDCT_DEPTH == 32 +# define idctin int32_t +#else +# define idctin int16_t +#endif +#else +# define idctin int16_t +#endif + # define INIT_CLIP # define no_rnd_avg_pixel4 no_rnd_avg64 # define rnd_avg_pixel4 rnd_avg64 @@ -71,6 +82,7 @@ # define pixel2 uint16_t # define pixel4 uint32_t # define dctcoef int16_t +# define idctin int16_t # define INIT_CLIP # define no_rnd_avg_pixel4 no_rnd_avg32 @@ -87,7 +99,10 @@ # define CLIP(a) av_clip_uint8(a) #endif -#define FUNC3(a, b, c) a ## _ ## b ## c +#define FUNC3(a, b, c) a ## _ ## b ## c #define FUNC2(a, b, c) FUNC3(a, b, c) #define FUNC(a) FUNC2(a, BIT_DEPTH,) #define FUNCC(a) FUNC2(a, BIT_DEPTH, _c) +#define FUNC4(a, b, c) a ## _int ## b ## _ ## c ## bit +#define FUNC5(a, b, c) FUNC4(a, b, c) +#define FUNC6(a) FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH) diff --git a/libavcodec/idctdsp.c b/libavcodec/idctdsp.c index 0122d29efa325fad57aefa70e1ff837bce688607..1de372d2b9e267de519ef04cdfca79061b4c402f 100644 --- a/libavcodec/idctdsp.c +++ b/libavcodec/idctdsp.c @@ -256,14 +256,14 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) c->perm_type = FF_IDCT_PERM_NONE; } else { if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) { - c->idct_put = ff_simple_idct_put_10; - c->idct_add = ff_simple_idct_add_10; - c->idct = ff_simple_idct_10; + c->idct_put = ff_simple_idct_put_int16_10bit; + c->idct_add = ff_simple_idct_add_int16_10bit; + c->idct = ff_simple_idct_int16_10bit; c->perm_type = FF_IDCT_PERM_NONE; } else if (avctx->bits_per_raw_sample == 12) { - c->idct_put = ff_simple_idct_put_12; - c->idct_add = ff_simple_idct_add_12; - c->idct = ff_simple_idct_12; + c->idct_put = ff_simple_idct_put_int16_12bit; + c->idct_add = ff_simple_idct_add_int16_12bit; + c->idct = ff_simple_idct_int16_12bit; c->perm_type = FF_IDCT_PERM_NONE; } else { if (avctx->idct_algo == FF_IDCT_INT) { @@ -280,9 +280,9 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) #endif /* CONFIG_FAANIDCT */ } else { // accurate/default /* Be sure FF_IDCT_NONE will select this one, since it uses FF_IDCT_PERM_NONE */ - c->idct_put = ff_simple_idct_put_8; - c->idct_add = ff_simple_idct_add_8; - c->idct = ff_simple_idct_8; + c->idct_put = ff_simple_idct_put_int16_8bit; + c->idct_add = ff_simple_idct_add_int16_8bit; + c->idct = ff_simple_idct_int16_8bit; c->perm_type = FF_IDCT_PERM_NONE; } } diff --git a/libavcodec/me_cmp.c b/libavcodec/me_cmp.c index 465d3ccb2af162a2f9dbb2f9f0f3367858d2558c..ae248c52f82a50f811f7908d0b055bf57fc98d98 100644 --- a/libavcodec/me_cmp.c +++ b/libavcodec/me_cmp.c @@ -755,7 +755,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, s->block_last_index[0 /* FIXME */] = s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); s->dct_unquantize_inter(s, temp, 0, s->qscale); - ff_simple_idct_8(temp); // FIXME + ff_simple_idct_int16_8bit(temp); // FIXME for (i = 0; i < 64; i++) sum += (temp[i] - bak[i]) * (temp[i] - bak[i]); diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c index 1d05b2fe08cb869353e12778f2312140a38de2e6..78b29c0fe3e03d8ebee94a3966729f5770e66862 100644 --- a/libavcodec/simple_idct.c +++ b/libavcodec/simple_idct.c @@ -30,6 +30,8 @@ #include "mathops.h" #include "simple_idct.h" +#define IN_IDCT_DEPTH 16 + #define BIT_DEPTH 8 #include "simple_idct_template.c" #undef BIT_DEPTH @@ -46,6 +48,13 @@ #define BIT_DEPTH 12 #include "simple_idct_template.c" #undef BIT_DEPTH +#undef IN_IDCT_DEPTH + +#define IN_IDCT_DEPTH 32 +#define BIT_DEPTH 10 +#include "simple_idct_template.c" +#undef BIT_DEPTH +#undef IN_IDCT_DEPTH /* 2x4x8 idct */ @@ -115,7 +124,7 @@ void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block) /* IDCT8 on each line */ for(i=0; i<8; i++) { - idctRowCondDC_8(block + i*8, 0); + idctRowCondDC_int16_8bit(block + i*8, 0); } /* IDCT4 and store */ @@ -188,7 +197,7 @@ void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) /* IDCT8 on each line */ for(i=0; i<4; i++) { - idctRowCondDC_8(block + i*8, 0); + idctRowCondDC_int16_8bit(block + i*8, 0); } /* IDCT4 and store */ @@ -208,7 +217,7 @@ void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) /* IDCT8 and store */ for(i=0; i<4; i++){ - idctSparseColAdd_8(dest + i, line_size, block + i); + idctSparseColAdd_int16_8bit(dest + i, line_size, block + i); } } diff --git a/libavcodec/simple_idct.h b/libavcodec/simple_idct.h index 2a5e1d7f6dab83fc0b191f7930dac39e4adc4e8a..39df2308caa5a4a52c48566bdbd5800eb6548931 100644 --- a/libavcodec/simple_idct.h +++ b/libavcodec/simple_idct.h @@ -31,20 +31,24 @@ #include #include -void ff_simple_idct_put_8(uint8_t *dest, ptrdiff_t line_size, int16_t *block); -void ff_simple_idct_add_8(uint8_t *dest, ptrdiff_t line_size, int16_t *block); -void ff_simple_idct_8(int16_t *block); +void ff_simple_idct_put_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); +void ff_simple_idct_add_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); +void ff_simple_idct_int16_8bit(int16_t *block); -void ff_simple_idct_put_10(uint8_t *dest, ptrdiff_t line_size, int16_t *block); -void ff_simple_idct_add_10(uint8_t *dest, ptrdiff_t line_size, int16_t *block); -void ff_simple_idct_10(int16_t *block); +void ff_simple_idct_put_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); +void ff_simple_idct_add_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); +void ff_simple_idct_int16_10bit(int16_t *block); -void ff_simple_idct_put_12(uint8_t *dest, ptrdiff_t line_size, int16_t *block); -void ff_simple_idct_add_12(uint8_t *dest, ptrdiff_t line_size, int16_t *block); -void ff_simple_idct_12(int16_t *block); +void ff_simple_idct_put_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); +void ff_simple_idct_add_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); +void ff_simple_idct_int32_10bit(int16_t *block); + +void ff_simple_idct_put_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); +void ff_simple_idct_add_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block); +void ff_simple_idct_int16_12bit(int16_t *block); /** - * Special version of ff_simple_idct_10() which does dequantization + * Special version of ff_simple_idct_int16_10bit() which does dequantization * and scales by a factor of 2 more between the two IDCTs to account * for larger scale of input coefficients. */ diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c index f532313441c05426679a378d629819416d3540cc..904263fc7142a14b8a52632a03cffe864c2b738b 100644 --- a/libavcodec/simple_idct_template.c +++ b/libavcodec/simple_idct_template.c @@ -77,6 +77,10 @@ #define ROW_SHIFT 13 #define COL_SHIFT 18 #define DC_SHIFT 1 +# elif IN_IDCT_DEPTH == 32 +#define ROW_SHIFT 13 +#define COL_SHIFT 21 +#define DC_SHIFT 2 # else #define ROW_SHIFT 12 #define COL_SHIFT 19 @@ -109,11 +113,13 @@ #ifdef EXTRA_SHIFT static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift) #else -static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) +static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift) #endif { SUINT a0, a1, a2, a3, b0, b1, b2, b3; +// TODO: Add DC-only support for int32_t input +#if IN_IDCT_DEPTH == 16 #if HAVE_FAST_64BIT #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) { @@ -147,6 +153,7 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) AV_WN32A(row+6, temp); return; } +#endif #endif a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1)); @@ -168,7 +175,11 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) b3 = MUL(W7, row[1]); MAC(b3, -W5, row[3]); +#if IN_IDCT_DEPTH == 32 + if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) { +#else if (AV_RN64A(row + 4)) { +#endif a0 += W4*row[4] + W6*row[6]; a1 += - W4*row[4] - W2*row[6]; a2 += - W4*row[4] + W2*row[6]; @@ -250,8 +261,8 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) #ifdef EXTRA_SHIFT static inline void FUNC(idctSparseCol_extrashift)(int16_t *col) #else -static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, - int16_t *col) +static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, + idctin *col) { SUINT a0, a1, a2, a3, b0, b1, b2, b3; @@ -274,8 +285,8 @@ static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT); } -static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, - int16_t *col) +static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, + idctin *col) { int a0, a1, a2, a3, b0, b1, b2, b3; @@ -298,7 +309,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT)); } -static inline void FUNC(idctSparseCol)(int16_t *col) +static inline void FUNC6(idctSparseCol)(idctin *col) #endif { int a0, a1, a2, a3, b0, b1, b2, b3; @@ -316,21 +327,23 @@ static inline void FUNC(idctSparseCol)(int16_t *col) } #ifndef EXTRA_SHIFT -void FUNC(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) +void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_) { + idctin *block = (idctin *)block_; pixel *dest = (pixel *)dest_; int i; line_size /= sizeof(pixel); for (i = 0; i < 8; i++) - FUNC(idctRowCondDC)(block + i*8, 0); + FUNC6(idctRowCondDC)(block + i*8, 0); for (i = 0; i < 8; i++) - FUNC(idctSparseColPut)(dest + i, line_size, block + i); + FUNC6(idctSparseColPut)(dest + i, line_size, block + i); } -void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) +#if IN_IDCT_DEPTH == 16 +void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) { pixel *dest = (pixel *)dest_; int i; @@ -338,20 +351,21 @@ void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *bloc line_size /= sizeof(pixel); for (i = 0; i < 8; i++) - FUNC(idctRowCondDC)(block + i*8, 0); + FUNC6(idctRowCondDC)(block + i*8, 0); for (i = 0; i < 8; i++) - FUNC(idctSparseColAdd)(dest + i, line_size, block + i); + FUNC6(idctSparseColAdd)(dest + i, line_size, block + i); } -void FUNC(ff_simple_idct)(int16_t *block) +void FUNC6(ff_simple_idct)(int16_t *block) { int i; for (i = 0; i < 8; i++) - FUNC(idctRowCondDC)(block + i*8, 0); + FUNC6(idctRowCondDC)(block + i*8, 0); for (i = 0; i < 8; i++) - FUNC(idctSparseCol)(block + i); + FUNC6(idctSparseCol)(block + i); } #endif +#endif diff --git a/libavcodec/tests/dct.c b/libavcodec/tests/dct.c index b44c66f427494ca7ba7b61afa954d6316306e0e7..e8fa4a3cc1a40faee77bef244a07dbd495f7d75f 100644 --- a/libavcodec/tests/dct.c +++ b/libavcodec/tests/dct.c @@ -82,9 +82,9 @@ static void ff_prores_idct_wrap(int16_t *dst){ static const struct algo idct_tab[] = { { "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE }, { "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 }, - { "SIMPLE-C", ff_simple_idct_8, FF_IDCT_PERM_NONE }, - { "SIMPLE-C10", ff_simple_idct_10, FF_IDCT_PERM_NONE }, - { "SIMPLE-C12", ff_simple_idct_12, FF_IDCT_PERM_NONE, 0, 1 }, + { "SIMPLE-C", ff_simple_idct_int16_8bit, FF_IDCT_PERM_NONE }, + { "SIMPLE-C10", ff_simple_idct_int16_10bit, FF_IDCT_PERM_NONE }, + { "SIMPLE-C12", ff_simple_idct_int16_12bit, FF_IDCT_PERM_NONE, 0, 1 }, { "PR-C", ff_prores_idct_wrap, FF_IDCT_PERM_NONE, 0, 1 }, #if CONFIG_FAANIDCT { "FAANI", ff_faanidct, FF_IDCT_PERM_NONE }, diff --git a/libavcodec/vc1.c b/libavcodec/vc1.c index 2b9f8db3ee9773dfdc50d894e0427278265d7071..e7625070f79823d9dca12b21a062c0f23722f384 100644 --- a/libavcodec/vc1.c +++ b/libavcodec/vc1.c @@ -314,11 +314,11 @@ int ff_vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitCo v->multires = get_bits1(gb); v->res_fasttx = get_bits1(gb); if (!v->res_fasttx) { - v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct_8; + v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct_int16_8bit; v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add; v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add; v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add; - v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add_8; + v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add_int16_8bit; v->vc1dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add; v->vc1dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add; v->vc1dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add;