提交 699fa8f3 编写于 作者: K Kieran Kunhya

simple_idct: Template functions to support an input bitdepth parameter

上级 22a878ec
...@@ -29,6 +29,7 @@ ...@@ -29,6 +29,7 @@
# undef pixel2 # undef pixel2
# undef pixel4 # undef pixel4
# undef dctcoef # undef dctcoef
# undef idctin
# undef INIT_CLIP # undef INIT_CLIP
# undef no_rnd_avg_pixel4 # undef no_rnd_avg_pixel4
# undef rnd_avg_pixel4 # undef rnd_avg_pixel4
...@@ -53,6 +54,16 @@ ...@@ -53,6 +54,16 @@
# define pixel4 uint64_t # define pixel4 uint64_t
# define dctcoef int32_t # define dctcoef int32_t
#ifdef IN_IDCT_DEPTH
#if IN_IDCT_DEPTH == 32
# define idctin int32_t
#else
# define idctin int16_t
#endif
#else
# define idctin int16_t
#endif
# define INIT_CLIP # define INIT_CLIP
# define no_rnd_avg_pixel4 no_rnd_avg64 # define no_rnd_avg_pixel4 no_rnd_avg64
# define rnd_avg_pixel4 rnd_avg64 # define rnd_avg_pixel4 rnd_avg64
...@@ -71,6 +82,7 @@ ...@@ -71,6 +82,7 @@
# define pixel2 uint16_t # define pixel2 uint16_t
# define pixel4 uint32_t # define pixel4 uint32_t
# define dctcoef int16_t # define dctcoef int16_t
# define idctin int16_t
# define INIT_CLIP # define INIT_CLIP
# define no_rnd_avg_pixel4 no_rnd_avg32 # define no_rnd_avg_pixel4 no_rnd_avg32
...@@ -87,7 +99,10 @@ ...@@ -87,7 +99,10 @@
# define CLIP(a) av_clip_uint8(a) # define CLIP(a) av_clip_uint8(a)
#endif #endif
#define FUNC3(a, b, c) a ## _ ## b ## c #define FUNC3(a, b, c) a ## _ ## b ## c
#define FUNC2(a, b, c) FUNC3(a, b, c) #define FUNC2(a, b, c) FUNC3(a, b, c)
#define FUNC(a) FUNC2(a, BIT_DEPTH,) #define FUNC(a) FUNC2(a, BIT_DEPTH,)
#define FUNCC(a) FUNC2(a, BIT_DEPTH, _c) #define FUNCC(a) FUNC2(a, BIT_DEPTH, _c)
#define FUNC4(a, b, c) a ## _int ## b ## _ ## c ## bit
#define FUNC5(a, b, c) FUNC4(a, b, c)
#define FUNC6(a) FUNC5(a, IN_IDCT_DEPTH, BIT_DEPTH)
...@@ -256,14 +256,14 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) ...@@ -256,14 +256,14 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
c->perm_type = FF_IDCT_PERM_NONE; c->perm_type = FF_IDCT_PERM_NONE;
} else { } else {
if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) { if (avctx->bits_per_raw_sample == 10 || avctx->bits_per_raw_sample == 9) {
c->idct_put = ff_simple_idct_put_10; c->idct_put = ff_simple_idct_put_int16_10bit;
c->idct_add = ff_simple_idct_add_10; c->idct_add = ff_simple_idct_add_int16_10bit;
c->idct = ff_simple_idct_10; c->idct = ff_simple_idct_int16_10bit;
c->perm_type = FF_IDCT_PERM_NONE; c->perm_type = FF_IDCT_PERM_NONE;
} else if (avctx->bits_per_raw_sample == 12) { } else if (avctx->bits_per_raw_sample == 12) {
c->idct_put = ff_simple_idct_put_12; c->idct_put = ff_simple_idct_put_int16_12bit;
c->idct_add = ff_simple_idct_add_12; c->idct_add = ff_simple_idct_add_int16_12bit;
c->idct = ff_simple_idct_12; c->idct = ff_simple_idct_int16_12bit;
c->perm_type = FF_IDCT_PERM_NONE; c->perm_type = FF_IDCT_PERM_NONE;
} else { } else {
if (avctx->idct_algo == FF_IDCT_INT) { if (avctx->idct_algo == FF_IDCT_INT) {
...@@ -280,9 +280,9 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx) ...@@ -280,9 +280,9 @@ av_cold void ff_idctdsp_init(IDCTDSPContext *c, AVCodecContext *avctx)
#endif /* CONFIG_FAANIDCT */ #endif /* CONFIG_FAANIDCT */
} else { // accurate/default } else { // accurate/default
/* Be sure FF_IDCT_NONE will select this one, since it uses FF_IDCT_PERM_NONE */ /* Be sure FF_IDCT_NONE will select this one, since it uses FF_IDCT_PERM_NONE */
c->idct_put = ff_simple_idct_put_8; c->idct_put = ff_simple_idct_put_int16_8bit;
c->idct_add = ff_simple_idct_add_8; c->idct_add = ff_simple_idct_add_int16_8bit;
c->idct = ff_simple_idct_8; c->idct = ff_simple_idct_int16_8bit;
c->perm_type = FF_IDCT_PERM_NONE; c->perm_type = FF_IDCT_PERM_NONE;
} }
} }
......
...@@ -755,7 +755,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, ...@@ -755,7 +755,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
s->block_last_index[0 /* FIXME */] = s->block_last_index[0 /* FIXME */] =
s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i); s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
s->dct_unquantize_inter(s, temp, 0, s->qscale); s->dct_unquantize_inter(s, temp, 0, s->qscale);
ff_simple_idct_8(temp); // FIXME ff_simple_idct_int16_8bit(temp); // FIXME
for (i = 0; i < 64; i++) for (i = 0; i < 64; i++)
sum += (temp[i] - bak[i]) * (temp[i] - bak[i]); sum += (temp[i] - bak[i]) * (temp[i] - bak[i]);
......
...@@ -30,6 +30,8 @@ ...@@ -30,6 +30,8 @@
#include "mathops.h" #include "mathops.h"
#include "simple_idct.h" #include "simple_idct.h"
#define IN_IDCT_DEPTH 16
#define BIT_DEPTH 8 #define BIT_DEPTH 8
#include "simple_idct_template.c" #include "simple_idct_template.c"
#undef BIT_DEPTH #undef BIT_DEPTH
...@@ -46,6 +48,13 @@ ...@@ -46,6 +48,13 @@
#define BIT_DEPTH 12 #define BIT_DEPTH 12
#include "simple_idct_template.c" #include "simple_idct_template.c"
#undef BIT_DEPTH #undef BIT_DEPTH
#undef IN_IDCT_DEPTH
#define IN_IDCT_DEPTH 32
#define BIT_DEPTH 10
#include "simple_idct_template.c"
#undef BIT_DEPTH
#undef IN_IDCT_DEPTH
/* 2x4x8 idct */ /* 2x4x8 idct */
...@@ -115,7 +124,7 @@ void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block) ...@@ -115,7 +124,7 @@ void ff_simple_idct248_put(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
/* IDCT8 on each line */ /* IDCT8 on each line */
for(i=0; i<8; i++) { for(i=0; i<8; i++) {
idctRowCondDC_8(block + i*8, 0); idctRowCondDC_int16_8bit(block + i*8, 0);
} }
/* IDCT4 and store */ /* IDCT4 and store */
...@@ -188,7 +197,7 @@ void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) ...@@ -188,7 +197,7 @@ void ff_simple_idct84_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
/* IDCT8 on each line */ /* IDCT8 on each line */
for(i=0; i<4; i++) { for(i=0; i<4; i++) {
idctRowCondDC_8(block + i*8, 0); idctRowCondDC_int16_8bit(block + i*8, 0);
} }
/* IDCT4 and store */ /* IDCT4 and store */
...@@ -208,7 +217,7 @@ void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block) ...@@ -208,7 +217,7 @@ void ff_simple_idct48_add(uint8_t *dest, ptrdiff_t line_size, int16_t *block)
/* IDCT8 and store */ /* IDCT8 and store */
for(i=0; i<4; i++){ for(i=0; i<4; i++){
idctSparseColAdd_8(dest + i, line_size, block + i); idctSparseColAdd_int16_8bit(dest + i, line_size, block + i);
} }
} }
......
...@@ -31,20 +31,24 @@ ...@@ -31,20 +31,24 @@
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
void ff_simple_idct_put_8(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct_put_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_8(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct_add_int16_8bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_8(int16_t *block); void ff_simple_idct_int16_8bit(int16_t *block);
void ff_simple_idct_put_10(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct_put_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_10(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct_add_int16_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_10(int16_t *block); void ff_simple_idct_int16_10bit(int16_t *block);
void ff_simple_idct_put_12(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct_put_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_12(uint8_t *dest, ptrdiff_t line_size, int16_t *block); void ff_simple_idct_add_int32_10bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_12(int16_t *block); void ff_simple_idct_int32_10bit(int16_t *block);
void ff_simple_idct_put_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_add_int16_12bit(uint8_t *dest, ptrdiff_t line_size, int16_t *block);
void ff_simple_idct_int16_12bit(int16_t *block);
/** /**
* Special version of ff_simple_idct_10() which does dequantization * Special version of ff_simple_idct_int16_10bit() which does dequantization
* and scales by a factor of 2 more between the two IDCTs to account * and scales by a factor of 2 more between the two IDCTs to account
* for larger scale of input coefficients. * for larger scale of input coefficients.
*/ */
......
...@@ -77,6 +77,10 @@ ...@@ -77,6 +77,10 @@
#define ROW_SHIFT 13 #define ROW_SHIFT 13
#define COL_SHIFT 18 #define COL_SHIFT 18
#define DC_SHIFT 1 #define DC_SHIFT 1
# elif IN_IDCT_DEPTH == 32
#define ROW_SHIFT 13
#define COL_SHIFT 21
#define DC_SHIFT 2
# else # else
#define ROW_SHIFT 12 #define ROW_SHIFT 12
#define COL_SHIFT 19 #define COL_SHIFT 19
...@@ -109,11 +113,13 @@ ...@@ -109,11 +113,13 @@
#ifdef EXTRA_SHIFT #ifdef EXTRA_SHIFT
static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift) static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
#else #else
static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) static inline void FUNC6(idctRowCondDC)(idctin *row, int extra_shift)
#endif #endif
{ {
SUINT a0, a1, a2, a3, b0, b1, b2, b3; SUINT a0, a1, a2, a3, b0, b1, b2, b3;
// TODO: Add DC-only support for int32_t input
#if IN_IDCT_DEPTH == 16
#if HAVE_FAST_64BIT #if HAVE_FAST_64BIT
#define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) { if (((AV_RN64A(row) & ~ROW0_MASK) | AV_RN64A(row+4)) == 0) {
...@@ -147,6 +153,7 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) ...@@ -147,6 +153,7 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
AV_WN32A(row+6, temp); AV_WN32A(row+6, temp);
return; return;
} }
#endif
#endif #endif
a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1)); a0 = (W4 * row[0]) + (1 << (ROW_SHIFT + extra_shift - 1));
...@@ -168,7 +175,11 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) ...@@ -168,7 +175,11 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
b3 = MUL(W7, row[1]); b3 = MUL(W7, row[1]);
MAC(b3, -W5, row[3]); MAC(b3, -W5, row[3]);
#if IN_IDCT_DEPTH == 32
if (AV_RN64A(row + 4) | AV_RN64A(row + 6)) {
#else
if (AV_RN64A(row + 4)) { if (AV_RN64A(row + 4)) {
#endif
a0 += W4*row[4] + W6*row[6]; a0 += W4*row[4] + W6*row[6];
a1 += - W4*row[4] - W2*row[6]; a1 += - W4*row[4] - W2*row[6];
a2 += - W4*row[4] + W2*row[6]; a2 += - W4*row[4] + W2*row[6];
...@@ -250,8 +261,8 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) ...@@ -250,8 +261,8 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
#ifdef EXTRA_SHIFT #ifdef EXTRA_SHIFT
static inline void FUNC(idctSparseCol_extrashift)(int16_t *col) static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
#else #else
static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, static inline void FUNC6(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
int16_t *col) idctin *col)
{ {
SUINT a0, a1, a2, a3, b0, b1, b2, b3; SUINT a0, a1, a2, a3, b0, b1, b2, b3;
...@@ -274,8 +285,8 @@ static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size, ...@@ -274,8 +285,8 @@ static inline void FUNC(idctSparseColPut)(pixel *dest, ptrdiff_t line_size,
dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT); dest[0] = av_clip_pixel((int)(a0 - b0) >> COL_SHIFT);
} }
static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, static inline void FUNC6(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
int16_t *col) idctin *col)
{ {
int a0, a1, a2, a3, b0, b1, b2, b3; int a0, a1, a2, a3, b0, b1, b2, b3;
...@@ -298,7 +309,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size, ...@@ -298,7 +309,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, ptrdiff_t line_size,
dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT)); dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
} }
static inline void FUNC(idctSparseCol)(int16_t *col) static inline void FUNC6(idctSparseCol)(idctin *col)
#endif #endif
{ {
int a0, a1, a2, a3, b0, b1, b2, b3; int a0, a1, a2, a3, b0, b1, b2, b3;
...@@ -316,21 +327,23 @@ static inline void FUNC(idctSparseCol)(int16_t *col) ...@@ -316,21 +327,23 @@ static inline void FUNC(idctSparseCol)(int16_t *col)
} }
#ifndef EXTRA_SHIFT #ifndef EXTRA_SHIFT
void FUNC(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) void FUNC6(ff_simple_idct_put)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block_)
{ {
idctin *block = (idctin *)block_;
pixel *dest = (pixel *)dest_; pixel *dest = (pixel *)dest_;
int i; int i;
line_size /= sizeof(pixel); line_size /= sizeof(pixel);
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
FUNC(idctRowCondDC)(block + i*8, 0); FUNC6(idctRowCondDC)(block + i*8, 0);
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
FUNC(idctSparseColPut)(dest + i, line_size, block + i); FUNC6(idctSparseColPut)(dest + i, line_size, block + i);
} }
void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block) #if IN_IDCT_DEPTH == 16
void FUNC6(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *block)
{ {
pixel *dest = (pixel *)dest_; pixel *dest = (pixel *)dest_;
int i; int i;
...@@ -338,20 +351,21 @@ void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *bloc ...@@ -338,20 +351,21 @@ void FUNC(ff_simple_idct_add)(uint8_t *dest_, ptrdiff_t line_size, int16_t *bloc
line_size /= sizeof(pixel); line_size /= sizeof(pixel);
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
FUNC(idctRowCondDC)(block + i*8, 0); FUNC6(idctRowCondDC)(block + i*8, 0);
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
FUNC(idctSparseColAdd)(dest + i, line_size, block + i); FUNC6(idctSparseColAdd)(dest + i, line_size, block + i);
} }
void FUNC(ff_simple_idct)(int16_t *block) void FUNC6(ff_simple_idct)(int16_t *block)
{ {
int i; int i;
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
FUNC(idctRowCondDC)(block + i*8, 0); FUNC6(idctRowCondDC)(block + i*8, 0);
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
FUNC(idctSparseCol)(block + i); FUNC6(idctSparseCol)(block + i);
} }
#endif #endif
#endif
...@@ -82,9 +82,9 @@ static void ff_prores_idct_wrap(int16_t *dst){ ...@@ -82,9 +82,9 @@ static void ff_prores_idct_wrap(int16_t *dst){
static const struct algo idct_tab[] = { static const struct algo idct_tab[] = {
{ "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE }, { "REF-DBL", ff_ref_idct, FF_IDCT_PERM_NONE },
{ "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 }, { "INT", ff_j_rev_dct, FF_IDCT_PERM_LIBMPEG2 },
{ "SIMPLE-C", ff_simple_idct_8, FF_IDCT_PERM_NONE }, { "SIMPLE-C", ff_simple_idct_int16_8bit, FF_IDCT_PERM_NONE },
{ "SIMPLE-C10", ff_simple_idct_10, FF_IDCT_PERM_NONE }, { "SIMPLE-C10", ff_simple_idct_int16_10bit, FF_IDCT_PERM_NONE },
{ "SIMPLE-C12", ff_simple_idct_12, FF_IDCT_PERM_NONE, 0, 1 }, { "SIMPLE-C12", ff_simple_idct_int16_12bit, FF_IDCT_PERM_NONE, 0, 1 },
{ "PR-C", ff_prores_idct_wrap, FF_IDCT_PERM_NONE, 0, 1 }, { "PR-C", ff_prores_idct_wrap, FF_IDCT_PERM_NONE, 0, 1 },
#if CONFIG_FAANIDCT #if CONFIG_FAANIDCT
{ "FAANI", ff_faanidct, FF_IDCT_PERM_NONE }, { "FAANI", ff_faanidct, FF_IDCT_PERM_NONE },
......
...@@ -314,11 +314,11 @@ int ff_vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitCo ...@@ -314,11 +314,11 @@ int ff_vc1_decode_sequence_header(AVCodecContext *avctx, VC1Context *v, GetBitCo
v->multires = get_bits1(gb); v->multires = get_bits1(gb);
v->res_fasttx = get_bits1(gb); v->res_fasttx = get_bits1(gb);
if (!v->res_fasttx) { if (!v->res_fasttx) {
v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct_8; v->vc1dsp.vc1_inv_trans_8x8 = ff_simple_idct_int16_8bit;
v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add; v->vc1dsp.vc1_inv_trans_8x4 = ff_simple_idct84_add;
v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add; v->vc1dsp.vc1_inv_trans_4x8 = ff_simple_idct48_add;
v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add; v->vc1dsp.vc1_inv_trans_4x4 = ff_simple_idct44_add;
v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add_8; v->vc1dsp.vc1_inv_trans_8x8_dc = ff_simple_idct_add_int16_8bit;
v->vc1dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add; v->vc1dsp.vc1_inv_trans_8x4_dc = ff_simple_idct84_add;
v->vc1dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add; v->vc1dsp.vc1_inv_trans_4x8_dc = ff_simple_idct48_add;
v->vc1dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add; v->vc1dsp.vc1_inv_trans_4x4_dc = ff_simple_idct44_add;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册