提交 9b2c3c40 编写于 作者: K Kaustubh Raste 提交者: Michael Niedermayer

avcodec/mips: Improve vp9 mc msa functions

Load the specific destination bytes instead of MSA load and pack.
Signed-off-by: NKaustubh Raste <kaustubh.raste@imgtec.com>
Reviewed-by: NManojkumar Bhosale <Manojkumar.Bhosale@imgtec.com>
Signed-off-by: NMichael Niedermayer <michael@niedermayer.cc>
上级 c75b23cb
......@@ -1479,7 +1479,8 @@ static void avc_luma_hz_and_aver_dst_8x8_msa(const uint8_t *src,
plus20b, res0, res1, res2, res3);
SRARI_H4_SH(res0, res1, res2, res3, 5);
SAT_SH4_SH(res0, res1, res2, res3, 7);
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1, dst2, dst3,
ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1,
dst, dst_stride);
dst += (4 * dst_stride);
......@@ -1825,8 +1826,8 @@ static void avc_luma_vt_and_aver_dst_8x8_msa(const uint8_t *src,
SRARI_H4_SH(out0, out1, out2, out3, 5);
SAT_SH4_SH(out0, out1, out2, out3, 7);
LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1,
dst, dst_stride);
dst += (4 * dst_stride);
......@@ -2229,7 +2230,8 @@ static void avc_luma_mid_and_aver_dst_8w_msa(const uint8_t *src,
res3 = AVC_CALC_DPADD_H_6PIX_2COEFF_SH(hz_out3, hz_out4, hz_out5,
hz_out6, hz_out7, hz_out8);
LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3);
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1, dst2, dst3,
ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1,
dst, dst_stride);
dst += (4 * dst_stride);
......@@ -2518,8 +2520,8 @@ static void avc_luma_midv_qrt_and_aver_dst_8w_msa(const uint8_t *src,
res1 = __msa_aver_s_h(res2, res3);
res2 = __msa_aver_s_h(res4, res5);
res3 = __msa_aver_s_h(res6, res7);
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1, dst2, dst3,
ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
CONVERT_UB_AVG_ST8x4_UB(res0, res1, res2, res3, dst0, dst1,
dst, dst_stride);
dst += (4 * dst_stride);
......@@ -2676,7 +2678,8 @@ static void avc_luma_hv_qrt_and_aver_dst_8x8_msa(const uint8_t *src_x,
out3 = __msa_srari_h((hz_out3 + vert_out3), 1);
SAT_SH4_SH(out0, out1, out2, out3, 7);
CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst2, dst3,
ILVR_D2_UB(dst1, dst0, dst3, dst2, dst0, dst1);
CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1,
dst, dst_stride);
dst += (4 * dst_stride);
......
此差异已折叠。
......@@ -2773,20 +2773,18 @@
/* Description : Converts inputs to unsigned bytes, interleave, average & store
as 8x4 unsigned byte block
Arguments : Inputs - in0, in1, in2, in3, dst0, dst1, dst2, dst3,
pdst, stride
Arguments : Inputs - in0, in1, in2, in3, dst0, dst1, pdst, stride
*/
#define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3, \
dst0, dst1, dst2, dst3, pdst, stride) \
{ \
v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \
uint8_t *pdst_m = (uint8_t *) (pdst); \
\
tmp0_m = PCKEV_XORI128_UB(in0, in1); \
tmp1_m = PCKEV_XORI128_UB(in2, in3); \
ILVR_D2_UB(dst1, dst0, dst3, dst2, tmp2_m, tmp3_m); \
AVER_UB2_UB(tmp0_m, tmp2_m, tmp1_m, tmp3_m, tmp0_m, tmp1_m); \
ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \
#define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3, \
dst0, dst1, pdst, stride) \
{ \
v16u8 tmp0_m, tmp1_m; \
uint8_t *pdst_m = (uint8_t *) (pdst); \
\
tmp0_m = PCKEV_XORI128_UB(in0, in1); \
tmp1_m = PCKEV_XORI128_UB(in2, in3); \
AVER_UB2_UB(tmp0_m, dst0, tmp1_m, dst1, tmp0_m, tmp1_m); \
ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \
}
/* Description : Pack even byte elements, extract 0 & 2 index words from pair
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册