From 5cf226fb090d27889ccd7768e580d46fd2f3ab47 Mon Sep 17 00:00:00 2001 From: Stefan Gehrer Date: Fri, 14 Jul 2006 19:31:58 +0000 Subject: [PATCH] avoid double copying of left border in luma prediction Originally committed as revision 5752 to svn://svn.ffmpeg.org/ffmpeg/trunk --- libavcodec/cavs.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/libavcodec/cavs.c b/libavcodec/cavs.c index 5f8f86e27f..220a731529 100644 --- a/libavcodec/cavs.c +++ b/libavcodec/cavs.c @@ -82,7 +82,8 @@ typedef struct { /** intra prediction is done with un-deblocked samples they are saved here before deblocking the MB */ uint8_t *top_border_y, *top_border_u, *top_border_v; - uint8_t left_border_y[16], left_border_u[10], left_border_v[10]; + uint8_t left_border_y[26], left_border_u[10], left_border_v[10]; + uint8_t intern_border_y[26]; uint8_t topleft_border_y, topleft_border_u, topleft_border_v; void (*intra_pred_l[8])(uint8_t *d,uint8_t *top,uint8_t *left,int stride); @@ -151,8 +152,8 @@ static void filter_mb(AVSContext *h, enum mb_t mb_type) { memcpy(&h->top_border_u[h->mbx*10+1], h->cu + 7* h->c_stride,8); memcpy(&h->top_border_v[h->mbx*10+1], h->cv + 7* h->c_stride,8); for(i=0;i<8;i++) { - h->left_border_y[i*2+0] = *(h->cy + 15 + (i*2+0)*h->l_stride); - h->left_border_y[i*2+1] = *(h->cy + 15 + (i*2+1)*h->l_stride); + h->left_border_y[i*2+1] = *(h->cy + 15 + (i*2+0)*h->l_stride); + h->left_border_y[i*2+2] = *(h->cy + 15 + (i*2+1)*h->l_stride); h->left_border_u[i+1] = *(h->cu + 7 + i*h->c_stride); h->left_border_v[i+1] = *(h->cv + 7 + i*h->c_stride); } @@ -211,25 +212,26 @@ static void filter_mb(AVSContext *h, enum mb_t mb_type) { ****************************************************************************/ static inline void load_intra_pred_luma(AVSContext *h, uint8_t *top, - uint8_t *left, int block) { + uint8_t **left, int block) { int i; switch(block) { case 0: - memcpy(&left[1],h->left_border_y,16); - left[0] = left[1]; - left[17] = left[16]; + *left = h->left_border_y; + h->left_border_y[0] = h->left_border_y[1]; + memset(&h->left_border_y[17],h->left_border_y[16],9); memcpy(&top[1],&h->top_border_y[h->mbx*16],16); top[17] = top[16]; top[0] = top[1]; if((h->flags & A_AVAIL) && (h->flags & B_AVAIL)) - left[0] = top[0] = h->topleft_border_y; + h->left_border_y[0] = top[0] = h->topleft_border_y; break; case 1: + *left = h->intern_border_y; for(i=0;i<8;i++) - left[i+1] = *(h->cy + 7 + i*h->l_stride); - memset(&left[9],left[8],9); - left[0] = left[1]; + h->intern_border_y[i+1] = *(h->cy + 7 + i*h->l_stride); + memset(&h->intern_border_y[9],h->intern_border_y[8],9); + h->intern_border_y[0] = h->intern_border_y[1]; memcpy(&top[1],&h->top_border_y[h->mbx*16+8],8); if(h->flags & C_AVAIL) memcpy(&top[9],&h->top_border_y[(h->mbx + 1)*16],8); @@ -238,22 +240,21 @@ static inline void load_intra_pred_luma(AVSContext *h, uint8_t *top, top[17] = top[16]; top[0] = top[1]; if(h->flags & B_AVAIL) - left[0] = top[0] = h->top_border_y[h->mbx*16+7]; + h->intern_border_y[0] = top[0] = h->top_border_y[h->mbx*16+7]; break; case 2: - memcpy(&left[1],&h->left_border_y[8],8); - memset(&left[9],left[8],9); + *left = &h->left_border_y[8]; memcpy(&top[1],h->cy + 7*h->l_stride,16); top[17] = top[16]; - left[0] = h->left_border_y[7]; top[0] = top[1]; if(h->flags & A_AVAIL) - top[0] = left[0]; + top[0] = h->left_border_y[8]; break; case 3: - for(i=0;i<9;i++) - left[i] = *(h->cy + 7 + (i+7)*h->l_stride); - memset(&left[9],left[8],9); + *left = &h->intern_border_y[8]; + for(i=0;i<8;i++) + h->intern_border_y[i+9] = *(h->cy + 7 + (i+8)*h->l_stride); + memset(&h->intern_border_y[17],h->intern_border_y[16],9); memcpy(&top[0],h->cy + 7 + 7*h->l_stride,9); memset(&top[9],top[8],9); break; @@ -816,7 +817,7 @@ static int decode_mb_i(AVSContext *h, int cbp_code) { GetBitContext *gb = &h->s.gb; int block, pred_mode_uv; uint8_t top[18]; - uint8_t left[18]; + uint8_t *left = NULL; uint8_t *d; init_mb(h); @@ -875,7 +876,7 @@ static int decode_mb_i(AVSContext *h, int cbp_code) { /* luma intra prediction interleaved with residual decode/transform/add */ for(block=0;block<4;block++) { d = h->cy + h->luma_scan[block]; - load_intra_pred_luma(h, top, left, block); + load_intra_pred_luma(h, top, &left, block); h->intra_pred_l[h->pred_mode_Y[scan3x3[block]]] (d, top, left, h->l_stride); if(h->cbp & (1<