From 7beddb12d48ac229a4427bb1b7c2a03d831200fb Mon Sep 17 00:00:00 2001
From: Michael Niedermayer <michaelni@gmx.at>
Date: Thu, 19 May 2005 10:48:36 +0000
Subject: [PATCH] use O(number of non zero coeffs) instead of O(number of
 coeffs) storage for the coefficient colleting/reordering

Originally committed as revision 4279 to svn://svn.ffmpeg.org/ffmpeg/trunk
---
 libavcodec/vp3.c | 117 +++++++++++++++++++++++++++++------------------
 1 file changed, 73 insertions(+), 44 deletions(-)

diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c
index b964b7da30..7491d1708c 100644
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -134,8 +134,15 @@ static inline void debug_dc_pred(const char *format, ...) { }
 static inline void debug_idct(const char *format, ...) { } 
 #endif
 
+typedef struct Coeff {
+    struct Coeff *next;
+    DCTELEM coeff;
+    uint8_t index;
+} Coeff;
+
+//FIXME split things out into their own arrays
 typedef struct Vp3Fragment {
-    DCTELEM *coeffs;
+    Coeff *next_coeff;
     /* address of first pixel taking into account which plane the fragment
      * lives on as well as the plane stride */
     int first_pixel;
@@ -143,7 +150,6 @@ typedef struct Vp3Fragment {
     uint16_t macroblock;
     uint8_t coding_method;
     uint8_t coeff_count;
-    int8_t last_coeff;
     int8_t motion_x;
     int8_t motion_y;
 } Vp3Fragment;
@@ -246,7 +252,8 @@ typedef struct Vp3DecodeContext {
     int fragment_height;
 
     Vp3Fragment *all_fragments;
-    DCTELEM *coeffs;
+    Coeff *coeffs;
+    Coeff *next_coeff;
     int u_fragment_start;
     int v_fragment_start;
     
@@ -833,16 +840,17 @@ static void unpack_token(GetBitContext *gb, int token, int *zero_run,
 static void init_frame(Vp3DecodeContext *s, GetBitContext *gb)
 {
     int i;
-    static const DCTELEM zero_block[64];
 
     /* zero out all of the fragment information */
     s->coded_fragment_list_index = 0;
     for (i = 0; i < s->fragment_count; i++) {
-        s->all_fragments[i].coeffs = zero_block;
         s->all_fragments[i].coeff_count = 0;
-        s->all_fragments[i].last_coeff = -1;
 s->all_fragments[i].motion_x = 0xbeef;
 s->all_fragments[i].motion_y = 0xbeef;
+s->all_fragments[i].next_coeff= NULL;
+        s->coeffs[i].index=
+        s->coeffs[i].coeff=0;
+        s->coeffs[i].next= NULL;
     }
 }
 
@@ -1260,6 +1268,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
     /* figure out which fragments are coded; iterate through each
      * superblock (all planes) */
     s->coded_fragment_list_index = 0;
+    s->next_coeff= s->coeffs + s->fragment_count;
     s->first_coded_y_fragment = s->first_coded_c_fragment = 0;
     s->last_coded_y_fragment = s->last_coded_c_fragment = -1;
     first_c_fragment_seen = 0;
@@ -1302,7 +1311,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
                          * the next phase */
                         s->all_fragments[current_fragment].coding_method = 
                             MODE_INTER_NO_MV;
-                        s->all_fragments[current_fragment].coeffs= s->coeffs + 64*s->coded_fragment_list_index;
+                        s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
                         s->coded_fragment_list[s->coded_fragment_list_index] = 
                             current_fragment;
                         if ((current_fragment >= s->u_fragment_start) &&
@@ -1330,7 +1339,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
                      * coding will be determined in next step */
                     s->all_fragments[current_fragment].coding_method = 
                         MODE_INTER_NO_MV;
-                    s->all_fragments[current_fragment].coeffs= s->coeffs + 64*s->coded_fragment_list_index;
+                    s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
                     s->coded_fragment_list[s->coded_fragment_list_index] = 
                         current_fragment;
                     if ((current_fragment >= s->u_fragment_start) &&
@@ -1716,15 +1725,19 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
 
         if (!eob_run) {
             fragment->coeff_count += zero_run;
-            if (fragment->coeff_count < 64)
-                fragment->coeffs[perm[fragment->coeff_count++]] = coeff;
+            if (fragment->coeff_count < 64){
+                fragment->next_coeff->coeff= coeff;
+                fragment->next_coeff->index= perm[fragment->coeff_count++]; //FIXME perm here already?
+                fragment->next_coeff->next= s->next_coeff;
+                s->next_coeff->next=NULL;
+                fragment->next_coeff= s->next_coeff++;
+            }
             debug_vlc(" fragment %d coeff = %d\n",
-                s->coded_fragment_list[i], fragment->coeffs[coeff_index]);
+                s->coded_fragment_list[i], fragment->next_coeff[coeff_index]);
         } else {
-            fragment->last_coeff = fragment->coeff_count;
-            fragment->coeff_count = 64;
+            fragment->coeff_count |= 128;
             debug_vlc(" fragment %d eob with %d coefficients\n", 
-                s->coded_fragment_list[i], fragment->last_coeff);
+                s->coded_fragment_list[i], fragment->coeff_count&127);
             eob_run--;
         }
     }
@@ -1832,6 +1845,7 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
 #define COMPATIBLE_FRAME(x) \
   (compatible_frame[s->all_fragments[x].coding_method] == current_frame_type)
 #define FRAME_CODED(x) (s->all_fragments[x].coding_method != MODE_COPY)
+#define DC_COEFF(u) (s->coeffs[u].index ? 0 : s->coeffs[u].coeff) //FIXME do somethin to simplify this
 static inline int iabs (int x) { return ((x < 0) ? -x : x); }
 
 static void reverse_dc_prediction(Vp3DecodeContext *s,
@@ -1942,7 +1956,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                 predictor_group = (x == 0) + ((y == 0) << 1) +
                     ((x + 1 == fragment_width) << 2);
                 debug_dc_pred(" frag %d: group %d, orig DC = %d, ",
-                    i, predictor_group, s->all_fragments[i].coeffs[0]);
+                    i, predictor_group, DC_COEFF(i));
 
                 switch (predictor_group) {
 
@@ -1957,10 +1971,10 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                     l = i - 1;
 
                     /* fetch the DC values for the predicting fragments */
-                    vul = s->all_fragments[ul].coeffs[0];
-                    vu = s->all_fragments[u].coeffs[0];
-                    vur = s->all_fragments[ur].coeffs[0];
-                    vl = s->all_fragments[l].coeffs[0];
+                    vul = DC_COEFF(ul);
+                    vu = DC_COEFF(u);
+                    vur = DC_COEFF(ur);
+                    vl = DC_COEFF(l);
 
                     /* figure out which fragments are valid */
                     ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul);
@@ -1982,8 +1996,8 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                     ur = i - fragment_width + 1;
 
                     /* fetch the DC values for the predicting fragments */
-                    vu = s->all_fragments[u].coeffs[0];
-                    vur = s->all_fragments[ur].coeffs[0];
+                    vu = DC_COEFF(u);
+                    vur = DC_COEFF(ur);
 
                     /* figure out which fragments are valid */
                     fur = FRAME_CODED(ur) && COMPATIBLE_FRAME(ur);
@@ -2003,7 +2017,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                     l = i - 1;
 
                     /* fetch the DC values for the predicting fragments */
-                    vl = s->all_fragments[l].coeffs[0];
+                    vl = DC_COEFF(l);
 
                     /* figure out which fragments are valid */
                     fl = FRAME_CODED(l) && COMPATIBLE_FRAME(l);
@@ -2032,9 +2046,9 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                     l = i - 1;
 
                     /* fetch the DC values for the predicting fragments */
-                    vul = s->all_fragments[ul].coeffs[0];
-                    vu = s->all_fragments[u].coeffs[0];
-                    vl = s->all_fragments[l].coeffs[0];
+                    vul = DC_COEFF(ul);
+                    vu = DC_COEFF(u);
+                    vl = DC_COEFF(l);
 
                     /* figure out which fragments are valid */
                     ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul);
@@ -2054,9 +2068,9 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
 
                     /* if there were no fragments to predict from, use last
                      * DC saved */
-                    s->all_fragments[i].coeffs[0] += last_dc[current_frame_type];
+                    predicted_dc = last_dc[current_frame_type];
                     debug_dc_pred("from last DC (%d) = %d\n", 
-                        current_frame_type, s->all_fragments[i].coeffs[0]);
+                        current_frame_type, DC_COEFF(i));
 
                 } else {
 
@@ -2086,16 +2100,26 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                             predicted_dc = vul;
                     }
 
-                    /* at long last, apply the predictor */
-                    s->all_fragments[i].coeffs[0] += predicted_dc;
                     debug_dc_pred("from pred DC = %d\n", 
-                    s->all_fragments[i].coeffs[0]);
+                    DC_COEFF(i));
                 }
 
+                /* at long last, apply the predictor */
+                if(s->coeffs[i].index){
+                    *s->next_coeff= s->coeffs[i];
+                    s->coeffs[i].index=0;
+                    s->coeffs[i].coeff=0;
+                    s->coeffs[i].next= s->next_coeff++;
+                }
+                s->coeffs[i].coeff += predicted_dc;
                 /* save the DC */
-                last_dc[current_frame_type] = s->all_fragments[i].coeffs[0];
-                if(s->all_fragments[i].coeffs[0] && s->all_fragments[i].last_coeff<0)
-                    s->all_fragments[i].last_coeff= 0;
+                last_dc[current_frame_type] = DC_COEFF(i);
+                if(DC_COEFF(i) && !(s->all_fragments[i].coeff_count&127)){
+                    s->all_fragments[i].coeff_count= 129;
+//                    s->all_fragments[i].next_coeff= s->next_coeff;
+                    s->coeffs[i].next= s->next_coeff;
+                    (s->next_coeff++)->next=NULL;
+                }
             }
         }
     }
@@ -2115,7 +2139,7 @@ static void render_fragments(Vp3DecodeContext *s,
     int m, n;
     int i = first_fragment;
     int16_t *dequantizer;
-    DCTELEM __align16 output_samples[64];
+    DCTELEM __align16 block[64];
     unsigned char *output_plane;
     unsigned char *last_plane;
     unsigned char *golden_plane;
@@ -2244,15 +2268,21 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x,
                 /* dequantize the DCT coefficients */
                 debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n", 
                     i, s->all_fragments[i].coding_method, 
-                    s->all_fragments[i].coeffs[0], dequantizer[0]);
+                    DC_COEFF(i), dequantizer[0]);
 
                 if(s->avctx->idct_algo==FF_IDCT_VP3){
-                    for (j = 0; j < 64; j++) {
-                        s->all_fragments[i].coeffs[j] *= dequantizer[j];
+                    Coeff *coeff= s->coeffs + i;
+                    memset(block, 0, sizeof(block));
+                    while(coeff->next){
+                        block[coeff->index]= coeff->coeff * dequantizer[coeff->index];
+                        coeff= coeff->next;
                     }
                 }else{
-                    for (j = 0; j < 64; j++) {
-                        s->all_fragments[i].coeffs[j]= (dequantizer[j] * s->all_fragments[i].coeffs[j] + 2) >> 2;
+                    Coeff *coeff= s->coeffs + i;
+                    memset(block, 0, sizeof(block));
+                    while(coeff->next){
+                        block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2;
+                        coeff= coeff->next;
                     }
                 }
 
@@ -2260,18 +2290,17 @@ av_log(s->avctx, AV_LOG_ERROR, " help! got beefy vector! (%X, %X)\n", motion_x,
                 
                 if (s->all_fragments[i].coding_method == MODE_INTRA) {
                     if(s->avctx->idct_algo!=FF_IDCT_VP3)
-                        s->all_fragments[i].coeffs[0] += 128<<3;
+                        block[0] += 128<<3;
                     s->dsp.idct_put(
                         output_plane + s->all_fragments[i].first_pixel,
                         stride,
-                        s->all_fragments[i].coeffs);
+                        block);
                 } else {
                     s->dsp.idct_add(
                         output_plane + s->all_fragments[i].first_pixel,
                         stride,
-                        s->all_fragments[i].coeffs);
+                        block);
                 }
-                memset(s->all_fragments[i].coeffs, 0, 64*sizeof(DCTELEM));
 
                 debug_idct("block after idct_%s():\n",
                     (s->all_fragments[i].coding_method == MODE_INTRA)?
@@ -2611,7 +2640,7 @@ static int vp3_decode_init(AVCodecContext *avctx)
         s->v_fragment_start);
 
     s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment));
-    s->coeffs = av_malloc(s->fragment_count * sizeof(DCTELEM) * 64);
+    s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65);
     s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int));
     s->pixel_addresses_inited = 0;
 
-- 
GitLab