diff --git a/ffmpeg.c b/ffmpeg.c
index 1d8b65cee2b150595d2a635510a96aef2533d9e5..7816da9391a7847ce7173e1bb0955335f5343c1e 100644
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -80,14 +80,28 @@ static AVOutputFormat *file_oformat;
 static int frame_width  = 160;
 static int frame_height = 128;
 static int frame_rate = 25 * FRAME_RATE_BASE;
-static int video_bit_rate = 200000;
-static int video_bit_rate_tolerance = 200000;
+static int video_bit_rate = 200*1000;
+static int video_bit_rate_tolerance = 4000*1000;
 static int video_qscale = 0;
-static int video_qmin = 3;
-static int video_qmax = 15;
+static int video_qmin = 2;
+static int video_qmax = 31;
 static int video_qdiff = 3;
 static float video_qblur = 0.5;
 static float video_qcomp = 0.5;
+static float video_rc_qsquish=1.0;
+static float video_rc_qmod_amp=0;
+static int video_rc_qmod_freq=0;
+static char *video_rc_override_string=NULL;
+static char *video_rc_eq="tex^qComp";
+static int video_rc_buffer_size=0;
+static float video_rc_buffer_aggressivity=1.0;
+static int video_rc_max_rate=0;
+static int video_rc_min_rate=0;
+static float video_rc_initial_cplx=0;
+static float video_b_qfactor = 1.25;
+static float video_b_qoffset = 1.25;
+static float video_i_qfactor = 0.8;
+static float video_i_qoffset = 0.0;
 static int me_method = 0;
 static int video_disable = 0;
 static int video_codec_id = CODEC_ID_NONE;
@@ -1334,6 +1348,22 @@ void opt_video_bitrate_tolerance(const char *arg)
     video_bit_rate_tolerance = atoi(arg) * 1000;
 }
 
+void opt_video_bitrate_max(const char *arg)
+{
+    video_rc_max_rate = atoi(arg) * 1000;
+}
+
+void opt_video_bitrate_min(const char *arg)
+{
+    video_rc_min_rate = atoi(arg) * 1000;
+}
+
+void opt_video_rc_eq(char *arg)
+{
+    video_rc_eq = arg;
+}
+
+
 void opt_workaround_bugs(const char *arg)
 {
     workaround_bugs = atoi(arg);
@@ -1754,6 +1784,14 @@ void opt_output_file(const char *filename)
             video_enc->max_qdiff = video_qdiff;
             video_enc->qblur = video_qblur;
             video_enc->qcompress = video_qcomp;
+            video_enc->rc_eq = video_rc_eq;
+            video_enc->rc_max_rate = video_rc_max_rate;
+            video_enc->rc_min_rate = video_rc_min_rate;
+            video_enc->rc_buffer_size = video_rc_buffer_size;
+            video_enc->i_quant_factor = video_i_qfactor;
+            video_enc->b_quant_factor = video_b_qfactor;
+            video_enc->i_quant_offset = video_i_qoffset;
+            video_enc->b_quant_offset = video_b_qoffset;
             
             if (do_psnr)
                 video_enc->get_psnr = 1;
@@ -2097,7 +2135,10 @@ const OptionDef options[] = {
     { "qdiff", HAS_ARG | OPT_EXPERT, {(void*)opt_qdiff}, "max difference between the quantiser scale (VBR)", "q" },
     { "qblur", HAS_ARG | OPT_EXPERT, {(void*)opt_qblur}, "video quantiser scale blur (VBR)", "blur" },
     { "qcomp", HAS_ARG | OPT_EXPERT, {(void*)opt_qcomp}, "video quantiser scale compression (VBR)", "compression" },
+    { "rc_eq", HAS_ARG | OPT_EXPERT, {(void*)opt_video_rc_eq}, "", "equation" },
     { "bt", HAS_ARG, {(void*)opt_video_bitrate_tolerance}, "set video bitrate tolerance (in kbit/s)", "tolerance" },
+    { "maxrate", HAS_ARG, {(void*)opt_video_bitrate_max}, "set max video bitrate tolerance (in kbit/s)", "bitrate" },
+    { "minrate", HAS_ARG, {(void*)opt_video_bitrate_min}, "set min video bitrate tolerance (in kbit/s)", "bitrate" },
     { "vd", HAS_ARG | OPT_EXPERT, {(void*)opt_video_device}, "set video grab device", "device" },
     { "vcodec", HAS_ARG | OPT_EXPERT, {(void*)opt_video_codec}, "force video codec", "codec" },
     { "me", HAS_ARG | OPT_EXPERT, {(void*)opt_motion_estimation}, "set motion estimation method", 
diff --git a/libavcodec/Makefile b/libavcodec/Makefile
index dc56f58425b33472d3149d52a935588a908fbd5b..2d3b490018c71f909bc9052f779b244e6a9f07a4 100644
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -15,7 +15,7 @@ OBJS= common.o utils.o mem.o allcodecs.o \
       mpegaudio.o ac3enc.o mjpeg.o resample.o dsputil.o \
       motion_est.o imgconvert.o imgresample.o msmpeg4.o \
       mpeg12.o h263dec.o svq1.o rv10.o mpegaudiodec.o pcm.o simple_idct.o \
-      ratecontrol.o adpcm.o
+      ratecontrol.o adpcm.o eval.o
 ASM_OBJS=
 
 # currently using liba52 for ac3 decoding
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index d65bfc39ab7a68a0842922eb0cd2e6bfebf7be04..a510ece213dbac33ec35743765abee05ceafa664 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -32,6 +32,8 @@ void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
 void (*clear_blocks)(DCTELEM *blocks);
+int (*pix_sum)(UINT8 * pix, int line_size);
+int (*pix_norm1)(UINT8 * pix, int line_size);
 
 op_pixels_abs_func pix_abs16x16;
 op_pixels_abs_func pix_abs16x16_x2;
@@ -159,6 +161,52 @@ static void build_zigzag_end(void)
     }
 }
 
+int pix_sum_c(UINT8 * pix, int line_size)
+{
+    int s, i, j;
+
+    s = 0;
+    for (i = 0; i < 16; i++) {
+	for (j = 0; j < 16; j += 8) {
+	    s += pix[0];
+	    s += pix[1];
+	    s += pix[2];
+	    s += pix[3];
+	    s += pix[4];
+	    s += pix[5];
+	    s += pix[6];
+	    s += pix[7];
+	    pix += 8;
+	}
+	pix += line_size - 16;
+    }
+    return s;
+}
+
+int pix_norm1_c(UINT8 * pix, int line_size)
+{
+    int s, i, j;
+    UINT32 *sq = squareTbl + 256;
+
+    s = 0;
+    for (i = 0; i < 16; i++) {
+	for (j = 0; j < 16; j += 8) {
+	    s += sq[pix[0]];
+	    s += sq[pix[1]];
+	    s += sq[pix[2]];
+	    s += sq[pix[3]];
+	    s += sq[pix[4]];
+	    s += sq[pix[5]];
+	    s += sq[pix[6]];
+	    s += sq[pix[7]];
+	    pix += 8;
+	}
+	pix += line_size - 16;
+    }
+    return s;
+}
+
+
 void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
 {
     int i;
@@ -241,7 +289,6 @@ void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
         block += 8;
     }
 }
-
 #if 0
 
 #define PIXOP2(OPNAME, OP) \
@@ -569,7 +616,6 @@ void (*OPNAME ## _no_rnd_pixels_tab[4])(uint8_t *block, const uint8_t *pixels, i
 };
 #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
 #endif
-
 #define op_put(a, b) a = b
 
 PIXOP2(avg, op_avg)
@@ -684,8 +730,11 @@ void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_siz
 
 #define op_avg(a, b) a = avg2(a, b)
 #define op_sub(a, b) a -= b
+#define op_put(a, b) a = b
 
 PIXOP(DCTELEM, sub, op_sub, 8)
+PIXOP(uint8_t, avg, op_avg, line_size)
+PIXOP(uint8_t, put, op_put, line_size)
 
 /* not rounding primitives */
 #undef avg2
@@ -693,6 +742,8 @@ PIXOP(DCTELEM, sub, op_sub, 8)
 #define avg2(a,b) ((a+b)>>1)
 #define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
 
+PIXOP(uint8_t, avg_no_rnd, op_avg, line_size)
+PIXOP(uint8_t, put_no_rnd, op_put, line_size)
 /* motion estimation */
 
 #undef avg2
@@ -1261,6 +1312,8 @@ void dsputil_init(void)
     add_pixels_clamped = add_pixels_clamped_c;
     gmc1= gmc1_c;
     clear_blocks= clear_blocks_c;
+    pix_sum= pix_sum_c;
+    pix_norm1= pix_norm1_c;
 
     pix_abs16x16     = pix_abs16x16_c;
     pix_abs16x16_x2  = pix_abs16x16_x2_c;
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h
index 0134a242e08cdd933c213150a049eea21fe17019..2617dc666b22936f93a9071947a1eb7fb5c5016b 100644
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -62,6 +62,9 @@ extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_
 extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
 extern void (*clear_blocks)(DCTELEM *blocks);
+extern int (*pix_sum)(UINT8 * pix, int line_size);
+extern int (*pix_norm1)(UINT8 * pix, int line_size);
+
 
 
 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
diff --git a/libavcodec/eval.c b/libavcodec/eval.c
new file mode 100644
index 0000000000000000000000000000000000000000..8d06f7fc8a5ac2cf1a9e0efc8abdcab9a95f1627
--- /dev/null
+++ b/libavcodec/eval.c
@@ -0,0 +1,241 @@
+/*
+ * simple arithmetic expression evaluator
+ *
+ * Copyright (c) 2002 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+ /*
+ * see http://joe.hotchkiss.com/programming/eval/eval.html
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+#define STACK_SIZE 100
+
+typedef struct Parser{
+    double stack[STACK_SIZE];
+    int stack_index;
+    char *s;
+    double *const_value;
+    char **const_name;          // NULL terminated
+    double (**func1)(void *, double a); // NULL terminated
+    char **func1_name;          // NULL terminated
+    double (**func2)(void *, double a, double b); // NULL terminated
+    char **func2_name;          // NULL terminated
+    void *opaque;
+} Parser;
+
+static void evalExpression(Parser *p);
+
+static void push(Parser *p, double d){
+    if(p->stack_index+1>= STACK_SIZE){
+        fprintf(stderr, "stack overflow in the parser\n");
+        return;
+    }
+    p->stack[ p->stack_index++ ]= d;
+//printf("push %f\n", d); fflush(stdout);
+}
+
+static double pop(Parser *p){
+    if(p->stack_index<=0){
+        fprintf(stderr, "stack underflow in the parser\n");
+        return NAN;
+    }
+//printf("pop\n"); fflush(stdout);
+    return p->stack[ --p->stack_index ];
+}
+
+static int strmatch(char *s, char *prefix){
+    int i;
+    for(i=0; prefix[i]; i++){
+        if(prefix[i] != s[i]) return 0;
+    }
+    return 1;
+}
+
+static void evalPrimary(Parser *p){
+    double d, d2=NAN;
+    char *next= p->s;
+    int i;
+
+    /* number */
+    d= strtod(p->s, &next);
+    if(next != p->s){
+        push(p, d);
+        p->s= next;
+        return;
+    }
+    
+    /* named constants */
+    for(i=0; p->const_name[i]; i++){
+        if(strmatch(p->s, p->const_name[i])){
+            push(p, p->const_value[i]);
+            p->s+= strlen(p->const_name[i]);
+            return;
+        }
+    }
+    
+    p->s= strchr(p->s, '(');
+    if(p->s==NULL){
+        fprintf(stderr, "Parser: missing ( in \"%s\"\n", next);
+        return;
+    }
+    p->s++; // "("
+    evalExpression(p);
+    d= pop(p);
+    p->s++; // ")" or ","
+    if(p->s[-1]== ','){
+        evalExpression(p);
+        d2= pop(p);
+        p->s++; // ")"
+    }
+    
+         if( strmatch(next, "sinh"  ) ) d= sinh(d);
+    else if( strmatch(next, "cosh"  ) ) d= cosh(d);
+    else if( strmatch(next, "tanh"  ) ) d= tanh(d);
+    else if( strmatch(next, "sin"   ) ) d= sin(d);
+    else if( strmatch(next, "cos"   ) ) d= cos(d);
+    else if( strmatch(next, "tan"   ) ) d= tan(d);
+    else if( strmatch(next, "exp"   ) ) d= exp(d);
+    else if( strmatch(next, "log"   ) ) d= log(d);
+    else if( strmatch(next, "squish") ) d= 1/(1+exp(4*d));
+    else if( strmatch(next, "gauss" ) ) d= exp(-d*d/2)/sqrt(2*M_PI);
+    else if( strmatch(next, "abs"   ) ) d= abs(d);
+    else if( strmatch(next, "max"   ) ) d= d > d2 ? d : d2;
+    else if( strmatch(next, "min"   ) ) d= d < d2 ? d : d2;
+    else if( strmatch(next, "gt"    ) ) d= d > d2 ? 1.0 : 0.0;
+    else if( strmatch(next, "lt"    ) ) d= d > d2 ? 0.0 : 1.0;
+    else if( strmatch(next, "eq"    ) ) d= d == d2 ? 1.0 : 0.0;
+//    else if( strmatch(next, "l1"    ) ) d= 1 + d2*(d - 1);
+//    else if( strmatch(next, "sq01"  ) ) d= (d >= 0.0 && d <=1.0) ? 1.0 : 0.0;
+    else{
+        int error=1;
+        for(i=0; p->func1_name && p->func1_name[i]; i++){
+            if(strmatch(next, p->func1_name[i])){
+                d= p->func1[i](p->opaque, d);
+                error=0;
+                break;
+            }
+        }
+
+        for(i=0; p->func2_name && p->func2_name[i]; i++){
+            if(strmatch(next, p->func2_name[i])){
+                d= p->func2[i](p->opaque, d, d2);
+                error=0;
+                break;
+            }
+        }
+
+        if(error){
+            fprintf(stderr, "Parser: unknown function in \"%s\"\n", next);
+            return;
+        }
+    }
+    
+    if(p->s[-1]!= ')'){
+        fprintf(stderr, "Parser: missing ) in \"%s\"\n", next);
+        return;
+    }
+    push(p, d);
+}      
+       
+static void evalPow(Parser *p){
+    int neg= 0;
+    if(p->s[0]=='+') p->s++;
+       
+    if(p->s[0]=='-'){ 
+        neg= 1;
+        p->s++;
+    }
+    
+    if(p->s[0]=='('){
+        p->s++;;
+        evalExpression(p);
+
+        if(p->s[0]!=')')
+            fprintf(stderr, "Parser: missing )\n");
+        p->s++;
+    }else{
+        evalPrimary(p);
+    }
+    
+    if(neg) push(p, -pop(p));
+}
+
+static void evalFactor(Parser *p){
+    evalPow(p);
+    while(p->s[0]=='^'){
+        double d;
+
+        p->s++;
+        evalPow(p);
+        d= pop(p);
+        push(p, pow(pop(p), d));
+    }
+}
+
+static void evalTerm(Parser *p){
+    evalFactor(p);
+    while(p->s[0]=='*' || p->s[0]=='/'){
+        int inv= p->s[0]=='/';
+        double d;
+
+        p->s++;
+        evalFactor(p);
+        d= pop(p);
+        if(inv) d= 1.0/d;
+        push(p, d * pop(p));
+    }
+}
+
+static void evalExpression(Parser *p){
+    evalTerm(p);
+    while(p->s[0]=='+' || p->s[0]=='-'){
+        int sign= p->s[0]=='-';
+        double d;
+
+        p->s++;
+        evalTerm(p);
+        d= pop(p);
+        if(sign) d= -d;
+        push(p, d + pop(p));
+    }
+}
+
+double ff_eval(char *s, double *const_value, char **const_name,
+               double (**func1)(void *, double), char **func1_name, 
+               double (**func2)(void *, double, double), char **func2_name,
+               void *opaque){
+    Parser p;
+    
+    p.stack_index=0;
+    p.s= s;
+    p.const_value= const_value;
+    p.const_name = const_name;
+    p.func1      = func1;
+    p.func1_name = func1_name;
+    p.func2      = func2;
+    p.func2_name = func2_name;
+    p.opaque     = opaque;
+    
+    evalExpression(&p);
+    return pop(&p);
+}
diff --git a/libavcodec/motion_est.c b/libavcodec/motion_est.c
index 612c22cf9fc6b90fd66ecf8df15acc3b25938869..d672642fd86b03e494b7fc79e62c5566b55dc2f9 100644
--- a/libavcodec/motion_est.c
+++ b/libavcodec/motion_est.c
@@ -41,28 +41,6 @@
 #define P_MV1 P[9]
 
 
-static int pix_sum(UINT8 * pix, int line_size)
-{
-    int s, i, j;
-
-    s = 0;
-    for (i = 0; i < 16; i++) {
-	for (j = 0; j < 16; j += 8) {
-	    s += pix[0];
-	    s += pix[1];
-	    s += pix[2];
-	    s += pix[3];
-	    s += pix[4];
-	    s += pix[5];
-	    s += pix[6];
-	    s += pix[7];
-	    pix += 8;
-	}
-	pix += line_size - 16;
-    }
-    return s;
-}
-
 static int pix_dev(UINT8 * pix, int line_size, int mean)
 {
     int s, i, j;
@@ -85,29 +63,6 @@ static int pix_dev(UINT8 * pix, int line_size, int mean)
     return s;
 }
 
-static int pix_norm1(UINT8 * pix, int line_size)
-{
-    int s, i, j;
-    UINT32 *sq = squareTbl + 256;
-
-    s = 0;
-    for (i = 0; i < 16; i++) {
-	for (j = 0; j < 16; j += 8) {
-	    s += sq[pix[0]];
-	    s += sq[pix[1]];
-	    s += sq[pix[2]];
-	    s += sq[pix[3]];
-	    s += sq[pix[4]];
-	    s += sq[pix[5]];
-	    s += sq[pix[6]];
-	    s += sq[pix[7]];
-	    pix += 8;
-	}
-	pix += line_size - 16;
-    }
-    return s;
-}
-
 static int pix_norm(UINT8 * pix1, UINT8 * pix2, int line_size)
 {
     int s, i, j;
@@ -1578,9 +1533,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
 
     fbmin= bidir_refine(s, mb_x, mb_y);
 
-    if(s->flags&CODEC_FLAG_HQ){
-        type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT;
-    }else{
+    {
         int score= dmin;
         type=MB_TYPE_DIRECT;
         
@@ -1596,9 +1549,15 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
             score=fbmin;
             type= MB_TYPE_BIDIR;
         }
+        score= (score*score)>>8;
         s->mc_mb_var_sum += score;
-        s->mc_mb_var[mb_y*s->mb_width + mb_x] = score;
+        s->mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSD
     }
+
+    if(s->flags&CODEC_FLAG_HQ){
+        type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter
+    }
+
 /*
 {
 static int count=0;
diff --git a/libavcodec/mpegvideo.c b/libavcodec/mpegvideo.c
index 04b4e394d0149effac10daaeda70d036ed5eea55..22c6fc9656d6cbea3a88f594ebc220f3beeec8b1 100644
--- a/libavcodec/mpegvideo.c
+++ b/libavcodec/mpegvideo.c
@@ -226,6 +226,8 @@ int MPV_common_init(MpegEncContext *s)
             CHECKED_ALLOCZ(s->tex_pb_buffer, PB_BUFFER_SIZE);
             CHECKED_ALLOCZ(   s->pb2_buffer, PB_BUFFER_SIZE);
         }
+        
+        CHECKED_ALLOCZ(s->avctx->stats_out, 256);
     }
     
     if (s->out_format == FMT_H263 || s->encoding) {
@@ -328,7 +330,8 @@ void MPV_common_end(MpegEncContext *s)
     av_freep(&s->pb2_buffer);
     av_freep(&s->edge_emu_buffer);
     av_freep(&s->non_b_mv4_table);
-
+    av_freep(&s->avctx->stats_out);
+    
     for(i=0;i<3;i++) {
         int j;
         if(!(s->flags&CODEC_FLAG_DR1)){
@@ -377,13 +380,10 @@ int MPV_encode_init(AVCodecContext *avctx)
     s->max_qdiff= avctx->max_qdiff;
     s->qcompress= avctx->qcompress;
     s->qblur= avctx->qblur;
-    s->b_quant_factor= avctx->b_quant_factor;
-    s->b_quant_offset= avctx->b_quant_offset;
     s->avctx = avctx;
     s->aspect_ratio_info= avctx->aspect_ratio_info;
     s->flags= avctx->flags;
     s->max_b_frames= avctx->max_b_frames;
-    s->rc_strategy= avctx->rc_strategy;
     s->b_frame_strategy= avctx->b_frame_strategy;
     s->codec_id= avctx->codec->id;
     s->luma_elim_threshold  = avctx->luma_elim_threshold;
@@ -678,7 +678,6 @@ void MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
                 avctx->dr_opaque_frame= s->next_dr_opaque;
         }
     }
-
     /* set dequantizer, we cant do it during init as it might change for mpeg4
        and we cant do it in the header decode as init isnt called for mpeg4 there yet */
     if(s->out_format == FMT_H263){
@@ -703,10 +702,9 @@ void MPV_frame_end(MpegEncContext *s)
     }
     emms_c();
     
+    s->last_pict_type    = s->pict_type;
     if(s->pict_type!=B_TYPE){
         s->last_non_b_pict_type= s->pict_type;
-        s->last_non_b_qscale= s->qscale;
-        s->last_non_b_mc_mb_var= s->mc_mb_var_sum;
         s->num_available_buffers++;
         if(s->num_available_buffers>2) s->num_available_buffers= 2;
     }
@@ -873,13 +871,21 @@ int MPV_encode_picture(AVCodecContext *avctx,
 
     flush_put_bits(&s->pb);
     s->frame_bits  = (pbBufPtr(&s->pb) - s->pb.buf) * 8;
-    if(s->pict_type==B_TYPE) s->pb_frame_bits+= s->frame_bits;
-    else                     s->pb_frame_bits= s->frame_bits;
-
+    
     s->total_bits += s->frame_bits;
     avctx->frame_bits  = s->frame_bits;
 //printf("fcode: %d, type: %d, head: %d, mv: %d, misc: %d, frame: %d, itex: %d, ptex: %d\n", 
 //s->f_code, avctx->key_frame, s->header_bits, s->mv_bits, s->misc_bits, s->frame_bits, s->i_tex_bits, s->p_tex_bits);
+#if 0 //dump some stats to stats.txt for testing/debuging
+if(s->max_b_frames==0)
+{
+    static FILE *f=NULL;
+    if(!f) f= fopen("stats.txt", "wb");
+    get_psnr(pict->data, s->current_picture,
+             pict->linesize, s->linesize, avctx);
+    fprintf(f, "%7d, %7d, %2.4f\n", pbBufPtr(&s->pb) - s->pb.buf, s->qscale, avctx->psnr_y);
+}
+#endif
 
     if (avctx->get_psnr) {
         /* At this point pict->data should have the original frame   */
@@ -2010,6 +2016,25 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         memset(s->motion_val[0], 0, sizeof(INT16)*(s->mb_width*2 + 2)*(s->mb_height*2 + 2)*2);
         memset(s->p_mv_table   , 0, sizeof(INT16)*(s->mb_width+2)*(s->mb_height+2)*2);
         memset(s->mb_type      , MB_TYPE_INTRA, sizeof(UINT8)*s->mb_width*s->mb_height);
+        
+        if(!s->fixed_qscale){
+            /* finding spatial complexity for I-frame rate control */
+            for(mb_y=0; mb_y < s->mb_height; mb_y++) {
+                for(mb_x=0; mb_x < s->mb_width; mb_x++) {
+                    int xx = mb_x * 16;
+                    int yy = mb_y * 16;
+                    uint8_t *pix = s->new_picture[0] + (yy * s->linesize) + xx;
+                    int varc;
+                    int sum = pix_sum(pix, s->linesize);
+    
+                    sum= (sum+8)>>4;
+                    varc = (pix_norm1(pix, s->linesize) - sum*sum + 500 + 128)>>8;
+
+                    s->mb_var[s->mb_width * mb_y + mb_x] = varc;
+                    s->mb_var_sum    += varc;
+                }
+            }
+        }
     }
     if(s->scene_change_score > 0 && s->pict_type == P_TYPE){
         s->pict_type= I_TYPE;
@@ -2018,7 +2043,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
             s->input_pict_type= I_TYPE;
             s->input_picture_in_gop_number=0;
         }
-//printf("Scene change detected, encoding as I Frame\n");
+//printf("Scene change detected, encoding as I Frame %d %d\n", s->mb_var_sum, s->mc_mb_var_sum);
     }
     
     if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) 
@@ -2037,9 +2062,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
 //printf("f_code %d ///\n", s->f_code);
 
 //    printf("%d %d\n", s->avg_mb_var, s->mc_mb_var);
-    if(s->flags&CODEC_FLAG_PASS2)
-        s->qscale = ff_rate_estimate_qscale_pass2(s);
-    else if (!s->fixed_qscale) 
+    if (!s->fixed_qscale) 
         s->qscale = ff_rate_estimate_qscale(s);
 
     if (s->out_format == FMT_MJPEG) {
@@ -2675,7 +2698,7 @@ void ff_conceal_past_errors(MpegEncContext *s, int unknown_pos)
     int i, intra_count=0, inter_count=0;
     int intra_conceal= s->msmpeg4_version ? 50 : 50; //FIXME finetune
     int inter_conceal= s->msmpeg4_version ? 50 : 50;
-    
+
     // for last block
     if(mb_x>=s->mb_width)  mb_x= s->mb_width -1;
     if(mb_y>=s->mb_height) mb_y= s->mb_height-1;
diff --git a/libavcodec/mpegvideo.h b/libavcodec/mpegvideo.h
index fb2487e5e46b63cb85e99b945bc5cd06d6806c51..4bbcdf77cc080e6d55f3607f6e24f3d8b4e47631 100644
--- a/libavcodec/mpegvideo.h
+++ b/libavcodec/mpegvideo.h
@@ -61,12 +61,34 @@ typedef struct RateControlEntry{
     UINT64 expected_bits;
     int new_pict_type;
     float new_qscale;
+    int mc_mb_var_sum;
+    int mb_var_sum;
+    int i_count;
+    int f_code;
+    int b_code;
 }RateControlEntry;
 
 typedef struct RateControlContext{
     FILE *stats_file;
-    int num_entries;
+    int num_entries;          /* number of RateControlEntries */
     RateControlEntry *entry;
+    int buffer_index;         /* amount of bits in the video/audio buffer */
+    Predictor pred[5];
+    double short_term_qsum;   /* sum of recent qscales */
+    double short_term_qcount; /* count of recent qscales */
+    double pass1_bits;        /* bits outputted by the pass1 code (including complexity init) */
+    double pass1_wanted_bits; /* bits which should have been outputed by the pass1 code (including complexity init) */
+    double last_qscale;
+    double last_qscale_for[5]; /* last qscale for a specific pict type */
+    double next_non_b_qscale;
+    double next_p_qscale;
+    int last_mc_mb_var_sum;
+    int last_mb_var_sum;
+    UINT64 i_cplx_sum[5];
+    UINT64 p_cplx_sum[5];
+    UINT64 mv_bits_sum[5];
+    UINT64 qscale_sum[5];
+    int frame_count[5];
 }RateControlContext;
 
 typedef struct ReorderBuffer{
@@ -107,9 +129,6 @@ typedef struct MpegEncContext {
     int flags;        /* AVCodecContext.flags (HQ, MV4, ...) */
     int force_input_type;/* 0= no force, otherwise I_TYPE, P_TYPE, ... */
     int max_b_frames; /* max number of b-frames for encoding */
-    float b_quant_factor;/* qscale factor between ips and b frames */
-    float b_quant_offset;/* qscale offset between ips and b frames */
-    int rc_strategy;
     int b_frame_strategy;
     int luma_elim_threshold;
     int chroma_elim_threshold;
@@ -170,8 +189,8 @@ typedef struct MpegEncContext {
     int input_pict_type;        /* pict_type prior to reordering of frames */
     int force_type;             /* 0= no force, otherwise I_TYPE, P_TYPE, ... */
     int qscale;                 /* QP */
-    int last_non_b_qscale;	/* QP of last non b frame used for b frame qscale*/
     int pict_type;              /* I_TYPE, P_TYPE, B_TYPE, ... */
+    int last_pict_type;
     int last_non_b_pict_type;   /* used for mpeg4 gmc b-frames & ratecontrol */
     int frame_rate_index;
     /* motion compensation */
@@ -271,18 +290,10 @@ typedef struct MpegEncContext {
     int I_frame_bits; //FIXME used in mpeg12 ...
     int mb_var_sum;          /* sum of MB variance for current frame */
     int mc_mb_var_sum;       /* motion compensated MB variance for current frame */
-    int last_non_b_mc_mb_var;/* motion compensated MB variance for last non b frame */
     INT64 wanted_bits;
     INT64 total_bits;
     int frame_bits;        /* bits used for the current frame */
-    int pb_frame_bits;     /* bits of the last b...bp group */
-    Predictor i_pred;
-    Predictor p_pred;
-    double qsum;         /* sum of qscales */
-    double qcount;       /* count of qscales */
-    double short_term_qsum;   /* sum of recent qscales */
-    double short_term_qcount; /* count of recent qscales */
-    RateControlContext rc_context;
+    RateControlContext rc_context; // contains stuff only accessed in ratecontrol.c
 
     /* statistics, used for 2-pass encoding */
     int mv_bits;
@@ -590,5 +601,10 @@ int ff_rate_estimate_qscale(MpegEncContext *s);
 int ff_rate_estimate_qscale_pass2(MpegEncContext *s);
 void ff_write_pass1_stats(MpegEncContext *s);
 void ff_rate_control_uninit(MpegEncContext *s);
+double ff_eval(char *s, double *const_value, char **const_name,
+               double (**func1)(void *, double), char **func1_name, 
+               double (**func2)(void *, double, double), char **func2_name,
+               void *opaque);
+
 
 #endif /* AVCODEC_MPEGVIDEO_H */
diff --git a/libavcodec/ratecontrol.c b/libavcodec/ratecontrol.c
index 8395eefadd04456adfb87f93af7353f4d1014837..303de82848f9f952412a9d216bbd769de7be06a3 100644
--- a/libavcodec/ratecontrol.c
+++ b/libavcodec/ratecontrol.c
@@ -17,84 +17,154 @@
  * License along with this library; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  */
+#include <math.h>
+#include "common.h"
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
 
-#define STATS_FILE "lavc_stats.txt"
+#undef NDEBUG // allways check asserts, the speed effect is far too small to disable them
+#include <assert.h>
 
 static int init_pass2(MpegEncContext *s);
+static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_factor, int frame_num);
 
 void ff_write_pass1_stats(MpegEncContext *s){
-    RateControlContext *rcc= &s->rc_context;
-//    fprintf(c->stats_file, "type:%d q:%d icount:%d pcount:%d scount:%d itex:%d ptex%d mv:%d misc:%d fcode:%d bcode:%d\")
-    fprintf(rcc->stats_file, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d\n",
+    sprintf(s->avctx->stats_out, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d;\n",
             s->picture_number, s->input_picture_number - s->max_b_frames, s->pict_type, 
-            s->qscale, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits, s->f_code, s->b_code);
+            s->qscale, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits, 
+            s->f_code, s->b_code, s->mc_mb_var_sum, s->mb_var_sum, s->i_count);
 }
 
 int ff_rate_control_init(MpegEncContext *s)
 {
     RateControlContext *rcc= &s->rc_context;
+    int i;
     emms_c();
 
-    if(s->flags&CODEC_FLAG_PASS1){
-        rcc->stats_file= fopen(STATS_FILE, "w");
-        if(!rcc->stats_file){
-            fprintf(stderr, "failed to open " STATS_FILE "\n");
-            return -1;
-        }
-    } else if(s->flags&CODEC_FLAG_PASS2){
-        int size;
+    for(i=0; i<5; i++){
+        rcc->pred[i].coeff= 7.0;
+        rcc->pred[i].count= 1.0;
+    
+        rcc->pred[i].decay= 0.4;
+        rcc->i_cplx_sum [i]=
+        rcc->p_cplx_sum [i]=
+        rcc->mv_bits_sum[i]=
+        rcc->qscale_sum [i]=
+        rcc->frame_count[i]= 1; // 1 is better cuz of 1/0 and such
+        rcc->last_qscale_for[i]=5;
+    }
+    rcc->buffer_index= s->avctx->rc_buffer_size/2;
+
+    rcc->next_non_b_qscale=10;
+    rcc->next_p_qscale=10;
+    
+    if(s->flags&CODEC_FLAG_PASS2){
         int i;
+        char *p;
 
-        rcc->stats_file= fopen(STATS_FILE, "r");
-        if(!rcc->stats_file){
-            fprintf(stderr, "failed to open " STATS_FILE "\n");
-            return -1;
+        /* find number of pics */
+        p= s->avctx->stats_in;
+        for(i=-1; p; i++){
+            p= strchr(p+1, ';');
         }
-
-        /* find number of pics without reading the file twice :) */
-        fseek(rcc->stats_file, 0, SEEK_END);
-        size= ftell(rcc->stats_file);
-        fseek(rcc->stats_file, 0, SEEK_SET);
-
-        size/= 64; // we need at least 64 byte to store a line ...
-        rcc->entry = (RateControlEntry*)av_mallocz(size*sizeof(RateControlEntry));
-
-        for(i=0; !feof(rcc->stats_file); i++){
+        i+= s->max_b_frames;
+        rcc->entry = (RateControlEntry*)av_mallocz(i*sizeof(RateControlEntry));
+        rcc->num_entries= i;
+        
+        /* init all to skiped p frames (with b frames we might have a not encoded frame at the end FIXME) */
+        for(i=0; i<rcc->num_entries; i++){
+            RateControlEntry *rce= &rcc->entry[i];
+            rce->pict_type= rce->new_pict_type=P_TYPE;
+            rce->qscale= rce->new_qscale=2;
+            rce->misc_bits= s->mb_num + 10;
+            rce->mb_var_sum= s->mb_num*100;
+        }        
+        
+        /* read stats */
+        p= s->avctx->stats_in;
+        for(i=0; i<rcc->num_entries - s->max_b_frames; i++){
             RateControlEntry *rce;
             int picture_number;
             int e;
-            
-            e= fscanf(rcc->stats_file, "in:%d ", &picture_number);
+            char *next;
+
+            next= strchr(p, ';');
+            if(next){
+                (*next)=0; //sscanf in unbelieavle slow on looong strings //FIXME copy / dont write
+                next++;
+            }
+            e= sscanf(p, " in:%d ", &picture_number);
+
+            assert(picture_number >= 0);
+            assert(picture_number < rcc->num_entries);
             rce= &rcc->entry[picture_number];
-            e+=fscanf(rcc->stats_file, "out:%*d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%*d bcode:%*d\n",
-                   &rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits, &rce->mv_bits, &rce->misc_bits);
-            if(e!=7){
-                fprintf(stderr, STATS_FILE " is damaged\n");
+
+            e+=sscanf(p, " in:%*d out:%*d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d",
+                   &rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits, &rce->mv_bits, &rce->misc_bits, 
+                   &rce->f_code, &rce->b_code, &rce->mc_mb_var_sum, &rce->mb_var_sum, &rce->i_count);
+            if(e!=12){
+                fprintf(stderr, "statistics are damaged at line %d, parser out=%d\n", i, e);
                 return -1;
             }
+            p= next;
         }
-        rcc->num_entries= i;
         
         if(init_pass2(s) < 0) return -1;
     }
      
-    /* no 2pass stuff, just normal 1-pass */
-    //initial values, they dont really matter as they will be totally different within a few frames
-    s->i_pred.coeff= s->p_pred.coeff= 7.0;
-    s->i_pred.count= s->p_pred.count= 1.0;
-    
-    s->i_pred.decay= s->p_pred.decay= 0.4;
+    if(!(s->flags&CODEC_FLAG_PASS2)){
+
+        rcc->short_term_qsum=0.001;
+        rcc->short_term_qcount=0.001;
     
-    // use more bits at the beginning, otherwise high motion at the begin will look like shit
-    s->qsum=100 * s->qmin;
-    s->qcount=100;
+        rcc->pass1_bits       =0.001;
+        rcc->pass1_wanted_bits=0.001;
+        
+        /* init stuff with the user specified complexity */
+        if(s->avctx->rc_initial_cplx){
+            for(i=0; i<60*30; i++){
+                double bits= s->avctx->rc_initial_cplx * (i/10000.0 + 1.0)*s->mb_num;
+                RateControlEntry rce;
+                double q;
+                
+                if     (i%((s->gop_size+3)/4)==0) rce.pict_type= I_TYPE;
+                else if(i%(s->max_b_frames+1))    rce.pict_type= B_TYPE;
+                else                              rce.pict_type= P_TYPE;
+
+                rce.new_pict_type= rce.pict_type;
+                rce.mc_mb_var_sum= bits*s->mb_num/100000;
+                rce.mb_var_sum   = s->mb_num;
+                rce.qscale   = 2;
+                rce.f_code   = 2;
+                rce.b_code   = 1;
+                rce.misc_bits= 1;
+
+                if(s->pict_type== I_TYPE){
+                    rce.i_count   = s->mb_num;
+                    rce.i_tex_bits= bits;
+                    rce.p_tex_bits= 0;
+                    rce.mv_bits= 0;
+                }else{
+                    rce.i_count   = 0; //FIXME we do know this approx
+                    rce.i_tex_bits= 0;
+                    rce.p_tex_bits= bits*0.9;
+                    rce.mv_bits= bits*0.1;
+                }
+                rcc->i_cplx_sum [rce.pict_type] += rce.i_tex_bits*rce.qscale;
+                rcc->p_cplx_sum [rce.pict_type] += rce.p_tex_bits*rce.qscale;
+                rcc->mv_bits_sum[rce.pict_type] += rce.mv_bits;
+                rcc->frame_count[rce.pict_type] ++;
 
-    s->short_term_qsum=0.001;
-    s->short_term_qcount=0.001;
+                bits= rce.i_tex_bits + rce.p_tex_bits;
 
+                q= get_qscale(s, &rce, rcc->pass1_wanted_bits/rcc->pass1_bits, i);
+                rcc->pass1_wanted_bits+= s->bit_rate/(s->frame_rate / (double)FRAME_RATE_BASE);
+            }
+        }
+
+    }
+    
     return 0;
 }
 
@@ -103,24 +173,257 @@ void ff_rate_control_uninit(MpegEncContext *s)
     RateControlContext *rcc= &s->rc_context;
     emms_c();
 
-    if(rcc->stats_file) 
-        fclose(rcc->stats_file);
-    rcc->stats_file = NULL;
     av_freep(&rcc->entry);
 }
 
+static inline double qp2bits(RateControlEntry *rce, double qp){
+    if(qp<=0.0){
+        fprintf(stderr, "qp<=0.0\n");
+    }
+    return rce->qscale * (double)(rce->i_tex_bits + rce->p_tex_bits+1)/ qp;
+}
+
+static inline double bits2qp(RateControlEntry *rce, double bits){
+    if(bits<0.9){
+        fprintf(stderr, "bits<0.9\n");
+    }
+    return rce->qscale * (double)(rce->i_tex_bits + rce->p_tex_bits+1)/ bits;
+}
+    
+static void update_rc_buffer(MpegEncContext *s, int frame_size){
+    RateControlContext *rcc= &s->rc_context;
+    const double fps= (double)s->frame_rate / FRAME_RATE_BASE;
+    const double buffer_size= s->avctx->rc_buffer_size;
+    const double min_rate= s->avctx->rc_min_rate/fps;
+    const double max_rate= s->avctx->rc_max_rate/fps;
+
+    if(buffer_size){
+        rcc->buffer_index-= frame_size;
+        if(rcc->buffer_index < buffer_size/2 /*FIXME /2 */ || min_rate==0){
+            rcc->buffer_index+= max_rate;
+            if(rcc->buffer_index >= buffer_size)
+                rcc->buffer_index= buffer_size-1;
+        }else{
+            rcc->buffer_index+= min_rate;
+        }
+        
+        if(rcc->buffer_index < 0)
+            fprintf(stderr, "rc buffer underflow\n");
+        if(rcc->buffer_index >= s->avctx->rc_buffer_size)
+            fprintf(stderr, "rc buffer overflow\n");
+    }
+}
+
+/**
+ * modifies the bitrate curve from pass1 for one frame
+ */
+static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_factor, int frame_num){
+    RateControlContext *rcc= &s->rc_context;
+    double q, bits;
+    const int pict_type= rce->new_pict_type;
+    const double mb_num= s->mb_num;  
+    int i;
+    const double last_q= rcc->last_qscale_for[pict_type];
+
+    double const_values[]={
+        M_PI,
+        M_E,
+        rce->i_tex_bits*rce->qscale,
+        rce->p_tex_bits*rce->qscale,
+        (rce->i_tex_bits + rce->p_tex_bits)*(double)rce->qscale,
+        rce->mv_bits/mb_num,
+        rce->pict_type == B_TYPE ? (rce->f_code + rce->b_code)*0.5 : rce->f_code,
+        rce->i_count/mb_num,
+        rce->mc_mb_var_sum/mb_num,
+        rce->mb_var_sum/mb_num,
+        rce->pict_type == I_TYPE,
+        rce->pict_type == P_TYPE,
+        rce->pict_type == B_TYPE,
+        rcc->qscale_sum[pict_type] / (double)rcc->frame_count[pict_type],
+        s->qcompress,
+/*        rcc->last_qscale_for[I_TYPE],
+        rcc->last_qscale_for[P_TYPE],
+        rcc->last_qscale_for[B_TYPE],
+        rcc->next_non_b_qscale,*/
+        rcc->i_cplx_sum[I_TYPE] / (double)rcc->frame_count[I_TYPE],
+        rcc->i_cplx_sum[P_TYPE] / (double)rcc->frame_count[P_TYPE],
+        rcc->p_cplx_sum[P_TYPE] / (double)rcc->frame_count[P_TYPE],
+        rcc->p_cplx_sum[B_TYPE] / (double)rcc->frame_count[B_TYPE],
+        (rcc->i_cplx_sum[pict_type] + rcc->p_cplx_sum[pict_type]) / (double)rcc->frame_count[pict_type],
+        0
+    };
+    char *const_names[]={
+        "PI",
+        "E",
+        "iTex",
+        "pTex",
+        "tex",
+        "mv",
+        "fCode",
+        "iCount",
+        "mcVar",
+        "var",
+        "isI",
+        "isP",
+        "isB",
+        "avgQP",
+        "qComp",
+/*        "lastIQP",
+        "lastPQP",
+        "lastBQP",
+        "nextNonBQP",*/
+        "avgIITex",
+        "avgPITex",
+        "avgPPTex",
+        "avgBPTex",
+        "avgTex",
+        NULL
+    };
+    double (*func1[])(void *, double)={
+        bits2qp,
+        qp2bits,
+        NULL
+    };
+    char *func1_names[]={
+        "bits2qp",
+        "qp2bits",
+        NULL
+    };
+
+    bits= ff_eval(s->avctx->rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
+    
+    rcc->pass1_bits+= bits;
+    bits*=rate_factor;
+    if(bits<0.0) bits=0.0;
+    bits+= 1.0; //avoid 1/0 issues
+    
+    /* user override */
+    for(i=0; i<s->avctx->rc_override_count; i++){
+        RcOverride *rco= s->avctx->rc_override;
+        if(rco[i].start_frame > frame_num) continue;
+        if(rco[i].end_frame   < frame_num) continue;
+    
+        if(rco[i].qscale) 
+            bits= qp2bits(rce, rco[i].qscale); //FIXME move at end to really force it?
+        else
+            bits*= rco[i].quality_factor;
+    }
+
+    q= bits2qp(rce, bits);
+    
+    /* I/B difference */
+    if     (pict_type==I_TYPE && s->avctx->i_quant_factor<0.0)
+        q= -q*s->avctx->i_quant_factor + s->avctx->i_quant_offset;
+    else if(pict_type==B_TYPE && s->avctx->b_quant_factor<0.0)
+        q= -q*s->avctx->b_quant_factor + s->avctx->b_quant_offset;
+    
+    /* last qscale / qdiff stuff */
+    if     (q > last_q + s->max_qdiff) q= last_q + s->max_qdiff;
+    else if(q < last_q - s->max_qdiff) q= last_q - s->max_qdiff;
+
+    rcc->last_qscale_for[pict_type]= q; //Note we cant do that after blurring
+    
+    return q;
+}
+
+/**
+ * gets the qmin & qmax for pict_type
+ */
+static void get_qminmax(int *qmin_ret, int *qmax_ret, MpegEncContext *s, int pict_type){
+    int qmin= s->qmin;                                                       
+    int qmax= s->qmax;
+
+    if(pict_type==B_TYPE){
+        qmin= (int)(qmin*ABS(s->avctx->b_quant_factor)+s->avctx->b_quant_offset + 0.5);
+        qmax= (int)(qmax*ABS(s->avctx->b_quant_factor)+s->avctx->b_quant_offset + 0.5);
+    }else if(pict_type==I_TYPE){
+        qmin= (int)(qmin*ABS(s->avctx->i_quant_factor)+s->avctx->i_quant_offset + 0.5);
+        qmax= (int)(qmax*ABS(s->avctx->i_quant_factor)+s->avctx->i_quant_offset + 0.5);
+    }
+
+    if(qmin<1) qmin=1;
+    if(qmin==1 && s->qmin>1) qmin=2; //avoid qmin=1 unless the user wants qmin=1
+
+    if(qmin<3 && s->max_qcoeff<=128 && pict_type==I_TYPE) qmin=3; //reduce cliping problems
+
+    if(qmax>31) qmax=31;
+    if(qmax<=qmin) qmax= qmin= (qmax+qmin+1)>>1;
+    
+    *qmin_ret= qmin;
+    *qmax_ret= qmax;
+}
+
+static double modify_qscale(MpegEncContext *s, RateControlEntry *rce, double q, int frame_num){
+    RateControlContext *rcc= &s->rc_context;
+    int qmin, qmax;
+    double bits;
+    const int pict_type= rce->new_pict_type;
+    const double buffer_size= s->avctx->rc_buffer_size;
+    const double min_rate= s->avctx->rc_min_rate;
+    const double max_rate= s->avctx->rc_max_rate;
+    
+    get_qminmax(&qmin, &qmax, s, pict_type);
+
+    /* modulation */
+    if(s->avctx->rc_qmod_freq && frame_num%s->avctx->rc_qmod_freq==0 && pict_type==P_TYPE)
+        q*= s->avctx->rc_qmod_amp;
+
+    bits= qp2bits(rce, q);
+
+    /* buffer overflow/underflow protection */
+    if(buffer_size){
+        double expected_size= rcc->buffer_index - bits;
+
+        if(min_rate){
+            double d= 2*(buffer_size - (expected_size + min_rate))/buffer_size;
+            if(d>1.0) d=1.0;
+            q/= pow(d, 1.0/s->avctx->rc_buffer_aggressivity);
+        }
+
+        if(max_rate){
+            double d= 2*expected_size/buffer_size;
+            if(d>1.0) d=1.0;
+            q*= pow(d, 1.0/s->avctx->rc_buffer_aggressivity);
+        }
+    }
+
+    if(s->avctx->rc_qsquish==0.0){
+        if     (q<qmin) q=qmin;
+        else if(q>qmax) q=qmax;
+    }else{
+        double min2= log(qmin);
+        double max2= log(qmax);
+        
+        q= log(q);
+        q= (q - min2)/(max2-min2) - 0.5;
+        q*= -4.0;
+        q= 1.0/(1.0 + exp(q));
+        q= q*(max2-min2) + min2;
+        
+        q= exp(q);
+    }
+
+    return q;
+}
+
 //----------------------------------
 // 1 Pass Code
 
-static double predict(Predictor *p, double q, double var)
+static double predict_size(Predictor *p, double q, double var)
 {
      return p->coeff*var / (q*p->count);
 }
 
+static double predict_qp(Predictor *p, double size, double var)
+{
+//printf("coeff:%f, count:%f, var:%f, size:%f//\n", p->coeff, p->count, var, size);
+     return p->coeff*var / (size*p->count);
+}
+
 static void update_predictor(Predictor *p, double q, double var, double size)
 {
     double new_coeff= size*q / (var + 1);
-    if(var<1000) return;
+    if(var<10) return;
 
     p->count*= p->decay;
     p->coeff*= p->decay;
@@ -130,90 +433,136 @@ static void update_predictor(Predictor *p, double q, double var, double size)
 
 int ff_rate_estimate_qscale(MpegEncContext *s)
 {
-    int qmin= s->qmin;
-    int qmax= s->qmax;
-    int rate_q=5;
     float q;
-    int qscale;
+    int qscale, qmin, qmax;
     float br_compensation;
     double diff;
     double short_term_q;
-    double long_term_q;
     double fps;
-    int picture_number= s->input_picture_number - s->max_b_frames;
+    int picture_number= s->picture_number;
     int64_t wanted_bits;
+    RateControlContext *rcc= &s->rc_context;
+    RateControlEntry local_rce, *rce;
+    double bits;
+    double rate_factor;
+    int var;
+    const int pict_type= s->pict_type;
     emms_c();
 
-    fps= (double)s->frame_rate / FRAME_RATE_BASE;
-    wanted_bits= (uint64_t)(s->bit_rate*(double)picture_number/fps);
-//    printf("%d %d %d\n", picture_number, (int)wanted_bits, (int)s->total_bits);
-    
-    if(s->pict_type==B_TYPE){
-        qmin= (int)(qmin*s->b_quant_factor+s->b_quant_offset + 0.5);
-        qmax= (int)(qmax*s->b_quant_factor+s->b_quant_offset + 0.5);
-    }
-    if(qmin<1) qmin=1;
-    if(qmax>31) qmax=31;
-    if(qmax<=qmin) qmax= qmin;
+    get_qminmax(&qmin, &qmax, s, pict_type);
 
+    fps= (double)s->frame_rate / FRAME_RATE_BASE;
+//printf("input_picture_number:%d picture_number:%d\n", s->input_picture_number, s->picture_number);
         /* update predictors */
     if(picture_number>2){
-        if(s->pict_type!=B_TYPE && s->last_non_b_pict_type == P_TYPE){
-//printf("%d %d %d %f\n", s->qscale, s->last_mc_mb_var, s->frame_bits, s->p_pred.coeff);
-            update_predictor(&s->p_pred, s->last_non_b_qscale, s->last_non_b_mc_mb_var, s->pb_frame_bits);
-        }
+        const int last_var= s->last_pict_type == I_TYPE ? rcc->last_mb_var_sum : rcc->last_mc_mb_var_sum;
+        update_predictor(&rcc->pred[s->last_pict_type], rcc->last_qscale, sqrt(last_var), s->frame_bits);
     }
 
-    if(s->pict_type == I_TYPE){
-        short_term_q= s->short_term_qsum/s->short_term_qcount;
-    
-        long_term_q= s->qsum/s->qcount*(s->total_bits+1)/(wanted_bits+1); //+1 to avoid nan & 0
+    if(s->flags&CODEC_FLAG_PASS2){
+        assert(picture_number>=0);
+        assert(picture_number<rcc->num_entries);
+        rce= &rcc->entry[picture_number];
+        wanted_bits= rce->expected_bits;
+    }else{
+        rce= &local_rce;
+        wanted_bits= (uint64_t)(s->bit_rate*(double)picture_number/fps);
+    }
 
-        q= 1/((1/long_term_q - 1/short_term_q)*s->qcompress + 1/short_term_q);
-    }else if(s->pict_type==B_TYPE){
-        q= (int)(s->last_non_b_qscale*s->b_quant_factor+s->b_quant_offset + 0.5);
-    }else{ //P Frame
-        int i;
-        int diff, best_diff=1000000000;
-        for(i=1; i<=31; i++){
-            diff= predict(&s->p_pred, i, s->mc_mb_var_sum) - (double)s->bit_rate/fps;
-            if(diff<0) diff= -diff;
-            if(diff<best_diff){
-                best_diff= diff;
-                rate_q= i;
-            }
+    diff= s->total_bits - wanted_bits;
+    br_compensation= (s->bit_rate_tolerance - diff)/s->bit_rate_tolerance;
+    if(br_compensation<=0.0) br_compensation=0.001;
+
+    var= pict_type == I_TYPE ? s->mb_var_sum : s->mc_mb_var_sum;
+    
+    if(s->flags&CODEC_FLAG_PASS2){
+        if(pict_type!=I_TYPE)
+            assert(pict_type == rce->new_pict_type);
+
+        q= rce->new_qscale / br_compensation;
+//printf("%f %f %f last:%d var:%d type:%d//\n", q, rce->new_qscale, br_compensation, s->frame_bits, var, pict_type);
+    }else{
+        rce->pict_type= 
+        rce->new_pict_type= pict_type;
+        rce->mc_mb_var_sum= s->mc_mb_var_sum;
+        rce->mb_var_sum   = s->   mb_var_sum;
+        rce->qscale   = 2;
+        rce->f_code   = s->f_code;
+        rce->b_code   = s->b_code;
+        rce->misc_bits= 1;
+
+        if(picture_number>0)
+            update_rc_buffer(s, s->frame_bits);
+
+        bits= predict_size(&rcc->pred[pict_type], rce->qscale, sqrt(var));
+        if(pict_type== I_TYPE){
+            rce->i_count   = s->mb_num;
+            rce->i_tex_bits= bits;
+            rce->p_tex_bits= 0;
+            rce->mv_bits= 0;
+        }else{
+            rce->i_count   = 0; //FIXME we do know this approx
+            rce->i_tex_bits= 0;
+            rce->p_tex_bits= bits*0.9;
+            
+            rce->mv_bits= bits*0.1;
         }
-        s->short_term_qsum*=s->qblur;
-        s->short_term_qcount*=s->qblur;
+        rcc->i_cplx_sum [pict_type] += rce->i_tex_bits*rce->qscale;
+        rcc->p_cplx_sum [pict_type] += rce->p_tex_bits*rce->qscale;
+        rcc->mv_bits_sum[pict_type] += rce->mv_bits;
+        rcc->frame_count[pict_type] ++;
 
-        s->short_term_qsum+= rate_q;
-        s->short_term_qcount++;
-        short_term_q= s->short_term_qsum/s->short_term_qcount;
+        bits= rce->i_tex_bits + rce->p_tex_bits;
+        rate_factor= rcc->pass1_wanted_bits/rcc->pass1_bits * br_compensation;
     
-        long_term_q= s->qsum/s->qcount*(s->total_bits+1)/(wanted_bits+1); //+1 to avoid nan & 0
+        q= get_qscale(s, rce, rate_factor, picture_number);
 
-//    q= (long_term_q - short_term_q)*s->qcompress + short_term_q;
-        q= 1/((1/long_term_q - 1/short_term_q)*s->qcompress + 1/short_term_q);
+//printf("%f ", q);
+        if     (pict_type==I_TYPE && s->avctx->i_quant_factor>0.0)
+            q= rcc->next_p_qscale*s->avctx->i_quant_factor + s->avctx->i_quant_offset;
+        else if(pict_type==B_TYPE && s->avctx->b_quant_factor>0.0)
+            q= rcc->next_non_b_qscale*s->avctx->b_quant_factor + s->avctx->b_quant_offset;
+            
+//printf("%f ", q);
+        assert(q>0.0);
+
+        if(pict_type==P_TYPE || s->intra_only){ //FIXME type dependant blur like in 2-pass
+            rcc->short_term_qsum*=s->qblur;
+            rcc->short_term_qcount*=s->qblur;
+
+            rcc->short_term_qsum+= q;
+            rcc->short_term_qcount++;
+//printf("%f ", q);
+            q= short_term_q= rcc->short_term_qsum/rcc->short_term_qcount;
+//printf("%f ", q);
+        }
+        q= modify_qscale(s, rce, q, picture_number);
+
+        rcc->pass1_wanted_bits+= s->bit_rate/fps;
+
+        if(pict_type != B_TYPE) rcc->next_non_b_qscale= q;
+        if(pict_type == P_TYPE) rcc->next_p_qscale= q;
     }
+//printf("qmin:%d, qmax:%d, q:%f\n", qmin, qmax, q);
+    
+
+    if     (q<qmin) q=qmin; 
+    else if(q>qmax) q=qmax;
+        
+//    printf("%f %d %d %d\n", q, picture_number, (int)wanted_bits, (int)s->total_bits);
+    
 
-    diff= s->total_bits - wanted_bits;
-    br_compensation= (s->bit_rate_tolerance - diff)/s->bit_rate_tolerance;
-    if(br_compensation<=0.0) br_compensation=0.001;
-    q/=br_compensation;
 //printf("%f %f %f\n", q, br_compensation, short_term_q);
     qscale= (int)(q + 0.5);
-    if     (qscale<qmin) qscale=qmin;
-    else if(qscale>qmax) qscale=qmax;
+//printf("%d ", qscale);
     
-    if(s->pict_type!=B_TYPE){
-        s->qsum+= qscale;
-        s->qcount++;
-        if     (qscale<s->last_non_b_qscale-s->max_qdiff) qscale=s->last_non_b_qscale-s->max_qdiff;
-        else if(qscale>s->last_non_b_qscale+s->max_qdiff) qscale=s->last_non_b_qscale+s->max_qdiff;
-    }
 //printf("q:%d diff:%d comp:%f rate_q:%d st_q:%f fvar:%d last_size:%d\n", qscale, (int)diff, br_compensation, 
 //       rate_q, short_term_q, s->mc_mb_var, s->frame_bits);
 //printf("%d %d\n", s->bit_rate, (int)fps);
+
+    rcc->last_qscale= qscale;
+    rcc->last_mc_mb_var_sum= s->mc_mb_var_sum;
+    rcc->last_mb_var_sum= s->mb_var_sum;
     return qscale;
 }
 
@@ -231,10 +580,12 @@ static int init_pass2(MpegEncContext *s)
     uint64_t available_bits[5];
     uint64_t all_const_bits;
     uint64_t all_available_bits= (uint64_t)(s->bit_rate*(double)rcc->num_entries/fps);
-    int num_frames[5]={0,0,0,0,0};
     double rate_factor=0;
     double step;
     int last_i_frame=-10000000;
+    const int filter_size= (int)(s->qblur*4) | 1;  
+    double expected_bits;
+    double *qscale, *blured_qscale;
 
     /* find complexity & const_bits & decide the pict_types */
     for(i=0; i<rcc->num_entries; i++){
@@ -272,10 +623,13 @@ static int init_pass2(MpegEncContext *s)
                 break;
             }
         }
+        rcc->i_cplx_sum [rce->pict_type] += rce->i_tex_bits*rce->qscale;
+        rcc->p_cplx_sum [rce->pict_type] += rce->p_tex_bits*rce->qscale;
+        rcc->mv_bits_sum[rce->pict_type] += rce->mv_bits;
+        rcc->frame_count[rce->pict_type] ++;
 
         complexity[rce->new_pict_type]+= (rce->i_tex_bits+ rce->p_tex_bits)*(double)rce->qscale;
         const_bits[rce->new_pict_type]+= rce->mv_bits + rce->misc_bits;
-        num_frames[rce->new_pict_type]++;
     }
     all_const_bits= const_bits[I_TYPE] + const_bits[P_TYPE] + const_bits[B_TYPE];
     
@@ -283,120 +637,108 @@ static int init_pass2(MpegEncContext *s)
         fprintf(stderr, "requested bitrate is to low\n");
         return -1;
     }
-
-//    avg_complexity= complexity/rcc->num_entries;
-    avg_quantizer[P_TYPE]= 
-    avg_quantizer[I_TYPE]=   (complexity[I_TYPE]+complexity[P_TYPE] + complexity[B_TYPE]/s->b_quant_factor) 
-                           / (all_available_bits - all_const_bits);
-    avg_quantizer[B_TYPE]= avg_quantizer[P_TYPE]*s->b_quant_factor + s->b_quant_offset;
-//printf("avg quantizer: %f %f\n", avg_quantizer[P_TYPE], avg_quantizer[B_TYPE]);
+    
+    /* find average quantizers */
+    avg_quantizer[P_TYPE]=0;
+    for(step=256*256; step>0.0000001; step*=0.5){
+        double expected_bits=0;
+        avg_quantizer[P_TYPE]+= step;
+        
+        avg_quantizer[I_TYPE]= avg_quantizer[P_TYPE]*ABS(s->avctx->i_quant_factor) + s->avctx->i_quant_offset;
+        avg_quantizer[B_TYPE]= avg_quantizer[P_TYPE]*ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
+        
+        expected_bits= 
+            + all_const_bits 
+            + complexity[I_TYPE]/avg_quantizer[I_TYPE]
+            + complexity[P_TYPE]/avg_quantizer[P_TYPE]
+            + complexity[B_TYPE]/avg_quantizer[B_TYPE];
+            
+        if(expected_bits < all_available_bits) avg_quantizer[P_TYPE]-= step;
+//printf("%f %lld %f\n", expected_bits, all_available_bits, avg_quantizer[P_TYPE]);
+    }
+printf("qp_i:%f, qp_p:%f, qp_b:%f\n", avg_quantizer[I_TYPE],avg_quantizer[P_TYPE],avg_quantizer[B_TYPE]);
 
     for(i=0; i<5; i++){
         available_bits[i]= const_bits[i] + complexity[i]/avg_quantizer[i];
     }
 //printf("%lld %lld %lld %lld\n", available_bits[I_TYPE], available_bits[P_TYPE], available_bits[B_TYPE], all_available_bits);
-    
+        
+    qscale= malloc(sizeof(double)*rcc->num_entries);
+    blured_qscale= malloc(sizeof(double)*rcc->num_entries);
+
     for(step=256*256; step>0.0000001; step*=0.5){
-        uint64_t expected_bits=0;
+        expected_bits=0;
         rate_factor+= step;
+        
+        rcc->buffer_index= s->avctx->rc_buffer_size/2;
+
         /* find qscale */
         for(i=0; i<rcc->num_entries; i++){
+            qscale[i]= get_qscale(s, &rcc->entry[i], rate_factor, i);
+        }
+        assert(filter_size%2==1);
+
+        /* fixed I/B QP relative to P mode */
+        rcc->next_non_b_qscale= 10;
+        rcc->next_p_qscale= 10;
+        for(i=rcc->num_entries-1; i>=0; i--){
             RateControlEntry *rce= &rcc->entry[i];
-            double short_term_q, q, bits_left;
             const int pict_type= rce->new_pict_type;
-            int qmin= s->qmin;
-            int qmax= s->qmax;
+        
+            if     (pict_type==I_TYPE && s->avctx->i_quant_factor>0.0)
+                qscale[i]= rcc->next_p_qscale*s->avctx->i_quant_factor + s->avctx->i_quant_offset;
+            else if(pict_type==B_TYPE && s->avctx->b_quant_factor>0.0)
+                qscale[i]= rcc->next_non_b_qscale*s->avctx->b_quant_factor + s->avctx->b_quant_offset;
+
+            if(pict_type!=B_TYPE) 
+                rcc->next_non_b_qscale= qscale[i];
+            if(pict_type==P_TYPE) 
+                rcc->next_p_qscale= qscale[i];
+        }
 
-            if(pict_type==B_TYPE){
-                qmin= (int)(qmin*s->b_quant_factor+s->b_quant_offset + 0.5);
-                qmax= (int)(qmax*s->b_quant_factor+s->b_quant_offset + 0.5);
-            }
-            if(qmin<1) qmin=1;
-            if(qmax>31) qmax=31;
-            if(qmax<=qmin) qmax= qmin;
+        /* smooth curve */
+        for(i=0; i<rcc->num_entries; i++){
+            RateControlEntry *rce= &rcc->entry[i];
+            const int pict_type= rce->new_pict_type;
+            int j;
+            double q=0.0, sum=0.0;
+        
+            for(j=0; j<filter_size; j++){
+                int index= i+j-filter_size/2;
+                double d= index-i;
+                double coeff= s->qblur==0 ? 1.0 : exp(-d*d/(s->qblur * s->qblur));
             
-            switch(s->rc_strategy){
-            case 0:
-                bits_left= available_bits[pict_type]/num_frames[pict_type]*rate_factor - rce->misc_bits - rce->mv_bits;
-                if(bits_left<1.0) bits_left=1.0;
-                short_term_q= rce->qscale*(rce->i_tex_bits + rce->p_tex_bits)/bits_left;
-                break;
-            case 1:
-                bits_left= (available_bits[pict_type] - const_bits[pict_type])/num_frames[pict_type]*rate_factor;
-                if(bits_left<1.0) bits_left=1.0;
-                short_term_q= rce->qscale*(rce->i_tex_bits + rce->p_tex_bits)/bits_left;
-                break;
-            case 2:
-                bits_left= available_bits[pict_type]/num_frames[pict_type]*rate_factor;
-                if(bits_left<1.0) bits_left=1.0;
-                short_term_q= rce->qscale*(rce->i_tex_bits + rce->p_tex_bits + rce->misc_bits + rce->mv_bits)/bits_left;
-                break;
-            default:
-                fprintf(stderr, "unknown strategy\n");
-                short_term_q=3; //gcc warning fix
+                if(index < 0 || index >= rcc->num_entries) continue;
+                if(pict_type != rcc->entry[index].new_pict_type) continue;
+                q+= qscale[index] * coeff;
+                sum+= coeff;
             }
-
-            if(short_term_q>31.0) short_term_q=31.0;
-            else if (short_term_q<1.0) short_term_q=1.0;
-
-            q= 1/((1/avg_quantizer[pict_type] - 1/short_term_q)*s->qcompress + 1/short_term_q);
-            if     (q<qmin) q=qmin;
-            else if(q>qmax) q=qmax;
-//printf("lq:%f, sq:%f t:%f q:%f\n", avg_quantizer[rce->pict_type], short_term_q, bits_left, q);
-            rce->new_qscale= q;
+            blured_qscale[i]= q/sum;
         }
-
-        /* smooth curve */
     
         /* find expected bits */
         for(i=0; i<rcc->num_entries; i++){
             RateControlEntry *rce= &rcc->entry[i];
-            double factor= rce->qscale / rce->new_qscale;
-            
+            double bits;
+            rce->new_qscale= modify_qscale(s, rce, blured_qscale[i], i);
+            bits= qp2bits(rce, rce->new_qscale);
+//printf("%d %f\n", rce->new_bits, blured_qscale[i]);
+            update_rc_buffer(s, bits);
+
             rce->expected_bits= expected_bits;
-            expected_bits += (int)(rce->misc_bits + rce->mv_bits + (rce->i_tex_bits + rce->p_tex_bits)*factor + 0.5);
+            expected_bits += bits;
         }
 
-//        printf("%d %d %f\n", (int)expected_bits, (int)all_available_bits, rate_factor);
+//        printf("%f %d %f\n", expected_bits, (int)all_available_bits, rate_factor);
         if(expected_bits > all_available_bits) rate_factor-= step;
     }
+    free(qscale);
+    free(blured_qscale);
 
-    return 0;
-}
-
-int ff_rate_estimate_qscale_pass2(MpegEncContext *s)
-{
-    int qmin= s->qmin;
-    int qmax= s->qmax;
-    float q;
-    int qscale;
-    float br_compensation;
-    double diff;
-    int picture_number= s->picture_number;
-    RateControlEntry *rce= &s->rc_context.entry[picture_number];
-    int64_t wanted_bits= rce->expected_bits;
-    emms_c();
-
-//    printf("%d %d %d\n", picture_number, (int)wanted_bits, (int)s->total_bits);
-    
-    if(s->pict_type==B_TYPE){
-        qmin= (int)(qmin*s->b_quant_factor+s->b_quant_offset + 0.5);
-        qmax= (int)(qmax*s->b_quant_factor+s->b_quant_offset + 0.5);
+    if(abs(expected_bits/all_available_bits - 1.0) > 0.01 ){
+        fprintf(stderr, "Error: 2pass curve failed to converge\n");
+        return -1;
     }
-    if(qmin<1) qmin=1;
-    if(qmax>31) qmax=31;
-    if(qmax<=qmin) qmax= qmin;
 
-    q= rce->new_qscale;
-
-    diff= s->total_bits - wanted_bits;
-    br_compensation= (s->bit_rate_tolerance - diff)/s->bit_rate_tolerance;
-    if(br_compensation<=0.0) br_compensation=0.001;
-    q/=br_compensation;
-
-    qscale= (int)(q + 0.5);
-    if     (qscale<qmin) qscale=qmin;
-    else if(qscale>qmax) qscale=qmax;
-//    printf("%d %d %d %d type:%d\n", qmin, qscale, qmax, picture_number, s->pict_type); fflush(stdout);
-    return qscale;
+    return 0;
 }