diff --git a/CMakeLists.txt b/CMakeLists.txt
index 27bc208bae35f2f10b4331512c3668993adf1a80..b54878b365463d4daae8b6e52e81dd58d7d89817 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -9,6 +9,7 @@ option(LOG_PROFILE "log profile" ON)
 option(CPU "armv7 with neon" ON)
 option(MALI_GPU "mali gpu" OFF)
 option(FPGA "fpga" OFF)
+option(QUANTI "quantification" OFF)
 
 file(GLOB_RECURSE PADDLE_MOBILE_CC src/*.cc src/*.cpp src/*.c src/*.mm)
 file(GLOB_RECURSE PADDLE_MOBILE_H src/*.h)
@@ -152,3 +153,7 @@ if(DEBUGING)
     endif()
 endif()
 
+if (QUANTI)
+    add_subdirectory(tools/quantification)
+endif ()
+
diff --git a/src/framework/program/program.h b/src/framework/program/program.h
index 5760efc826667d805695118b12e41efa0305553b..e500d500344d83204bf388401541259b90ea2f78 100644
--- a/src/framework/program/program.h
+++ b/src/framework/program/program.h
@@ -30,6 +30,7 @@ class Program {
   std::string model_path;
   std::string para_path;
   bool combined = false;
+  bool quantification = false;
 
  private:
 };
diff --git a/src/io/executor.cpp b/src/io/executor.cpp
index 480f48290cc1bbf4888832d76187a13a4915ec40..65f019d1e3c3f6f6bdb8a18a9ff99bb7ecb2012c 100644
--- a/src/io/executor.cpp
+++ b/src/io/executor.cpp
@@ -154,7 +154,7 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
 
   tensor->Resize(framework::make_ddim(desc.Dims()));
 
-  void *memory = tensor;
+  void *memory = nullptr;
   int type_size = 0;
   switch (desc.DataType()) {
     case framework::VARTYPE_TYPE_FP16:
@@ -179,11 +179,25 @@ void Executor<Dtype, P>::LoadMemory(const framework::VarDesc var_desc,
     default:
       break;
   }
-
-  for (int n = 0; n < memory_size * type_size; ++n) {
-    static_cast<char *>(memory)[n] = (*data)[n];
+  if (program_.quantification) {
+    float min_value;
+    float max_value;
+
+    memcpy(&min_value, *data, sizeof(float));
+    memcpy(&max_value, *data + sizeof(float), sizeof(float));
+    *data += 2 * sizeof(float);
+    const float factor = (max_value - min_value) / 255.0;
+    uint8_t *uint8_data = (uint8_t *)(*data);
+    for (int k = 0; k < memory_size; ++k) {
+      static_cast<float *>(memory)[k] = uint8_data[k] * factor + min_value;
+    }
+    *data += (memory_size * sizeof(uint8_t));
+  } else {
+    for (int n = 0; n < memory_size * type_size; ++n) {
+      static_cast<char *>(memory)[n] = (*data)[n];
+    }
+    (*data) += (sizeof(char) * memory_size * type_size);
   }
-  (*data) += (sizeof(char) * memory_size * type_size);
 }
 
 template <typename Dtype, Precision P>
diff --git a/src/io/loader.cpp b/src/io/loader.cpp
index 51e007a6ab4bce415628649a40f711903bceee92..9ed877d05d51dfbe7139ea2289fdb6480c62f88f 100644
--- a/src/io/loader.cpp
+++ b/src/io/loader.cpp
@@ -44,26 +44,29 @@ static size_t ReadBuffer(const char *file_name, uint8_t **out) {
 
 template <typename Dtype, Precision P>
 const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
-    const std::string &dirname, bool optimize, bool can_add_split) {
-  auto program =
-      this->LoadProgram(dirname + "/__model__", optimize, can_add_split);
+    const std::string &dirname, bool optimize, bool quantification,
+    bool can_add_split) {
+  auto program = this->LoadProgram(dirname + "/__model__", optimize,
+                                   quantification, can_add_split);
   program.model_path = dirname;
   return program;
 }
 
 template <typename Dtype, Precision P>
 const framework::Program<Dtype, P> Loader<Dtype, P>::Load(
-    const std::string &model_path, const std::string &para_path,
-    bool optimize) {
+    const std::string &model_path, const std::string &para_path, bool optimize,
+    bool quantification) {
   auto program = this->LoadProgram(model_path, optimize);
   program.para_path = para_path;
   program.combined = true;
+  program.quantification = quantification;
   return program;
 }
 
 template <typename Dtype, Precision P>
 const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
-    const std::string &model_path, bool optimize, bool can_add_split) {
+    const std::string &model_path, bool optimize, bool quantification,
+    bool can_add_split) {
   std::string model_filename = model_path;
   PaddleMobile__Framework__Proto__ProgramDesc *c_program;
   uint8_t *buf = NULL;
@@ -82,6 +85,7 @@ const framework::Program<Dtype, P> Loader<Dtype, P>::LoadProgram(
 
   framework::Program<Dtype, P> program;
   program.originProgram = originProgramDesc;
+  program.quantification = quantification;
 
   auto scope = std::make_shared<framework::Scope>();
   program.scope = scope;
diff --git a/src/io/loader.h b/src/io/loader.h
index 5e3c53dc9db858f506a13d2105339038340344a6..512cee831f0a09f8223c07c531eb9d1c74e75d92 100644
--- a/src/io/loader.h
+++ b/src/io/loader.h
@@ -30,6 +30,7 @@ class Loader {
    * */
   const framework::Program<Dtype, P> Load(const std::string &dirname,
                                           bool optimize = false,
+                                          bool quantification = false,
                                           bool can_add_split = false);
 
   /*
@@ -38,11 +39,13 @@ class Loader {
    * */
   const framework::Program<Dtype, P> Load(const std::string &model_path,
                                           const std::string &para_path,
-                                          bool optimize = false);
+                                          bool optimize = false,
+                                          bool quantification = false);
 
  private:
   const framework::Program<Dtype, P> LoadProgram(const std::string &model_path,
                                                  bool optimize = false,
+                                                 bool quantification = false,
                                                  bool can_add_split = false);
 };
 
diff --git a/src/io/paddle_mobile.cpp b/src/io/paddle_mobile.cpp
index cabdd799a0e7d561d8bc56c0913f1389c38f8907..5e2e209d64aa7a00b56a5bdbbff88cb3097b7b94 100644
--- a/src/io/paddle_mobile.cpp
+++ b/src/io/paddle_mobile.cpp
@@ -26,7 +26,7 @@ void PaddleMobile<Dtype, P>::SetThreadNum(int num) {
 
 template <typename Dtype, Precision P>
 bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
-                                  int batch_size) {
+                                  bool quantification, int batch_size) {
   if (loader_.get() == nullptr) {
     loader_ = std::make_shared<Loader<Dtype, P>>();
   } else {
@@ -35,7 +35,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
 
   if (executor_.get() == nullptr) {
     executor_ = std::make_shared<Executor<Dtype, P>>(
-        loader_->Load(dirname, optimize), batch_size, optimize);
+        loader_->Load(dirname, optimize, quantification), batch_size, optimize);
   } else {
     LOG(kLOG_INFO) << "executor inited";
   }
@@ -46,7 +46,7 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &dirname, bool optimize,
 template <typename Dtype, Precision P>
 bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
                                   const std::string &para_path, bool optimize,
-                                  int batch_size) {
+                                  bool quantification, int batch_size) {
   if (loader_.get() == nullptr) {
     loader_ = std::make_shared<Loader<Dtype, P>>();
   } else {
@@ -55,7 +55,8 @@ bool PaddleMobile<Dtype, P>::Load(const std::string &model_path,
 
   if (executor_.get() == nullptr) {
     executor_ = std::make_shared<Executor<Dtype, P>>(
-        loader_->Load(model_path, para_path, optimize), batch_size, optimize);
+        loader_->Load(model_path, para_path, optimize, quantification),
+        batch_size, optimize);
   } else {
     LOG(kLOG_INFO) << "executor inited";
   }
diff --git a/src/io/paddle_mobile.h b/src/io/paddle_mobile.h
index 0c4dfa9782458b03c7d5c4660e15e89937142dc7..5dc3ccb21dd7e67fbe9b5032d01046b12728dc64 100644
--- a/src/io/paddle_mobile.h
+++ b/src/io/paddle_mobile.h
@@ -39,15 +39,15 @@ class PaddleMobile {
    * @b 加载分开形式的 fluid 模型
    * */
   bool Load(const std::string &dirname, bool optimize = false,
-            int batch_size = 1);
+            bool quantification = false, int batch_size = 1);
 
   /*
    * @b load combine format fluid mode
    * @b 加载结合在一起格式的模型
    * */
   bool Load(const std::string &model_path, const std::string &para_path,
-            bool optimize = false, int batch_size = 1);
-
+            bool optimize = false, bool quantification = false,
+            int batch_size = 1);
   /*
    * @b 设置线程数, 当 cmake 中开启 openmp 时生效
    * */
diff --git a/tools/quantification/CMakeLists.txt b/tools/quantification/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1dfb9ee056a4126f65c2ab6fac4c1417039f66ec
--- /dev/null
+++ b/tools/quantification/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(dir ${CMAKE_CURRENT_SOURCE_DIR})
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${dir}/build")
+
+ADD_EXECUTABLE(convert convert.cpp)
+target_link_libraries(convert paddle-mobile)
\ No newline at end of file
diff --git a/tools/quantification/convert.cpp b/tools/quantification/convert.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7a9511a654f3de9ac9ace5d3b9621c360bd86ad9
--- /dev/null
+++ b/tools/quantification/convert.cpp
@@ -0,0 +1,202 @@
+
+
+#include "io/paddle_mobile.h"
+#include <cstdlib>
+using std::string;
+
+static const std::string g_googlenet_combine = "../models/googlenet_combine";
+static const std::string g_googlenet = "../models/googlenet";
+using paddle_mobile::Executor;
+using paddle_mobile::framework::Program;
+
+    char *Get_binary_data(std::string filename) {
+        FILE *file = fopen(filename.c_str(), "rb");
+        PADDLE_MOBILE_ENFORCE(file != nullptr, "can't open file: %s ",
+                              filename.c_str());
+        fseek(file, 0, SEEK_END);
+        int64_t size = ftell(file);
+        PADDLE_MOBILE_ENFORCE(size > 0, "size is too small");
+        rewind(file);
+        char *data = new char[size];
+        size_t bytes_read = fread(data, 1, size, file);
+        PADDLE_MOBILE_ENFORCE(bytes_read == size,
+                              "read binary file bytes do not match with fseek");
+        DLOG << "Get_binary_data end";
+        fclose(file);
+        return data;
+    }
+
+    void LoadWithDump(const paddle_mobile::framework::VarDesc var_desc,
+                    paddle_mobile::framework::LoDTensor *tensor, char **data, FILE *out_file) {
+        // 1. version
+        uint32_t version = *reinterpret_cast<uint32_t *>(*data);
+        // write version
+        fwrite(&version, sizeof(uint32_t), 1, out_file );
+        (*data) += sizeof(uint32_t);
+        // 2 Lod information
+        uint64_t *lod_level_ptr = new uint64_t();
+        memcpy(lod_level_ptr, (*data), sizeof(uint64_t));
+        uint64_t lod_level = 0;
+        // write lod Information
+        fwrite(&lod_level, sizeof(uint64_t), 1, out_file);
+        delete lod_level_ptr;
+        (*data) += sizeof(uint64_t);
+        auto &lod = *tensor->mutable_lod();
+        lod.resize(lod_level);
+        for (uint64_t i = 0; i < lod_level; ++i) {
+            uint64_t size = *reinterpret_cast<uint64_t *>(*data);
+            // write lod size
+            fwrite(&size, sizeof(uint64_t), 1, out_file);
+            (*data) += sizeof(uint64_t);
+            std::vector<size_t> tmp(size / sizeof(size_t));
+            for (int k = 0; k < tmp.size(); ++k) {
+                tmp[k] = *reinterpret_cast<size_t *>(*data);
+                (*data) += sizeof(size_t);
+            }
+            // write lod size vector
+            fwrite(&tmp, sizeof(size_t), tmp.size(), out_file );
+
+            lod[i] = tmp;
+        }
+
+        // 3. tensor version
+        uint32_t tensor_version = *reinterpret_cast<uint32_t *>(*data);
+        // write tensor version
+        fwrite(&tensor_version, sizeof(uint32_t), 1, out_file);
+        (*data) += sizeof(uint32_t);
+
+        // 4. tensor desc
+        int32_t size = *reinterpret_cast<int32_t *>(*data);
+        // write tensor desc
+        fwrite(&size, sizeof(int32_t), 1, out_file);
+        (*data) += sizeof(int32_t);
+
+        std::unique_ptr<char[]> buf(new char[size]);
+        for (int m = 0; m < size; ++m) {
+            buf.get()[m] = (*data)[m];
+        }
+        fwrite(buf.get(), sizeof(char), size, out_file);
+        (*data) += (sizeof(char) * size);
+
+        const paddle_mobile::framework::TensorDesc &desc = var_desc.Tensor_desc();
+        int memory_size = 1;
+        for (auto l : desc.Dims()) {
+            memory_size *= l;
+        }
+        tensor->Resize(paddle_mobile::framework::make_ddim(desc.Dims()));
+
+        void *memory = tensor;
+        int type_size = 0;
+        switch (desc.DataType()) {
+            case paddle_mobile::framework::VARTYPE_TYPE_FP16:
+                type_size = 2;
+                break;
+            case paddle_mobile::framework::VARTYPE_TYPE_FP32:
+                type_size = 4;
+                memory = tensor->mutable_data<float>();
+                break;
+            case paddle_mobile::framework::VARTYPE_TYPE_FP64:
+                type_size = 8;
+                break;
+            case paddle_mobile::framework::VARTYPE_TYPE_INT32:
+                type_size = 4;
+                break;
+            case paddle_mobile::framework::VARTYPE_TYPE_INT64:
+                type_size = 8;
+                break;
+            case paddle_mobile::framework::VARTYPE_TYPE_BOOL:
+                type_size = 1;
+                break;
+            default:
+                break;
+        }
+        for (int n = 0; n < memory_size * type_size; ++n) {
+            static_cast<char *>(memory)[n] = (*data)[n];
+        }
+        (*data) += (sizeof(char) * memory_size * type_size);
+        // for float 32
+        float min_value = std::numeric_limits<float>::max();
+        float max_value = std::numeric_limits<float>::min();
+        for (int k = 0; k < memory_size; ++k) {
+            min_value = std::min(min_value, static_cast<float *> (memory)[k]);
+            max_value = std::max(max_value, static_cast<float *> (memory)[k]);
+        }
+        fwrite(&min_value, sizeof(float), 1, out_file);
+        fwrite(&max_value, sizeof(float), 1, out_file);
+        for (int g = 0; g < memory_size; ++g) {
+            float value = static_cast<float *> (memory)[g];
+            uint8_t factor = (uint8_t) round((value - min_value) / (max_value - min_value) * 255);
+            fwrite(&factor, sizeof(uint8_t), 1, out_file);
+        }
+
+
+    }
+
+    void quantificate_combined(std::string model_path, std::string param_path, std::string param_min_path){
+        paddle_mobile::Loader<paddle_mobile::CPU,paddle_mobile::Precision::FP32 > loader;
+        bool optimize = true;
+        auto program = loader.Load(model_path, param_path, optimize);
+        char *origin_data = Get_binary_data(program.para_path);
+        char *data = origin_data;
+        FILE *out_file = fopen(param_min_path.c_str(), "wb");
+        for (const auto &block : program.originProgram->Blocks()) {
+            for (const auto &var_desc : block->Vars()) {
+                auto var = program.scope->Var(var_desc->Name());
+                if(var_desc ->Persistable()) {
+                    auto tensor = var->template GetMutable<paddle_mobile::framework::LoDTensor>();
+                    if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
+                        continue;
+                    }
+                    LoadWithDump(*var_desc, tensor, &data,out_file);
+                }
+            }
+        }
+        fclose(out_file);
+        delete origin_data;
+
+    }
+    void quantificate_seperated(std::string model_dir, std::string param_min_path) {
+        paddle_mobile::Loader<paddle_mobile::CPU,paddle_mobile::Precision::FP32 > loader;
+        bool optimize = true;
+        auto program = loader.Load(model_dir, optimize);
+        std::string shell_command = "mkdir "+param_min_path;
+        system(shell_command.c_str());
+        for (const auto &block : program.originProgram->Blocks()) {
+            for (const auto &var_desc : block->Vars()) {
+                auto var = program.scope->Var(var_desc->Name());
+                if(var_desc ->Persistable()) {
+                    auto tensor = var->template GetMutable<paddle_mobile::framework::LoDTensor>();
+                    if (var_desc->Name() == "feed" || var_desc->Name() == "fetch") {
+                        continue;
+                    }
+                    std::string file_name = param_min_path +"/"+ var_desc->Name();
+
+                    FILE *out_file = fopen(file_name.c_str(), "wb");
+                    char *origin_data =
+                            Get_binary_data(program.model_path + "/" + var_desc->Name());
+                    char *data = origin_data;
+                    LoadWithDump(*var_desc, tensor, &data,out_file);
+                    delete origin_data;
+                    fclose(out_file);
+                }
+            }
+        }
+
+    }
+    int main() {
+        std::string filename = "params_min";
+        std::string model_path = g_googlenet_combine + "/model";
+        std::string param_path = g_googlenet_combine + "/params";
+        std::string dirname = "param_min_dir";
+        std::string model_dir = g_googlenet;
+//        quantificate_combined(model_path, param_path,filename);
+        quantificate_seperated(model_dir, dirname);
+
+        return 0;
+    }
+
+
+
+
+
+