diff --git a/core/predictor/common/constant.h b/core/predictor/common/constant.h
index ec0c5b4ee292f840b6fd8638b891f9d341463dd5..dd4c5733c410864f3bf8449891bd90e1aec457b1 100644
--- a/core/predictor/common/constant.h
+++ b/core/predictor/common/constant.h
@@ -43,6 +43,8 @@ DECLARE_bool(enable_model_toolkit);
 DECLARE_string(enable_protocol_list);
 DECLARE_bool(enable_cube);
 DECLARE_bool(enable_general_model);
+DECLARE_string(precision);
+DECLARE_bool(use_calib);
 
 // STATIC Variables
 extern const char* START_OP_NAME;
diff --git a/core/predictor/common/utils.h b/core/predictor/common/utils.h
index 052f90b166f04a28d0e7aeb427884921abdcab5e..0230fca2e9033866f4187d4ce4f37f37d567e1d6 100644
--- a/core/predictor/common/utils.h
+++ b/core/predictor/common/utils.h
@@ -13,8 +13,8 @@
 // limitations under the License.
 
 #pragma once
-#include <string>
 #include <fstream>
+#include <string>
 #include "core/predictor/common/inner_common.h"
 #include "core/predictor/common/macros.h"
 
@@ -26,6 +26,41 @@ namespace predictor {
 namespace butil = base;
 #endif
 
+enum class Precision {
+  kFloat32 = 0,  ///< fp32
+  kInt8,         ///< int8
+  kHalf,         ///< fp16
+  kBfloat16,     ///< bf16
+};
+
+string PrecisionTypeString(const Precision data_type) {
+  switch (data_type) {
+    case 0:
+      return "kFloat32";
+    case 1:
+      return "kInt8";
+    case 2:
+      return "kHalf";
+    case 3:
+      return "kBloat16";
+    default:
+      return "unUnk";
+  }
+}
+
+Precision GetPrecision(const std::string& precision_data) {
+  if (precision_data == "fp32") {
+    return Precision::kFloat32;
+  } else if (precision_data == "int8") {
+    return Precison::kInt8;
+  } else if (precision_data == "fp16") {
+    return Precision::kHalf;
+  } else if (precision_data == "bf16") {
+    return Precision::kBfloat16;
+  }
+  return "unknow type";
+}
+
 class TimerFlow {
  public:
   static const int MAX_SIZE = 1024;
diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h
index 599d5e5e5477da72927f76c0189a82721db3c6b4..eac3b25beabdb51940a4168319c1c61583af6850 100644
--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
@@ -147,7 +147,8 @@ class PaddleInferenceEngine : public PaddleEngineBase {
                                   min_subgraph_size,
                                   Config::Precision::kFloat32,
                                   false,
-                                  false);
+                                  use_calib);
+      // EnableMkldnnBfloat16();
       LOG(INFO) << "create TensorRT predictor";
     }
 
@@ -171,7 +172,6 @@ class PaddleInferenceEngine : public PaddleEngineBase {
       config.EnableMemoryOptim();
     }
 
-
     predictor::AutoLock lock(predictor::GlobalCreateMutex::instance());
     _predictor = CreatePredictor(config);
     if (NULL == _predictor.get()) {
diff --git a/paddle_inference/paddle/src/paddle_engine.cpp b/paddle_inference/paddle/src/paddle_engine.cpp
index 94ed4b9ae92df3c8f407590f9c24f351bf7ec6a3..1dcbb168312625e54f7469c07cf05e58d8efa2b8 100644
--- a/paddle_inference/paddle/src/paddle_engine.cpp
+++ b/paddle_inference/paddle/src/paddle_engine.cpp
@@ -20,11 +20,14 @@ namespace paddle_serving {
 namespace inference {
 
 DEFINE_int32(gpuid, 0, "GPU device id to use");
+DEFINE_string(precision, "fp32", "precision to deploy, default is fp32");
+DEFINE_bool(use_calib, false, "calibration mode, default is false")
 
-REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
-    ::baidu::paddle_serving::predictor::FluidInferEngine<PaddleInferenceEngine>,
-    ::baidu::paddle_serving::predictor::InferEngine,
-    "PADDLE_INFER");
+    REGIST_FACTORY_OBJECT_IMPL_WITH_NAME(
+        ::baidu::paddle_serving::predictor::FluidInferEngine<
+            PaddleInferenceEngine>,
+        ::baidu::paddle_serving::predictor::InferEngine,
+        "PADDLE_INFER");
 
 }  // namespace inference
 }  // namespace paddle_serving
diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py
index d96253b592f70956591c345606eeb0d01e1e4b43..3f37c47a01060ec088fbb7b31110746760b8f091 100755
--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -44,17 +44,22 @@ class Server(object):
     def __init__(self):
         self.server_handle_ = None
         self.infer_service_conf = None
-        self.model_toolkit_conf = []#The quantity is equal to the InferOp quantity,Engine--OP
+        self.model_toolkit_conf = [
+        ]  #The quantity is equal to the InferOp quantity,Engine--OP
         self.resource_conf = None
         self.memory_optimization = False
         self.ir_optimization = False
-        self.model_conf = collections.OrderedDict()# save the serving_server_conf.prototxt content (feed and fetch information) this is a map for multi-model in a workflow
-        self.workflow_fn = "workflow.prototxt"#only one for one Service,Workflow--Op 
-        self.resource_fn = "resource.prototxt"#only one for one Service,model_toolkit_fn and  general_model_config_fn is recorded in this file
-        self.infer_service_fn = "infer_service.prototxt"#only one for one Service,Service--Workflow
-        self.model_toolkit_fn = []#["general_infer_0/model_toolkit.prototxt"]The quantity is equal to the InferOp quantity,Engine--OP
-        self.general_model_config_fn = []#["general_infer_0/general_model.prototxt"]The quantity is equal to the InferOp quantity,Feed and Fetch --OP
-        self.subdirectory = []#The quantity is equal to the InferOp quantity, and name = node.name = engine.name
+        # save the serving_server_conf.prototxt content (feed and fetch information) this is a map for multi-model in a workflow
+        self.model_conf = collections.OrderedDict()
+        self.workflow_fn = "workflow.prototxt"  #only one for one Service,Workflow--Op 
+        self.resource_fn = "resource.prototxt"  #only one for one Service,model_toolkit_fn and  general_model_config_fn is recorded in this file
+        self.infer_service_fn = "infer_service.prototxt"  #only one for one Service,Service--Workflow
+        #["general_infer_0/model_toolkit.prototxt"]The quantity is equal to the InferOp quantity,Engine--OP
+        self.model_toolkit_fn = []
+        #["general_infer_0/general_model.prototxt"]The quantity is equal to the InferOp quantity,Feed and Fetch --OP
+        self.general_model_config_fn = []
+        #The quantity is equal to the InferOp quantity, and name = node.name = engine.name
+        self.subdirectory = []
         self.cube_config_fn = "cube.conf"
         self.workdir = ""
         self.max_concurrency = 0
@@ -71,12 +76,16 @@ class Server(object):
         self.use_trt = False
         self.use_lite = False
         self.use_xpu = False
-        self.model_config_paths = collections.OrderedDict()  # save the serving_server_conf.prototxt path (feed and fetch information) this is a map for multi-model in a workflow
+        # save the serving_server_conf.prototxt path (feed and fetch information) this is a map for multi-model in a workflow
+        self.model_config_paths = collections.OrderedDict()
         self.product_name = None
         self.container_id = None
 
-    def get_fetch_list(self,infer_node_idx = -1 ):
-        fetch_names = [var.alias_name for var in list(self.model_conf.values())[infer_node_idx].fetch_var]
+    def get_fetch_list(self, infer_node_idx=-1):
+        fetch_names = [
+            var.alias_name
+            for var in list(self.model_conf.values())[infer_node_idx].fetch_var
+        ]
         return fetch_names
 
     def set_max_concurrency(self, concurrency):
@@ -172,6 +181,10 @@ class Server(object):
             engine.use_trt = self.use_trt
             engine.use_lite = self.use_lite
             engine.use_xpu = self.use_xpu
+            engine.use_gpu = False
+            if self.device == "gpu":
+                engine.use_gpu = True
+
             if os.path.exists('{}/__params__'.format(model_config_path)):
                 engine.combined_model = True
             else:
@@ -195,9 +208,10 @@ class Server(object):
         self.workdir = workdir
         if self.resource_conf == None:
             self.resource_conf = server_sdk.ResourceConf()
-            for idx, op_general_model_config_fn in enumerate(self.general_model_config_fn):
+            for idx, op_general_model_config_fn in enumerate(
+                    self.general_model_config_fn):
                 with open("{}/{}".format(workdir, op_general_model_config_fn),
-                        "w") as fout:
+                          "w") as fout:
                     fout.write(str(list(self.model_conf.values())[idx]))
                 for workflow in self.workflow_conf.workflows:
                     for node in workflow.nodes:
@@ -212,9 +226,11 @@ class Server(object):
                             if "quant" in node.name:
                                 self.resource_conf.cube_quant_bits = 8
                 self.resource_conf.model_toolkit_path.extend([workdir])
-                self.resource_conf.model_toolkit_file.extend([self.model_toolkit_fn[idx]])
+                self.resource_conf.model_toolkit_file.extend(
+                    [self.model_toolkit_fn[idx]])
                 self.resource_conf.general_model_path.extend([workdir])
-                self.resource_conf.general_model_file.extend([op_general_model_config_fn])
+                self.resource_conf.general_model_file.extend(
+                    [op_general_model_config_fn])
                 #TODO:figure out the meaning of product_name and container_id.
                 if self.product_name != None:
                     self.resource_conf.auth_product_name = self.product_name
@@ -237,15 +253,18 @@ class Server(object):
             if os.path.isdir(single_model_config):
                 pass
             elif os.path.isfile(single_model_config):
-                raise ValueError("The input of --model should be a dir not file.")
-        
+                raise ValueError(
+                    "The input of --model should be a dir not file.")
+
         if isinstance(model_config_paths_args, list):
             # If there is only one model path, use the default infer_op.
             # Because there are several infer_op type, we need to find
             # it from workflow_conf.
             default_engine_types = [
-                'GeneralInferOp', 'GeneralDistKVInferOp',
-                'GeneralDistKVQuantInferOp','GeneralDetectionOp',
+                'GeneralInferOp',
+                'GeneralDistKVInferOp',
+                'GeneralDistKVQuantInferOp',
+                'GeneralDetectionOp',
             ]
             # now only support single-workflow.
             # TODO:support multi-workflow
@@ -256,16 +275,24 @@ class Server(object):
                         raise Exception(
                             "You have set the engine_name of Op. Please use the form {op: model_path} to configure model path"
                         )
-                    
+
                     f = open("{}/serving_server_conf.prototxt".format(
-                        model_config_paths_args[model_config_paths_list_idx]), 'r')
-                    self.model_conf[node.name] = google.protobuf.text_format.Merge(str(f.read()), m_config.GeneralModelConfig())
-                    self.model_config_paths[node.name] = model_config_paths_args[model_config_paths_list_idx]
-                    self.general_model_config_fn.append(node.name+"/general_model.prototxt")
-                    self.model_toolkit_fn.append(node.name+"/model_toolkit.prototxt")
+                        model_config_paths_args[model_config_paths_list_idx]),
+                             'r')
+                    self.model_conf[
+                        node.name] = google.protobuf.text_format.Merge(
+                            str(f.read()), m_config.GeneralModelConfig())
+                    self.model_config_paths[
+                        node.name] = model_config_paths_args[
+                            model_config_paths_list_idx]
+                    self.general_model_config_fn.append(
+                        node.name + "/general_model.prototxt")
+                    self.model_toolkit_fn.append(node.name +
+                                                 "/model_toolkit.prototxt")
                     self.subdirectory.append(node.name)
                     model_config_paths_list_idx += 1
-                    if model_config_paths_list_idx == len(model_config_paths_args):
+                    if model_config_paths_list_idx == len(
+                            model_config_paths_args):
                         break
         #Right now, this is not useful.
         elif isinstance(model_config_paths_args, dict):
@@ -278,11 +305,12 @@ class Server(object):
                   "that the input and output of multiple models are the same.")
             f = open("{}/serving_server_conf.prototxt".format(path), 'r')
             self.model_conf[node.name] = google.protobuf.text_format.Merge(
-            str(f.read()), m_config.GeneralModelConfig())
+                str(f.read()), m_config.GeneralModelConfig())
         else:
-            raise Exception("The type of model_config_paths must be str or list or "
-                            "dict({op: model_path}), not {}.".format(
-                                type(model_config_paths_args)))
+            raise Exception(
+                "The type of model_config_paths must be str or list or "
+                "dict({op: model_path}), not {}.".format(
+                    type(model_config_paths_args)))
         # check config here
         # print config here
 
@@ -409,7 +437,7 @@ class Server(object):
         resource_fn = "{}/{}".format(workdir, self.resource_fn)
         self._write_pb_str(resource_fn, self.resource_conf)
 
-        for idx,single_model_toolkit_fn in enumerate(self.model_toolkit_fn):
+        for idx, single_model_toolkit_fn in enumerate(self.model_toolkit_fn):
             model_toolkit_fn = "{}/{}".format(workdir, single_model_toolkit_fn)
             self._write_pb_str(model_toolkit_fn, self.model_toolkit_conf[idx])
 
@@ -498,6 +526,7 @@ class Server(object):
 
         os.system(command)
 
+
 class MultiLangServer(object):
     def __init__(self):
         self.bserver_ = Server()
@@ -553,22 +582,23 @@ class MultiLangServer(object):
     def set_gpuid(self, gpuid=0):
         self.bserver_.set_gpuid(gpuid)
 
-    def load_model_config(self, server_config_dir_paths, client_config_path=None):
+    def load_model_config(self,
+                          server_config_dir_paths,
+                          client_config_path=None):
         if isinstance(server_config_dir_paths, str):
             server_config_dir_paths = [server_config_dir_paths]
         elif isinstance(server_config_dir_paths, list):
             pass
         else:
             raise Exception("The type of model_config_paths must be str or list"
-                            ", not {}.".format(
-                                type(server_config_dir_paths)))
-        
+                            ", not {}.".format(type(server_config_dir_paths)))
 
         for single_model_config in server_config_dir_paths:
             if os.path.isdir(single_model_config):
                 pass
             elif os.path.isfile(single_model_config):
-                raise ValueError("The input of --model should be a dir not file.")
+                raise ValueError(
+                    "The input of --model should be a dir not file.")
 
         self.bserver_.load_model_config(server_config_dir_paths)
         if client_config_path is None:
@@ -576,27 +606,30 @@ class MultiLangServer(object):
             if isinstance(server_config_dir_paths, dict):
                 self.is_multi_model_ = True
                 client_config_path = []
-                for server_config_path_items in list(server_config_dir_paths.items()):
-                    client_config_path.append( server_config_path_items[1] )
+                for server_config_path_items in list(
+                        server_config_dir_paths.items()):
+                    client_config_path.append(server_config_path_items[1])
             elif isinstance(server_config_dir_paths, list):
                 self.is_multi_model_ = False
                 client_config_path = server_config_dir_paths
             else:
-                raise Exception("The type of model_config_paths must be str or list or "
-                            "dict({op: model_path}), not {}.".format(
-                                type(server_config_dir_paths)))
+                raise Exception(
+                    "The type of model_config_paths must be str or list or "
+                    "dict({op: model_path}), not {}.".format(
+                        type(server_config_dir_paths)))
         if isinstance(client_config_path, str):
             client_config_path = [client_config_path]
         elif isinstance(client_config_path, list):
             pass
-        else:# dict is not support right now.
-            raise Exception("The type of client_config_path must be str or list or "
-                            "dict({op: model_path}), not {}.".format(
-                                type(client_config_path)))
+        else:  # dict is not support right now.
+            raise Exception(
+                "The type of client_config_path must be str or list or "
+                "dict({op: model_path}), not {}.".format(
+                    type(client_config_path)))
         if len(client_config_path) != len(server_config_dir_paths):
-            raise Warning("The len(client_config_path) is {}, != len(server_config_dir_paths) {}."
-                            .format( len(client_config_path), len(server_config_dir_paths) )
-                            )
+            raise Warning(
+                "The len(client_config_path) is {}, != len(server_config_dir_paths) {}."
+                .format(len(client_config_path), len(server_config_dir_paths)))
         self.bclient_config_path_list = client_config_path
 
     def prepare_server(self,