DLTP-36497 [Bug] 【develop】detection示例检测框偏离

9318c46a · felixhjh · eb16b33f · 9318c46a · 9318c46a · 9318c46a
25 changed file
--- a/README.md
+++ b/README.md
@@ -188,7 +188,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p
 | `use_lite` (Only for Intel x86 CPU or ARM CPU) | -    | -       | Run PaddleLite inference                              |
 | `use_xpu`                                      | -    | -       | Run PaddleLite inference with Baidu Kunlun XPU        |
 | `precision`                                    | str  | FP32    | Precision Mode, support FP32, FP16, INT8              |
-| `use_calib`                                    | bool | False   | Only for deployment with TensorRT                     |
+| `use_calib`                                    | bool | False   | Use TRT int8 calibration                              |
 | `gpu_multi_stream`                             | bool | False   | EnableGpuMultiStream to get larger QPS                |

 #### Description of asynchronous model

--- a/README_CN.md
+++ b/README_CN.md
@@ -187,7 +187,7 @@ python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --p
 | `use_lite` (Only for Intel x86 CPU or ARM CPU) | -    | -       | Run PaddleLite inference                              |
 | `use_xpu`                                      | -    | -       | Run PaddleLite inference with Baidu Kunlun XPU        |
 | `precision`                                    | str  | FP32    | Precision Mode, support FP32, FP16, INT8              |
-| `use_calib`                                    | bool | False   | Only for deployment with TensorRT                     |
+| `use_calib`                                    | bool | False   | Use TRT int8 calibration                              |
 | `gpu_multi_stream`                             | bool | False   | EnableGpuMultiStream to get larger QPS                |

 #### 异步模型的说明

--- a/core/cube/cube-api/go-api/conf/cube.conf
+++ b/core/cube/cube-api/go-api/conf/cube.conf
+[{
+    "dict_name": "test",
+    "shard": 2,
+    "nodes": [{
+        "ip": "127.0.0.1",
+        "port": 8731
+    },{
+        "ip": "127.0.0.1",
+        "port": 8730
+    }]
+}]
--- a/core/cube/cube-api/go-api/demo.go
+++ b/core/cube/cube-api/go-api/demo.go
+package main
+
+import (
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io/ioutil"
+)
+
+func main() {
+	dict_name := flag.String("n", "test", "cube name")
+	conf_path := flag.String("c", "./conf/cube.conf", "cube conf path")
+	input_path := flag.String("i", "./input.json", "keys to seek")
+	output_path := flag.String("o", "./output.json", "result to save")
+	flag.Parse()
+	bytes, err := ioutil.ReadFile(*conf_path)
+	if err != nil {
+		fmt.Println("读取配置文件失败", err)
+		return
+	}
+	var meta Meta
+	err = json.Unmarshal(bytes, &meta.Servers)
+	if err != nil {
+		fmt.Println("解析数据失败", err)
+		return
+	}
+
+	err = meta.Seek(*dict_name, *input_path, *output_path)
+	if err != nil {
+		fmt.Println(err)
+	}
+	return
+}
--- a/core/cube/cube-api/go-api/input.json
+++ b/core/cube/cube-api/go-api/input.json
+{"keys": [0,1,2,3,4,5,6,7]}
+{"keys": [1]}
--- a/core/cube/cube-api/go-api/meta.go
+++ b/core/cube/cube-api/go-api/meta.go
+package main
+
+import "fmt"
+
+type Meta struct {
+	Servers []CubeServer `json:"servers,omitempty"`
+}
+
+func (meta *Meta) Seek(dict_name string, input string, output string) (err error) {
+	var server CubeServer
+
+	for _, s := range meta.Servers {
+		if s.Name == dict_name {
+			server = s
+			break
+		}
+	}
+	if server.Name != dict_name {
+		err = fmt.Errorf("%s server not exist", dict_name)
+		return err
+	}
+	err = server.Seek(input, output)
+	return err
+}
--- a/core/cube/cube-api/go-api/server.go
+++ b/core/cube/cube-api/go-api/server.go
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+	"io/ioutil"
+	"log"
+	"net/http"
+	"os"
+)
+
+type Input struct {
+	Keys []uint64 `json:"keys"`
+}
+
+type SingleValue struct {
+	Status uint32 `json:"status"`
+	Value  string `json:"value"`
+}
+type Output struct {
+	Values []SingleValue `json:"values"`
+}
+
+type ServerNode struct {
+	Ip   string `json:"ip"`
+	Port uint64 `json:"port"`
+}
+
+type CubeServer struct {
+	Name  string       `json:"dict_name"`
+	Shard uint64       `json:"shard"`
+	Nodes []ServerNode `json:"nodes"`
+}
+
+func (server *CubeServer) SplitKeys(keys []uint64) (splited_keys map[uint64]Input, offset map[uint64][]uint64) {
+	splited_keys = make(map[uint64]Input)
+
+	offset = make(map[uint64][]uint64)
+	for i, key := range keys {
+		shard_id := key % server.Shard
+		temp_split, _ := splited_keys[shard_id]
+		temp_split.Keys = append(temp_split.Keys, key)
+		splited_keys[shard_id] = temp_split
+
+		temp_offset, _ := offset[shard_id]
+		temp_offset = append(temp_offset, uint64(i))
+		offset[shard_id] = temp_offset
+	}
+
+	return splited_keys, offset
+}
+
+func (server *CubeServer) Seek(input string, output_path string) (err error) {
+	file, err := os.Open(input)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	buf := bufio.NewReader(file)
+
+	for {
+		line, err := buf.ReadBytes('\n')
+		//line = strings.TrimSpace(line)
+		if err != nil || io.EOF == err {
+			break
+		}
+		var temp_input Input
+		json.Unmarshal(line, &temp_input)
+		key_nums := len(temp_input.Keys)
+		var output Output
+		output.Values = make([]SingleValue, key_nums+1)
+		splited_keys, offset := server.SplitKeys(temp_input.Keys)
+		for shard_id, keys := range splited_keys {
+			cur_output, _ := server.Post(shard_id, keys)
+			for index, single_value := range cur_output.Values {
+				output.Values[offset[shard_id][index]] = single_value
+			}
+		}
+		json_str, _ := json.Marshal(output)
+		fp, err := os.OpenFile(output_path, os.O_RDWR|os.O_APPEND|os.O_CREATE, 0755)
+		if err != nil {
+			log.Fatal(err)
+		}
+		defer fp.Close()
+		_, err = fp.Write(json_str)
+	}
+	return err
+}
+
+func (server *CubeServer) Post(shard_id uint64, input Input) (output Output, err error) {
+	if shard_id >= uint64(len(server.Nodes)) {
+		err = fmt.Errorf("have no shard:%v", shard_id)
+		return output, err
+	}
+	json_str, _ := json.Marshal(input)
+	URL := fmt.Sprintf("http://%s:%v/DictService/seek", server.Nodes[shard_id].Ip, server.Nodes[shard_id].Port)
+	req, err := http.NewRequest("POST", URL, bytes.NewBuffer(json_str))
+	if err != nil {
+		return output, err
+	}
+	req.Header.Set("Content-Type", "application/json")
+	client := &http.Client{}
+	resp, err := client.Do(req)
+	if err != nil {
+		return output, err
+	}
+	body, err := ioutil.ReadAll(resp.Body)
+	if err != nil {
+		return output, err
+	}
+	err = json.Unmarshal(body, &output)
+	return output, err
+}
--- a/core/cube/cube-api/python-api/conf/cube.conf
+++ b/core/cube/cube-api/python-api/conf/cube.conf
+[{
+    "dict_name": "test",
+    "shard": 2,
+    "nodes": [{
+        "ip": "127.0.0.1",
+        "port": 8731
+    },{
+        "ip": "127.0.0.1",
+        "port": 8730
+    }]
+}]
--- a/core/cube/cube-api/python-api/demo.py
+++ b/core/cube/cube-api/python-api/demo.py
+#coding=utf-8
+
+import requests
+import sys
+import json
+
+class Meta(object):
+    """记录cube分片server路由"""
+    def __init__(self, conf_path):
+        """根据配置文件初始化路由"""
+        self.server_api = "/DictService/seek"
+        self.server_meta = {}
+        with open(conf_path, "r", encoding="utf8") as fp:
+            cube_servcers = json.load(fp)
+            for server in cube_servcers:
+                self.server_meta[server["dict_name"]] = server
+            fp.close()
+
+    def seek(self, dict_name, keys_path, save_path):
+        """查询"""
+        save_file = open(save_path, 'w')
+        with open(keys_path, "r", encoding="utf8") as fp:
+            lines = fp.readlines()
+            for line in lines:
+                json_line = json.loads(line)
+                values = [{} for i in range(len(json_line["keys"]))]
+                splited_keys, offset = self.split_keys(json_line)
+                for shard_id, keys in splited_keys.items():
+                    results = self.post(dict_name, shard_id, keys)
+                    for i, result in enumerate(results["values"]):
+                        values[offset[shard_id][i]] = result
+                cur_line_results = {}
+                cur_line_results["values"] = values
+                
+                json.dump(cur_line_results, save_file)
+                save_file.write("\n")
+                
+            fp.close()
+        save_file.close()
+
+    def split_keys(self, json_line):
+        """根据key值及分片数判断去哪一个分片上查询"""
+        keys_split = {}
+        offset = {}
+        i = 0
+        for key in json_line["keys"]:
+            shard_id = key % self.server_meta[dict_name]["shard"]
+            if shard_id not in keys_split:
+                keys_split[shard_id] = []
+            keys_split[shard_id].append(key)
+            if shard_id not in offset:
+                offset[shard_id] = []
+            offset[shard_id].append(i)
+            i += 1
+        return keys_split, offset
+
+    def post(self, dict_name, shard_id, keys):
+        """向分片server发送post请求"""
+        api = "http://%s:%s%s" % (self.server_meta[dict_name]["nodes"][shard_id]["ip"],
+            self.server_meta[dict_name]["nodes"][shard_id]["port"],
+            self.server_api)
+        data = {"keys": keys}
+        response = requests.post(api, json.dumps(data))
+        return response.json()
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 5:
+        print('please usage: python demo.py conf_path dict_name keys_path save_path')
+        exit(0)
+    conf_path = sys.argv[1]
+    dict_name = sys.argv[2]
+    keys_path = sys.argv[3]
+    save_path = sys.argv[4]
+    meta = Meta(conf_path)
+    meta.seek(dict_name, keys_path, save_path)
--- a/core/cube/cube-api/python-api/input.json
+++ b/core/cube/cube-api/python-api/input.json
+{"keys": [0,1,2,3,4,5,6,7]}
+{"keys": [1]}
\ No newline at end of file
--- a/core/cube/cube-api/python-api/ptyhon_api.md
+++ b/core/cube/cube-api/python-api/ptyhon_api.md
+# cube python api说明文档
+参考[大规模稀疏参数服务Cube的部署和使用](https://github.com/PaddlePaddle/Serving/blob/master/doc/DEPLOY.md#2-大规模稀疏参数服务cube的部署和使用)文档进行cube的部署。
+使用python api，可替代上述文档中第3节预测服务的部署、使用
+
+## 配置说明
+conf/cube.conf 以json格式，设置各个分片cube server的ip以及port，shard与分片数一致，示例：
+```bash
+[{
+    "dict_name": "test",
+    "shard": 2,
+    "nodes": [{
+        "ip": "127.0.0.1",
+        "port": 8731
+    },{
+        "ip": "127.0.0.1",
+        "port": 8730
+    }]
+}]
+```
+
+## 数据格式
+```bash
+{"keys": [0,1,2,3,4,5,6,7]}
+{"keys": [1]}
+```
+支持批量查询，每次查询一行
+
+## 使用
+```bash
+cd ./python-api
+python3 demo.py conf/cube.conf test input.json result.json
+```
\ No newline at end of file
--- a/core/cube/cube-api/python-api/result.json
+++ b/core/cube/cube-api/python-api/result.json
+{"values": [{"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}, {"status": 4294967295, "value": ""}]}
+{"values": [{"status": 4294967295, "value": ""}]}
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -266,10 +266,14 @@ class PredictorClient {
                    const std::vector<std::string>& float_feed_name,
                    const std::vector<std::vector<int>>& float_shape,
                    const std::vector<std::vector<int>>& float_lod_slot_batch,
-                    const std::vector<py::array_t<int64_t>>& int_feed,
-                    const std::vector<std::string>& int_feed_name,
-                    const std::vector<std::vector<int>>& int_shape,
-                    const std::vector<std::vector<int>>& int_lod_slot_batch,
+                    const std::vector<py::array_t<int32_t>> &int32_feed,
+                    const std::vector<std::string> &int32_feed_name,
+                    const std::vector<std::vector<int>> &int32_shape,
+                    const std::vector<std::vector<int>> &int32_lod_slot_batch,
+                    const std::vector<py::array_t<int64_t>> &int64_feed,
+                    const std::vector<std::string> &int64_feed_name,
+                    const std::vector<std::vector<int>> &int64_shape,
+                    const std::vector<std::vector<int>> &int64_lod_slot_batch,
                    const std::vector<std::string>& string_feed,
                    const std::vector<std::string>& string_feed_name,
                    const std::vector<std::vector<int>>& string_shape,

--- a/core/general-client/src/brpc_client.cpp
+++ b/core/general-client/src/brpc_client.cpp
@@ -184,10 +184,12 @@ int ServingBrpcClient::predict(const PredictorInputs& inputs,
      oss << "op" << i << "=" << t << "ms,";
    }
  }
-  int i = op_num - 1;
-  double server_cost = (res.profile_time(i * 2 + 1)
-               - res.profile_time(i * 2)) / 1000.0;
-  oss << "server_cost=" << server_cost << "ms.";
+  if (op_num > 0) {
+    int i = op_num - 1;
+    double server_cost = (res.profile_time(i * 2 + 1)
+                 - res.profile_time(i * 2)) / 1000.0;
+    oss << "server_cost=" << server_cost << "ms.";
+  }
  LOG(INFO) << oss.str();

  return 0;

--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -168,10 +168,14 @@ int PredictorClient::numpy_predict(
    const std::vector<std::string> &float_feed_name,
    const std::vector<std::vector<int>> &float_shape,
    const std::vector<std::vector<int>> &float_lod_slot_batch,
-    const std::vector<py::array_t<int64_t>> &int_feed,
-    const std::vector<std::string> &int_feed_name,
-    const std::vector<std::vector<int>> &int_shape,
-    const std::vector<std::vector<int>> &int_lod_slot_batch,
+    const std::vector<py::array_t<int32_t>> &int32_feed,
+    const std::vector<std::string> &int32_feed_name,
+    const std::vector<std::vector<int>> &int32_shape,
+    const std::vector<std::vector<int>> &int32_lod_slot_batch,
+    const std::vector<py::array_t<int64_t>> &int64_feed,
+    const std::vector<std::string> &int64_feed_name,
+    const std::vector<std::vector<int>> &int64_shape,
+    const std::vector<std::vector<int>> &int64_lod_slot_batch,
    const std::vector<std::string> &string_feed,
    const std::vector<std::string> &string_feed_name,
    const std::vector<std::vector<int>> &string_shape,
@@ -190,7 +194,8 @@ int PredictorClient::numpy_predict(
  predict_res_batch.set_variant_tag(variant_tag);
  VLOG(2) << "fetch general model predictor done.";
  VLOG(2) << "float feed name size: " << float_feed_name.size();
-  VLOG(2) << "int feed name size: " << int_feed_name.size();
+  VLOG(2) << "int feed name size: " << int32_feed_name.size();
+  VLOG(2) << "int feed name size: " << int64_feed_name.size();
  VLOG(2) << "string feed name size: " << string_feed_name.size();
  VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
  Request req;
@@ -207,7 +212,11 @@ int PredictorClient::numpy_predict(
    tensor_vec.push_back(req.add_tensor());
  }

-  for (auto &name : int_feed_name) {
+  for (auto &name : int32_feed_name) {
+    tensor_vec.push_back(req.add_tensor());
+  }
+
+  for (auto &name : int64_feed_name) {
    tensor_vec.push_back(req.add_tensor());
  }

@@ -247,34 +256,58 @@ int PredictorClient::numpy_predict(
  }

  vec_idx = 0;
-  for (auto &name : int_feed_name) {
+  for (auto &name : int32_feed_name) {
    int idx = _feed_name_to_idx[name];
    if (idx >= tensor_vec.size()) {
      LOG(ERROR) << "idx > tensor_vec.size()";
      return -1;
    }
    Tensor *tensor = tensor_vec[idx];
-    int nbytes = int_feed[vec_idx].nbytes();
-    void *rawdata_ptr = (void *)(int_feed[vec_idx].data(0));
-    int total_number = int_feed[vec_idx].size();
+    int nbytes = int32_feed[vec_idx].nbytes();
+    void *rawdata_ptr = (void *)(int32_feed[vec_idx].data(0));
+    int total_number = int32_feed[vec_idx].size();

-    for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
-      tensor->add_shape(int_shape[vec_idx][j]);
+    for (uint32_t j = 0; j < int32_shape[vec_idx].size(); ++j) {
+      tensor->add_shape(int32_shape[vec_idx][j]);
    }
-    for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
-      tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
+    for (uint32_t j = 0; j < int32_lod_slot_batch[vec_idx].size(); ++j) {
+      tensor->add_lod(int32_lod_slot_batch[vec_idx][j]);
    }
    tensor->set_elem_type(_type[idx]);
    tensor->set_name(_feed_name[idx]);
    tensor->set_alias_name(name);

-    if (_type[idx] == P_INT64) {
-      tensor->mutable_int64_data()->Resize(total_number, 0);
-      memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
-    } else {
-      tensor->mutable_int_data()->Resize(total_number, 0);
-      memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
+    tensor->mutable_int_data()->Resize(total_number, 0);
+    memcpy(tensor->mutable_int_data()->mutable_data(), rawdata_ptr, nbytes);
+    vec_idx++;
+  }
+
+
+  // Individual INT_64 feed data of int_input to tensor_content
+  vec_idx = 0;
+  for (auto &name : int64_feed_name) {
+    int idx = _feed_name_to_idx[name];
+    if (idx >= tensor_vec.size()) {
+      LOG(ERROR) << "idx > tensor_vec.size()";
+      return -1;
+    }
+    Tensor *tensor = tensor_vec[idx];
+    int nbytes = int64_feed[vec_idx].nbytes();
+    void *rawdata_ptr = (void *)(int64_feed[vec_idx].data(0));
+    int total_number = int64_feed[vec_idx].size();
+
+    for (uint32_t j = 0; j < int64_shape[vec_idx].size(); ++j) {
+      tensor->add_shape(int64_shape[vec_idx][j]);
+    }
+    for (uint32_t j = 0; j < int64_lod_slot_batch[vec_idx].size(); ++j) {
+      tensor->add_lod(int64_lod_slot_batch[vec_idx][j]);
    }
+    tensor->set_elem_type(_type[idx]);
+    tensor->set_name(_feed_name[idx]);
+    tensor->set_alias_name(name);
+
+    tensor->mutable_int64_data()->Resize(total_number, 0);
+    memcpy(tensor->mutable_int64_data()->mutable_data(), rawdata_ptr, nbytes);
    vec_idx++;
  }

@@ -449,10 +482,12 @@ int PredictorClient::numpy_predict(
      oss << "op" << i << "=" << t << "ms,";
    }
  }
-  int i = op_num - 1;
-  double server_cost = (res.profile_time(i * 2 + 1)
-               - res.profile_time(i * 2)) / 1000.0;
-  oss << "server_cost=" << server_cost << "ms.";
+  if (op_num > 0) {
+    int i = op_num - 1;
+    double server_cost = (res.profile_time(i * 2 + 1)
+                 - res.profile_time(i * 2)) / 1000.0;
+    oss << "server_cost=" << server_cost << "ms.";
+  }
  LOG(INFO) << oss.str();
  return 0;
 }

--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -117,10 +117,14 @@ PYBIND11_MODULE(serving_client, m) {
              const std::vector<std::string> &float_feed_name,
              const std::vector<std::vector<int>> &float_shape,
              const std::vector<std::vector<int>> &float_lod_slot_batch,
-              const std::vector<py::array_t<int64_t>> &int_feed,
-              const std::vector<std::string> &int_feed_name,
-              const std::vector<std::vector<int>> &int_shape,
-              const std::vector<std::vector<int>> &int_lod_slot_batch,
+              const std::vector<py::array_t<int32_t>> &int32_feed,
+              const std::vector<std::string> &int32_feed_name,
+              const std::vector<std::vector<int>> &int32_shape,
+              const std::vector<std::vector<int>> &int32_lod_slot_batch,
+              const std::vector<py::array_t<int64_t>> &int64_feed,
+              const std::vector<std::string> &int64_feed_name,
+              const std::vector<std::vector<int>> &int64_shape,
+              const std::vector<std::vector<int>> &int64_lod_slot_batch,
              const std::vector<std::string> &string_feed,
              const std::vector<std::string> &string_feed_name,
              const std::vector<std::vector<int>> &string_shape,
@@ -133,10 +137,14 @@ PYBIND11_MODULE(serving_client, m) {
                                       float_feed_name,
                                       float_shape,
                                       float_lod_slot_batch,
-                                       int_feed,
-                                       int_feed_name,
-                                       int_shape,
-                                       int_lod_slot_batch,
+                                       int32_feed,
+                                       int32_feed_name,
+                                       int32_shape,
+                                       int32_lod_slot_batch,
+                                       int64_feed,
+                                       int64_feed_name,
+                                       int64_shape,
+                                       int64_lod_slot_batch,
                                       string_feed,
                                       string_feed_name,
                                       string_shape,

--- a/python/examples/cascade_rcnn/get_data.sh
+++ b/python/examples/cascade_rcnn/get_data.sh
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_rcnn_r50_fpx_1x_serving.tar.gz
-tar xf cascade_rcnn_r50_fpx_1x_serving.tar.gz
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco_serving.tar.gz
+tar xf cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco_serving.tar.gz
--- a/python/examples/cascade_rcnn/test_client.py
+++ b/python/examples/cascade_rcnn/test_client.py
@@ -12,29 +12,32 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import sys
+import numpy as np
 from paddle_serving_client import Client
 from paddle_serving_app.reader import *
-import numpy as np
-
+import cv2
 preprocess = Sequential([
-    File2Image(), BGR2RGB(), Div(255.0),
-    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
-    Resize(800, 1333), Transpose((2, 0, 1)), PadStride(32)
+    File2Image(), BGR2RGB(), Resize(
+        (608, 608), interpolation=cv2.INTER_LINEAR), Div(255.0), Transpose(
+            (2, 0, 1))
 ])
-postprocess = RCNNPostprocess("label_list.txt", "output")
+
+postprocess = RCNNPostprocess("label_list.txt", "output", [608, 608])
 client = Client()
+
 client.load_client_config("serving_client/serving_client_conf.prototxt")
 client.connect(['127.0.0.1:9292'])
+
 im = preprocess('000000570688.jpg')
 fetch_map = client.predict(
    feed={
        "image": im,
-        "im_info": np.array(list(im.shape[1:]) + [1.0]),
-        "im_shape": np.array(list(im.shape[1:]) + [1.0])
+        "im_shape": np.array(list(im.shape[1:])).reshape(-1),
+        "scale_factor": np.array([1.0, 1.0]).reshape(-1),
    },
-    fetch=["multiclass_nms_0.tmp_0"],
+    fetch=["save_infer_model/scale_0.tmp_1"],
    batch=False)
-fetch_map["image"] = '000000570688.jpg'
 print(fetch_map)
+fetch_map["image"] = '000000570688.jpg'
 postprocess(fetch_map)
-print(fetch_map)
--- a/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README.md
+++ b/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README.md
@@ -4,7 +4,7 @@

 ### Get The Faster RCNN HRNet Model
 ```
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/faster_rcnn_hrnetv2p_w18_1x.tar
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/faster_rcnn_hrnetv2p_w18_1x.tar.gz
 ```

 ### Start the service

--- a/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README_CN.md
+++ b/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README_CN.md
@@ -4,7 +4,7 @@

 ## 获得Faster RCNN HRNet模型
 ```
-wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/faster_rcnn_hrnetv2p_w18_1x.tar
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/faster_rcnn_hrnetv2p_w18_1x.tar.gz
 ```



--- a/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/test_client.py
+++ b/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/test_client.py
-from paddle_serving_client import Client
-from paddle_serving_app.reader import *
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 import sys
 import numpy as np
-
+from paddle_serving_client import Client
+from paddle_serving_app.reader import *
+import cv2
 preprocess = Sequential([
-    File2Image(), BGR2RGB(), Div(255.0),
-    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
-    Resize(640, 640), Transpose((2, 0, 1))
+    File2Image(), BGR2RGB(), Resize(
+        (608, 608), interpolation=cv2.INTER_LINEAR), Div(255.0), Transpose(
+            (2, 0, 1))
 ])

-postprocess = RCNNPostprocess("label_list.txt", "output")
+postprocess = RCNNPostprocess("label_list.txt", "output", [608, 608])
 client = Client()

 client.load_client_config("serving_client/serving_client_conf.prototxt")
@@ -19,9 +33,11 @@ im = preprocess(sys.argv[1])
 fetch_map = client.predict(
    feed={
        "image": im,
-        "im_info": np.array(list(im.shape[1:]) + [1.0]),
-        "im_shape": np.array(list(im.shape[1:]) + [1.0])
+        "im_shape": np.array(list(im.shape[1:])).reshape(-1),
+        "scale_factor": np.array([1.0, 1.0]).reshape(-1),
    },
-    fetch=["multiclass_nms_0.tmp_0"],
+    fetch=["save_infer_model/scale_0.tmp_1"],
    batch=False)
 print(fetch_map)
+fetch_map["image"] = sys.argv[1]
+postprocess(fetch_map)
--- a/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/test_client.py
+++ b/python/examples/detection/ppyolo_r50vd_dcn_1x_coco/test_client.py
@@ -12,18 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from paddle_serving_client import Client
-from paddle_serving_app.reader import *
 import sys
 import numpy as np
-
+from paddle_serving_client import Client
+from paddle_serving_app.reader import *
+import cv2
 preprocess = Sequential([
-    File2Image(), BGR2RGB(), Div(255.0),
-    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
-    Resize((608, 608)), Transpose((2, 0, 1))
+    File2Image(), BGR2RGB(), Resize(
+        (608, 608), interpolation=cv2.INTER_LINEAR), Div(255.0), Transpose(
+            (2, 0, 1))
 ])

-postprocess = RCNNPostprocess("label_list.txt", "output")
+postprocess = RCNNPostprocess("label_list.txt", "output", [608, 608])
 client = Client()

 client.load_client_config("serving_client/serving_client_conf.prototxt")

--- a/python/examples/detection/yolov3_darknet53_270e_coco/test_client.py
+++ b/python/examples/detection/yolov3_darknet53_270e_coco/test_client.py
@@ -12,18 +12,18 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from paddle_serving_client import Client
-from paddle_serving_app.reader import *
 import sys
 import numpy as np
-
+from paddle_serving_client import Client
+from paddle_serving_app.reader import *
+import cv2
 preprocess = Sequential([
-    File2Image(), BGR2RGB(), Div(255.0),
-    Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
-    Resize((608, 608)), Transpose((2, 0, 1))
+    File2Image(), BGR2RGB(), Resize(
+        (608, 608), interpolation=cv2.INTER_LINEAR), Div(255.0), Transpose(
+            (2, 0, 1))
 ])

-postprocess = RCNNPostprocess("label_list.txt", "output")
+postprocess = RCNNPostprocess("label_list.txt", "output", [608, 608])
 client = Client()

 client.load_client_config("serving_client/serving_client_conf.prototxt")

--- a/python/paddle_serving_client/client.py
+++ b/python/paddle_serving_client/client.py
@@ -335,10 +335,15 @@ class Client(object):
        if len(feed_batch) != 1:
            raise ValueError("len of feed_batch can only be 1.")

-        int_slot = []
-        int_feed_names = []
-        int_shape = []
-        int_lod_slot_batch = []
+        int32_slot = []
+        int32_feed_names = []
+        int32_shape = []
+        int32_lod_slot_batch = []
+
+        int64_slot = []
+        int64_feed_names = []
+        int64_shape = []
+        int64_lod_slot_batch = []

        float_slot = []
        float_feed_names = []
@@ -364,27 +369,39 @@ class Client(object):

            self.shape_check(feed_dict, key)
            if self.feed_types_[key] in int_type:
-                int_feed_names.append(key)
                shape_lst = []
                if batch == False:
                    feed_dict[key] = np.expand_dims(feed_dict[key], 0).repeat(
                        1, axis=0)
-                if isinstance(feed_dict[key], np.ndarray):
-                    shape_lst.extend(list(feed_dict[key].shape))
-                    int_shape.append(shape_lst)
-                else:
-                    int_shape.append(self.feed_shapes_[key])
-                if "{}.lod".format(key) in feed_dict:
-                    int_lod_slot_batch.append(feed_dict["{}.lod".format(key)])
+                # verify different input int_type
+                if(self.feed_types_[key] == int64_type):
+                    int64_feed_names.append(key)
+                    if isinstance(feed_dict[key], np.ndarray):
+                        shape_lst.extend(list(feed_dict[key].shape))
+                        int64_shape.append(shape_lst)
+                        self.has_numpy_input = True
+                    else:
+                        int64_shape.append(self.feed_shapes_[key])
+                        self.all_numpy_input = False
+                    if "{}.lod".format(key) in feed_dict:
+                        int64_lod_slot_batch.append(feed_dict["{}.lod".format(key)])
+                    else:
+                        int64_lod_slot_batch.append([])
+                    int64_slot.append(np.ascontiguousarray(feed_dict[key]))
                else:
-                    int_lod_slot_batch.append([])
-
-                if isinstance(feed_dict[key], np.ndarray):
-                    int_slot.append(np.ascontiguousarray(feed_dict[key]))
-                    self.has_numpy_input = True
-                else:
-                    int_slot.append(np.ascontiguousarray(feed_dict[key]))
-                    self.all_numpy_input = False
+                    int32_feed_names.append(key)
+                    if isinstance(feed_dict[key], np.ndarray):
+                        shape_lst.extend(list(feed_dict[key].shape))
+                        int32_shape.append(shape_lst)
+                        self.has_numpy_input = True
+                    else:
+                        int32_shape.append(self.feed_shapes_[key])
+                        self.all_numpy_input = False
+                    if "{}.lod".format(key) in feed_dict:
+                        int32_lod_slot_batch.append(feed_dict["{}.lod".format(key)])
+                    else:
+                        int32_lod_slot_batch.append([])
+                    int32_slot.append(np.ascontiguousarray(feed_dict[key]))

            elif self.feed_types_[key] in float_type:
                float_feed_names.append(key)
@@ -430,7 +447,8 @@ class Client(object):
        if self.all_numpy_input:
            res = self.client_handle_.numpy_predict(
                float_slot, float_feed_names, float_shape, float_lod_slot_batch,
-                int_slot, int_feed_names, int_shape, int_lod_slot_batch,
+                int32_slot, int32_feed_names, int32_shape, int32_lod_slot_batch,
+                int64_slot, int64_feed_names, int64_shape, int64_lod_slot_batch,
                string_slot, string_feed_names, string_shape,
                string_lod_slot_batch, fetch_names, result_batch_handle,
                self.pid, log_id)

--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -564,7 +564,7 @@ class Server(object):
                    "-num_threads {} " \
                    "-port {} " \
                    "-precision {} " \
-                    "-use_calib {} " \
+                    "-use_calib={} " \
                    "-reload_interval_s {} " \
                    "-resource_path {} " \
                    "-resource_file {} " \