Merge branch 'develop' into fix_parallel

fb0cabc7 · Thomas Young · GitHub · 6cc22b73 · e8133c26 · fb0cabc7
12 changed file
--- a/core/general-server/op/general_detection_op.cpp
+++ b/core/general-server/op/general_detection_op.cpp
@@ -191,42 +191,64 @@ int GeneralDetectionOp::inference() {
    boxes = post_processor_.FilterTagDetRes(boxes, ratio_h, ratio_w, srcimg);
-    for (int i = boxes.size() - 1; i >= 0; i--) {
+    float max_wh_ratio = 0.0f;
-      crop_img = GetRotateCropImage(img, boxes[i]);
+    std::vector<cv::Mat> crop_imgs;
+    std::vector<cv::Mat> resize_imgs;
-      float wh_ratio = float(crop_img.cols) / float(crop_img.rows);
+    int max_resize_w = 0;
+    int max_resize_h = 0;
+    int box_num = boxes.size();
+    std::vector<std::vector<float>> output_rec;
+    for (int i = 0; i < box_num; ++i) {
+      cv::Mat line_img = GetRotateCropImage(img, boxes[i]);
+      float wh_ratio = float(line_img.cols) / float(line_img.rows);
+      max_wh_ratio = max_wh_ratio > wh_ratio ? max_wh_ratio : wh_ratio;
+      crop_imgs.push_back(line_img);
+    }
+    for (int i = 0; i < box_num; ++i) {
+      cv::Mat resize_img;
+      crop_img = crop_imgs[i];
      this->resize_op_rec.Run(
-          crop_img, resize_img_rec, wh_ratio, this->use_tensorrt_);
+          crop_img, resize_img, max_wh_ratio, this->use_tensorrt_);
      this->normalize_op_.Run(
-          &resize_img_rec, this->mean_rec, this->scale_rec, this->is_scale_);
+          &resize_img, this->mean_rec, this->scale_rec, this->is_scale_);
-      std::vector<float> output_rec(
+      max_resize_w = std::max(max_resize_w, resize_img.cols);
-          1 * 3 * resize_img_rec.rows * resize_img_rec.cols, 0.0f);
+      max_resize_h = std::max(max_resize_h, resize_img.rows);
+      resize_imgs.push_back(resize_img);
-      this->permute_op_.Run(&resize_img_rec, output_rec.data());
+    }
+    int buf_size = 3 * max_resize_h * max_resize_w;
-      // Inference.
+    output_rec = std::vector<std::vector<float>>(box_num,
-      output_shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols};
+                     std::vector<float>(buf_size, 0.0f));
-      out_num = std::accumulate(
+    for (int i = 0; i < box_num; ++i) {
-          output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
+      resize_img_rec = resize_imgs[i];
-      databuf_size_out = out_num * sizeof(float);
-      databuf_data_out = MempoolWrapper::instance().malloc(databuf_size_out);
+      this->permute_op_.Run(&resize_img_rec, output_rec[i].data());
-      if (!databuf_data_out) {
+    }
-        LOG(ERROR) << "Malloc failed, size: " << databuf_size_out;
-        return -1;
+    // Inference.
-      }
+    output_shape = {box_num, 3, max_resize_h, max_resize_w};
-      memcpy(databuf_data_out, output_rec.data(), databuf_size_out);
+    out_num = std::accumulate(
-      databuf_char_out = reinterpret_cast<char*>(databuf_data_out);
+        output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
-      paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out);
+    databuf_size_out = out_num * sizeof(float);
-      paddle::PaddleTensor tensor_out;
+    databuf_data_out = MempoolWrapper::instance().malloc(databuf_size_out);
-      tensor_out.name = "image";
+    if (!databuf_data_out) {
-      tensor_out.dtype = paddle::PaddleDType::FLOAT32;
+      LOG(ERROR) << "Malloc failed, size: " << databuf_size_out;
-      tensor_out.shape = {1, 3, resize_img_rec.rows, resize_img_rec.cols};
+      return -1;
-      tensor_out.data = paddleBuf;
+    }
-      out->push_back(tensor_out);
+    int offset = buf_size * sizeof(float);
+    for (int i = 0; i < box_num; ++i) {
+      memcpy(databuf_data_out + i * offset, output_rec[i].data(), offset);
    }
+    databuf_char_out = reinterpret_cast<char*>(databuf_data_out);
+    paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out);
+    paddle::PaddleTensor tensor_out;
+    tensor_out.name = "image";
+    tensor_out.dtype = paddle::PaddleDType::FLOAT32;
+    tensor_out.shape = output_shape;
+    tensor_out.data = paddleBuf;
+    out->push_back(tensor_out);
  }
  out->erase(out->begin(), out->begin() + infer_outnum);

--- a/core/general-server/op/general_detection_op.h
+++ b/core/general-server/op/general_detection_op.h
@@ -63,7 +63,7 @@ class GeneralDetectionOp
    double det_db_thresh_ = 0.3;
    double det_db_box_thresh_ = 0.5;
-    double det_db_unclip_ratio_ = 2.0;
+    double det_db_unclip_ratio_ = 1.5;
    std::vector<float> mean_det = {0.485f, 0.456f, 0.406f};
    std::vector<float> scale_det = {1 / 0.229f, 1 / 0.224f, 1 / 0.225f};

--- a/core/general-server/op/general_dist_kv_infer_op.cpp
+++ b/core/general-server/op/general_dist_kv_infer_op.cpp
@@ -186,9 +186,9 @@ int GeneralDistKVInferOp::inference() {
  if (values.size() != keys.size() || values[0].buff.size() == 0) {
    LOG(ERROR) << "cube value return null";
  }
-  // size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
+   size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
  // size_t EMBEDDING_SIZE = (values[0].buff.size() - 10) / sizeof(float);
-  size_t EMBEDDING_SIZE = 9;
+  //size_t EMBEDDING_SIZE = 9;
  TensorVector sparse_out;
  sparse_out.resize(sparse_count);
  TensorVector dense_out;
@@ -241,7 +241,7 @@ int GeneralDistKVInferOp::inference() {
      // The data generated by pslib has 10 bytes of information to be filtered
      // out
-      memcpy(data_ptr, cur_val->buff.data() + 10, cur_val->buff.size() - 10);
+      memcpy(data_ptr, cur_val->buff.data(), cur_val->buff.size() );
      // VLOG(3) <<  keys[cube_val_idx] << ":" << data_ptr[0] << ", " <<
      // data_ptr[1] << ", " <<data_ptr[2] << ", " <<data_ptr[3] << ", "
      // <<data_ptr[4] << ", " <<data_ptr[5] << ", " <<data_ptr[6] << ", "

--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -277,7 +277,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
    LOG(WARNING) << "Loading cube cache[" << next_idx << "] ...";
    std::string model_path = conf.model_dir();
    if (access(model_path.c_str(), F_OK) == 0) {
-      std::string cube_cache_path = model_path + "/" + "cube_cache";
+      std::string cube_cache_path = model_path + "/cube_cache";
      int reload_cache_ret = md->caches[next_idx]->reload_data(cube_cache_path);
      LOG(WARNING) << "Loading cube cache[" << next_idx << "] done.";
    } else {
@@ -437,7 +437,7 @@ class CloneDBReloadableInferEngine
      // create caches
      std::string model_path = conf.model_dir();
      if (access(model_path.c_str(), F_OK) == 0) {
-        std::string cube_cache_path = model_path + "cube_cache";
+        std::string cube_cache_path = model_path + "/cube_cache";
        int reload_cache_ret =
            md->caches[next_idx]->reload_data(cube_cache_path);
        LOG(WARNING) << "create cube cache[" << next_idx << "] done.";

--- a/core/predictor/tools/ocrtools/preprocess_op.cpp
+++ b/core/predictor/tools/ocrtools/preprocess_op.cpp
@@ -82,14 +82,14 @@ void ResizeImgType0::Run(const cv::Mat &img, cv::Mat &resize_img,
  else if (resize_h / 32 < 1 + 1e-5)
    resize_h = 32;
  else
-    resize_h = (resize_h / 32) * 32;
+    resize_h = (resize_h / 32 - 1) * 32;
  if (resize_w % 32 == 0)
    resize_w = resize_w;
  else if (resize_w / 32 < 1 + 1e-5)
    resize_w = 32;
  else
-    resize_w = (resize_w / 32) * 32;
+    resize_w = (resize_w / 32 - 1) * 32;
  if (!use_tensorrt) {
    cv::resize(img, resize_img, cv::Size(resize_w, resize_h));
    ratio_h = float(resize_h) / float(h);

--- a/python/examples/criteo_ctr_with_cube/cube/conf/cube.conf
+++ b/python/examples/criteo_ctr_with_cube/cube/conf/cube.conf
+[{
+    "dict_name": "test_dict",
+    "shard": 1,
+    "dup": 1,
+    "timeout": 200,
+    "retry": 3,
+    "backup_request": 100,
+    "type": "ipport_list",
+    "load_balancer": "rr",
+    "nodes": [{
+        "ipport_list": "list://127.0.0.1:8027"
+    }]
+}]
--- a/python/examples/criteo_ctr_with_cube/cube/conf/gflags.conf
+++ b/python/examples/criteo_ctr_with_cube/cube/conf/gflags.conf
+--port=8027
+--dict_split=1
+--in_mem=true
+--log_dir=./log/
--- a/python/examples/criteo_ctr_with_cube/cube/keys
+++ b/python/examples/criteo_ctr_with_cube/cube/keys
+1
+2
+3
+4
+5
+6
+7
+8
+9
+10
--- a/python/examples/criteo_ctr_with_cube/test_client.py
+++ b/python/examples/criteo_ctr_with_cube/test_client.py
@@ -16,7 +16,7 @@
 from paddle_serving_client import Client
 import sys
 import os
-import criteo as criteo
+import criteo_reader as criteo
 import time
 from paddle_serving_client.metric import auc
 import numpy as np
@@ -35,22 +35,23 @@ reader = dataset.infer_reader(test_filelists, batch, buf_size)
 label_list = []
 prob_list = []
 start = time.time()
-for ei in range(10000):
+for ei in range(100):
    if py_version == 2:
        data = reader().next()
    else:
        data = reader().__next__()
    feed_dict = {}
-    feed_dict['dense_input'] = data[0][0]
+    feed_dict['dense_input'] = np.array(data[0][0]).reshape(1, len(data[0][0]))
    for i in range(1, 27):
-        feed_dict["embedding_{}.tmp_0".format(i - 1)] = np.array(data[0][i]).reshape(-1)
+        feed_dict["embedding_{}.tmp_0".format(i - 1)] = np.array(data[0][i]).reshape(len(data[0][i]))
        feed_dict["embedding_{}.tmp_0.lod".format(i - 1)] = [0, len(data[0][i])]
-    fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
+    fetch_map = client.predict(feed=feed_dict, fetch=["prob"],batch=True)
    print(fetch_map)
    prob_list.append(fetch_map['prob'][0][1])
    label_list.append(data[0][-1][0])
-print(auc(label_list, prob_list))
 end = time.time()
 print(end - start)
--- a/python/examples/pipeline/PaddleDetection/faster_rcnn/web_service.py
+++ b/python/examples/pipeline/PaddleDetection/faster_rcnn/web_service.py
@@ -25,7 +25,7 @@ class FasterRCNNOp(Op):
        self.img_preprocess = Sequential([
            BGR2RGB(), Div(255.0),
            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
-            Resize((640, 640)), Transpose((2, 0, 1))
+            Resize(640, 640), Transpose((2, 0, 1))
        ])
        self.img_postprocess = RCNNPostprocess("label_list.txt", "output")

--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -7,7 +7,7 @@ protobuf>=3.12.2
 grpcio-tools>=1.28.1
 grpcio>=1.28.1
 func-timeout>=4.3.5
-pyyaml>=1.3.0
+pyyaml>=1.3.0, <6.0
 flask>=1.1.2
 click==7.1.2
 itsdangerous==1.1.0

--- a/python/requirements_mac.txt
+++ b/python/requirements_mac.txt
@@ -6,7 +6,7 @@ google>=2.0.3
 opencv-python==4.2.0.32
 protobuf>=3.12.2
 func-timeout>=4.3.5
-pyyaml>=1.3.0
+pyyaml>=1.3.0, <6.0
 flask>=1.1.2
 click==7.1.2
 itsdangerous==1.1.0