Tipc add serving cpp infer test shell (#6203)

* [TIPC] update serving/python, add keypoint * [TIPC] update serving/cpp * [TIPC] add serving_cpp_infer test shell * [TIPC] remove gpu infer in fleet train, test=document_fix

Tipc add serving cpp infer test shell (#6203)
* [TIPC] update serving/python, add keypoint * [TIPC] update serving/cpp * [TIPC] add serving_cpp_infer test shell * [TIPC] remove gpu infer in fleet train, test=document_fix
ce38daed · shangliang Xu · GitHub · 145d1556 · ce38daed · ce38daed
28 changed file
--- a/deploy/serving/cpp/build_server.sh
+++ b/deploy/serving/cpp/build_server.sh
@@ -47,8 +47,8 @@ export CUDA_CUDART_LIBRARY='/usr/local/cuda/lib64/'
 export TENSORRT_LIBRARY_PATH='/usr/local/TensorRT6-cuda10.1-cudnn7/targets/x86_64-linux-gnu/'

 # cp 自定义OP代码
-\cp ../deploy/serving/cpp/preprocess/ppyoloe_op.* ${Serving_repo_path}/core/general-server/op
-\cp ../deploy/serving/cpp/preprocess/yolov3_op.* ${Serving_repo_path}/core/general-server/op
+\cp ../deploy/serving/cpp/preprocess/*.h ${Serving_repo_path}/core/general-server/op
+\cp ../deploy/serving/cpp/preprocess/*.cpp ${Serving_repo_path}/core/general-server/op

 # 编译Server, export SERVING_BIN
 mkdir server-build-gpu-opencv && cd server-build-gpu-opencv

--- a/deploy/serving/cpp/preprocess/mask_rcnn_r50_fpn_1x_coco.cpp
+++ b/deploy/serving/cpp/preprocess/mask_rcnn_r50_fpn_1x_coco.cpp
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "core/general-server/op/mask_rcnn_r50_fpn_1x_coco.h"
+#include "core/predictor/framework/infer.h"
+#include "core/predictor/framework/memory.h"
+#include "core/predictor/framework/resource.h"
+#include "core/util/include/timer.h"
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <sstream>
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+using baidu::paddle_serving::Timer;
+using baidu::paddle_serving::predictor::InferManager;
+using baidu::paddle_serving::predictor::MempoolWrapper;
+using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::Response;
+using baidu::paddle_serving::predictor::general_model::Tensor;
+
+int mask_rcnn_r50_fpn_1x_coco::inference() {
+  VLOG(2) << "Going to run inference";
+  const std::vector<std::string> pre_node_names = pre_names();
+  if (pre_node_names.size() != 1) {
+    LOG(ERROR) << "This op(" << op_name()
+               << ") can only have one predecessor op, but received "
+               << pre_node_names.size();
+    return -1;
+  }
+  const std::string pre_name = pre_node_names[0];
+
+  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
+  if (!input_blob) {
+    LOG(ERROR) << "input_blob is nullptr,error";
+    return -1;
+  }
+  uint64_t log_id = input_blob->GetLogId();
+  VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
+
+  GeneralBlob *output_blob = mutable_data<GeneralBlob>();
+  if (!output_blob) {
+    LOG(ERROR) << "output_blob is nullptr,error";
+    return -1;
+  }
+  output_blob->SetLogId(log_id);
+
+  if (!input_blob) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed mutable depended argument, op:" << pre_name;
+    return -1;
+  }
+
+  const TensorVector *in = &input_blob->tensor_vector;
+  TensorVector *out = &output_blob->tensor_vector;
+
+  int batch_size = input_blob->_batch_size;
+  output_blob->_batch_size = batch_size;
+  VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
+
+  Timer timeline;
+  int64_t start = timeline.TimeStampUS();
+  timeline.Start();
+
+  // only support string type
+  char *total_input_ptr = static_cast<char *>(in->at(0).data.data());
+  std::string base64str = total_input_ptr;
+
+  cv::Mat img = Base2Mat(base64str);
+  cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
+
+  // preprocess
+  std::vector<float> input(1 * 3 * im_shape_h * im_shape_w, 0.0f);
+  preprocess_det(img, input.data(), scale_factor_h, scale_factor_w, im_shape_h,
+                 im_shape_w, mean_, scale_, is_scale_);
+
+  // create real_in
+  TensorVector *real_in = new TensorVector();
+  if (!real_in) {
+    LOG(ERROR) << "real_in is nullptr,error";
+    return -1;
+  }
+
+  int in_num = 0;
+  size_t databuf_size = 0;
+  void *databuf_data = NULL;
+  char *databuf_char = NULL;
+
+  // im_shape
+  std::vector<float> im_shape{static_cast<float>(im_shape_h),
+                              static_cast<float>(im_shape_w)};
+  databuf_size = 2 * sizeof(float);
+
+  databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+  if (!databuf_data) {
+    LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+    return -1;
+  }
+
+  memcpy(databuf_data, im_shape.data(), databuf_size);
+  databuf_char = reinterpret_cast<char *>(databuf_data);
+  paddle::PaddleBuf paddleBuf_0(databuf_char, databuf_size);
+  paddle::PaddleTensor tensor_in_0;
+  tensor_in_0.name = "im_shape";
+  tensor_in_0.dtype = paddle::PaddleDType::FLOAT32;
+  tensor_in_0.shape = {1, 2};
+  tensor_in_0.lod = in->at(0).lod;
+  tensor_in_0.data = paddleBuf_0;
+  real_in->push_back(tensor_in_0);
+
+  // image
+  in_num = 1 * 3 * im_shape_h * im_shape_w;
+  databuf_size = in_num * sizeof(float);
+
+  databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+  if (!databuf_data) {
+    LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+    return -1;
+  }
+
+  memcpy(databuf_data, input.data(), databuf_size);
+  databuf_char = reinterpret_cast<char *>(databuf_data);
+  paddle::PaddleBuf paddleBuf_1(databuf_char, databuf_size);
+  paddle::PaddleTensor tensor_in_1;
+  tensor_in_1.name = "image";
+  tensor_in_1.dtype = paddle::PaddleDType::FLOAT32;
+  tensor_in_1.shape = {1, 3, im_shape_h, im_shape_w};
+  tensor_in_1.lod = in->at(0).lod;
+  tensor_in_1.data = paddleBuf_1;
+  real_in->push_back(tensor_in_1);
+
+  // scale_factor
+  std::vector<float> scale_factor{scale_factor_h, scale_factor_w};
+  databuf_size = 2 * sizeof(float);
+
+  databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+  if (!databuf_data) {
+    LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+    return -1;
+  }
+
+  memcpy(databuf_data, scale_factor.data(), databuf_size);
+  databuf_char = reinterpret_cast<char *>(databuf_data);
+  paddle::PaddleBuf paddleBuf_2(databuf_char, databuf_size);
+  paddle::PaddleTensor tensor_in_2;
+  tensor_in_2.name = "scale_factor";
+  tensor_in_2.dtype = paddle::PaddleDType::FLOAT32;
+  tensor_in_2.shape = {1, 2};
+  tensor_in_2.lod = in->at(0).lod;
+  tensor_in_2.data = paddleBuf_2;
+  real_in->push_back(tensor_in_2);
+
+  if (InferManager::instance().infer(engine_name().c_str(), real_in, out,
+                                     batch_size)) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed do infer in fluid model: " << engine_name().c_str();
+    return -1;
+  }
+
+  int64_t end = timeline.TimeStampUS();
+  CopyBlobInfo(input_blob, output_blob);
+  AddBlobInfo(output_blob, start);
+  AddBlobInfo(output_blob, end);
+  return 0;
+}
+
+void mask_rcnn_r50_fpn_1x_coco::preprocess_det(const cv::Mat &img, float *data,
+                                               float &scale_factor_h,
+                                               float &scale_factor_w,
+                                               int &im_shape_h, int &im_shape_w,
+                                               const std::vector<float> &mean,
+                                               const std::vector<float> &scale,
+                                               const bool is_scale) {
+  // keep_ratio
+  int im_size_max = std::max(img.rows, img.cols);
+  int im_size_min = std::min(img.rows, img.cols);
+  int target_size_max = std::max(im_shape_h, im_shape_w);
+  int target_size_min = std::min(im_shape_h, im_shape_w);
+  float scale_min =
+      static_cast<float>(target_size_min) / static_cast<float>(im_size_min);
+  float scale_max =
+      static_cast<float>(target_size_max) / static_cast<float>(im_size_max);
+  float scale_ratio = std::min(scale_min, scale_max);
+
+  // scale_factor
+  scale_factor_h = scale_ratio;
+  scale_factor_w = scale_ratio;
+
+  // Resize
+  cv::Mat resize_img;
+  cv::resize(img, resize_img, cv::Size(), scale_ratio, scale_ratio, 2);
+  im_shape_h = resize_img.rows;
+  im_shape_w = resize_img.cols;
+
+  // Normalize
+  double e = 1.0;
+  if (is_scale) {
+    e /= 255.0;
+  }
+  cv::Mat img_fp;
+  (resize_img).convertTo(img_fp, CV_32FC3, e);
+  for (int h = 0; h < im_shape_h; h++) {
+    for (int w = 0; w < im_shape_w; w++) {
+      img_fp.at<cv::Vec3f>(h, w)[0] =
+          (img_fp.at<cv::Vec3f>(h, w)[0] - mean[0]) / scale[0];
+      img_fp.at<cv::Vec3f>(h, w)[1] =
+          (img_fp.at<cv::Vec3f>(h, w)[1] - mean[1]) / scale[1];
+      img_fp.at<cv::Vec3f>(h, w)[2] =
+          (img_fp.at<cv::Vec3f>(h, w)[2] - mean[2]) / scale[2];
+    }
+  }
+
+  // PadStride
+  cv::Mat img_pad;
+  int stride_ = 32;
+  int nh =
+      (im_shape_h / stride_) * stride_ + (im_shape_h % stride_ != 0) * stride_;
+  int nw =
+      (im_shape_w / stride_) * stride_ + (im_shape_w % stride_ != 0) * stride_;
+  cv::copyMakeBorder(img_fp, img_pad, 0, nh - im_shape_h, 0, nw - im_shape_w,
+                     cv::BORDER_CONSTANT, cv::Scalar(0));
+
+  // Permute
+  int rh = img_pad.rows;
+  int rw = img_pad.cols;
+  int rc = img_pad.channels();
+  for (int i = 0; i < rc; ++i) {
+    cv::extractChannel(img_pad, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw),
+                       i);
+  }
+}
+
+cv::Mat mask_rcnn_r50_fpn_1x_coco::Base2Mat(std::string &base64_data) {
+  cv::Mat img;
+  std::string s_mat;
+  s_mat = base64Decode(base64_data.data(), base64_data.size());
+  std::vector<char> base64_img(s_mat.begin(), s_mat.end());
+  img = cv::imdecode(base64_img, cv::IMREAD_COLOR); // CV_LOAD_IMAGE_COLOR
+  return img;
+}
+
+std::string mask_rcnn_r50_fpn_1x_coco::base64Decode(const char *Data,
+                                                    int DataByte) {
+  const char DecodeTable[] = {
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,
+      62, // '+'
+      0,  0,  0,
+      63,                                     // '/'
+      52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9'
+      0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z'
+      0,  0,  0,  0,  0,  0,  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
+      37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z'
+  };
+
+  std::string strDecode;
+  int nValue;
+  int i = 0;
+  while (i < DataByte) {
+    if (*Data != '\r' && *Data != '\n') {
+      nValue = DecodeTable[*Data++] << 18;
+      nValue += DecodeTable[*Data++] << 12;
+      strDecode += (nValue & 0x00FF0000) >> 16;
+      if (*Data != '=') {
+        nValue += DecodeTable[*Data++] << 6;
+        strDecode += (nValue & 0x0000FF00) >> 8;
+        if (*Data != '=') {
+          nValue += DecodeTable[*Data++];
+          strDecode += nValue & 0x000000FF;
+        }
+      }
+      i += 4;
+    } else // 回车换行,跳过
+    {
+      Data++;
+      i++;
+    }
+  }
+  return strDecode;
+}
+
+DEFINE_OP(mask_rcnn_r50_fpn_1x_coco);
+
+} // namespace serving
+} // namespace paddle_serving
+} // namespace baidu
--- a/deploy/serving/cpp/preprocess/mask_rcnn_r50_fpn_1x_coco.h
+++ b/deploy/serving/cpp/preprocess/mask_rcnn_r50_fpn_1x_coco.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "core/general-server/general_model_service.pb.h"
+#include "core/general-server/op/general_infer_helper.h"
+#include "paddle_inference_api.h" // NOLINT
+#include <string>
+#include <vector>
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <vector>
+
+#include <cstring>
+#include <fstream>
+#include <numeric>
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+class mask_rcnn_r50_fpn_1x_coco
+    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
+public:
+  typedef std::vector<paddle::PaddleTensor> TensorVector;
+
+  DECLARE_OP(mask_rcnn_r50_fpn_1x_coco);
+
+  int inference();
+
+private:
+  // preprocess
+  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
+  std::vector<float> scale_ = {0.229f, 0.224f, 0.225f};
+  bool is_scale_ = true;
+  int im_shape_h = 1333;
+  int im_shape_w = 800;
+  float scale_factor_h = 1.0f;
+  float scale_factor_w = 1.0f;
+  void preprocess_det(const cv::Mat &img, float *data, float &scale_factor_h,
+                      float &scale_factor_w, int &im_shape_h, int &im_shape_w,
+                      const std::vector<float> &mean,
+                      const std::vector<float> &scale, const bool is_scale);
+
+  // read pics
+  cv::Mat Base2Mat(std::string &base64_data);
+  std::string base64Decode(const char *Data, int DataByte);
+};
+
+} // namespace serving
+} // namespace paddle_serving
+} // namespace baidu
--- a/deploy/serving/cpp/preprocess/ppyoloe_op.cpp
+++ b/deploy/serving/cpp/preprocess/ppyoloe_op.cpp
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "core/general-server/op/ppyoloe_op.h"
+#include "core/general-server/op/picodet_lcnet_1_5x_416_coco.h"
 #include "core/predictor/framework/infer.h"
 #include "core/predictor/framework/memory.h"
 #include "core/predictor/framework/resource.h"
@@ -34,7 +34,7 @@ using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Tensor;

-int PPYOLOEOp::inference() {
+int picodet_lcnet_1_5x_416_coco::inference() {
  VLOG(2) << "Going to run inference";
  const std::vector<std::string> pre_node_names = pre_names();
  if (pre_node_names.size() != 1) {
@@ -157,12 +157,11 @@ int PPYOLOEOp::inference() {
  return 0;
 }

-void PPYOLOEOp::preprocess_det(const cv::Mat &img, float *data,
-                                   float &scale_factor_h, float &scale_factor_w,
-                                   int im_shape_h, int im_shape_w,
-                                   const std::vector<float> &mean,
-                                   const std::vector<float> &scale,
-                                   const bool is_scale) {
+void picodet_lcnet_1_5x_416_coco::preprocess_det(
+    const cv::Mat &img, float *data, float &scale_factor_h,
+    float &scale_factor_w, int im_shape_h, int im_shape_w,
+    const std::vector<float> &mean, const std::vector<float> &scale,
+    const bool is_scale) {
  // scale_factor
  scale_factor_h =
      static_cast<float>(im_shape_h) / static_cast<float>(img.rows);
@@ -201,7 +200,7 @@ void PPYOLOEOp::preprocess_det(const cv::Mat &img, float *data,
  }
 }

-cv::Mat PPYOLOEOp::Base2Mat(std::string &base64_data) {
+cv::Mat picodet_lcnet_1_5x_416_coco::Base2Mat(std::string &base64_data) {
  cv::Mat img;
  std::string s_mat;
  s_mat = base64Decode(base64_data.data(), base64_data.size());
@@ -210,7 +209,8 @@ cv::Mat PPYOLOEOp::Base2Mat(std::string &base64_data) {
  return img;
 }

-std::string PPYOLOEOp::base64Decode(const char *Data, int DataByte) {
+std::string picodet_lcnet_1_5x_416_coco::base64Decode(const char *Data,
+                                                      int DataByte) {
  const char DecodeTable[] = {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -251,7 +251,7 @@ std::string PPYOLOEOp::base64Decode(const char *Data, int DataByte) {
  return strDecode;
 }

-DEFINE_OP(PPYOLOEOp);
+DEFINE_OP(picodet_lcnet_1_5x_416_coco);

 } // namespace serving
 } // namespace paddle_serving

--- a/deploy/serving/cpp/preprocess/picodet_lcnet_1_5x_416_coco.h
+++ b/deploy/serving/cpp/preprocess/picodet_lcnet_1_5x_416_coco.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "core/general-server/general_model_service.pb.h"
+#include "core/general-server/op/general_infer_helper.h"
+#include "paddle_inference_api.h" // NOLINT
+#include <string>
+#include <vector>
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <vector>
+
+#include <cstring>
+#include <fstream>
+#include <numeric>
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+class picodet_lcnet_1_5x_416_coco
+    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
+public:
+  typedef std::vector<paddle::PaddleTensor> TensorVector;
+
+  DECLARE_OP(picodet_lcnet_1_5x_416_coco);
+
+  int inference();
+
+private:
+  // preprocess
+  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
+  std::vector<float> scale_ = {0.229f, 0.224f, 0.225f};
+  bool is_scale_ = true;
+  int im_shape_h = 416;
+  int im_shape_w = 416;
+  float scale_factor_h = 1.0f;
+  float scale_factor_w = 1.0f;
+  void preprocess_det(const cv::Mat &img, float *data, float &scale_factor_h,
+                      float &scale_factor_w, int im_shape_h, int im_shape_w,
+                      const std::vector<float> &mean,
+                      const std::vector<float> &scale, const bool is_scale);
+
+  // read pics
+  cv::Mat Base2Mat(std::string &base64_data);
+  std::string base64Decode(const char *Data, int DataByte);
+};
+
+} // namespace serving
+} // namespace paddle_serving
+} // namespace baidu
--- a/deploy/serving/cpp/preprocess/yolov3_op.cpp
+++ b/deploy/serving/cpp/preprocess/yolov3_op.cpp
@@ -12,7 +12,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

-#include "core/general-server/op/yolov3_op.h"
+#include "core/general-server/op/ppyolo_mbv3_large_coco.h"
 #include "core/predictor/framework/infer.h"
 #include "core/predictor/framework/memory.h"
 #include "core/predictor/framework/resource.h"
@@ -34,7 +34,7 @@ using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::Response;
 using baidu::paddle_serving::predictor::general_model::Tensor;

-int YOLOv3Op::inference() {
+int ppyolo_mbv3_large_coco::inference() {
  VLOG(2) << "Going to run inference";
  const std::vector<std::string> pre_node_names = pre_names();
  if (pre_node_names.size() != 1) {
@@ -179,12 +179,13 @@ int YOLOv3Op::inference() {
  return 0;
 }

-void YOLOv3Op::preprocess_det(const cv::Mat &img, float *data,
-                                   float &scale_factor_h, float &scale_factor_w,
-                                   int im_shape_h, int im_shape_w,
-                                   const std::vector<float> &mean,
-                                   const std::vector<float> &scale,
-                                   const bool is_scale) {
+void ppyolo_mbv3_large_coco::preprocess_det(const cv::Mat &img, float *data,
+                                            float &scale_factor_h,
+                                            float &scale_factor_w,
+                                            int im_shape_h, int im_shape_w,
+                                            const std::vector<float> &mean,
+                                            const std::vector<float> &scale,
+                                            const bool is_scale) {
  // scale_factor
  scale_factor_h =
      static_cast<float>(im_shape_h) / static_cast<float>(img.rows);
@@ -223,7 +224,7 @@ void YOLOv3Op::preprocess_det(const cv::Mat &img, float *data,
  }
 }

-cv::Mat YOLOv3Op::Base2Mat(std::string &base64_data) {
+cv::Mat ppyolo_mbv3_large_coco::Base2Mat(std::string &base64_data) {
  cv::Mat img;
  std::string s_mat;
  s_mat = base64Decode(base64_data.data(), base64_data.size());
@@ -232,7 +233,8 @@ cv::Mat YOLOv3Op::Base2Mat(std::string &base64_data) {
  return img;
 }

-std::string YOLOv3Op::base64Decode(const char *Data, int DataByte) {
+std::string ppyolo_mbv3_large_coco::base64Decode(const char *Data,
+                                                 int DataByte) {
  const char DecodeTable[] = {
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
@@ -273,7 +275,7 @@ std::string YOLOv3Op::base64Decode(const char *Data, int DataByte) {
  return strDecode;
 }

-DEFINE_OP(YOLOv3Op);
+DEFINE_OP(ppyolo_mbv3_large_coco);

 } // namespace serving
 } // namespace paddle_serving

--- a/deploy/serving/cpp/preprocess/yolov3_op.h
+++ b/deploy/serving/cpp/preprocess/yolov3_op.h
@@ -36,17 +36,17 @@ namespace baidu {
 namespace paddle_serving {
 namespace serving {

-class YOLOv3Op
+class ppyolo_mbv3_large_coco
    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
 public:
  typedef std::vector<paddle::PaddleTensor> TensorVector;

-  DECLARE_OP(YOLOv3Op);
+  DECLARE_OP(ppyolo_mbv3_large_coco);

  int inference();

 private:
-  // yolov3, ppyolo preprocess
+  // preprocess
  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
  std::vector<float> scale_ = {0.229f, 0.224f, 0.225f};
  bool is_scale_ = true;

--- a/deploy/serving/cpp/preprocess/ppyoloe_crn_s_300e_coco.cpp
+++ b/deploy/serving/cpp/preprocess/ppyoloe_crn_s_300e_coco.cpp
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "core/general-server/op/ppyoloe_crn_s_300e_coco.h"
+#include "core/predictor/framework/infer.h"
+#include "core/predictor/framework/memory.h"
+#include "core/predictor/framework/resource.h"
+#include "core/util/include/timer.h"
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <sstream>
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+using baidu::paddle_serving::Timer;
+using baidu::paddle_serving::predictor::InferManager;
+using baidu::paddle_serving::predictor::MempoolWrapper;
+using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::Response;
+using baidu::paddle_serving::predictor::general_model::Tensor;
+
+int ppyoloe_crn_s_300e_coco::inference() {
+  VLOG(2) << "Going to run inference";
+  const std::vector<std::string> pre_node_names = pre_names();
+  if (pre_node_names.size() != 1) {
+    LOG(ERROR) << "This op(" << op_name()
+               << ") can only have one predecessor op, but received "
+               << pre_node_names.size();
+    return -1;
+  }
+  const std::string pre_name = pre_node_names[0];
+
+  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
+  if (!input_blob) {
+    LOG(ERROR) << "input_blob is nullptr,error";
+    return -1;
+  }
+  uint64_t log_id = input_blob->GetLogId();
+  VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
+
+  GeneralBlob *output_blob = mutable_data<GeneralBlob>();
+  if (!output_blob) {
+    LOG(ERROR) << "output_blob is nullptr,error";
+    return -1;
+  }
+  output_blob->SetLogId(log_id);
+
+  if (!input_blob) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed mutable depended argument, op:" << pre_name;
+    return -1;
+  }
+
+  const TensorVector *in = &input_blob->tensor_vector;
+  TensorVector *out = &output_blob->tensor_vector;
+
+  int batch_size = input_blob->_batch_size;
+  output_blob->_batch_size = batch_size;
+  VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
+
+  Timer timeline;
+  int64_t start = timeline.TimeStampUS();
+  timeline.Start();
+
+  // only support string type
+  char *total_input_ptr = static_cast<char *>(in->at(0).data.data());
+  std::string base64str = total_input_ptr;
+
+  cv::Mat img = Base2Mat(base64str);
+  cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
+
+  // preprocess
+  std::vector<float> input(1 * 3 * im_shape_h * im_shape_w, 0.0f);
+  preprocess_det(img, input.data(), scale_factor_h, scale_factor_w, im_shape_h,
+                 im_shape_w, mean_, scale_, is_scale_);
+
+  // create real_in
+  TensorVector *real_in = new TensorVector();
+  if (!real_in) {
+    LOG(ERROR) << "real_in is nullptr,error";
+    return -1;
+  }
+
+  int in_num = 0;
+  size_t databuf_size = 0;
+  void *databuf_data = NULL;
+  char *databuf_char = NULL;
+
+  // image
+  in_num = 1 * 3 * im_shape_h * im_shape_w;
+  databuf_size = in_num * sizeof(float);
+
+  databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+  if (!databuf_data) {
+    LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+    return -1;
+  }
+
+  memcpy(databuf_data, input.data(), databuf_size);
+  databuf_char = reinterpret_cast<char *>(databuf_data);
+  paddle::PaddleBuf paddleBuf(databuf_char, databuf_size);
+  paddle::PaddleTensor tensor_in;
+  tensor_in.name = "image";
+  tensor_in.dtype = paddle::PaddleDType::FLOAT32;
+  tensor_in.shape = {1, 3, im_shape_h, im_shape_w};
+  tensor_in.lod = in->at(0).lod;
+  tensor_in.data = paddleBuf;
+  real_in->push_back(tensor_in);
+
+  // scale_factor
+  std::vector<float> scale_factor{scale_factor_h, scale_factor_w};
+  databuf_size = 2 * sizeof(float);
+
+  databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+  if (!databuf_data) {
+    LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+    return -1;
+  }
+
+  memcpy(databuf_data, scale_factor.data(), databuf_size);
+  databuf_char = reinterpret_cast<char *>(databuf_data);
+  paddle::PaddleBuf paddleBuf_2(databuf_char, databuf_size);
+  paddle::PaddleTensor tensor_in_2;
+  tensor_in_2.name = "scale_factor";
+  tensor_in_2.dtype = paddle::PaddleDType::FLOAT32;
+  tensor_in_2.shape = {1, 2};
+  tensor_in_2.lod = in->at(0).lod;
+  tensor_in_2.data = paddleBuf_2;
+  real_in->push_back(tensor_in_2);
+
+  if (InferManager::instance().infer(engine_name().c_str(), real_in, out,
+                                     batch_size)) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed do infer in fluid model: " << engine_name().c_str();
+    return -1;
+  }
+
+  int64_t end = timeline.TimeStampUS();
+  CopyBlobInfo(input_blob, output_blob);
+  AddBlobInfo(output_blob, start);
+  AddBlobInfo(output_blob, end);
+  return 0;
+}
+
+void ppyoloe_crn_s_300e_coco::preprocess_det(const cv::Mat &img, float *data,
+                                             float &scale_factor_h,
+                                             float &scale_factor_w,
+                                             int im_shape_h, int im_shape_w,
+                                             const std::vector<float> &mean,
+                                             const std::vector<float> &scale,
+                                             const bool is_scale) {
+  // scale_factor
+  scale_factor_h =
+      static_cast<float>(im_shape_h) / static_cast<float>(img.rows);
+  scale_factor_w =
+      static_cast<float>(im_shape_w) / static_cast<float>(img.cols);
+
+  // Resize
+  cv::Mat resize_img;
+  cv::resize(img, resize_img, cv::Size(im_shape_w, im_shape_h), 0, 0, 2);
+
+  // Normalize
+  double e = 1.0;
+  if (is_scale) {
+    e /= 255.0;
+  }
+  cv::Mat img_fp;
+  (resize_img).convertTo(img_fp, CV_32FC3, e);
+  for (int h = 0; h < im_shape_h; h++) {
+    for (int w = 0; w < im_shape_w; w++) {
+      img_fp.at<cv::Vec3f>(h, w)[0] =
+          (img_fp.at<cv::Vec3f>(h, w)[0] - mean[0]) / scale[0];
+      img_fp.at<cv::Vec3f>(h, w)[1] =
+          (img_fp.at<cv::Vec3f>(h, w)[1] - mean[1]) / scale[1];
+      img_fp.at<cv::Vec3f>(h, w)[2] =
+          (img_fp.at<cv::Vec3f>(h, w)[2] - mean[2]) / scale[2];
+    }
+  }
+
+  // Permute
+  int rh = img_fp.rows;
+  int rw = img_fp.cols;
+  int rc = img_fp.channels();
+  for (int i = 0; i < rc; ++i) {
+    cv::extractChannel(img_fp, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw),
+                       i);
+  }
+}
+
+cv::Mat ppyoloe_crn_s_300e_coco::Base2Mat(std::string &base64_data) {
+  cv::Mat img;
+  std::string s_mat;
+  s_mat = base64Decode(base64_data.data(), base64_data.size());
+  std::vector<char> base64_img(s_mat.begin(), s_mat.end());
+  img = cv::imdecode(base64_img, cv::IMREAD_COLOR); // CV_LOAD_IMAGE_COLOR
+  return img;
+}
+
+std::string ppyoloe_crn_s_300e_coco::base64Decode(const char *Data,
+                                                  int DataByte) {
+  const char DecodeTable[] = {
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,
+      62, // '+'
+      0,  0,  0,
+      63,                                     // '/'
+      52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9'
+      0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z'
+      0,  0,  0,  0,  0,  0,  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
+      37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z'
+  };
+
+  std::string strDecode;
+  int nValue;
+  int i = 0;
+  while (i < DataByte) {
+    if (*Data != '\r' && *Data != '\n') {
+      nValue = DecodeTable[*Data++] << 18;
+      nValue += DecodeTable[*Data++] << 12;
+      strDecode += (nValue & 0x00FF0000) >> 16;
+      if (*Data != '=') {
+        nValue += DecodeTable[*Data++] << 6;
+        strDecode += (nValue & 0x0000FF00) >> 8;
+        if (*Data != '=') {
+          nValue += DecodeTable[*Data++];
+          strDecode += nValue & 0x000000FF;
+        }
+      }
+      i += 4;
+    } else // 回车换行,跳过
+    {
+      Data++;
+      i++;
+    }
+  }
+  return strDecode;
+}
+
+DEFINE_OP(ppyoloe_crn_s_300e_coco);
+
+} // namespace serving
+} // namespace paddle_serving
+} // namespace baidu
--- a/deploy/serving/cpp/preprocess/ppyoloe_op.h
+++ b/deploy/serving/cpp/preprocess/ppyoloe_op.h
@@ -36,17 +36,17 @@ namespace baidu {
 namespace paddle_serving {
 namespace serving {

-class PPYOLOEOp
+class ppyoloe_crn_s_300e_coco
    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
 public:
  typedef std::vector<paddle::PaddleTensor> TensorVector;

-  DECLARE_OP(PPYOLOEOp);
+  DECLARE_OP(ppyoloe_crn_s_300e_coco);

  int inference();

 private:
-  // ppyoloe, picodet preprocess
+  // preprocess
  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
  std::vector<float> scale_ = {0.229f, 0.224f, 0.225f};
  bool is_scale_ = true;
@@ -55,10 +55,9 @@ private:
  float scale_factor_h = 1.0f;
  float scale_factor_w = 1.0f;
  void preprocess_det(const cv::Mat &img, float *data, float &scale_factor_h,
-                      float &scale_factor_w, int im_shape_h,
-                      int im_shape_w, const std::vector<float> &mean,
-                      const std::vector<float> &scale,
-                      const bool is_scale);
+                      float &scale_factor_w, int im_shape_h, int im_shape_w,
+                      const std::vector<float> &mean,
+                      const std::vector<float> &scale, const bool is_scale);

  // read pics
  cv::Mat Base2Mat(std::string &base64_data);

--- a/deploy/serving/cpp/preprocess/tinypose_128x96.cpp
+++ b/deploy/serving/cpp/preprocess/tinypose_128x96.cpp
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "core/general-server/op/tinypose_128x96.h"
+#include "core/predictor/framework/infer.h"
+#include "core/predictor/framework/memory.h"
+#include "core/predictor/framework/resource.h"
+#include "core/util/include/timer.h"
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <sstream>
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+using baidu::paddle_serving::Timer;
+using baidu::paddle_serving::predictor::InferManager;
+using baidu::paddle_serving::predictor::MempoolWrapper;
+using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::Response;
+using baidu::paddle_serving::predictor::general_model::Tensor;
+
+int tinypose_128x96::inference() {
+  VLOG(2) << "Going to run inference";
+  const std::vector<std::string> pre_node_names = pre_names();
+  if (pre_node_names.size() != 1) {
+    LOG(ERROR) << "This op(" << op_name()
+               << ") can only have one predecessor op, but received "
+               << pre_node_names.size();
+    return -1;
+  }
+  const std::string pre_name = pre_node_names[0];
+
+  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
+  if (!input_blob) {
+    LOG(ERROR) << "input_blob is nullptr,error";
+    return -1;
+  }
+  uint64_t log_id = input_blob->GetLogId();
+  VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
+
+  GeneralBlob *output_blob = mutable_data<GeneralBlob>();
+  if (!output_blob) {
+    LOG(ERROR) << "output_blob is nullptr,error";
+    return -1;
+  }
+  output_blob->SetLogId(log_id);
+
+  if (!input_blob) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed mutable depended argument, op:" << pre_name;
+    return -1;
+  }
+
+  const TensorVector *in = &input_blob->tensor_vector;
+  TensorVector *out = &output_blob->tensor_vector;
+
+  int batch_size = input_blob->_batch_size;
+  output_blob->_batch_size = batch_size;
+  VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
+
+  Timer timeline;
+  int64_t start = timeline.TimeStampUS();
+  timeline.Start();
+
+  // only support string type
+  char *total_input_ptr = static_cast<char *>(in->at(0).data.data());
+  std::string base64str = total_input_ptr;
+
+  cv::Mat img = Base2Mat(base64str);
+  cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
+
+  // preprocess
+  std::vector<float> input(1 * 3 * im_shape_h * im_shape_w, 0.0f);
+  preprocess_det(img, input.data(), scale_factor_h, scale_factor_w, im_shape_h,
+                 im_shape_w, mean_, scale_, is_scale_);
+
+  // create real_in
+  TensorVector *real_in = new TensorVector();
+  if (!real_in) {
+    LOG(ERROR) << "real_in is nullptr,error";
+    return -1;
+  }
+
+  int in_num = 0;
+  size_t databuf_size = 0;
+  void *databuf_data = NULL;
+  char *databuf_char = NULL;
+
+  // image
+  in_num = 1 * 3 * im_shape_h * im_shape_w;
+  databuf_size = in_num * sizeof(float);
+
+  databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+  if (!databuf_data) {
+    LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+    return -1;
+  }
+
+  memcpy(databuf_data, input.data(), databuf_size);
+  databuf_char = reinterpret_cast<char *>(databuf_data);
+  paddle::PaddleBuf paddleBuf(databuf_char, databuf_size);
+  paddle::PaddleTensor tensor_in;
+  tensor_in.name = "image";
+  tensor_in.dtype = paddle::PaddleDType::FLOAT32;
+  tensor_in.shape = {1, 3, im_shape_h, im_shape_w};
+  tensor_in.lod = in->at(0).lod;
+  tensor_in.data = paddleBuf;
+  real_in->push_back(tensor_in);
+
+  if (InferManager::instance().infer(engine_name().c_str(), real_in, out,
+                                     batch_size)) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed do infer in fluid model: " << engine_name().c_str();
+    return -1;
+  }
+
+  int64_t end = timeline.TimeStampUS();
+  CopyBlobInfo(input_blob, output_blob);
+  AddBlobInfo(output_blob, start);
+  AddBlobInfo(output_blob, end);
+  return 0;
+}
+
+void tinypose_128x96::preprocess_det(const cv::Mat &img, float *data,
+                                     float &scale_factor_h,
+                                     float &scale_factor_w, int im_shape_h,
+                                     int im_shape_w,
+                                     const std::vector<float> &mean,
+                                     const std::vector<float> &scale,
+                                     const bool is_scale) {
+  // Resize
+  cv::Mat resize_img;
+  cv::resize(img, resize_img, cv::Size(im_shape_w, im_shape_h), 0, 0, 1);
+
+  // Normalize
+  double e = 1.0;
+  if (is_scale) {
+    e /= 255.0;
+  }
+  cv::Mat img_fp;
+  (resize_img).convertTo(img_fp, CV_32FC3, e);
+  for (int h = 0; h < im_shape_h; h++) {
+    for (int w = 0; w < im_shape_w; w++) {
+      img_fp.at<cv::Vec3f>(h, w)[0] =
+          (img_fp.at<cv::Vec3f>(h, w)[0] - mean[0]) / scale[0];
+      img_fp.at<cv::Vec3f>(h, w)[1] =
+          (img_fp.at<cv::Vec3f>(h, w)[1] - mean[1]) / scale[1];
+      img_fp.at<cv::Vec3f>(h, w)[2] =
+          (img_fp.at<cv::Vec3f>(h, w)[2] - mean[2]) / scale[2];
+    }
+  }
+
+  // Permute
+  int rh = img_fp.rows;
+  int rw = img_fp.cols;
+  int rc = img_fp.channels();
+  for (int i = 0; i < rc; ++i) {
+    cv::extractChannel(img_fp, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw),
+                       i);
+  }
+}
+
+cv::Mat tinypose_128x96::Base2Mat(std::string &base64_data) {
+  cv::Mat img;
+  std::string s_mat;
+  s_mat = base64Decode(base64_data.data(), base64_data.size());
+  std::vector<char> base64_img(s_mat.begin(), s_mat.end());
+  img = cv::imdecode(base64_img, cv::IMREAD_COLOR); // CV_LOAD_IMAGE_COLOR
+  return img;
+}
+
+std::string tinypose_128x96::base64Decode(const char *Data, int DataByte) {
+  const char DecodeTable[] = {
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,
+      62, // '+'
+      0,  0,  0,
+      63,                                     // '/'
+      52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9'
+      0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z'
+      0,  0,  0,  0,  0,  0,  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
+      37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z'
+  };
+
+  std::string strDecode;
+  int nValue;
+  int i = 0;
+  while (i < DataByte) {
+    if (*Data != '\r' && *Data != '\n') {
+      nValue = DecodeTable[*Data++] << 18;
+      nValue += DecodeTable[*Data++] << 12;
+      strDecode += (nValue & 0x00FF0000) >> 16;
+      if (*Data != '=') {
+        nValue += DecodeTable[*Data++] << 6;
+        strDecode += (nValue & 0x0000FF00) >> 8;
+        if (*Data != '=') {
+          nValue += DecodeTable[*Data++];
+          strDecode += nValue & 0x000000FF;
+        }
+      }
+      i += 4;
+    } else // 回车换行,跳过
+    {
+      Data++;
+      i++;
+    }
+  }
+  return strDecode;
+}
+
+DEFINE_OP(tinypose_128x96);
+
+} // namespace serving
+} // namespace paddle_serving
+} // namespace baidu
--- a/deploy/serving/cpp/preprocess/tinypose_128x96.h
+++ b/deploy/serving/cpp/preprocess/tinypose_128x96.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "core/general-server/general_model_service.pb.h"
+#include "core/general-server/op/general_infer_helper.h"
+#include "paddle_inference_api.h" // NOLINT
+#include <string>
+#include <vector>
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <vector>
+
+#include <cstring>
+#include <fstream>
+#include <numeric>
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+class tinypose_128x96
+    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
+public:
+  typedef std::vector<paddle::PaddleTensor> TensorVector;
+
+  DECLARE_OP(tinypose_128x96);
+
+  int inference();
+
+private:
+  // preprocess
+  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
+  std::vector<float> scale_ = {0.229f, 0.224f, 0.225f};
+  bool is_scale_ = true;
+  int im_shape_h = 128;
+  int im_shape_w = 96;
+  float scale_factor_h = 1.0f;
+  float scale_factor_w = 1.0f;
+  void preprocess_det(const cv::Mat &img, float *data, float &scale_factor_h,
+                      float &scale_factor_w, int im_shape_h, int im_shape_w,
+                      const std::vector<float> &mean,
+                      const std::vector<float> &scale, const bool is_scale);
+
+  // read pics
+  cv::Mat Base2Mat(std::string &base64_data);
+  std::string base64Decode(const char *Data, int DataByte);
+};
+
+} // namespace serving
+} // namespace paddle_serving
+} // namespace baidu
--- a/deploy/serving/cpp/preprocess/yolov3_darknet53_270e_coco.cpp
+++ b/deploy/serving/cpp/preprocess/yolov3_darknet53_270e_coco.cpp
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "core/general-server/op/yolov3_darknet53_270e_coco.h"
+#include "core/predictor/framework/infer.h"
+#include "core/predictor/framework/memory.h"
+#include "core/predictor/framework/resource.h"
+#include "core/util/include/timer.h"
+#include <algorithm>
+#include <iostream>
+#include <memory>
+#include <sstream>
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+using baidu::paddle_serving::Timer;
+using baidu::paddle_serving::predictor::InferManager;
+using baidu::paddle_serving::predictor::MempoolWrapper;
+using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::Response;
+using baidu::paddle_serving::predictor::general_model::Tensor;
+
+int yolov3_darknet53_270e_coco::inference() {
+  VLOG(2) << "Going to run inference";
+  const std::vector<std::string> pre_node_names = pre_names();
+  if (pre_node_names.size() != 1) {
+    LOG(ERROR) << "This op(" << op_name()
+               << ") can only have one predecessor op, but received "
+               << pre_node_names.size();
+    return -1;
+  }
+  const std::string pre_name = pre_node_names[0];
+
+  const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
+  if (!input_blob) {
+    LOG(ERROR) << "input_blob is nullptr,error";
+    return -1;
+  }
+  uint64_t log_id = input_blob->GetLogId();
+  VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
+
+  GeneralBlob *output_blob = mutable_data<GeneralBlob>();
+  if (!output_blob) {
+    LOG(ERROR) << "output_blob is nullptr,error";
+    return -1;
+  }
+  output_blob->SetLogId(log_id);
+
+  if (!input_blob) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed mutable depended argument, op:" << pre_name;
+    return -1;
+  }
+
+  const TensorVector *in = &input_blob->tensor_vector;
+  TensorVector *out = &output_blob->tensor_vector;
+
+  int batch_size = input_blob->_batch_size;
+  output_blob->_batch_size = batch_size;
+  VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
+
+  Timer timeline;
+  int64_t start = timeline.TimeStampUS();
+  timeline.Start();
+
+  // only support string type
+  char *total_input_ptr = static_cast<char *>(in->at(0).data.data());
+  std::string base64str = total_input_ptr;
+
+  cv::Mat img = Base2Mat(base64str);
+  cv::cvtColor(img, img, cv::COLOR_BGR2RGB);
+
+  // preprocess
+  std::vector<float> input(1 * 3 * im_shape_h * im_shape_w, 0.0f);
+  preprocess_det(img, input.data(), scale_factor_h, scale_factor_w, im_shape_h,
+                 im_shape_w, mean_, scale_, is_scale_);
+
+  // create real_in
+  TensorVector *real_in = new TensorVector();
+  if (!real_in) {
+    LOG(ERROR) << "real_in is nullptr,error";
+    return -1;
+  }
+
+  int in_num = 0;
+  size_t databuf_size = 0;
+  void *databuf_data = NULL;
+  char *databuf_char = NULL;
+
+  // im_shape
+  std::vector<float> im_shape{static_cast<float>(im_shape_h),
+                              static_cast<float>(im_shape_w)};
+  databuf_size = 2 * sizeof(float);
+
+  databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+  if (!databuf_data) {
+    LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+    return -1;
+  }
+
+  memcpy(databuf_data, im_shape.data(), databuf_size);
+  databuf_char = reinterpret_cast<char *>(databuf_data);
+  paddle::PaddleBuf paddleBuf_0(databuf_char, databuf_size);
+  paddle::PaddleTensor tensor_in_0;
+  tensor_in_0.name = "im_shape";
+  tensor_in_0.dtype = paddle::PaddleDType::FLOAT32;
+  tensor_in_0.shape = {1, 2};
+  tensor_in_0.lod = in->at(0).lod;
+  tensor_in_0.data = paddleBuf_0;
+  real_in->push_back(tensor_in_0);
+
+  // image
+  in_num = 1 * 3 * im_shape_h * im_shape_w;
+  databuf_size = in_num * sizeof(float);
+
+  databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+  if (!databuf_data) {
+    LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+    return -1;
+  }
+
+  memcpy(databuf_data, input.data(), databuf_size);
+  databuf_char = reinterpret_cast<char *>(databuf_data);
+  paddle::PaddleBuf paddleBuf_1(databuf_char, databuf_size);
+  paddle::PaddleTensor tensor_in_1;
+  tensor_in_1.name = "image";
+  tensor_in_1.dtype = paddle::PaddleDType::FLOAT32;
+  tensor_in_1.shape = {1, 3, im_shape_h, im_shape_w};
+  tensor_in_1.lod = in->at(0).lod;
+  tensor_in_1.data = paddleBuf_1;
+  real_in->push_back(tensor_in_1);
+
+  // scale_factor
+  std::vector<float> scale_factor{scale_factor_h, scale_factor_w};
+  databuf_size = 2 * sizeof(float);
+
+  databuf_data = MempoolWrapper::instance().malloc(databuf_size);
+  if (!databuf_data) {
+    LOG(ERROR) << "Malloc failed, size: " << databuf_size;
+    return -1;
+  }
+
+  memcpy(databuf_data, scale_factor.data(), databuf_size);
+  databuf_char = reinterpret_cast<char *>(databuf_data);
+  paddle::PaddleBuf paddleBuf_2(databuf_char, databuf_size);
+  paddle::PaddleTensor tensor_in_2;
+  tensor_in_2.name = "scale_factor";
+  tensor_in_2.dtype = paddle::PaddleDType::FLOAT32;
+  tensor_in_2.shape = {1, 2};
+  tensor_in_2.lod = in->at(0).lod;
+  tensor_in_2.data = paddleBuf_2;
+  real_in->push_back(tensor_in_2);
+
+  if (InferManager::instance().infer(engine_name().c_str(), real_in, out,
+                                     batch_size)) {
+    LOG(ERROR) << "(logid=" << log_id
+               << ") Failed do infer in fluid model: " << engine_name().c_str();
+    return -1;
+  }
+
+  int64_t end = timeline.TimeStampUS();
+  CopyBlobInfo(input_blob, output_blob);
+  AddBlobInfo(output_blob, start);
+  AddBlobInfo(output_blob, end);
+  return 0;
+}
+
+void yolov3_darknet53_270e_coco::preprocess_det(const cv::Mat &img, float *data,
+                                                float &scale_factor_h,
+                                                float &scale_factor_w,
+                                                int im_shape_h, int im_shape_w,
+                                                const std::vector<float> &mean,
+                                                const std::vector<float> &scale,
+                                                const bool is_scale) {
+  // scale_factor
+  scale_factor_h =
+      static_cast<float>(im_shape_h) / static_cast<float>(img.rows);
+  scale_factor_w =
+      static_cast<float>(im_shape_w) / static_cast<float>(img.cols);
+
+  // Resize
+  cv::Mat resize_img;
+  cv::resize(img, resize_img, cv::Size(im_shape_w, im_shape_h), 0, 0, 2);
+
+  // Normalize
+  double e = 1.0;
+  if (is_scale) {
+    e /= 255.0;
+  }
+  cv::Mat img_fp;
+  (resize_img).convertTo(img_fp, CV_32FC3, e);
+  for (int h = 0; h < im_shape_h; h++) {
+    for (int w = 0; w < im_shape_w; w++) {
+      img_fp.at<cv::Vec3f>(h, w)[0] =
+          (img_fp.at<cv::Vec3f>(h, w)[0] - mean[0]) / scale[0];
+      img_fp.at<cv::Vec3f>(h, w)[1] =
+          (img_fp.at<cv::Vec3f>(h, w)[1] - mean[1]) / scale[1];
+      img_fp.at<cv::Vec3f>(h, w)[2] =
+          (img_fp.at<cv::Vec3f>(h, w)[2] - mean[2]) / scale[2];
+    }
+  }
+
+  // Permute
+  int rh = img_fp.rows;
+  int rw = img_fp.cols;
+  int rc = img_fp.channels();
+  for (int i = 0; i < rc; ++i) {
+    cv::extractChannel(img_fp, cv::Mat(rh, rw, CV_32FC1, data + i * rh * rw),
+                       i);
+  }
+}
+
+cv::Mat yolov3_darknet53_270e_coco::Base2Mat(std::string &base64_data) {
+  cv::Mat img;
+  std::string s_mat;
+  s_mat = base64Decode(base64_data.data(), base64_data.size());
+  std::vector<char> base64_img(s_mat.begin(), s_mat.end());
+  img = cv::imdecode(base64_img, cv::IMREAD_COLOR); // CV_LOAD_IMAGE_COLOR
+  return img;
+}
+
+std::string yolov3_darknet53_270e_coco::base64Decode(const char *Data,
+                                                     int DataByte) {
+  const char DecodeTable[] = {
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+      0,  0,  0,  0,  0,  0,  0,  0,  0,
+      62, // '+'
+      0,  0,  0,
+      63,                                     // '/'
+      52, 53, 54, 55, 56, 57, 58, 59, 60, 61, // '0'-'9'
+      0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
+      10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, // 'A'-'Z'
+      0,  0,  0,  0,  0,  0,  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36,
+      37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, // 'a'-'z'
+  };
+
+  std::string strDecode;
+  int nValue;
+  int i = 0;
+  while (i < DataByte) {
+    if (*Data != '\r' && *Data != '\n') {
+      nValue = DecodeTable[*Data++] << 18;
+      nValue += DecodeTable[*Data++] << 12;
+      strDecode += (nValue & 0x00FF0000) >> 16;
+      if (*Data != '=') {
+        nValue += DecodeTable[*Data++] << 6;
+        strDecode += (nValue & 0x0000FF00) >> 8;
+        if (*Data != '=') {
+          nValue += DecodeTable[*Data++];
+          strDecode += nValue & 0x000000FF;
+        }
+      }
+      i += 4;
+    } else // 回车换行,跳过
+    {
+      Data++;
+      i++;
+    }
+  }
+  return strDecode;
+}
+
+DEFINE_OP(yolov3_darknet53_270e_coco);
+
+} // namespace serving
+} // namespace paddle_serving
+} // namespace baidu
--- a/deploy/serving/cpp/preprocess/yolov3_darknet53_270e_coco.h
+++ b/deploy/serving/cpp/preprocess/yolov3_darknet53_270e_coco.h
+// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "core/general-server/general_model_service.pb.h"
+#include "core/general-server/op/general_infer_helper.h"
+#include "paddle_inference_api.h" // NOLINT
+#include <string>
+#include <vector>
+
+#include "opencv2/core.hpp"
+#include "opencv2/imgcodecs.hpp"
+#include "opencv2/imgproc.hpp"
+#include <chrono>
+#include <iomanip>
+#include <iostream>
+#include <ostream>
+#include <vector>
+
+#include <cstring>
+#include <fstream>
+#include <numeric>
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+class yolov3_darknet53_270e_coco
+    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
+public:
+  typedef std::vector<paddle::PaddleTensor> TensorVector;
+
+  DECLARE_OP(yolov3_darknet53_270e_coco);
+
+  int inference();
+
+private:
+  // preprocess
+  std::vector<float> mean_ = {0.485f, 0.456f, 0.406f};
+  std::vector<float> scale_ = {0.229f, 0.224f, 0.225f};
+  bool is_scale_ = true;
+  int im_shape_h = 608;
+  int im_shape_w = 608;
+  float scale_factor_h = 1.0f;
+  float scale_factor_w = 1.0f;
+  void preprocess_det(const cv::Mat &img, float *data, float &scale_factor_h,
+                      float &scale_factor_w, int im_shape_h, int im_shape_w,
+                      const std::vector<float> &mean,
+                      const std::vector<float> &scale, const bool is_scale);
+
+  // read pics
+  cv::Mat Base2Mat(std::string &base64_data);
+  std::string base64Decode(const char *Data, int DataByte);
+};
+
+} // namespace serving
+} // namespace paddle_serving
+} // namespace baidu
--- a/deploy/serving/cpp/serving_client.py
+++ b/deploy/serving/cpp/serving_client.py
@@ -12,20 +12,21 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-import base64
-import glob
 import os
+import glob
+import base64
+import argparse
 from paddle_serving_client import Client
 from paddle_serving_client.proto import general_model_config_pb2 as m_config
 import google.protobuf.text_format

-import argparse

 parser = argparse.ArgumentParser(description="args for paddleserving")
 parser.add_argument(
    "--serving_client", type=str, help="the directory of serving_client")
 parser.add_argument("--image_dir", type=str)
 parser.add_argument("--image_file", type=str)
+parser.add_argument("--http_port", type=int, default=9997)
 parser.add_argument(
    "--threshold", type=float, default=0.5, help="Threshold of score.")
 args = parser.parse_args()
@@ -63,13 +64,20 @@ def get_test_images(infer_dir, infer_img):


 def postprocess(fetch_dict, draw_threshold=0.5):
-    bboxes = fetch_dict["multiclass_nms3_0.tmp_0"]
-    bboxes_num = fetch_dict["multiclass_nms3_0.tmp_2"]
-    for bbox in bboxes:
-        if bbox[0] > -1 and bbox[1] > draw_threshold:
-            print(f"{int(bbox[0])} {bbox[1]} "
-                  f"{bbox[2]} {bbox[3]} {bbox[4]} {bbox[5]}")
-    return fetch_dict
+    result = []
+    if "conv2d_441.tmp_1" in fetch_dict:
+        heatmap = fetch_dict["conv2d_441.tmp_1"]
+        print(heatmap)
+        result.append(heatmap)
+    else:
+        bboxes = fetch_dict["multiclass_nms3_0.tmp_0"]
+        for bbox in bboxes:
+            if bbox[0] > -1 and bbox[1] > draw_threshold:
+                print(f"{int(bbox[0])} {bbox[1]} "
+                      f"{bbox[2]} {bbox[3]} {bbox[4]} {bbox[5]}")
+                result.append(f"{int(bbox[0])} {bbox[1]} "
+                      f"{bbox[2]} {bbox[3]} {bbox[4]} {bbox[5]}")
+    return result


 def get_model_vars(client_config_dir):
@@ -99,7 +107,7 @@ def get_model_vars(client_config_dir):


 if __name__ == '__main__':
-    url = "127.0.0.1:9997"
+    url = f"127.0.0.1:{args.http_port}"
    logid = 10000
    img_list = get_test_images(args.image_dir, args.image_file)
    feed_vars, fetch_vars = get_model_vars(args.serving_client)

--- a/deploy/serving/cpp/preprocess/serving_client_conf.prototxt
+++ b/deploy/serving/cpp/preprocess/serving_client_conf.prototxt
--- a/deploy/serving/python/postprocess_ops.py
+++ b/deploy/serving/python/postprocess_ops.py
+import cv2
+import math
+import numpy as np
+from preprocess_ops import get_affine_transform
+
+
+class HRNetPostProcess(object):
+    def __init__(self, use_dark=True):
+        self.use_dark = use_dark
+
+    def flip_back(self, output_flipped, matched_parts):
+        assert output_flipped.ndim == 4,\
+                'output_flipped should be [batch_size, num_joints, height, width]'
+
+        output_flipped = output_flipped[:, :, :, ::-1]
+
+        for pair in matched_parts:
+            tmp = output_flipped[:, pair[0], :, :].copy()
+            output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :]
+            output_flipped[:, pair[1], :, :] = tmp
+
+        return output_flipped
+
+    def get_max_preds(self, heatmaps):
+        """get predictions from score maps
+
+        Args:
+            heatmaps: numpy.ndarray([batch_size, num_joints, height, width])
+
+        Returns:
+            preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
+            maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints
+        """
+        assert isinstance(heatmaps,
+                          np.ndarray), 'heatmaps should be numpy.ndarray'
+        assert heatmaps.ndim == 4, 'batch_images should be 4-ndim'
+
+        batch_size = heatmaps.shape[0]
+        num_joints = heatmaps.shape[1]
+        width = heatmaps.shape[3]
+        heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1))
+        idx = np.argmax(heatmaps_reshaped, 2)
+        maxvals = np.amax(heatmaps_reshaped, 2)
+
+        maxvals = maxvals.reshape((batch_size, num_joints, 1))
+        idx = idx.reshape((batch_size, num_joints, 1))
+
+        preds = np.tile(idx, (1, 1, 2)).astype(np.float32)
+
+        preds[:, :, 0] = (preds[:, :, 0]) % width
+        preds[:, :, 1] = np.floor((preds[:, :, 1]) / width)
+
+        pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2))
+        pred_mask = pred_mask.astype(np.float32)
+
+        preds *= pred_mask
+
+        return preds, maxvals
+
+    def gaussian_blur(self, heatmap, kernel):
+        border = (kernel - 1) // 2
+        batch_size = heatmap.shape[0]
+        num_joints = heatmap.shape[1]
+        height = heatmap.shape[2]
+        width = heatmap.shape[3]
+        for i in range(batch_size):
+            for j in range(num_joints):
+                origin_max = np.max(heatmap[i, j])
+                dr = np.zeros((height + 2 * border, width + 2 * border))
+                dr[border:-border, border:-border] = heatmap[i, j].copy()
+                dr = cv2.GaussianBlur(dr, (kernel, kernel), 0)
+                heatmap[i, j] = dr[border:-border, border:-border].copy()
+                heatmap[i, j] *= origin_max / np.max(heatmap[i, j])
+        return heatmap
+
+    def dark_parse(self, hm, coord):
+        heatmap_height = hm.shape[0]
+        heatmap_width = hm.shape[1]
+        px = int(coord[0])
+        py = int(coord[1])
+        if 1 < px < heatmap_width - 2 and 1 < py < heatmap_height - 2:
+            dx = 0.5 * (hm[py][px + 1] - hm[py][px - 1])
+            dy = 0.5 * (hm[py + 1][px] - hm[py - 1][px])
+            dxx = 0.25 * (hm[py][px + 2] - 2 * hm[py][px] + hm[py][px - 2])
+            dxy = 0.25 * (hm[py+1][px+1] - hm[py-1][px+1] - hm[py+1][px-1] \
+                + hm[py-1][px-1])
+            dyy = 0.25 * (
+                hm[py + 2 * 1][px] - 2 * hm[py][px] + hm[py - 2 * 1][px])
+            derivative = np.matrix([[dx], [dy]])
+            hessian = np.matrix([[dxx, dxy], [dxy, dyy]])
+            if dxx * dyy - dxy**2 != 0:
+                hessianinv = hessian.I
+                offset = -hessianinv * derivative
+                offset = np.squeeze(np.array(offset.T), axis=0)
+                coord += offset
+        return coord
+
+    def dark_postprocess(self, hm, coords, kernelsize):
+        """
+        refer to https://github.com/ilovepose/DarkPose/lib/core/inference.py
+
+        """
+        hm = self.gaussian_blur(hm, kernelsize)
+        hm = np.maximum(hm, 1e-10)
+        hm = np.log(hm)
+        for n in range(coords.shape[0]):
+            for p in range(coords.shape[1]):
+                coords[n, p] = self.dark_parse(hm[n][p], coords[n][p])
+        return coords
+
+    def get_final_preds(self, heatmaps, center, scale, kernelsize=3):
+        """the highest heatvalue location with a quarter offset in the
+        direction from the highest response to the second highest response.
+
+        Args:
+            heatmaps (numpy.ndarray): The predicted heatmaps
+            center (numpy.ndarray): The boxes center
+            scale (numpy.ndarray): The scale factor
+
+        Returns:
+            preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords
+            maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints
+        """
+
+        coords, maxvals = self.get_max_preds(heatmaps)
+
+        heatmap_height = heatmaps.shape[2]
+        heatmap_width = heatmaps.shape[3]
+
+        if self.use_dark:
+            coords = self.dark_postprocess(heatmaps, coords, kernelsize)
+        else:
+            for n in range(coords.shape[0]):
+                for p in range(coords.shape[1]):
+                    hm = heatmaps[n][p]
+                    px = int(math.floor(coords[n][p][0] + 0.5))
+                    py = int(math.floor(coords[n][p][1] + 0.5))
+                    if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1:
+                        diff = np.array([
+                            hm[py][px + 1] - hm[py][px - 1],
+                            hm[py + 1][px] - hm[py - 1][px]
+                        ])
+                        coords[n][p] += np.sign(diff) * .25
+        preds = coords.copy()
+
+        # Transform back
+        for i in range(coords.shape[0]):
+            preds[i] = transform_preds(coords[i], center[i], scale[i],
+                                       [heatmap_width, heatmap_height])
+
+        return preds, maxvals
+
+    def __call__(self, output, center, scale):
+        preds, maxvals = self.get_final_preds(output, center, scale)
+        return np.concatenate(
+            (preds, maxvals), axis=-1), np.mean(
+                maxvals, axis=1)
+
+
+def transform_preds(coords, center, scale, output_size):
+    target_coords = np.zeros(coords.shape)
+    trans = get_affine_transform(center, scale * 200, 0, output_size, inv=1)
+    for p in range(coords.shape[0]):
+        target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans)
+    return target_coords
+
+
+def affine_transform(pt, t):
+    new_pt = np.array([pt[0], pt[1], 1.]).T
+    new_pt = np.dot(t, new_pt)
+    return new_pt[:2]
--- a/deploy/serving/python/preprocess_ops.py
+++ b/deploy/serving/python/preprocess_ops.py
@@ -395,6 +395,81 @@ class WarpAffine(object):
        return inp, im_info


+# keypoint preprocess
+def get_warp_matrix(theta, size_input, size_dst, size_target):
+    """This code is based on
+        https://github.com/open-mmlab/mmpose/blob/master/mmpose/core/post_processing/post_transforms.py
+
+        Calculate the transformation matrix under the constraint of unbiased.
+    Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
+    Data Processing for Human Pose Estimation (CVPR 2020).
+
+    Args:
+        theta (float): Rotation angle in degrees.
+        size_input (np.ndarray): Size of input image [w, h].
+        size_dst (np.ndarray): Size of output image [w, h].
+        size_target (np.ndarray): Size of ROI in input plane [w, h].
+
+    Returns:
+        matrix (np.ndarray): A matrix for transformation.
+    """
+    theta = np.deg2rad(theta)
+    matrix = np.zeros((2, 3), dtype=np.float32)
+    scale_x = size_dst[0] / size_target[0]
+    scale_y = size_dst[1] / size_target[1]
+    matrix[0, 0] = np.cos(theta) * scale_x
+    matrix[0, 1] = -np.sin(theta) * scale_x
+    matrix[0, 2] = scale_x * (
+        -0.5 * size_input[0] * np.cos(theta) + 0.5 * size_input[1] *
+        np.sin(theta) + 0.5 * size_target[0])
+    matrix[1, 0] = np.sin(theta) * scale_y
+    matrix[1, 1] = np.cos(theta) * scale_y
+    matrix[1, 2] = scale_y * (
+        -0.5 * size_input[0] * np.sin(theta) - 0.5 * size_input[1] *
+        np.cos(theta) + 0.5 * size_target[1])
+    return matrix
+
+
+class TopDownEvalAffine(object):
+    """apply affine transform to image and coords
+
+    Args:
+        trainsize (list): [w, h], the standard size used to train
+        use_udp (bool): whether to use Unbiased Data Processing.
+        records(dict): the dict contained the image and coords
+
+    Returns:
+        records (dict): contain the image and coords after tranformed
+
+    """
+
+    def __init__(self, trainsize, use_udp=False):
+        self.trainsize = trainsize
+        self.use_udp = use_udp
+
+    def __call__(self, image, im_info):
+        rot = 0
+        imshape = im_info['im_shape'][::-1]
+        center = im_info['center'] if 'center' in im_info else imshape / 2.
+        scale = im_info['scale'] if 'scale' in im_info else imshape
+        if self.use_udp:
+            trans = get_warp_matrix(
+                rot, center * 2.0,
+                [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale)
+            image = cv2.warpAffine(
+                image,
+                trans, (int(self.trainsize[0]), int(self.trainsize[1])),
+                flags=cv2.INTER_LINEAR)
+        else:
+            trans = get_affine_transform(center, scale, rot, self.trainsize)
+            image = cv2.warpAffine(
+                image,
+                trans, (int(self.trainsize[0]), int(self.trainsize[1])),
+                flags=cv2.INTER_LINEAR)
+
+        return image, im_info
+
+
 class Compose:
    def __init__(self, transforms):
        self.transforms = []

--- a/deploy/serving/python/web_service.py
+++ b/deploy/serving/python/web_service.py
@@ -23,31 +23,16 @@ import base64
 from PIL import Image
 import io
 from preprocess_ops import Compose
+from postprocess_ops import HRNetPostProcess

 from argparse import ArgumentParser, RawDescriptionHelpFormatter
 import yaml

 # Global dictionary
 SUPPORT_MODELS = {
-    'YOLO',
-    'RCNN',
-    'SSD',
-    'Face',
-    'FCOS',
-    'SOLOv2',
-    'TTFNet',
-    'S2ANet',
-    'JDE',
-    'FairMOT',
-    'DeepSORT',
-    'GFL',
-    'PicoDet',
-    'CenterNet',
-    'TOOD',
-    'RetinaNet',
-    'StrongBaseline',
-    'STGCN',
-    'YOLOX',
+    'YOLO', 'RCNN', 'SSD', 'Face', 'FCOS', 'SOLOv2', 'TTFNet', 'S2ANet', 'JDE',
+    'FairMOT', 'DeepSORT', 'GFL', 'PicoDet', 'CenterNet', 'TOOD', 'RetinaNet',
+    'StrongBaseline', 'STGCN', 'YOLOX', 'HRNet'
 }

 GLOBAL_VAR = {}
@@ -182,15 +167,10 @@ class DetectorOp(Op):

    def postprocess(self, input_dicts, fetch_dict, data_id, log_id):
        (_, input_dict), = input_dicts.items()
-        bboxes = fetch_dict["multiclass_nms3_0.tmp_0"]
-        bboxes_num = fetch_dict["multiclass_nms3_0.tmp_2"]
-        draw_threshold = GLOBAL_VAR['model_config'].draw_threshold
-        idx = 0
-        result = {}
-        for k, num in zip(input_dict.keys(), bboxes_num):
-            bbox = bboxes[idx:idx + num]
-            result[k] = self.parse_det_result(
-                bbox, draw_threshold, GLOBAL_VAR['model_config'].label_list)
+        if GLOBAL_VAR['model_config'].arch in ["HRNet"]:
+            result = self.parse_keypoint_result(input_dict, fetch_dict)
+        else:
+            result = self.parse_detection_result(input_dict, fetch_dict)
        return result, None, ""

    def collate_inputs(self, inputs):
@@ -203,13 +183,40 @@ class DetectorOp(Op):
            for k, v in collate_inputs.items() if k in GLOBAL_VAR['feed_vars']
        }

-    def parse_det_result(self, bbox, draw_threshold, label_list):
-        result = []
-        for line in bbox:
-            if line[0] > -1 and line[1] > draw_threshold:
-                result.append(f"{int(line[0])} {line[1]} "
-                              f"{line[2]} {line[3]} {line[4]} {line[5]}")
-        return result
+    def parse_detection_result(self, input_dict, fetch_dict):
+        bboxes = fetch_dict[GLOBAL_VAR['fetch_vars'][0]]
+        bboxes_num = fetch_dict[GLOBAL_VAR['fetch_vars'][1]]
+        if GLOBAL_VAR['model_config'].mask:
+            masks = fetch_dict[GLOBAL_VAR['fetch_vars'][2]]
+        idx = 0
+        results = {}
+        for img_name, num in zip(input_dict.keys(), bboxes_num):
+            result = []
+            bbox = bboxes[idx:idx + num]
+            for line in bbox:
+                if line[0] > -1 and line[1] > GLOBAL_VAR[
+                        'model_config'].draw_threshold:
+                    result.append(f"{int(line[0])} {line[1]} "
+                                  f"{line[2]} {line[3]} {line[4]} {line[5]}")
+            results[img_name] = result
+            idx += num
+        return results
+
+    def parse_keypoint_result(self, input_dict, fetch_dict):
+        heatmap = fetch_dict["conv2d_441.tmp_1"]
+        keypoint_postprocess = HRNetPostProcess()
+        im_shape = []
+        for key, data in input_dict.items():
+            data = base64.b64decode(data.encode('utf8'))
+            byte_stream = io.BytesIO(data)
+            img = Image.open(byte_stream).convert("RGB")
+            im_shape.append([img.width, img.height])
+        im_shape = np.array(im_shape)
+        center = np.round(im_shape / 2.)
+        scale = im_shape / 200.
+        kpts, scores = keypoint_postprocess(heatmap, center, scale)
+        results = {"keypoint": kpts, "scores": scores}
+        return results


 class DetectorService(WebService):

--- a/test_tipc/configs/keypoint/tinypose_128x96_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+++ b/test_tipc/configs/keypoint/tinypose_128x96_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+===========================serving_infer_cpp_params===========================
+model_name:tinypose_128x96
+python:python3.7
+filename:null
+##
+--output_dir:./output_inference
+weights:https://paddledet.bj.bcebos.com/models/keypoint/tinypose_128x96.pdparams
+norm_export:tools/export_model.py -c configs/keypoint/tiny_pose/tinypose_128x96.yml --export_serving_model True -o
+quant_export:tools/export_model.py -c configs/keypoint/tiny_pose/tinypose_128x96.yml --slim_config _template_pact --export_serving_model True -o
+fpgm_export:tools/export_model.py -c configs/keypoint/tiny_pose/tinypose_128x96.yml --slim_config _template_fpgm --export_serving_model True -o
+distill_export:null
+export1:null
+export2:null
+kl_quant_export:tools/post_quant.py -c configs/keypoint/tiny_pose/tinypose_128x96.yml --slim_config configs/slim/post_quant/tinypose_128x96_ptq.yml --export_serving_model True -o
+##
+infer_mode:norm
+infer_quant:False
+--model:null
+--op:tinypose_128x96
+--port:9997
+--gpu_ids:null|0
+null:null
+http_client:deploy/serving/cpp/serving_client.py
+--serving_client:null
+--image_file:./demo/hrnet_demo.jpg
+null:null
\ No newline at end of file
--- a/test_tipc/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+++ b/test_tipc/configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+===========================serving_infer_cpp_params===========================
+model_name:mask_rcnn_r50_fpn_1x_coco
+python:python3.7
+filename:null
+##
+--output_dir:./output_inference
+weights:https://paddledet.bj.bcebos.com/models/mask_rcnn_r50_fpn_1x_coco.pdparams
+norm_export:tools/export_model.py -c configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml --export_serving_model True -o
+quant_export:tools/export_model.py -c configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml --slim_config _template_pact --export_serving_model True -o
+fpgm_export:tools/export_model.py -c configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml --slim_config _template_fpgm --export_serving_model True -o
+distill_export:null
+export1:null
+export2:null
+kl_quant_export:tools/post_quant.py -c configs/mask_rcnn/mask_rcnn_r50_fpn_1x_coco.yml --slim_config configs/slim/post_quant/mask_rcnn_r50_fpn_1x_coco_ptq.yml --export_serving_model True -o
+##
+infer_mode:norm
+infer_quant:False
+--model:null
+--op:mask_rcnn_r50_fpn_1x_coco
+--port:9997
+--gpu_ids:null|0
+null:null
+http_client:deploy/serving/cpp/serving_client.py
+--serving_client:null
+--image_file:./demo/000000014439.jpg
+null:null
\ No newline at end of file
--- a/test_tipc/configs/picodet/picodet_lcnet_1_5x_416_coco_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+++ b/test_tipc/configs/picodet/picodet_lcnet_1_5x_416_coco_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+===========================serving_infer_cpp_params===========================
+model_name:picodet_lcnet_1_5x_416_coco
+python:python3.7
+filename:null
+##
+--output_dir:./output_inference
+weights:https://paddledet.bj.bcebos.com/models/picodet_lcnet_1_5x_416_coco.pdparams
+norm_export:tools/export_model.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_5x_416_coco.yml --export_serving_model True -o
+quant_export:tools/export_model.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_5x_416_coco.yml --slim_config _template_pact --export_serving_model True -o
+fpgm_export:tools/export_model.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_5x_416_coco.yml --slim_config _template_fpgm --export_serving_model True -o
+distill_export:null
+export1:null
+export2:null
+kl_quant_export:tools/post_quant.py -c configs/picodet/legacy_model/more_config/picodet_lcnet_1_5x_416_coco.yml --slim_config _template_kl_quant --export_serving_model True -o
+##
+infer_mode:norm
+infer_quant:False
+--model:null
+--op:picodet_lcnet_1_5x_416_coco
+--port:9997
+--gpu_ids:null|0
+null:null
+http_client:deploy/serving/cpp/serving_client.py
+--serving_client:null
+--image_file:./demo/000000014439.jpg
+null:null
\ No newline at end of file
--- a/test_tipc/configs/ppyolo/ppyolo_mbv3_large_coco_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ppyolo/ppyolo_mbv3_large_coco_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+===========================serving_infer_cpp_params===========================
+model_name:ppyolo_mbv3_large_coco
+python:python3.7
+filename:null
+##
+--output_dir:./output_inference
+weights:https://paddledet.bj.bcebos.com/models/ppyolo_mbv3_large_coco.pdparams
+norm_export:tools/export_model.py -c configs/ppyolo/ppyolo_mbv3_large_coco.yml --export_serving_model True -o
+quant_export:tools/export_model.py -c configs/ppyolo/ppyolo_mbv3_large_coco.yml --slim_config configs/slim/quant/ppyolo_mbv3_large_qat.yml --export_serving_model True -o
+fpgm_export:tools/export_model.py -c configs/ppyolo/ppyolo_mbv3_large_coco.yml --slim_config configs/slim/prune/ppyolo_mbv3_large_prune_fpgm.yml --export_serving_model True -o
+distill_export:null
+export1:null
+export2:null
+kl_quant_export:tools/post_quant.py -c configs/ppyolo/ppyolo_mbv3_large_coco.yml --slim_config configs/slim/post_quant/ppyolo_mbv3_large_ptq.yml --export_serving_model True -o
+##
+infer_mode:norm
+infer_quant:False
+--model:null
+--op:ppyolo_mbv3_large_coco
+--port:9997
+--gpu_ids:null|0
+null:null
+http_client:deploy/serving/cpp/serving_client.py
+--serving_client:null
+--image_file:./demo/000000014439.jpg
+null:null
\ No newline at end of file
--- a/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+===========================serving_infer_cpp_params===========================
+model_name:ppyoloe_crn_s_300e_coco
+python:python3.7
+filename:null
+##
+--output_dir:./output_inference
+weights:https://paddledet.bj.bcebos.com/models/ppyoloe_crn_s_300e_coco.pdparams
+norm_export:tools/export_model.py -c configs/ppyoloe/ppyoloe_crn_s_300e_coco.yml --export_serving_model True -o
+quant_export:tools/export_model.py -c configs/ppyoloe/ppyoloe_crn_s_300e_coco.yml --slim_config _template_pact --export_serving_model True -o
+fpgm_export:tools/export_model.py -c configs/ppyoloe/ppyoloe_crn_s_300e_coco.yml --slim_config _template_fpgm --export_serving_model True -o
+distill_export:null
+export1:null
+export2:null
+kl_quant_export:tools/post_quant.py -c configs/ppyoloe/ppyoloe_crn_s_300e_coco.yml --slim_config configs/slim/post_quant/ppyoloe_crn_s_300e_coco_ptq.yml --export_serving_model True -o
+##
+infer_mode:norm
+infer_quant:False
+--model:null
+--op:ppyoloe_crn_s_300e_coco
+--port:9997
+--gpu_ids:null|0
+null:null
+http_client:deploy/serving/cpp/serving_client.py
+--serving_client:null
+--image_file:./demo/000000014439.jpg
+null:null
\ No newline at end of file
--- a/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_linux_gpu_fleet_normal_infer_python_linux_gpu_cpu.txt
+++ b/test_tipc/configs/ppyoloe/ppyoloe_crn_s_300e_coco_train_linux_gpu_fleet_normal_infer_python_linux_gpu_cpu.txt
@@ -38,7 +38,7 @@ kl_quant_export:tools/post_quant.py -c configs/ppyoloe/ppyoloe_crn_s_300e_coco.y
 infer_mode:norm
 infer_quant:False
 inference:./deploy/python/infer.py
--device:gpu|cpu
+--device:cpu
 --enable_mkldnn:False
 --cpu_threads:4
 --batch_size:1|2

--- a/test_tipc/configs/yolov3/yolov3_darknet53_270e_coco_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+++ b/test_tipc/configs/yolov3/yolov3_darknet53_270e_coco_model_linux_gpu_normal_normal_serving_cpp_linux_gpu_cpu.txt
+===========================serving_infer_cpp_params===========================
+model_name:yolov3_darknet53_270e_coco
+python:python3.7
+filename:null
+##
+--output_dir:./output_inference
+weights:https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams
+norm_export:tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml --export_serving_model True -o
+quant_export:tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml --slim_config configs/slim/quant/yolov3_darknet_qat.yml --export_serving_model True -o
+fpgm_export:tools/export_model.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml --slim_config configs/slim/prune/yolov3_darknet_prune_fpgm.yml --export_serving_model True -o
+distill_export:null
+export1:null
+export2:null
+kl_quant_export:tools/post_quant.py -c configs/yolov3/yolov3_darknet53_270e_coco.yml --slim_config configs/slim/post_quant/yolov3_darknet53_ptq.yml --export_serving_model True -o
+##
+infer_mode:norm
+infer_quant:False
+--model:null
+--op:yolov3_darknet53_270e_coco
+--port:9997
+--gpu_ids:null|0
+null:null
+http_client:deploy/serving/cpp/serving_client.py
+--serving_client:null
+--image_file:./demo/000000014439.jpg
+null:null
\ No newline at end of file
--- a/test_tipc/test_inference_cpp.sh
+++ b/test_tipc/test_inference_cpp.sh
@@ -188,26 +188,25 @@ Count=0
 IFS="|"
 infer_quant_flag=(${cpp_infer_is_quant_list})
 for infer_mode in ${cpp_infer_mode_list[*]}; do
-    # run export
-    case ${infer_mode} in
-        norm) run_export=${norm_export} ;;
-        quant) run_export=${pact_export} ;;
-        fpgm) run_export=${fpgm_export} ;;
-        distill) run_export=${distill_export} ;;
-        kl_quant) run_export=${kl_quant_export} ;;
-        *) echo "Undefined infer_mode!"; exit 1;
-    esac
-    if [ ${run_export} = "null" ]; then
-        continue
+    if [ ${infer_mode} != "null" ]; then
+        # run export
+        case ${infer_mode} in
+            norm) run_export=${norm_export} ;;
+            quant) run_export=${pact_export} ;;
+            fpgm) run_export=${fpgm_export} ;;
+            distill) run_export=${distill_export} ;;
+            kl_quant) run_export=${kl_quant_export} ;;
+            *) echo "Undefined infer_mode!"; exit 1;
+        esac
+        set_export_weight=$(func_set_params "${export_weight_key}" "${export_weight_value}")
+        set_save_export_dir=$(func_set_params "${save_export_key}" "${save_export_value}")
+        set_filename=$(func_set_params "${filename_key}" "${model_name}")
+        export_cmd="${python} ${run_export} ${set_export_weight} ${set_filename} ${set_save_export_dir} "
+        echo  $export_cmd
+        eval $export_cmd
+        status_export=$?
+        status_check $status_export "${export_cmd}" "${status_log}" "${model_name}"
    fi
-    set_export_weight=$(func_set_params "${export_weight_key}" "${export_weight_value}")
-    set_save_export_dir=$(func_set_params "${save_export_key}" "${save_export_value}")
-    set_filename=$(func_set_params "${filename_key}" "${model_name}")
-    export_cmd="${python} ${run_export} ${set_export_weight} ${set_filename} ${set_save_export_dir} "
-    echo  $export_cmd
-    eval $export_cmd
-    status_export=$?
-    status_check $status_export "${export_cmd}" "${status_log}" "${model_name}"

    #run inference
    save_export_model_dir="${save_export_value}/${model_name}"

--- a/test_tipc/test_serving_infer_cpp.sh
+++ b/test_tipc/test_serving_infer_cpp.sh
+#!/bin/bash
+source test_tipc/utils_func.sh
+
+FILENAME=$1
+
+# parser model_name
+dataline=$(cat ${FILENAME})
+IFS=$'\n'
+lines=(${dataline})
+model_name=$(func_parser_value "${lines[1]}")
+echo "ppdet serving_infer: ${model_name}"
+python=$(func_parser_value "${lines[2]}")
+filename_key=$(func_parser_key "${lines[3]}")
+filename_value=$(func_parser_value "${lines[3]}")
+
+# parser export params
+save_export_key=$(func_parser_key "${lines[5]}")
+save_export_value=$(func_parser_value "${lines[5]}")
+export_weight_key=$(func_parser_key "${lines[6]}")
+export_weight_value=$(func_parser_value "${lines[6]}")
+norm_export=$(func_parser_value "${lines[7]}")
+pact_export=$(func_parser_value "${lines[8]}")
+fpgm_export=$(func_parser_value "${lines[9]}")
+distill_export=$(func_parser_value "${lines[10]}")
+export_key1=$(func_parser_key "${lines[11]}")
+export_value1=$(func_parser_value "${lines[11]}")
+export_key2=$(func_parser_key "${lines[12]}")
+export_value2=$(func_parser_value "${lines[12]}")
+kl_quant_export=$(func_parser_value "${lines[13]}")
+
+# parser serving params
+infer_mode_list=$(func_parser_value "${lines[15]}")
+infer_is_quant_list=$(func_parser_value "${lines[16]}")
+
+model_key=$(func_parser_key "${lines[17]}")
+op_key=$(func_parser_key "${lines[18]}")
+op_value=$(func_parser_value "${lines[18]}")
+port_key=$(func_parser_key "${lines[19]}")
+port_value=$(func_parser_value "${lines[19]}")
+gpu_ids_key=$(func_parser_key "${lines[20]}")
+gpu_ids_value=$(func_parser_value "${lines[20]}")
+web_service_key1=$(func_parser_key "${lines[21]}")
+web_service_value1=$(func_parser_value "${lines[21]}")
+http_client_py=$(func_parser_value "${lines[22]}")
+serving_client_key=$(func_parser_key "${lines[23]}")
+infer_image_key=$(func_parser_key "${lines[24]}")
+infer_image_value=$(func_parser_value "${lines[24]}")
+http_client_key1=$(func_parser_key "${lines[25]}")
+http_client_value1=$(func_parser_value "${lines[25]}")
+
+LOG_PATH="./test_tipc/output"
+mkdir -p ${LOG_PATH}
+status_log="${LOG_PATH}/results_serving_cpp.log"
+
+function func_serving_inference(){
+    IFS='|'
+    _python=$1
+    _log_path=$2
+    _set_server_model_dir=$3
+    _set_client_model_dir=$4
+    _set_image_file=$5
+
+    set_op=$(func_set_params "${op_key}" "${op_value}")
+    set_port=$(func_set_params "${port_key}" "${port_value}")
+    set_web_service_params1=$(func_set_params "${web_service_key1}" "${web_service_value1}")
+    set_http_client_params1=$(func_set_params "${http_client_key1}" "${http_client_value1}")
+    # inference
+    for gpu_ids in ${gpu_ids_value[*]}; do
+        if [ ${gpu_ids} = "null" ];then
+            _save_log_path="${_log_path}/serving_infer_cpp_cpu_batchsize_1.log"
+        else
+            _save_log_path="${_log_path}/serving_infer_cpp_gpu_batchsize_1.log"
+        fi
+        set_gpu_ids=$(func_set_params "${gpu_ids_key}" "${gpu_ids}")
+        # run web service
+        web_service_cmd="${_python} -m paddle_serving_server.serve ${_set_server_model_dir} ${set_op} ${set_port} ${set_gpu_ids} ${set_web_service_params1} &"
+        eval $web_service_cmd
+        last_status=${PIPESTATUS[0]}
+        status_check $last_status "${web_service_cmd}" "${status_log}" "${model_name}"
+        sleep 5s
+        # run http client
+        http_client_cmd="${_python} ${http_client_py} ${_set_client_model_dir} ${_set_image_file} ${set_http_client_params1} > ${_save_log_path} 2>&1 "
+        eval $http_client_cmd
+        last_status=${PIPESTATUS[0]}
+        status_check $last_status "${http_client_cmd}" "${status_log}" "${model_name}"
+        eval "cat ${_save_log_path}"
+        ps ux | grep -i ${port_value} | awk '{print $2}' | xargs kill -s 9
+        sleep 2s
+    done
+}
+
+# build paddle_serving_server
+bash deploy/serving/cpp/build_server.sh
+echo "################### build finished! ###################"
+
+# run serving infer
+Count=0
+IFS="|"
+infer_quant_flag=(${infer_is_quant_list})
+for infer_mode in ${infer_mode_list[*]}; do
+    if [ ${infer_mode} != "null" ]; then
+        # run export
+        case ${infer_mode} in
+            norm) run_export=${norm_export} ;;
+            quant) run_export=${pact_export} ;;
+            fpgm) run_export=${fpgm_export} ;;
+            distill) run_export=${distill_export} ;;
+            kl_quant) run_export=${kl_quant_export} ;;
+            *) echo "Undefined infer_mode!"; exit 1;
+        esac
+        set_export_weight=$(func_set_params "${export_weight_key}" "${export_weight_value}")
+        set_save_export_dir=$(func_set_params "${save_export_key}" "${save_export_value}")
+        set_filename=$(func_set_params "${filename_key}" "${model_name}")
+        export_cmd="${python} ${run_export} ${set_export_weight} ${set_filename} ${set_save_export_dir} "
+        echo  $export_cmd
+        eval $export_cmd
+        status_export=$?
+        status_check $status_export "${export_cmd}" "${status_log}" "${model_name}"
+    fi
+
+    #run inference
+    set_server_model_dir=$(func_set_params "${model_key}" "${save_export_value}/${model_name}/serving_server")
+    set_client_model_dir=$(func_set_params "${serving_client_key}" "${save_export_value}/${model_name}/serving_client")
+    set_infer_image_file=$(func_set_params "${infer_image_key}" "${infer_image_value}")
+    is_quant=${infer_quant_flag[Count]}
+    func_serving_inference "${python}" "${LOG_PATH}" "${set_server_model_dir}" "${set_client_model_dir}" ${set_infer_image_file}
+    Count=$(($Count + 1))
+done
+eval "unset CUDA_VISIBLE_DEVICES"
--- a/test_tipc/test_serving_infer_python.sh
+++ b/test_tipc/test_serving_infer_python.sh
@@ -94,26 +94,25 @@ Count=0
 IFS="|"
 infer_quant_flag=(${infer_is_quant_list})
 for infer_mode in ${infer_mode_list[*]}; do
-    # run export
-    case ${infer_mode} in
-        norm) run_export=${norm_export} ;;
-        quant) run_export=${pact_export} ;;
-        fpgm) run_export=${fpgm_export} ;;
-        distill) run_export=${distill_export} ;;
-        kl_quant) run_export=${kl_quant_export} ;;
-        *) echo "Undefined infer_mode!"; exit 1;
-    esac
-    if [ ${run_export} = "null" ]; then
-        continue
+    if [ ${infer_mode} != "null" ]; then
+        # run export
+        case ${infer_mode} in
+            norm) run_export=${norm_export} ;;
+            quant) run_export=${pact_export} ;;
+            fpgm) run_export=${fpgm_export} ;;
+            distill) run_export=${distill_export} ;;
+            kl_quant) run_export=${kl_quant_export} ;;
+            *) echo "Undefined infer_mode!"; exit 1;
+        esac
+        set_export_weight=$(func_set_params "${export_weight_key}" "${export_weight_value}")
+        set_save_export_dir=$(func_set_params "${save_export_key}" "${save_export_value}")
+        set_filename=$(func_set_params "${filename_key}" "${model_name}")
+        export_cmd="${python} ${run_export} ${set_export_weight} ${set_filename} ${set_save_export_dir} "
+        echo  $export_cmd
+        eval $export_cmd
+        status_export=$?
+        status_check $status_export "${export_cmd}" "${status_log}" "${model_name}"
    fi
-    set_export_weight=$(func_set_params "${export_weight_key}" "${export_weight_value}")
-    set_save_export_dir=$(func_set_params "${save_export_key}" "${save_export_value}")
-    set_filename=$(func_set_params "${filename_key}" "${model_name}")
-    export_cmd="${python} ${run_export} ${set_export_weight} ${set_filename} ${set_save_export_dir} "
-    echo  $export_cmd
-    eval $export_cmd
-    status_export=$?
-    status_check $status_export "${export_cmd}" "${status_log}" "${model_name}"

    #run inference
    set_export_model_dir=$(func_set_params "${model_dir_key}" "${save_export_value}/${model_name}")