Fix Paddle 2.3 compile error

4de287c6 · TeslaZhao · 48305205 · 4de287c6 · 4de287c6 · 4de287c6
9 changed file
--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -171,11 +171,23 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib)
 SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib")
 LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)

+SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib")
+LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib)
+
+SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib")
+LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib)
+
 if (NOT WITH_MKLML)
    ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
    SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
 endif()

+ADD_LIBRARY(paddle2onnx STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so)
+
+ADD_LIBRARY(onnxruntime  STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so.1.10.0)
+
 ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so)
 if (WITH_ASCEND_CL)

--- a/core/configure/proto/server_configure.proto
+++ b/core/configure/proto/server_configure.proto
@@ -49,6 +49,20 @@ message EngineDesc {
  optional bool gpu_multi_stream = 20;
  optional bool use_ascend_cl = 21;

+  /*
+   * "gpu_memory_mb": allocate gpu memory by config.EnableUseGpu()
+   * "cpu_math_thread_num": set thread numbers of cpu math by
+   * config.SetCpuMathLibraryNumThreads()
+   * "trt_workspace_size": set TensorRT workspace size by
+   * config.EnableTensorRtEngine(), 1 << 25 default
+   * "trt_use_static": If true, save the optimization information of the TRT
+   * serialized to the disk, and load from the disk.
+   */
+  optional int32 gpu_memory_mb = 22 [ default = 100 ];
+  optional int32 cpu_math_thread_num = 23 [ default = 1 ];
+  optional int32 trt_workspace_size = 24 [ default = 33554432 ];
+  optional bool trt_use_static = 25 [ default = false ];
+
  /*
   * "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling
   * mode.

--- a/core/general-server/op/general_remote_op.cpp
+++ b/core/general-server/op/general_remote_op.cpp
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "core/general-server/op/general_remote_op.h"
+#include <iostream>
+#include <sstream>
+#include "core/util/include/timer.h"
+
+// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
+// will support: FLOAT16
+#define BRPC_MAX_BODY_SIZE 2 * 1024 * 1024 * 1024
+const std::string LODABALANCE = "";
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+using baidu::paddle_serving::Timer;
+using baidu::paddle_serving::predictor::general_model::Tensor;
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::Response;
+
+brpc::Channel BRPCStub::brpc_channels[MAX_MP_NUM];
+
+brpc::ChannelOptions BRPCStub::options;
+std::atomic<int> BRPCStub::inited(0);
+
+int GeneralRemoteOp::inference() {
+  LOG(INFO) << "Enter GeneralRemoteOp:inference()";
+  int expected = 0;
+  std::vector<std::string> op_address = address();
+  if (BRPCStub::inited.compare_exchange_strong(expected, 1)) {
+    BRPCStub::options.protocol = "baidu_std";
+    BRPCStub::options.connection_type = "short";
+    BRPCStub::options.timeout_ms = 80000 /*milliseconds*/;
+    BRPCStub::options.max_retry = 100;
+    brpc::fLU64::FLAGS_max_body_size = BRPC_MAX_BODY_SIZE;
+
+    LOG(ERROR) << "address size: " << op_address.size();
+    for (int i = 0; i < op_address.size(); ++i) {
+      LOG(INFO) << i + 1 << " address is " << op_address[i].c_str();
+      BRPCStub::brpc_channels[i].Init(
+          op_address[i].c_str(), LODABALANCE.c_str(), &BRPCStub::options);
+    }
+
+    BRPCStub::inited++;
+  }
+  while (BRPCStub::inited < 2) {
+  }
+
+  Timer timeline;
+  int64_t start = timeline.TimeStampUS();
+  timeline.Start();
+  VLOG(2) << "Going to run Remote inference";
+
+  Request* req = (Request*)(get_request_message());
+  Response* res = mutable_data<Response>();
+  uint64_t log_id = req->log_id();
+
+  brpc::Controller brpc_controllers[MAX_MP_NUM];
+  brpc::CallId brpc_callids[MAX_MP_NUM];
+  Response brpc_response_tmp;
+
+  size_t i = 0;
+  // Init BRPC controllers, callids and stubs
+  for (i = 0; i < op_address.size(); ++i) {
+    brpc_controllers[i].set_log_id(log_id);
+    brpc_callids[i] = brpc_controllers[i].call_id();
+  }
+  for (i = 0; i < op_address.size(); ++i) {
+    baidu::paddle_serving::predictor::general_model::GeneralModelService_Stub
+        stub(&BRPCStub::brpc_channels[i]);
+    LOG(INFO) << "Sended 1 request to Slave Sever " << i;
+    if (0 == i) {
+      stub.inference(&brpc_controllers[i], req, res, brpc::DoNothing());
+      continue;
+    }
+    stub.inference(
+        &brpc_controllers[i], req, &brpc_response_tmp, brpc::DoNothing());
+  }
+
+  LOG(INFO) << "All request are sended, waiting for all responses.";
+
+  // Wait RPC done.
+  for (i = 0; i < op_address.size(); ++i) {
+    brpc::Join(brpc_callids[i]);
+  }
+
+  // Print RPC Results
+  for (i = 0; i < op_address.size(); ++i) {
+    LOG(INFO) << "brpc_controller_" << i
+              << " status:" << brpc_controllers[i].Failed();
+    if (!brpc_controllers[i].Failed()) {
+      LOG(INFO) << "Received response from "
+                << brpc_controllers[i].remote_side()
+                << " Latency=" << brpc_controllers[i].latency_us() << "us";
+    } else {
+      LOG(ERROR) << brpc_controllers[i].ErrorText();
+    }
+  }
+  LOG(INFO) << "All brpc remote stubs joined done.";
+
+  res->set_log_id(log_id);
+  res->set_profile_server(req->profile_server());
+  int64_t end = timeline.TimeStampUS();
+  res->add_profile_time(start);
+  res->add_profile_time(end);
+
+  return 0;
+}
+
+DEFINE_OP(GeneralRemoteOp);
+}  // namespace serving
+}  // namespace paddle_serving
+}  // namespace baidu
--- a/core/general-server/op/general_remote_op.h
+++ b/core/general-server/op/general_remote_op.h
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include <brpc/channel.h>
+#include <butil/logging.h>
+#include <butil/time.h>
+#include <gflags/gflags.h>
+#include <atomic>
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "core/general-server/general_model_service.pb.h"
+
+#include "core/sdk-cpp/builtin_format.pb.h"
+#include "core/sdk-cpp/general_model_service.pb.h"
+#include "core/sdk-cpp/include/common.h"
+#include "core/sdk-cpp/include/predictor_sdk.h"
+
+#define MAX_MP_NUM 16
+
+namespace baidu {
+namespace paddle_serving {
+namespace serving {
+
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::Response;
+
+class GeneralRemoteOp
+    : public baidu::paddle_serving::predictor::OpWithChannel<
+          baidu::paddle_serving::predictor::general_model::Response> {
+ public:
+  DECLARE_OP(GeneralRemoteOp);
+  int inference();
+};
+
+class BRPCStub {
+ public:
+  static brpc::Channel brpc_channels[MAX_MP_NUM];
+  static brpc::ChannelOptions options;
+  static std::atomic<int> inited;
+};
+
+}  // namespace serving
+}  // namespace paddle_serving
+}  // namespace baidu
--- a/core/predictor/common/constant.cpp
+++ b/core/predictor/common/constant.cpp
@@ -20,7 +20,7 @@ namespace predictor {

 DEFINE_bool(use_parallel_infer_service, false, "");
 DEFINE_int32(el_log_level, 16, "");
-DEFINE_int32(idle_timeout_s, 16, "");
+DEFINE_int32(idle_timeout_s, 80, "");
 DEFINE_int32(port, 8010, "");
 DEFINE_string(workflow_path, "./conf", "");
 DEFINE_string(workflow_file, "workflow.prototxt", "");

--- a/core/predictor/framework/bsf-inl.h
+++ b/core/predictor/framework/bsf-inl.h
@@ -341,7 +341,7 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
    LOG(INFO) << "Hit auto padding, merge " << padding_task_count
              << " tasks into 1 batch.";
  }
-  LOG(INFO) << "Number of tasks remaining in _task_queue is"
+  LOG(INFO) << "Number of tasks remaining in _task_queue is "
            << _task_queue.size();
  return true;
 }

--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
@@ -241,10 +241,10 @@ class PaddleInferenceEngine : public EngineCore {
    }

    config.SwitchSpecifyInputNames(true);
-    config.SetCpuMathLibraryNumThreads(1);
+    config.SetCpuMathLibraryNumThreads(engine_conf.cpu_math_thread_num());
    if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
      // 2000MB GPU memory
-      config.EnableUseGpu(50, gpu_id);
+      config.EnableUseGpu(engine_conf.gpu_memory_mb(), gpu_id);
      if (engine_conf.has_gpu_multi_stream() &&
          engine_conf.gpu_multi_stream()) {
        config.EnableGpuMultiStream();
@@ -267,17 +267,17 @@ class PaddleInferenceEngine : public EngineCore {
    if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
      config.SwitchIrOptim(true);
      if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
-        config.EnableUseGpu(50, gpu_id);
+        config.EnableUseGpu(engine_conf.gpu_memory_mb(), gpu_id);
        if (engine_conf.has_gpu_multi_stream() &&
            engine_conf.gpu_multi_stream()) {
          config.EnableGpuMultiStream();
        }
      }
-      config.EnableTensorRtEngine(1 << 25,
+      config.EnableTensorRtEngine(engine_conf.trt_workspace_size(),
                                  max_batch,
                                  local_min_subgraph_size,
                                  precision_type,
-                                  false,
+                                  engine_conf.trt_use_static(),
                                  FLAGS_use_calib);
      std::map<std::string, std::vector<int>> min_input_shape;
      std::map<std::string, std::vector<int>> max_input_shape;
@@ -413,7 +413,11 @@ class PaddleInferenceEngine : public EngineCore {
              << ", use_ascend_cl: " << engine_conf.has_use_ascend_cl()
              << ", use_xpu: " << engine_conf.use_xpu()
              << ", enable_memory_optimization: "
-              << engine_conf.enable_memory_optimization();
+              << engine_conf.enable_memory_optimization()
+              << ", gpu_memory_mb: " << engine_conf.gpu_memory_mb()
+              << ", cpu_math_thread_num: " << engine_conf.cpu_math_thread_num()
+              << ", trt_workspace_size: " << engine_conf.trt_workspace_size()
+              << ", trt_use_static: " << engine_conf.trt_use_static();

    VLOG(2) << "create paddle predictor sucess, path: " << model_path;
    return 0;

--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -280,6 +280,27 @@ def serve_args():
        default="",
        nargs="+",
        help="min_subgraph_size")
+    parser.add_argument(
+        "--gpu_memory_mb",
+        type=int,
+        default=50,
+        help="Initially allocate GPU storage size")
+    parser.add_argument(
+        "--cpu_math_thread_num",
+        type=int,
+        default=1,
+        help="Initialize the number of CPU computing threads")
+    parser.add_argument(
+        "--trt_workspace_size",
+        type=int,
+        default=33554432,
+        help="Initialize allocation 1 << 25 GPU storage size")
+    parser.add_argument(
+        "--trt_use_static",
+        default=False,
+        action="store_true",
+        help="Initialize TRT with static data")
+
    return parser.parse_args()


@@ -396,10 +417,14 @@ def start_gpu_card_model(gpu_mode, port, args):  # pylint: disable=doc-string-mi
    server.set_dist_endpoints(args.dist_endpoints.split(","))
    server.set_dist_subgraph_index(args.dist_subgraph_index)
    server.set_min_subgraph_size(args.min_subgraph_size)
+    server.set_gpu_memory_mb(args.gpu_memory_mb)
+    server.set_cpu_math_thread_num(args.cpu_math_thread_num)

    if args.use_trt and device == "gpu":
        server.set_trt()
        server.set_ir_optimize(True)
+        server.set_trt_workspace_size(args.trt_workspace_size)
+        server.set_trt_use_static(args.trt_use_static)
        if is_ocr:
            info = set_ocr_dynamic_shape_info()
            server.set_trt_dynamic_shape_info(info)

--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -119,6 +119,10 @@ class Server(object):
        self.dist_master_serving = False
        self.min_subgraph_size = []
        self.trt_dynamic_shape_info = []
+        self.gpu_memory_mb = 50
+        self.cpu_math_thread_num = 1
+        self.trt_workspace_size = 33554432  # 1 << 25
+        self.trt_use_static = False

    def get_fetch_list(self, infer_node_idx=-1):
        fetch_names = [
@@ -289,6 +293,18 @@ class Server(object):
    def set_trt_dynamic_shape_info(self, info):
        self.trt_dynamic_shape_info = info

+    def set_gpu_memory_mb(self, gpu_memory_mb):
+        self.gpu_memory_mb = gpu_memory_mb
+
+    def set_cpu_math_thread_num(self, cpu_math_thread_num):
+        self.cpu_math_thread_num = cpu_math_thread_num
+
+    def set_trt_workspace_size(self, trt_workspace_size):
+        self.trt_workspace_size = trt_workspace_size
+
+    def set_trt_use_static(self, trt_use_static):
+        self.trt_use_static = trt_use_static
+
    def _prepare_engine(self, model_config_paths, device, use_encryption_model):
        self.device = device
        if self.model_toolkit_conf == None:
@@ -342,6 +358,10 @@ class Server(object):
            engine.use_xpu = self.use_xpu
            engine.use_ascend_cl = self.use_ascend_cl
            engine.use_gpu = False
+            engine.gpu_memory_mb = self.gpu_memory_mb
+            engine.cpu_math_thread_num = self.cpu_math_thread_num
+            engine.trt_workspace_size = self.trt_workspace_size
+            engine.trt_use_static = self.trt_use_static

            # use distributed model.
            if self.dist_subgraph_index >= 0: