提交 4de287c6 编写于 作者: T TeslaZhao

Fix Paddle 2.3 compile error

上级 48305205
......@@ -171,11 +171,23 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib")
LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib")
LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib)
SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib")
LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib)
if (NOT WITH_MKLML)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
endif()
ADD_LIBRARY(paddle2onnx STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so)
ADD_LIBRARY(onnxruntime STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so.1.10.0)
ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so)
if (WITH_ASCEND_CL)
......
......@@ -49,6 +49,20 @@ message EngineDesc {
optional bool gpu_multi_stream = 20;
optional bool use_ascend_cl = 21;
/*
* "gpu_memory_mb": allocate gpu memory by config.EnableUseGpu()
* "cpu_math_thread_num": set thread numbers of cpu math by
* config.SetCpuMathLibraryNumThreads()
* "trt_workspace_size": set TensorRT workspace size by
* config.EnableTensorRtEngine(), 1 << 25 default
* "trt_use_static": If true, save the optimization information of the TRT
* serialized to the disk, and load from the disk.
*/
optional int32 gpu_memory_mb = 22 [ default = 100 ];
optional int32 cpu_math_thread_num = 23 [ default = 1 ];
optional int32 trt_workspace_size = 24 [ default = 33554432 ];
optional bool trt_use_static = 25 [ default = false ];
/*
* "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling
* mode.
......
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "core/general-server/op/general_remote_op.h"
#include <iostream>
#include <sstream>
#include "core/util/include/timer.h"
// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8
// will support: FLOAT16
#define BRPC_MAX_BODY_SIZE 2 * 1024 * 1024 * 1024
const std::string LODABALANCE = "";
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::Timer;
using baidu::paddle_serving::predictor::general_model::Tensor;
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
brpc::Channel BRPCStub::brpc_channels[MAX_MP_NUM];
brpc::ChannelOptions BRPCStub::options;
std::atomic<int> BRPCStub::inited(0);
int GeneralRemoteOp::inference() {
LOG(INFO) << "Enter GeneralRemoteOp:inference()";
int expected = 0;
std::vector<std::string> op_address = address();
if (BRPCStub::inited.compare_exchange_strong(expected, 1)) {
BRPCStub::options.protocol = "baidu_std";
BRPCStub::options.connection_type = "short";
BRPCStub::options.timeout_ms = 80000 /*milliseconds*/;
BRPCStub::options.max_retry = 100;
brpc::fLU64::FLAGS_max_body_size = BRPC_MAX_BODY_SIZE;
LOG(ERROR) << "address size: " << op_address.size();
for (int i = 0; i < op_address.size(); ++i) {
LOG(INFO) << i + 1 << " address is " << op_address[i].c_str();
BRPCStub::brpc_channels[i].Init(
op_address[i].c_str(), LODABALANCE.c_str(), &BRPCStub::options);
}
BRPCStub::inited++;
}
while (BRPCStub::inited < 2) {
}
Timer timeline;
int64_t start = timeline.TimeStampUS();
timeline.Start();
VLOG(2) << "Going to run Remote inference";
Request* req = (Request*)(get_request_message());
Response* res = mutable_data<Response>();
uint64_t log_id = req->log_id();
brpc::Controller brpc_controllers[MAX_MP_NUM];
brpc::CallId brpc_callids[MAX_MP_NUM];
Response brpc_response_tmp;
size_t i = 0;
// Init BRPC controllers, callids and stubs
for (i = 0; i < op_address.size(); ++i) {
brpc_controllers[i].set_log_id(log_id);
brpc_callids[i] = brpc_controllers[i].call_id();
}
for (i = 0; i < op_address.size(); ++i) {
baidu::paddle_serving::predictor::general_model::GeneralModelService_Stub
stub(&BRPCStub::brpc_channels[i]);
LOG(INFO) << "Sended 1 request to Slave Sever " << i;
if (0 == i) {
stub.inference(&brpc_controllers[i], req, res, brpc::DoNothing());
continue;
}
stub.inference(
&brpc_controllers[i], req, &brpc_response_tmp, brpc::DoNothing());
}
LOG(INFO) << "All request are sended, waiting for all responses.";
// Wait RPC done.
for (i = 0; i < op_address.size(); ++i) {
brpc::Join(brpc_callids[i]);
}
// Print RPC Results
for (i = 0; i < op_address.size(); ++i) {
LOG(INFO) << "brpc_controller_" << i
<< " status:" << brpc_controllers[i].Failed();
if (!brpc_controllers[i].Failed()) {
LOG(INFO) << "Received response from "
<< brpc_controllers[i].remote_side()
<< " Latency=" << brpc_controllers[i].latency_us() << "us";
} else {
LOG(ERROR) << brpc_controllers[i].ErrorText();
}
}
LOG(INFO) << "All brpc remote stubs joined done.";
res->set_log_id(log_id);
res->set_profile_server(req->profile_server());
int64_t end = timeline.TimeStampUS();
res->add_profile_time(start);
res->add_profile_time(end);
return 0;
}
DEFINE_OP(GeneralRemoteOp);
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <brpc/channel.h>
#include <butil/logging.h>
#include <butil/time.h>
#include <gflags/gflags.h>
#include <atomic>
#include <memory>
#include <string>
#include <vector>
#include "core/general-server/general_model_service.pb.h"
#include "core/sdk-cpp/builtin_format.pb.h"
#include "core/sdk-cpp/general_model_service.pb.h"
#include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/include/predictor_sdk.h"
#define MAX_MP_NUM 16
namespace baidu {
namespace paddle_serving {
namespace serving {
using baidu::paddle_serving::predictor::general_model::Request;
using baidu::paddle_serving::predictor::general_model::Response;
class GeneralRemoteOp
: public baidu::paddle_serving::predictor::OpWithChannel<
baidu::paddle_serving::predictor::general_model::Response> {
public:
DECLARE_OP(GeneralRemoteOp);
int inference();
};
class BRPCStub {
public:
static brpc::Channel brpc_channels[MAX_MP_NUM];
static brpc::ChannelOptions options;
static std::atomic<int> inited;
};
} // namespace serving
} // namespace paddle_serving
} // namespace baidu
......@@ -20,7 +20,7 @@ namespace predictor {
DEFINE_bool(use_parallel_infer_service, false, "");
DEFINE_int32(el_log_level, 16, "");
DEFINE_int32(idle_timeout_s, 16, "");
DEFINE_int32(idle_timeout_s, 80, "");
DEFINE_int32(port, 8010, "");
DEFINE_string(workflow_path, "./conf", "");
DEFINE_string(workflow_file, "workflow.prototxt", "");
......
......@@ -341,7 +341,7 @@ bool TaskExecutor<TaskT>::move_task_to_batch(
LOG(INFO) << "Hit auto padding, merge " << padding_task_count
<< " tasks into 1 batch.";
}
LOG(INFO) << "Number of tasks remaining in _task_queue is"
LOG(INFO) << "Number of tasks remaining in _task_queue is "
<< _task_queue.size();
return true;
}
......
......@@ -241,10 +241,10 @@ class PaddleInferenceEngine : public EngineCore {
}
config.SwitchSpecifyInputNames(true);
config.SetCpuMathLibraryNumThreads(1);
config.SetCpuMathLibraryNumThreads(engine_conf.cpu_math_thread_num());
if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) {
// 2000MB GPU memory
config.EnableUseGpu(50, gpu_id);
config.EnableUseGpu(engine_conf.gpu_memory_mb(), gpu_id);
if (engine_conf.has_gpu_multi_stream() &&
engine_conf.gpu_multi_stream()) {
config.EnableGpuMultiStream();
......@@ -267,17 +267,17 @@ class PaddleInferenceEngine : public EngineCore {
if (engine_conf.has_use_trt() && engine_conf.use_trt()) {
config.SwitchIrOptim(true);
if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) {
config.EnableUseGpu(50, gpu_id);
config.EnableUseGpu(engine_conf.gpu_memory_mb(), gpu_id);
if (engine_conf.has_gpu_multi_stream() &&
engine_conf.gpu_multi_stream()) {
config.EnableGpuMultiStream();
}
}
config.EnableTensorRtEngine(1 << 25,
config.EnableTensorRtEngine(engine_conf.trt_workspace_size(),
max_batch,
local_min_subgraph_size,
precision_type,
false,
engine_conf.trt_use_static(),
FLAGS_use_calib);
std::map<std::string, std::vector<int>> min_input_shape;
std::map<std::string, std::vector<int>> max_input_shape;
......@@ -413,7 +413,11 @@ class PaddleInferenceEngine : public EngineCore {
<< ", use_ascend_cl: " << engine_conf.has_use_ascend_cl()
<< ", use_xpu: " << engine_conf.use_xpu()
<< ", enable_memory_optimization: "
<< engine_conf.enable_memory_optimization();
<< engine_conf.enable_memory_optimization()
<< ", gpu_memory_mb: " << engine_conf.gpu_memory_mb()
<< ", cpu_math_thread_num: " << engine_conf.cpu_math_thread_num()
<< ", trt_workspace_size: " << engine_conf.trt_workspace_size()
<< ", trt_use_static: " << engine_conf.trt_use_static();
VLOG(2) << "create paddle predictor sucess, path: " << model_path;
return 0;
......
......@@ -280,6 +280,27 @@ def serve_args():
default="",
nargs="+",
help="min_subgraph_size")
parser.add_argument(
"--gpu_memory_mb",
type=int,
default=50,
help="Initially allocate GPU storage size")
parser.add_argument(
"--cpu_math_thread_num",
type=int,
default=1,
help="Initialize the number of CPU computing threads")
parser.add_argument(
"--trt_workspace_size",
type=int,
default=33554432,
help="Initialize allocation 1 << 25 GPU storage size")
parser.add_argument(
"--trt_use_static",
default=False,
action="store_true",
help="Initialize TRT with static data")
return parser.parse_args()
......@@ -396,10 +417,14 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi
server.set_dist_endpoints(args.dist_endpoints.split(","))
server.set_dist_subgraph_index(args.dist_subgraph_index)
server.set_min_subgraph_size(args.min_subgraph_size)
server.set_gpu_memory_mb(args.gpu_memory_mb)
server.set_cpu_math_thread_num(args.cpu_math_thread_num)
if args.use_trt and device == "gpu":
server.set_trt()
server.set_ir_optimize(True)
server.set_trt_workspace_size(args.trt_workspace_size)
server.set_trt_use_static(args.trt_use_static)
if is_ocr:
info = set_ocr_dynamic_shape_info()
server.set_trt_dynamic_shape_info(info)
......
......@@ -119,6 +119,10 @@ class Server(object):
self.dist_master_serving = False
self.min_subgraph_size = []
self.trt_dynamic_shape_info = []
self.gpu_memory_mb = 50
self.cpu_math_thread_num = 1
self.trt_workspace_size = 33554432 # 1 << 25
self.trt_use_static = False
def get_fetch_list(self, infer_node_idx=-1):
fetch_names = [
......@@ -289,6 +293,18 @@ class Server(object):
def set_trt_dynamic_shape_info(self, info):
self.trt_dynamic_shape_info = info
def set_gpu_memory_mb(self, gpu_memory_mb):
self.gpu_memory_mb = gpu_memory_mb
def set_cpu_math_thread_num(self, cpu_math_thread_num):
self.cpu_math_thread_num = cpu_math_thread_num
def set_trt_workspace_size(self, trt_workspace_size):
self.trt_workspace_size = trt_workspace_size
def set_trt_use_static(self, trt_use_static):
self.trt_use_static = trt_use_static
def _prepare_engine(self, model_config_paths, device, use_encryption_model):
self.device = device
if self.model_toolkit_conf == None:
......@@ -342,6 +358,10 @@ class Server(object):
engine.use_xpu = self.use_xpu
engine.use_ascend_cl = self.use_ascend_cl
engine.use_gpu = False
engine.gpu_memory_mb = self.gpu_memory_mb
engine.cpu_math_thread_num = self.cpu_math_thread_num
engine.trt_workspace_size = self.trt_workspace_size
engine.trt_use_static = self.trt_use_static
# use distributed model.
if self.dist_subgraph_index >= 0:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册