未验证 提交 a205bab9 编写于 作者: S shaohua.zhang 提交者: GitHub

Merge pull request #2 from PaddlePaddle/develop

update-11-13
......@@ -54,6 +54,7 @@ option(SERVER "Compile Paddle Serving Server" OFF)
option(APP "Compile Paddle Serving App package" OFF)
option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution" OFF)
option(PACK "Compile for whl" OFF)
option(WITH_TRT "Compile Paddle Serving with TRT" OFF)
set(WITH_MKLML ${WITH_MKL})
if (NOT DEFINED WITH_MKLDNN)
......
......@@ -45,9 +45,10 @@ nvidia-docker exec -it test bash
```
```shell
pip install paddle-serving-client
pip install paddle-serving-server # CPU
pip install paddle-serving-server-gpu # GPU
pip install paddle-serving-client==0.3.2
pip install paddle-serving-server==0.3.2 # CPU
pip install paddle-serving-server-gpu==0.3.2.post9 # GPU with CUDA9.0
pip install paddle-serving-server-gpu==0.3.2.post10 # GPU with CUDA10.0
```
You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
......@@ -127,6 +128,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `mem_optim_off` | - | - | Disable memory / graphic memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT |
Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
</center>
......
......@@ -47,9 +47,10 @@ nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/se
nvidia-docker exec -it test bash
```
```shell
pip install paddle-serving-client
pip install paddle-serving-server # CPU
pip install paddle-serving-server-gpu # GPU
pip install paddle-serving-client==0.3.2
pip install paddle-serving-server==0.3.2 # CPU
pip install paddle-serving-server-gpu==0.3.2.post9 # GPU with CUDA9.0
pip install paddle-serving-server-gpu==0.3.2.post10 # GPU with CUDA10.0
```
您可能需要使用国内镜像源(例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`)来加速下载。
......@@ -123,6 +124,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `mem_optim_off` | - | - | Disable memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT |
我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求,请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
</center>
......
......@@ -31,10 +31,14 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of:
# latest: latest develop build
# version number like 1.5.2
SET(PADDLE_VERSION "1.7.2")
SET(PADDLE_VERSION "1.8.4")
if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")
if (WITH_TRT)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6")
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl")
endif()
else()
if (WITH_AVX)
if (WITH_MKLML)
......@@ -50,21 +54,38 @@ endif()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/fluid_inference.tgz")
MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}")
if (WITH_GPU OR WITH_MKLML)
ExternalProject_Add(
"extern_paddle"
${EXTERNAL_PROJECT_LOG_ARGS}
URL "${PADDLE_LIB_PATH}"
PREFIX "${PADDLE_SOURCES_DIR}"
DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party &&
${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so
)
if (WITH_TRT)
ExternalProject_Add(
"extern_paddle"
${EXTERNAL_PROJECT_LOG_ARGS}
URL "${PADDLE_LIB_PATH}"
PREFIX "${PADDLE_SOURCES_DIR}"
DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party
)
else()
ExternalProject_Add(
"extern_paddle"
${EXTERNAL_PROJECT_LOG_ARGS}
URL "${PADDLE_LIB_PATH}"
PREFIX "${PADDLE_SOURCES_DIR}"
DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party &&
${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so
)
endif()
else()
ExternalProject_Add(
"extern_paddle"
......@@ -92,8 +113,16 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a)
ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so)
if (WITH_TRT)
ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer.so)
ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer_plugin.so)
endif()
ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a)
......@@ -101,4 +130,9 @@ SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/thir
LIST(APPEND external_project_dependencies paddle)
LIST(APPEND paddle_depend_libs
xxhash)
xxhash)
if(WITH_TRT)
LIST(APPEND paddle_depend_libs
nvinfer nvinfer_plugin)
endif()
......@@ -44,6 +44,7 @@ message EngineDesc {
optional bool static_optimization = 14;
optional bool force_update_static_cache = 15;
optional bool enable_ir_optimization = 16;
optional bool use_trt = 17;
};
// model_toolkit conf
......
......@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License
#execute_process(COMMAND go env -w GO111MODULE=off)
add_subdirectory(cube-server)
add_subdirectory(cube-api)
add_subdirectory(cube-builder)
add_subdirectory(cube-transfer)
add_subdirectory(cube-agent)
#add_subdirectory(cube-transfer)
#add_subdirectory(cube-agent)
......@@ -218,25 +218,15 @@ class PredictorClient {
int destroy_predictor();
int batch_predict(
const std::vector<std::vector<std::vector<float>>>& float_feed_batch,
const std::vector<std::string>& float_feed_name,
const std::vector<std::vector<int>>& float_shape,
const std::vector<std::vector<std::vector<int64_t>>>& int_feed_batch,
const std::vector<std::string>& int_feed_name,
const std::vector<std::vector<int>>& int_shape,
const std::vector<std::string>& fetch_name,
PredictorRes& predict_res_batch, // NOLINT
const int& pid,
const uint64_t log_id);
int numpy_predict(
const std::vector<std::vector<py::array_t<float>>>& float_feed_batch,
const std::vector<std::string>& float_feed_name,
const std::vector<std::vector<int>>& float_shape,
const std::vector<std::vector<int>>& float_lod_slot_batch,
const std::vector<std::vector<py::array_t<int64_t>>>& int_feed_batch,
const std::vector<std::string>& int_feed_name,
const std::vector<std::vector<int>>& int_shape,
const std::vector<std::vector<int>>& int_lod_slot_batch,
const std::vector<std::string>& fetch_name,
PredictorRes& predict_res_batch, // NOLINT
const int& pid,
......
......@@ -137,227 +137,15 @@ int PredictorClient::create_predictor() {
return 0;
}
int PredictorClient::batch_predict(
const std::vector<std::vector<std::vector<float>>> &float_feed_batch,
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<std::vector<int64_t>>> &int_feed_batch,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch,
const int &pid,
const uint64_t log_id) {
int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
predict_res_batch.clear();
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
int fetch_name_num = fetch_name.size();
_api.thrd_initialize();
std::string variant_tag;
_predictor = _api.fetch_predictor("general_model", &variant_tag);
predict_res_batch.set_variant_tag(variant_tag);
VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "float feed name size: " << float_feed_name.size();
VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req;
req.set_log_id(log_id);
for (auto &name : fetch_name) {
req.add_fetch_var_names(name);
}
for (int bi = 0; bi < batch_size; bi++) {
VLOG(2) << "prepare batch " << bi;
std::vector<Tensor *> tensor_vec;
FeedInst *inst = req.add_insts();
std::vector<std::vector<float>> float_feed = float_feed_batch[bi];
std::vector<std::vector<int64_t>> int_feed = int_feed_batch[bi];
for (auto &name : float_feed_name) {
tensor_vec.push_back(inst->add_tensor_array());
}
for (auto &name : int_feed_name) {
tensor_vec.push_back(inst->add_tensor_array());
}
VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name "
<< "prepared";
int vec_idx = 0;
VLOG(2) << "tensor_vec size " << tensor_vec.size() << " float shape "
<< float_shape.size();
for (auto &name : float_feed_name) {
int idx = _feed_name_to_idx[name];
Tensor *tensor = tensor_vec[idx];
VLOG(2) << "prepare float feed " << name << " shape size "
<< float_shape[vec_idx].size();
for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
tensor->add_shape(float_shape[vec_idx][j]);
}
tensor->set_elem_type(1);
for (uint32_t j = 0; j < float_feed[vec_idx].size(); ++j) {
tensor->add_float_data(float_feed[vec_idx][j]);
}
vec_idx++;
}
VLOG(2) << "batch [" << bi << "] "
<< "float feed value prepared";
vec_idx = 0;
for (auto &name : int_feed_name) {
int idx = _feed_name_to_idx[name];
Tensor *tensor = tensor_vec[idx];
if (_type[idx] == 0) {
VLOG(2) << "prepare int64 feed " << name << " shape size "
<< int_shape[vec_idx].size();
VLOG(3) << "feed var name " << name << " index " << vec_idx
<< "first data " << int_feed[vec_idx][0];
for (uint32_t j = 0; j < int_feed[vec_idx].size(); ++j) {
tensor->add_int64_data(int_feed[vec_idx][j]);
}
} else if (_type[idx] == 2) {
VLOG(2) << "prepare int32 feed " << name << " shape size "
<< int_shape[vec_idx].size();
VLOG(3) << "feed var name " << name << " index " << vec_idx
<< "first data " << int32_t(int_feed[vec_idx][0]);
for (uint32_t j = 0; j < int_feed[vec_idx].size(); ++j) {
tensor->add_int_data(int32_t(int_feed[vec_idx][j]));
}
}
for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
tensor->add_shape(int_shape[vec_idx][j]);
}
tensor->set_elem_type(_type[idx]);
vec_idx++;
}
VLOG(2) << "batch [" << bi << "] "
<< "int feed value prepared";
}
int64_t preprocess_end = timeline.TimeStampUS();
int64_t client_infer_start = timeline.TimeStampUS();
Response res;
int64_t client_infer_end = 0;
int64_t postprocess_start = 0;
int64_t postprocess_end = 0;
if (FLAGS_profile_client) {
if (FLAGS_profile_server) {
req.set_profile_server(true);
}
}
res.Clear();
if (_predictor->inference(&req, &res) != 0) {
LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
_api.thrd_clear();
return -1;
} else {
client_infer_end = timeline.TimeStampUS();
postprocess_start = client_infer_end;
VLOG(2) << "get model output num";
uint32_t model_num = res.outputs_size();
VLOG(2) << "model num: " << model_num;
for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
VLOG(2) << "process model output index: " << m_idx;
auto output = res.outputs(m_idx);
ModelRes model;
model.set_engine_name(output.engine_name());
int idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
int shape_size = output.insts(0).tensor_array(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size;
model._shape_map[name].resize(shape_size);
for (int i = 0; i < shape_size; ++i) {
model._shape_map[name][i] =
output.insts(0).tensor_array(idx).shape(i);
}
int lod_size = output.insts(0).tensor_array(idx).lod_size();
if (lod_size > 0) {
model._lod_map[name].resize(lod_size);
for (int i = 0; i < lod_size; ++i) {
model._lod_map[name][i] = output.insts(0).tensor_array(idx).lod(i);
}
}
idx += 1;
}
idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
if (_fetch_name_to_type[name] == 0) {
VLOG(2) << "ferch var " << name << "type int64";
int size = output.insts(0).tensor_array(idx).int64_data_size();
model._int64_value_map[name] = std::vector<int64_t>(
output.insts(0).tensor_array(idx).int64_data().begin(),
output.insts(0).tensor_array(idx).int64_data().begin() + size);
} else if (_fetch_name_to_type[name] == 1) {
VLOG(2) << "fetch var " << name << "type float";
int size = output.insts(0).tensor_array(idx).float_data_size();
model._float_value_map[name] = std::vector<float>(
output.insts(0).tensor_array(idx).float_data().begin(),
output.insts(0).tensor_array(idx).float_data().begin() + size);
} else if (_fetch_name_to_type[name] == 2) {
VLOG(2) << "fetch var " << name << "type int32";
int size = output.insts(0).tensor_array(idx).int_data_size();
model._int32_value_map[name] = std::vector<int32_t>(
output.insts(0).tensor_array(idx).int_data().begin(),
output.insts(0).tensor_array(idx).int_data().begin() + size);
}
idx += 1;
}
predict_res_batch.add_model_res(std::move(model));
}
postprocess_end = timeline.TimeStampUS();
}
if (FLAGS_profile_client) {
std::ostringstream oss;
oss << "PROFILE\t"
<< "pid:" << pid << "\t"
<< "prepro_0:" << preprocess_start << " "
<< "prepro_1:" << preprocess_end << " "
<< "client_infer_0:" << client_infer_start << " "
<< "client_infer_1:" << client_infer_end << " ";
if (FLAGS_profile_server) {
int op_num = res.profile_time_size() / 2;
for (int i = 0; i < op_num; ++i) {
oss << "op" << i << "_0:" << res.profile_time(i * 2) << " ";
oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " ";
}
}
oss << "postpro_0:" << postprocess_start << " ";
oss << "postpro_1:" << postprocess_end;
fprintf(stderr, "%s\n", oss.str().c_str());
}
_api.thrd_clear();
return 0;
}
int PredictorClient::numpy_predict(
const std::vector<std::vector<py::array_t<float>>> &float_feed_batch,
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<std::vector<py::array_t<int64_t>>> &int_feed_batch,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch,
const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch,
const int &pid,
......@@ -412,6 +200,9 @@ int PredictorClient::numpy_predict(
for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
tensor->add_shape(float_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(1);
const int float_shape_size = float_shape[vec_idx].size();
switch (float_shape_size) {
......@@ -470,6 +261,9 @@ int PredictorClient::numpy_predict(
for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
tensor->add_shape(int_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(_type[idx]);
if (_type[idx] == 0) {
......
......@@ -95,42 +95,18 @@ PYBIND11_MODULE(serving_client, m) {
[](PredictorClient &self) { self.create_predictor(); })
.def("destroy_predictor",
[](PredictorClient &self) { self.destroy_predictor(); })
.def("batch_predict",
[](PredictorClient &self,
const std::vector<std::vector<std::vector<float>>>
&float_feed_batch,
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<std::vector<int64_t>>>
&int_feed_batch,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch,
const int &pid,
const uint64_t log_id) {
return self.batch_predict(float_feed_batch,
float_feed_name,
float_shape,
int_feed_batch,
int_feed_name,
int_shape,
fetch_name,
predict_res_batch,
pid,
log_id);
},
py::call_guard<py::gil_scoped_release>())
.def("numpy_predict",
[](PredictorClient &self,
const std::vector<std::vector<py::array_t<float>>>
&float_feed_batch,
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<std::vector<py::array_t<int64_t>>>
&int_feed_batch,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch,
const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch,
const int &pid,
......@@ -138,9 +114,11 @@ PYBIND11_MODULE(serving_client, m) {
return self.numpy_predict(float_feed_batch,
float_feed_name,
float_shape,
float_lod_slot_batch,
int_feed_batch,
int_feed_name,
int_shape,
int_lod_slot_batch,
fetch_name,
predict_res_batch,
pid,
......
......@@ -9,7 +9,7 @@ endif()
target_include_directories(serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
)
include_directories(${CUDNN_ROOT}/include/)
if(WITH_GPU)
target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine
-Wl,--no-whole-archive)
......@@ -29,7 +29,11 @@ if(WITH_GPU)
endif()
if(WITH_MKL OR WITH_GPU)
if (WITH_TRT)
target_link_libraries(serving -liomp5 -lmklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
else()
target_link_libraries(serving -liomp5 -lmklml_intel -lmkldnn -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
endif()
else()
target_link_libraries(serving openblas -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
endif()
......
......@@ -73,8 +73,6 @@ int GeneralReaderOp::inference() {
// reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message());
uint64_t log_id = req->log_id();
int batch_size = req->insts_size();
int input_var_num = 0;
std::vector<int64_t> elem_type;
std::vector<int64_t> elem_size;
......@@ -83,7 +81,6 @@ int GeneralReaderOp::inference() {
GeneralBlob *res = mutable_data<GeneralBlob>();
TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
res->SetLogId(log_id);
if (!res) {
......@@ -98,11 +95,11 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config();
......@@ -122,13 +119,11 @@ int GeneralReaderOp::inference() {
elem_type.resize(var_num);
elem_size.resize(var_num);
capacity.resize(var_num);
// prepare basic information for input
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor;
elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has elem type: " << elem_type[i];
VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64;
......@@ -139,13 +134,24 @@ int GeneralReaderOp::inference() {
elem_size[i] = sizeof(int32_t);
lod_tensor.dtype = paddle::PaddleDType::INT32;
}
if (model_config->_is_lod_feed[i]) {
lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0);
// implement lod tensor here
if (req->insts(0).tensor_array(i).lod_size() > 0) {
VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
lod_tensor.lod.resize(1);
for (int k = 0; k < req->insts(0).tensor_array(i).lod_size(); ++k) {
lod_tensor.lod[0].push_back(req->insts(0).tensor_array(i).lod(k));
}
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
<< "]: " << dim;
capacity[i] *= dim;
lod_tensor.shape.push_back(dim);
}
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor, capacity: " << capacity[i];
} else {
lod_tensor.shape.push_back(batch_size);
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
......@@ -160,51 +166,40 @@ int GeneralReaderOp::inference() {
lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor);
}
// specify the memory needed for output tensor_vector
for (int i = 0; i < var_num; ++i) {
if (out->at(i).lod.size() == 1) {
int tensor_size = 0;
for (int j = 0; j < batch_size; ++j) {
const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = 0;
if (tensor.int64_data_size() > 0) {
data_len = tensor.int64_data_size();
} else if (tensor.float_data_size() > 0) {
data_len = tensor.float_data_size();
} else if (tensor.int_data_size() > 0) {
data_len = tensor.int_data_size();
}
VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
<< "]: " << data_len;
tensor_size += data_len;
int cur_len = out->at(i).lod[0].back();
VLOG(2) << "(logid=" << log_id << ") current len: " << cur_len;
int sample_len = 0;
if (tensor.shape_size() == 1) {
sample_len = data_len;
} else {
sample_len = tensor.shape(0);
}
out->at(i).lod[0].push_back(cur_len + sample_len);
VLOG(2) << "(logid=" << log_id << ") new len: " << cur_len + sample_len;
}
out->at(i).data.Resize(tensor_size * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back()};
for (int j = 1; j < req->insts(0).tensor_array(i).shape_size(); ++j) {
out->at(i).shape.push_back(req->insts(0).tensor_array(i).shape(j));
const Tensor &tensor = req->insts(0).tensor_array(i);
int data_len = 0;
if (tensor.int64_data_size() > 0) {
data_len = tensor.int64_data_size();
} else if (tensor.float_data_size() > 0) {
data_len = tensor.float_data_size();
} else if (tensor.int_data_size() > 0) {
data_len = tensor.int_data_size();
}
if (out->at(i).shape.size() == 1) {
out->at(i).shape.push_back(1);
VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
<< "]: " << data_len;
tensor_size += data_len;
int cur_len = out->at(i).lod[0].back();
VLOG(2) << "(logid=" << log_id << ") current len: " << cur_len;
int sample_len = 0;
if (tensor.shape_size() == 1) {
sample_len = data_len;
} else {
sample_len = tensor.shape(0);
}
VLOG(2) << "(logid=" << log_id << ") new len: " << cur_len + sample_len;
out->at(i).data.Resize(tensor_size * elem_size[i]);
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else {
out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
out->at(i).data.Resize(capacity[i] * elem_size[i]);
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i];
<< "] is tensor and capacity=" << capacity[i];
}
}
......@@ -215,58 +210,36 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << req->insts(0).tensor_array(i).int64_data(0);
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).int64_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(j).tensor_array(i).int64_data(k);
}
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
int elem_num = req->insts(0).tensor_array(i).int64_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(0).tensor_array(i).int64_data(k);
}
} else if (elem_type[i] == 1) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << req->insts(0).tensor_array(i).float_data(0);
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).float_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(j).tensor_array(i).float_data(k);
}
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
int elem_num = req->insts(0).tensor_array(i).float_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(0).tensor_array(i).float_data(k);
}
} else if (elem_type[i] == 2) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << req->insts(0).tensor_array(i).int_data(0);
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).int_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
}
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
int elem_num = req->insts(0).tensor_array(i).int_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(0).tensor_array(i).int_data(k);
}
}
}
VLOG(2) << "(logid=" << log_id << ") output size: " << out->size();
timeline.Pause();
int64_t end = timeline.TimeStampUS();
res->p_size = 0;
res->_batch_size = batch_size;
res->_batch_size = 1;
AddBlobInfo(res, start);
AddBlobInfo(res, end);
......
......@@ -155,9 +155,11 @@ int GeneralResponseOp::inference() {
}
if (model_config->_is_lod_fetch[idx]) {
for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
fetch_p->mutable_tensor_array(var_idx)->add_lod(
in->at(idx).lod[0][j]);
if (in->at(idx).lod.size() > 0) {
for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
fetch_p->mutable_tensor_array(var_idx)->add_lod(
in->at(idx).lod[0][j]);
}
}
}
......
......@@ -13,7 +13,9 @@ set_source_files_properties(
PROPERTIES
COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure)
if (WITH_TRT)
add_definitions(-DWITH_TRT)
endif()
target_link_libraries(pdserving
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
......
......@@ -38,6 +38,7 @@ class InferEngineCreationParams {
_enable_ir_optimization = false;
_static_optimization = false;
_force_update_static_cache = false;
_use_trt = false;
}
void set_path(const std::string& path) { _path = path; }
......@@ -50,12 +51,16 @@ class InferEngineCreationParams {
_enable_ir_optimization = enable_ir_optimization;
}
void set_use_trt(bool use_trt) { _use_trt = use_trt; }
bool enable_memory_optimization() const {
return _enable_memory_optimization;
}
bool enable_ir_optimization() const { return _enable_ir_optimization; }
bool use_trt() const { return _use_trt; }
void set_static_optimization(bool static_optimization = false) {
_static_optimization = static_optimization;
}
......@@ -86,6 +91,7 @@ class InferEngineCreationParams {
bool _enable_ir_optimization;
bool _static_optimization;
bool _force_update_static_cache;
bool _use_trt;
};
class InferEngine {
......@@ -172,6 +178,10 @@ class ReloadableInferEngine : public InferEngine {
force_update_static_cache);
}
if (conf.has_use_trt()) {
_infer_engine_params.set_use_trt(conf.use_trt());
}
if (!check_need_reload() || load(_infer_engine_params) != 0) {
LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
return -1;
......@@ -553,8 +563,12 @@ class CloneDBReloadableInferEngine
};
template <typename FluidFamilyCore>
#ifdef WITH_TRT
class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
#else
class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
public:
#endif
public: // NOLINT
FluidInferEngine() {}
~FluidInferEngine() {}
......
......@@ -51,8 +51,8 @@ class WeightedRandomRender : public EndpointRouterBase {
new (std::nothrow) Factory<WeightedRandomRender, EndpointRouterBase>();
if (factory == NULL) {
RAW_LOG(ERROR,
"Failed regist factory: WeightedRandomRender->EndpointRouterBase \
in macro!");
"Failed regist factory: WeightedRandomRender->EndpointRouterBase "
"in macro!");
return -1;
}
......@@ -63,8 +63,8 @@ class WeightedRandomRender : public EndpointRouterBase {
if (FactoryPool<EndpointRouterBase>::instance().register_factory(
"WeightedRandomRender", factory) != 0) {
RAW_LOG(INFO,
"Factory has been registed: \
WeightedRandomRender->EndpointRouterBase.");
"Factory has been registed: "
"WeightedRandomRender->EndpointRouterBase.");
}
return 0;
......
......@@ -75,10 +75,12 @@ export PATH=$PATH:$GOPATH/bin
## Get go packages
```shell
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
go get -u github.com/golang/protobuf/protoc-gen-go
go get -u google.golang.org/grpc
go env -w GO111MODULE=on
go env -w GOPROXY=https://goproxy.cn,direct
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0
```
......@@ -89,9 +91,9 @@ go get -u google.golang.org/grpc
``` shell
mkdir server-build-cpu && cd server-build-cpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON ..
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON ..
make -j10
```
......@@ -102,10 +104,28 @@ you can execute `make install` to put targets under directory `./output`, you ne
``` shell
mkdir server-build-gpu && cd server-build-gpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON \
-DWITH_GPU=ON ..
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON ..
make -j10
```
### Integrated TRT version paddle inference library
```
mkdir server-build-trt && cd server-build-trt
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON \
-DWITH_TRT=ON ..
make -j10
```
......@@ -134,7 +154,10 @@ execute `make install` to put targets under directory `./output`
```bash
mkdir app-build && cd app-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DAPP=ON ..
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DAPP=ON ..
make
```
......@@ -165,7 +188,9 @@ Please use the example under `python/examples` to verify.
| WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF |
| WITH_MKL | Compile Paddle Serving with MKL support | OFF |
| WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF |
| CUDNN_ROOT | Define CuDNN library and header path | |
| CUDNN_LIBRARY | Define CuDNN library and header path | |
| CUDA_TOOLKIT_ROOT_DIR | Define CUDA PATH | |
| TENSORRT_ROOT | Define TensorRT PATH | |
| CLIENT | Compile Paddle Serving Client | OFF |
| SERVER | Compile Paddle Serving Server | OFF |
| APP | Compile Paddle Serving App package | OFF |
......@@ -180,7 +205,8 @@ To compile the Paddle Serving GPU version on bare metal, you need to install the
- CUDA
- CuDNN
- NCCL2
To compile the TensorRT version, you need to install the TensorRT library.
Note here:
......@@ -190,21 +216,12 @@ Note here:
The following is the base library version matching relationship used by the PaddlePaddle release version for reference:
| | CUDA | CuDNN | NCCL2 |
| :----: | :-----: | :----------------------: | :----: |
| CUDA 8 | 8.0.61 | CuDNN 7.1.2 for CUDA 8.0 | 2.1.4 |
| CUDA 9 | 9.0.176 | CuDNN 7.3.1 for CUDA 9.0 | 2.2.12 |
| | CUDA | CuDNN | TensorRT |
| :----: | :-----: | :----------------------: | :----: |
| post9 | 9.0 | CuDNN 7.3.1 for CUDA 9.0 | |
| post10 | 10.0 | CuDNN 7.5.1 for CUDA 10.0| |
| trt | 10.1 | CuDNN 7.5.1 for CUDA 10.1| 6.0.1.5 |
### How to make the compiler detect the CuDNN library
Download the corresponding CUDNN version from NVIDIA developer official website and decompressing it, add `-DCUDNN_ROOT` to cmake command, to specify the path of CUDNN.
### How to make the compiler detect the nccl library
After downloading the corresponding version of the nccl2 library from the NVIDIA developer official website and decompressing it, add the following environment variables (take nccl2.1.4 as an example):
```shell
export C_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$CPLUS_INCLUDE_PATH
export LD_LIBRARY_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/lib/:$LD_LIBRARY_PATH
```
......@@ -72,10 +72,12 @@ export PATH=$PATH:$GOPATH/bin
## 获取 Go packages
```shell
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
go get -u github.com/golang/protobuf/protoc-gen-go
go get -u google.golang.org/grpc
go env -w GO111MODULE=on
go env -w GOPROXY=https://goproxy.cn,direct
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0
```
......@@ -85,7 +87,10 @@ go get -u google.golang.org/grpc
``` shell
mkdir server-build-cpu && cd server-build-cpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON ..
make -j10
```
......@@ -95,21 +100,44 @@ make -j10
``` shell
mkdir server-build-gpu && cd server-build-gpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON ..
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON ..
make -j10
```
执行`make install`可以把目标产出放在`./output`目录下。
### 集成TensorRT版本Paddle Inference Library
**注意:** 编译成功后,需要设置`SERVING_BIN`路径,详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)
```
mkdir server-build-trt && cd server-build-trt
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON \
-DWITH_TRT=ON ..
make -j10
```
执行`make install`可以把目标产出放在`./output`目录下。
**注意:** 编译成功后,需要设置`SERVING_BIN`路径,详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)
## 编译Client部分
``` shell
mkdir client-build && cd client-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON ..
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCLIENT=ON ..
make -j10
```
......@@ -121,7 +149,11 @@ make -j10
```bash
mkdir app-build && cd app-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCMAKE_INSTALL_PREFIX=./output -DAPP=ON ..
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCMAKE_INSTALL_PREFIX=./output \
-DAPP=ON ..
make
```
......@@ -152,7 +184,10 @@ make
| WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF |
| WITH_MKL | Compile Paddle Serving with MKL support | OFF |
| WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF |
| CUDNN_ROOT | Define CuDNN library and header path | |
| WITH_TRT | Compile Paddle Serving with TensorRT | OFF |
| CUDNN_LIBRARY | Define CuDNN library and header path | |
| CUDA_TOOLKIT_ROOT_DIR | Define CUDA PATH | |
| TENSORRT_ROOT | Define TensorRT PATH | |
| CLIENT | Compile Paddle Serving Client | OFF |
| SERVER | Compile Paddle Serving Server | OFF |
| APP | Compile Paddle Serving App package | OFF |
......@@ -167,7 +202,8 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选
- CUDA
- CuDNN
- NCCL2
编译TensorRT版本,需要安装TensorRT库。
这里要注意的是:
......@@ -176,21 +212,12 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选
以下是PaddlePaddle发布版本所使用的基础库版本匹配关系,供参考:
| | CUDA | CuDNN | NCCL2 |
| :----: | :-----: | :----------------------: | :----: |
| CUDA 8 | 8.0.61 | CuDNN 7.1.2 for CUDA 8.0 | 2.1.4 |
| CUDA 9 | 9.0.176 | CuDNN 7.3.1 for CUDA 9.0 | 2.2.12 |
| | CUDA | CuDNN | TensorRT |
| :----: | :-----: | :----------------------: | :----: |
| post9 | 9.0 | CuDNN 7.3.1 for CUDA 9.0 | |
| post10 | 10.0 | CuDNN 7.5.1 for CUDA 10.0| |
| trt | 10.1 | CuDNN 7.5.1 for CUDA 10.1| 6.0.1.5 |
### 如何让Paddle Serving编译系统探测到CuDNN库
从NVIDIA developer官网下载对应版本CuDNN并在本地解压后,在cmake编译命令中增加`-DCUDNN_ROOT`参数,指定CuDNN库所在路径。
### 如何让Paddle Serving编译系统探测到nccl库
从NVIDIA developer官网下载对应版本nccl2库并解压后,增加如下环境变量 (以nccl2.1.4为例):
```shell
export C_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$CPLUS_INCLUDE_PATH
export LD_LIBRARY_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/lib/:$LD_LIBRARY_PATH
```
从NVIDIA developer官网下载对应版本CuDNN并在本地解压后,在cmake编译命令中增加`-DCUDNN_LIBRARY`参数,指定CuDNN库所在路径。
# FAQ
- Q: 如何调整RPC服务的等待时间,避免超时?
A: 使用set_rpc_timeout_ms设置更长的等待时间,单位为毫秒,默认时间为20秒。
示例:
```
from paddle_serving_client import Client
client = Client()
client.load_client_config(sys.argv[1])
client.set_rpc_timeout_ms(100000)
client.connect(["127.0.0.1:9393"])
```
## 基础知识
- Q: 如何使用自己编译的Paddle Serving进行预测?
A: 通过pip命令安装自己编译出的whl包,并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。
- Q: 执行GPU预测时遇到InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
A: 将显卡驱动对应的libcuda.so的目录添加到LD_LIBRARY_PATH环境变量中
- Q: 执行GPU预测时遇到ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/operators/batch_norm_op.cu:198)
A: 将cudnn的lib64路径添加到LD_LIBRARY_PATH,安装自pypi的Paddle Serving中post9版使用的是cudnn 7.3,post10使用的是cudnn 7.5。如果是使用自己编译的Paddle Serving,可以在log/serving.INFO日志文件中查看对应的cudnn版本。
## 编译问题
- Q: 执行GPU预测时遇到Error: Failed to find dynamic library: libcublas.so
A: 将cuda的lib64路径添加到LD_LIBRARY_PATH, post9版本的Paddle Serving使用的是cuda 9.0,post10版本使用的cuda 10.0。
#### Q: 如何使用自己编译的Paddle Serving进行预测?
**A:** 通过pip命令安装自己编译出的whl包,并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。
## 部署问题
#### Q: GPU环境运行Serving报错,GPU count is: 0。
```
terminate called after throwing an instance of 'paddle::platform::EnforceNotMet'
what():
--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int)
1 paddle::platform::SetDeviceId(int)
2 paddle::AnalysisConfig::fraction_of_gpu_memory_for_pool() const
3 std::unique_ptr<paddle::PaddlePredictor, std::default_delete<paddle::PaddlePredictor> > paddle::CreatePaddlePredictor<paddle::AnalysisConfig, (paddle::PaddleEngineKind)2>(paddle::AnalysisConfig const&)
4 std::unique_ptr<paddle::PaddlePredictor, std::default_delete<paddle::PaddlePredictor> > paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(paddle::AnalysisConfig const&)
----------------------
Error Message Summary:
----------------------
InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
[Hint: Expected id < GetCUDADeviceCount(), but received id:0 >= GetCUDADeviceCount():0.] at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/platform/gpu_info.cc:211)
```
**A:** libcuda.so没有链接成功。首先在机器上找到libcuda.so,ldd检查libnvidia版本与nvidia-smi中版本一致(libnvidia-fatbinaryloader.so.418.39,与NVIDIA-SMI 418.39 Driver Version: 418.39),然后用export导出libcuda.so的路径即可(例如libcuda.so在/usr/lib64/,export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib64/)
#### Q: 遇到 GPU not found, please check your environment or use cpu version by "pip install paddle_serving_server"
**A:** 检查环境中是否有N卡:ls /dev/ | grep nvidia
#### Q: 目前Paddle Serving支持哪些镜像环境?
**A:** 目前(0.4.0)仅支持CentOS,具体列表查阅[这里](https://github.com/PaddlePaddle/Serving/blob/develop/doc/DOCKER_IMAGES.md)
## 预测问题
#### Q: 使用GPU第一次预测时特别慢,如何调整RPC服务的等待时间避免超时?
**A:** GPU第一次预测需要初始化。使用set_rpc_timeout_ms设置更长的等待时间,单位为毫秒,默认时间为20秒。
示例:
```
from paddle_serving_client import Client
client = Client()
client.load_client_config(sys.argv[1])
client.set_rpc_timeout_ms(100000)
client.connect(["127.0.0.1:9393"])
```
#### Q: 执行GPU预测时遇到InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
**A:** 将显卡驱动对应的libcuda.so的目录添加到LD_LIBRARY_PATH环境变量中
#### Q: 执行GPU预测时遇到ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (../batch_norm_op.cu:198)
**A:** 将cudnn的lib64路径添加到LD_LIBRARY_PATH,安装自pypi的Paddle Serving中post9版使用的是cudnn 7.3,post10使用的是cudnn 7.5。如果是使用自己编译的Paddle Serving,可以在log/serving.INFO日志文件中查看对应的cudnn版本。
#### Q: 执行GPU预测时遇到Error: Failed to find dynamic library: libcublas.so
**A:** 将cuda的lib64路径添加到LD_LIBRARY_PATH, post9版本的Paddle Serving使用的是cuda 9.0,post10版本使用的cuda 10.0。
#### Q: Client端fetch的变量名如何设置
**A:** 可以查看配置文件serving_server_conf.prototxt,获取需要的变量名
#### Q: 如何使用多语言客户端
**A:** 多语言客户端要与多语言服务端配套使用。当前版本下(0.4.0),服务端需要将Server改为MultiLangServer(如果是以命令行启动的话只需要添加--use_multilang参数),Python客户端需要将Client改为MultiLangClient,同时去除load_client_config的过程。[Java客户端参考文档](https://github.com/PaddlePaddle/Serving/blob/develop/doc/JAVA_SDK_CN.md)
#### Q: 如何在Windows下使用Paddle Serving
**A:** 当前版本(0.4.0)在Windows上可以运行多语言RPC客户端,或使用HTTP方式访问。如果使用多语言RPC客户端,需要在Linux环境(比如本机容器,或远程Linux机器)中运行多语言服务端;如果使用HTTP方式,需要在Linux环境中运行普通服务端
#### Q: libnvinfer.so: cannot open shared object file: No such file or directory)
**A:** 参考该文档安装TensorRT: https://blog.csdn.net/hesongzefairy/article/details/105343525
## 日志排查
#### Q: 部署和预测中的日志信息在哪里查看?
**A:** server端的日志分为两部分,一部分打印到标准输出,一部分打印到启动服务时的目录下的log/serving.INFO文件中。
client端的日志直接打印到标准输出。
通过在部署服务之前 'export GLOG_v=3'可以输出更为详细的日志信息。
#### Q: (GLOG_v=2下)Server端日志一切正常,但Client端始终得不到正确的预测结果
**A:** 可能是配置文件有问题,检查下配置文件(is_load_tensor,fetch_type等有没有问题)
#### Q: 如何给Server传递Logid
**A:** Logid默认为0(后续应该有自动生成Logid的计划,当前版本0.4.0),Client端通过在predict函数中指定log_id参数传递
## 性能优化
......@@ -3,51 +3,59 @@
## CPU server
### Python 3
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.2-py3-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py3-none-any.whl
```
### Python 2
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.2-py2-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py2-none-any.whl
```
## GPU server
### Python 3
```
#cuda 9.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post9-py3-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py3-none-any.whl
#cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post10-py3-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py3-none-any.whl
#cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py3-none-any.whl
```
### Python 2
```
#cuda 9.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post9-py2-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py2-none-any.whl
#cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post10-py2-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py2-none-any.whl
##cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py2-none-any.whl
```
## Client
### Python 3.7
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp37-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp37-none-any.whl
```
### Python 3.6
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp36-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp36-none-any.whl
```
### Python 3.5
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp35-none-any.whl
```
### Python 2.7
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp27-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp27-none-any.whl
```
## App
### Python 3
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.2-py3-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.0.0-py3-none-any.whl
```
### Python 2
```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.2-py2-none-any.whl
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.0.0-py2-none-any.whl
```
## Java Demo
### Install package
```
mvn compile
mvn install
cd examples
mvn compile
mvn install
```
### Start Server
take the fit_a_line demo as example
```
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #CPU
python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #GPU
```
### Client Predict
```
java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample fit_a_line
```
The Java example also contains the prediction client of Bert, Model_enaemble, asyn_predict, batch_predict, Cube_local, Cube_quant, and Yolov4 models.
## Java 示例
### 安装客户端依赖
```
mvn compile
mvn install
cd examples
mvn compile
mvn install
```
### 启动服务端
以fit_a_line模型为例
```
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #CPU
python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #GPU
```
### 客户端预测
```
java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample fit_a_line
```
java示例中还包含了bert、model_enaemble、asyn_predict、batch_predict、cube_local、cube_quant、yolov4模型的预测客户端。
......@@ -23,7 +23,6 @@
#include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT
//#include "predictor/framework/infer.h"
namespace baidu {
namespace paddle_serving {
......
......@@ -2,6 +2,7 @@ FILE(GLOB fluid_gpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_gpu_engine ${fluid_gpu_engine_srcs})
target_include_directories(fluid_gpu_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_gpu_engine pdserving extern_paddle configure)
target_link_libraries(fluid_gpu_engine pdserving paddle_fluid iomp5 mklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
......
......@@ -190,7 +190,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
paddle::AnalysisConfig analysis_config;
analysis_config.SetModel(data_path);
analysis_config.EnableUseGpu(100, FLAGS_gpuid);
analysis_config.EnableUseGpu(1500, FLAGS_gpuid);
analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1);
......@@ -198,12 +198,68 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
analysis_config.SwitchIrOptim(true);
#if 0 // todo: support flexible shape
int min_seq_len = 1;
int max_seq_len = 512;
int opt_seq_len = 128;
int head_number = 12;
int batch = 50;
std::vector<int> min_in_shape = {batch, min_seq_len, 1};
std::vector<int> max_in_shape = {batch, max_seq_len, 1};
std::vector<int> opt_in_shape = {batch, opt_seq_len, 1};
std::string input1_name = "src_text_a_ids";
std::string input2_name = "pos_text_a_ids";
std::string input3_name = "sent_text_a_ids";
std::string input4_name = "stack_0.tmp_0";
std::map<std::string, std::vector<int>> min_input_shape = {
{input1_name, min_in_shape},
{input2_name, min_in_shape},
{input3_name, min_in_shape},
{input4_name, {batch, head_number, min_seq_len, min_seq_len}},
};
std::map<std::string, std::vector<int>> max_input_shape = {
{input1_name, max_in_shape},
{input2_name, max_in_shape},
{input3_name, max_in_shape},
{input4_name, {batch, head_number, max_seq_len, max_seq_len}},
};
std::map<std::string, std::vector<int>> opt_input_shape = {
{input1_name, opt_in_shape},
{input2_name, opt_in_shape},
{input3_name, opt_in_shape},
{input4_name, {batch, head_number, opt_seq_len, opt_seq_len}},
};
analysis_config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape);
#endif
int max_batch = 32;
int min_subgraph_size = 3;
if (params.use_trt()) {
analysis_config.EnableTensorRtEngine(
1 << 20,
max_batch,
min_subgraph_size,
paddle::AnalysisConfig::Precision::kFloat32,
false,
false);
LOG(INFO) << "create TensorRT predictor";
} else {
analysis_config.SwitchIrOptim(false);
}
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
analysis_config.SwitchIrOptim(true);
} else {
analysis_config.SwitchIrOptim(false);
}
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
......
......@@ -80,6 +80,16 @@ if (SERVER)
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
elseif(WITH_TRT)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" trt
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
else()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
......
......@@ -18,16 +18,20 @@ import sys
from paddle_serving_client import Client
from paddle_serving_client.utils import benchmark_args
from paddle_serving_app.reader import ChineseBertReader
import numpy as np
args = benchmark_args()
reader = ChineseBertReader({"max_seq_len": 128})
fetch = ["pooled_output"]
endpoint_list = ["127.0.0.1:9292"]
endpoint_list = ['127.0.0.1:8861']
client = Client()
client.load_client_config(args.model)
client.connect(endpoint_list)
for line in sys.stdin:
feed_dict = reader.process(line)
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape((128, 1))
#print(feed_dict)
result = client.predict(feed=feed_dict, fetch=fetch)
print(result)
......@@ -13,10 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader import ChineseBertReader
import sys
import os
import numpy as np
class BertService(WebService):
......@@ -27,18 +28,20 @@ class BertService(WebService):
})
def preprocess(self, feed=[], fetch=[]):
feed_res = [
self.reader.process(ins["words"].encode("utf-8")) for ins in feed
]
feed_res = []
for ins in feed:
feed_dict = self.reader.process(ins["words"].encode("utf-8"))
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape(
(1, len(feed_dict[key]), 1))
feed_res.append(feed_dict)
return feed_res, fetch
bert_service = BertService(name="bert")
bert_service.load()
bert_service.load_model_config(sys.argv[1])
gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
bert_service.set_gpus(gpu_ids)
bert_service.prepare_server(
workdir="workdir", port=int(sys.argv[2]), device="gpu")
workdir="workdir", port=int(sys.argv[2]), device="cpu")
bert_service.run_rpc_service()
bert_service.run_web_service()
......@@ -15,6 +15,7 @@
from paddle_serving_client import Client
from paddle_serving_app.reader import ChineseBertReader
import sys
import numpy as np
client = Client()
client.load_client_config("./bert_seq32_client/serving_client_conf.prototxt")
......@@ -28,12 +29,21 @@ expected_shape = {
"pooled_output": (4, 768)
}
batch_size = 4
feed_batch = []
feed_batch = {}
batch_len = 0
for line in sys.stdin:
feed = reader.process(line)
if batch_len == 0:
for key in feed.keys():
val_len = len(feed[key])
feed_batch[key] = np.array(feed[key]).reshape((1, val_len, 1))
continue
if len(feed_batch) < batch_size:
feed_batch.append(feed)
for key in feed.keys():
np.concatenate([
feed_batch[key], np.array(feed[key]).reshape((1, val_len, 1))
])
else:
fetch_map = client.predict(feed=feed_batch, fetch=fetch)
feed_batch = []
......
......@@ -16,6 +16,7 @@ from paddle_serving_client import Client
from paddle_serving_app.reader import *
import sys
import numpy as np
from paddle_serving_app.reader import BlazeFacePostprocess
preprocess = Sequential([
File2Image(),
......
......@@ -20,7 +20,7 @@ import os
import time
import criteo_reader as criteo
from paddle_serving_client.metric import auc
import numpy as np
import sys
py_version = sys.version_info[0]
......@@ -49,7 +49,8 @@ for ei in range(1000):
data = reader().__next__()
feed_dict = {}
for i in range(1, 27):
feed_dict["sparse_{}".format(i - 1)] = data[0][i]
feed_dict["sparse_{}".format(i - 1)] = np.array(data[0][i]).reshape(-1)
feed_dict["sparse_{}.lod".format(i - 1)] = [0, len(data[0][i])]
fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
end = time.time()
print(end - start)
......@@ -19,6 +19,7 @@ import os
import criteo as criteo
import time
from paddle_serving_client.metric import auc
import numpy as np
py_version = sys.version_info[0]
......@@ -41,10 +42,15 @@ for ei in range(10000):
else:
data = reader().__next__()
feed_dict = {}
feed_dict['dense_input'] = data[0][0]
feed_dict['dense_input'] = np.array(data[0][0]).astype("float32").reshape(
1, 13)
feed_dict['dense_input.lod'] = [0, 1]
for i in range(1, 27):
feed_dict["embedding_{}.tmp_0".format(i - 1)] = data[0][i]
fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
tmp_data = np.array(data[0][i]).astype(np.int64)
feed_dict["embedding_{}.tmp_0".format(i - 1)] = tmp_data.reshape(
(1, len(data[0][i])))
feed_dict["embedding_{}.tmp_0.lod".format(i - 1)] = [0, 1]
fetch_map = client.predict(feed=feed_dict, fetch=["prob"], batch=True)
prob_list.append(fetch_map['prob'][0][1])
label_list.append(data[0][-1][0])
......
......@@ -36,6 +36,7 @@ fetch_map = client.predict(
"im_info": np.array(list(im.shape[1:]) + [1.0]),
"im_shape": np.array(list(im.shape[1:]) + [1.0])
},
fetch=["multiclass_nms"])
fetch=["multiclass_nms"],
batch=False)
fetch_map["image"] = sys.argv[3]
postprocess(fetch_map)
......@@ -27,5 +27,10 @@ test_reader = paddle.batch(
batch_size=1)
for data in test_reader():
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
import numpy as np
new_data = np.zeros((1, 1, 13)).astype("float32")
new_data[0] = data[0][0]
fetch_map = client.predict(
feed={"x": new_data}, fetch=["price"], batch=True)
print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
print(fetch_map)
......@@ -15,6 +15,7 @@
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
import paddle
import numpy as np
def single_func(idx, resource):
......@@ -26,6 +27,7 @@ def single_func(idx, resource):
0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584,
0.6283, 0.4919, 0.1856, 0.0795, -0.0332
]
x = np.array(x)
for i in range(1000):
fetch_map = client.predict(feed={"x": x}, fetch=["price"])
if fetch_map is None:
......
......@@ -90,6 +90,7 @@ def single_func(idx, resource):
image = base64.b64encode(
open("./image_data/n01440764/" + file_list[i]).read())
else:
image_path = "./image_data/n01440764/" + file_list[i]
image = base64.b64encode(open(image_path, "rb").read()).decode(
"utf-8")
req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
......
......@@ -13,7 +13,8 @@
# limitations under the License.
import sys
from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
if len(sys.argv) != 4:
print("python resnet50_web_service.py model device port")
......@@ -47,7 +48,7 @@ class ImageService(WebService):
if "image" not in ins:
raise ("feed data error!")
img = self.seq(ins["image"])
feed_batch.append({"image": img})
feed_batch.append({"image": img[np.newaxis, :]})
return feed_batch, fetch
def postprocess(self, feed=[], fetch=[], fetch_map={}):
......
......@@ -17,6 +17,7 @@ import os
import sys
import time
import requests
import numpy as np
from paddle_serving_app.reader import IMDBDataset
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
......@@ -47,11 +48,17 @@ def single_func(idx, resource):
for i in range(1000):
if args.batch_size >= 1:
feed_batch = []
feed = {"words": [], "words.lod": [0]}
for bi in range(args.batch_size):
word_ids, label = imdb_dataset.get_words_and_label(dataset[
bi])
feed_batch.append({"words": word_ids})
result = client.predict(feed=feed_batch, fetch=["prediction"])
feed["words.lod"].append(feed["words.lod"][-1] + len(
word_ids))
feed["words"].extend(word_ids)
feed["words"] = np.array(feed["words"]).reshape(
len(feed["words"]), 1)
result = client.predict(
feed=feed, fetch=["prediction"], batch=True)
if result is None:
raise ("predict failed.")
else:
......
......@@ -15,6 +15,7 @@
from paddle_serving_client import Client
from paddle_serving_app.reader import IMDBDataset
import sys
import numpy as np
client = Client()
client.load_client_config(sys.argv[1])
......@@ -28,7 +29,12 @@ imdb_dataset.load_resource(sys.argv[2])
for line in sys.stdin:
word_ids, label = imdb_dataset.get_words_and_label(line)
feed = {"words": word_ids}
word_len = len(word_ids)
feed = {
"words": np.array(word_ids).reshape(word_len, 1),
"words.lod": [0, word_len]
}
#print(feed)
fetch = ["prediction"]
fetch_map = client.predict(feed=feed, fetch=fetch)
fetch_map = client.predict(feed=feed, fetch=fetch, batch=True)
print("{} {}".format(fetch_map["prediction"][0], label[0]))
......@@ -16,6 +16,7 @@
from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader import IMDBDataset
import sys
import numpy as np
class IMDBService(WebService):
......@@ -26,10 +27,15 @@ class IMDBService(WebService):
self.dataset.load_resource(args["dict_file_path"])
def preprocess(self, feed={}, fetch=[]):
res_feed = [{
"words": self.dataset.get_words_only(ins["words"])
} for ins in feed]
return res_feed, fetch
feed_batch = []
words_lod = [0]
for ins in feed:
words = self.dataset.get_words_only(ins["words"])
words = np.array(words).reshape(len(words), 1)
words_lod.append(words_lod[-1] + len(words))
feed_batch.append(words)
feed = {"words": np.concatenate(feed_batch), "words.lod": words_lod}
return feed, fetch
imdb_service = IMDBService(name="imdb")
......
......@@ -19,6 +19,7 @@ from paddle_serving_app.reader import LACReader
import sys
import os
import io
import numpy as np
client = Client()
client.load_client_config(sys.argv[1])
......@@ -31,7 +32,17 @@ for line in sys.stdin:
feed_data = reader.process(line)
if len(feed_data) <= 0:
continue
fetch_map = client.predict(feed={"words": feed_data}, fetch=["crf_decode"])
print(feed_data)
#fetch_map = client.predict(feed={"words": np.array(feed_data).reshape(len(feed_data), 1), "words.lod": [0, len(feed_data)]}, fetch=["crf_decode"], batch=True)
fetch_map = client.predict(
feed={
"words": np.array(feed_data + feed_data).reshape(
len(feed_data) * 2, 1),
"words.lod": [0, len(feed_data), 2 * len(feed_data)]
},
fetch=["crf_decode"],
batch=True)
print(fetch_map)
begin = fetch_map['crf_decode.lod'][0]
end = fetch_map['crf_decode.lod'][1]
segs = reader.parse_result(line, fetch_map["crf_decode"][begin:end])
......
......@@ -34,9 +34,9 @@ python ocr_web_server.py gpu
```
python ocr_web_client.py
```
If you want a faster web service, please try Web Debugger Service
If you want a faster web service, please try Web LocalPredictor Service
## Web Debugger Service
## Web LocalPredictor Service
```
#choose one of cpu/gpu commands as following
#for cpu user
......@@ -45,7 +45,7 @@ python ocr_debugger_server.py cpu
python ocr_debugger_server.py gpu
```
## Web Debugger Client Prediction
## Web LocalPredictor Client Prediction
```
python ocr_web_client.py
```
......@@ -61,7 +61,7 @@ Dataset: RCTW 500 sample images
| engine | client read image(ms) | client-server tras time(ms) | server read image(ms) | det pre(ms) | det infer(ms) | det post(ms) | rec pre(ms) | rec infer(ms) | rec post(ms) | server-client trans time(ms) | server side time consumption(ms) | server side overhead(ms) | total time(ms) |
|------------------------------|----------------|----------------------------|------------------|--------------------|------------------|--------------------|--------------------|------------------|--------------------|--------------------------|--------------------|--------------|---------------|
| Serving web service | 8.69 | 13.41 | 109.97 | 2.82 | 87.76 | 4.29 | 3.98 | 78.51 | 3.66 | 4.12 | 181.02 | 136.49 | 317.51 |
| Serving Debugger web service | 8.73 | 16.42 | 115.27 | 2.93 | 20.63 | 3.97 | 4.48 | 13.84 | 3.60 | 6.91 | 49.45 | 147.33 | 196.78 |
| Serving LocalPredictor web service | 8.73 | 16.42 | 115.27 | 2.93 | 20.63 | 3.97 | 4.48 | 13.84 | 3.60 | 6.91 | 49.45 | 147.33 | 196.78 |
## Appendix: For Users who want to launch Det or Rec only
if you are going to detect images not recognize it or directly recognize the words from images. We also provide Det and Rec server for you.
......
......@@ -34,8 +34,8 @@ python ocr_web_server.py gpu
python ocr_web_client.py
```
如果用户需要更快的执行速度,请尝试Debugger版Web服务
## 启动Debugger版Web服务
如果用户需要更快的执行速度,请尝试LocalPredictor版Web服务
## 启动LocalPredictor版Web服务
```
#根据CPU/GPU设备选择一种启动方式
#for cpu user
......@@ -60,7 +60,7 @@ GPU: Nvidia Tesla V100单卡
| engine | 客户端读图(ms) | 客户端发送请求到服务端(ms) | 服务端读图(ms) | 检测预处理耗时(ms) | 检测模型耗时(ms) | 检测后处理耗时(ms) | 识别预处理耗时(ms) | 识别模型耗时(ms) | 识别后处理耗时(ms) | 服务端回传客户端时间(ms) | 服务端整体耗时(ms) | 空跑耗时(ms) | 整体耗时(ms) |
|------------------------------|----------------|----------------------------|------------------|--------------------|------------------|--------------------|--------------------|------------------|--------------------|--------------------------|--------------------|--------------|---------------|
| Serving web service | 8.69 | 13.41 | 109.97 | 2.82 | 87.76 | 4.29 | 3.98 | 78.51 | 3.66 | 4.12 | 181.02 | 136.49 | 317.51 |
| Serving Debugger web service | 8.73 | 16.42 | 115.27 | 2.93 | 20.63 | 3.97 | 4.48 | 13.84 | 3.60 | 6.91 | 49.45 | 147.33 | 196.78 |
| Serving LocalPredictor web service | 8.73 | 16.42 | 115.27 | 2.93 | 20.63 | 3.97 | 4.48 | 13.84 | 3.60 | 6.91 | 49.45 | 147.33 | 196.78 |
## 附录: 检测/识别单服务启动
......
......@@ -26,7 +26,7 @@ if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
from paddle_serving_app.local_predict import Debugger
from paddle_serving_app.local_predict import LocalPredictor
import time
import re
import base64
......@@ -39,7 +39,7 @@ class OCRService(WebService):
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.det_client = Debugger()
self.det_client = LocalPredictor()
if sys.argv[1] == 'gpu':
self.det_client.load_model_config(
det_model_config, gpu=True, profile=False)
......
rpc_port: 18085
rpc_port: 18080
worker_num: 4
build_dag_each_worker: false
http_port: 9999
dag:
is_thread_op: false
client_type: brpc
is_thread_op: true
retry: 1
use_profile: false
tracer:
interval_s: 10
op:
bow:
concurrency: 2
remote_service_conf:
client_type: brpc
model_config: imdb_bow_model
devices: ""
rpc_port : 9393
cnn:
concurrency: 2
remote_service_conf:
client_type: brpc
model_config: imdb_cnn_model
devices: ""
rpc_port : 9292
......@@ -4,19 +4,20 @@ build_dag_each_worker: false
http_port: 9999
dag:
is_thread_op: false
client_type: brpc
retry: 1
use_profile: false
op:
det:
concurrency: 2
local_service_conf:
client_type: local_predictor
model_config: ocr_det_model
devices: "0"
devices: ""
rec:
concurrency: 1
timeout: -1
retry: 1
local_service_conf:
client_type: local_predictor
model_config: ocr_rec_model
devices: "0"
devices: ""
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_serving_server_gpu.web_service import WebService, Op
from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging
......@@ -52,7 +52,7 @@ class DetOp(Op):
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img}
return {"image": det_img[np.newaxis, :]}
def postprocess(self, input_dicts, fetch_dict):
det_out = fetch_dict["concat_1.tmp_0"]
......@@ -62,6 +62,7 @@ class DetOp(Op):
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
print("out dict", out_dict)
return out_dict
......@@ -85,11 +86,14 @@ class RecOp(Op):
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
_, w, h = self.ocr_reader.resize_norm_img(img_list[0],
max_wh_ratio).shape
imgs = np.zeros((len(img_list), 3, w, h)).astype('float32')
for id, img in enumerate(img_list):
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
imgs[id] = norm_img
feed = {"image": imgs.copy()}
return feed
def postprocess(self, input_dicts, fetch_dict):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
......@@ -108,5 +112,5 @@ class OcrService(WebService):
uci_service = OcrService(name="ocr")
uci_service.prepare_pipeline_config("config.yml")
uci_service.prepare_pipeline_config("brpc_config.yml")
uci_service.run_service()
......@@ -31,7 +31,8 @@ class UciOp(Op):
x_value = input_dict["x"]
if isinstance(x_value, (str, unicode)):
input_dict["x"] = np.array(
[float(x.strip()) for x in x_value.split(self.separator)])
[float(x.strip())
for x in x_value.split(self.separator)]).reshape(1, 13)
return input_dict
def postprocess(self, input_dicts, fetch_dict):
......
......@@ -14,10 +14,10 @@
from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
from paddle_serving_app.local_predict import Debugger
from paddle_serving_app.local_predict import LocalPredictor
import sys
debugger = Debugger()
debugger = LocalPredictor()
debugger.load_model_config(sys.argv[1], gpu=True)
seq = Sequential([
......
......@@ -18,7 +18,7 @@ from paddle_serving_client import Client
from paddle_serving_app.reader import LACReader, SentaReader
import os
import sys
import numpy as np
#senta_web_service.py
from paddle_serving_server.web_service import WebService
from paddle_serving_client import Client
......@@ -36,26 +36,42 @@ class SentaService(WebService):
#定义senta模型预测服务的预处理,调用顺序:lac reader->lac模型预测->预测结果后处理->senta reader
def preprocess(self, feed=[], fetch=[]):
feed_data = [{
"words": self.lac_reader.process(x["words"])
} for x in feed]
lac_result = self.lac_client.predict(
feed=feed_data, fetch=["crf_decode"])
feed_batch = []
words_lod = [0]
for ins in feed:
if "words" not in ins:
raise ("feed data error!")
feed_data = self.lac_reader.process(ins["words"])
words_lod.append(words_lod[-1] + len(feed_data))
feed_batch.append(np.array(feed_data).reshape(len(feed_data), 1))
words = np.concatenate(feed_batch, axis=0)
lac_result = self.lac_client.predict(
feed={"words": words,
"words.lod": words_lod},
fetch=["crf_decode"],
batch=True)
result_lod = lac_result["crf_decode.lod"]
feed_batch = []
words_lod = [0]
for i in range(len(feed)):
segs = self.lac_reader.parse_result(
feed[i]["words"],
lac_result["crf_decode"][result_lod[i]:result_lod[i + 1]])
feed_data = self.senta_reader.process(segs)
feed_batch.append({"words": feed_data})
return feed_batch, fetch
feed_batch.append(np.array(feed_data).reshape(len(feed_data), 1))
words_lod.append(words_lod[-1] + len(feed_data))
return {
"words": np.concatenate(feed_batch),
"words.lod": words_lod
}, fetch
senta_service = SentaService(name="senta")
senta_service.load_model_config("senta_bilstm_model")
senta_service.prepare_server(workdir="workdir")
senta_service.init_lac_client(
lac_port=9300, lac_client_config="lac_model/serving_server_conf.prototxt")
lac_port=9300,
lac_client_config="lac/lac_model/serving_server_conf.prototxt")
senta_service.run_rpc_service()
senta_service.run_web_service()
......@@ -35,6 +35,7 @@ fetch_map = client.predict(
"image": im,
"im_size": np.array(list(im.shape[1:])),
},
fetch=["save_infer_model/scale_0.tmp_0"])
fetch=["save_infer_model/scale_0.tmp_0"],
batch=False)
fetch_map["image"] = sys.argv[1]
postprocess(fetch_map)
......@@ -160,10 +160,10 @@ Therefore, a local prediction tool is built into the paddle_serving_app, which i
Taking [fit_a_line prediction service](../examples/fit_a_line) as an example, the following code can be used to run local prediction.
```python
from paddle_serving_app.local_predict import Debugger
from paddle_serving_app.local_predict import LocalPredictor
import numpy as np
debugger = Debugger()
debugger = LocalPredictor()
debugger.load_model_config("./uci_housing_model", gpu=False)
data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
-0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
......
......@@ -147,10 +147,10 @@ Paddle Serving框架的server预测op使用了Paddle 的预测框架,在部署
[fit_a_line预测服务](../examples/fit_a_line)为例,使用以下代码即可执行本地预测。
```python
from paddle_serving_app.local_predict import Debugger
from paddle_serving_app.local_predict import LocalPredictor
import numpy as np
debugger = Debugger()
debugger = LocalPredictor()
debugger.load_model_config("./uci_housing_model", gpu=False)
data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
-0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
......
......@@ -31,7 +31,7 @@ logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)
class Debugger(object):
class LocalPredictor(object):
def __init__(self):
self.feed_names_ = []
self.fetch_names_ = []
......@@ -76,7 +76,7 @@ class Debugger(object):
config.switch_use_feed_fetch_ops(False)
self.predictor = create_paddle_predictor(config)
def predict(self, feed=None, fetch=None):
def predict(self, feed=None, fetch=None, batch=False, log_id=0):
if feed is None or fetch is None:
raise ValueError("You should specify feed and fetch for prediction")
fetch_list = []
......@@ -121,10 +121,19 @@ class Debugger(object):
name])
if self.feed_types_[name] == 0:
feed[name] = feed[name].astype("int64")
else:
elif self.feed_types_[name] == 1:
feed[name] = feed[name].astype("float32")
elif self.feed_types_[name] == 2:
feed[name] = feed[name].astype("int32")
else:
raise ValueError("local predictor receives wrong data type")
input_tensor = self.predictor.get_input_tensor(name)
input_tensor.copy_from_cpu(feed[name])
if "{}.lod".format(name) in feed:
input_tensor.set_lod([feed["{}.lod".format(name)]])
if batch == False:
input_tensor.copy_from_cpu(feed[name][np.newaxis, :])
else:
input_tensor.copy_from_cpu(feed[name])
output_tensors = []
output_names = self.predictor.get_output_names()
for output_name in output_names:
......@@ -139,5 +148,6 @@ class Debugger(object):
for i, name in enumerate(fetch):
fetch_map[name] = outputs[i]
if len(output_tensors[i].lod()) > 0:
fetch_map[name + ".lod"] = output_tensors[i].lod()[0]
fetch_map[name + ".lod"] = np.array(output_tensors[i].lod()[
0]).astype('int32')
return fetch_map
......@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from .chinese_bert_reader import ChineseBertReader
from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize
from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, Base64ToImage
from .image_reader import CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, ResizeByFactor
from .image_reader import RCNNPostprocess, SegPostprocess, PadStride
from .image_reader import RCNNPostprocess, SegPostprocess, PadStride, BlazeFacePostprocess
from .image_reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
from .lac_reader import LACReader
from .senta_reader import SentaReader
......
......@@ -317,7 +317,7 @@ class RCNNPostprocess(object):
self.clip_bbox([xmin, ymin, xmax, ymax])
w = xmax - xmin
h = ymax - ymin
im_shape = t['im_shape'][0][i].tolist()
im_shape = t['im_shape'].tolist()
im_height, im_width = int(im_shape[0]), int(im_shape[1])
xmin *= im_width
ymin *= im_height
......@@ -420,7 +420,7 @@ class RCNNPostprocess(object):
for key in image_with_bbox:
if key == "image":
continue
if ".lod" in key:
if ".lod" in key or "im_shape" in key:
continue
fetch_name = key
bbox_result = self._get_bbox_result(image_with_bbox, fetch_name,
......
......@@ -12,5 +12,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving App version string """
serving_app_version = "0.1.2"
serving_app_version = "0.0.0"
commit_id = ""
......@@ -233,7 +233,12 @@ class Client(object):
# key))
pass
def predict(self, feed=None, fetch=None, need_variant_tag=False, log_id=0):
def predict(self,
feed=None,
fetch=None,
batch=False,
need_variant_tag=False,
log_id=0):
self.profile_.record('py_prepro_0')
if feed is None or fetch is None:
......@@ -260,7 +265,10 @@ class Client(object):
int_feed_names = []
float_feed_names = []
int_shape = []
int_lod_slot_batch = []
float_lod_slot_batch = []
float_shape = []
fetch_names = []
counter = 0
batch_size = len(feed_batch)
......@@ -277,31 +285,56 @@ class Client(object):
for i, feed_i in enumerate(feed_batch):
int_slot = []
float_slot = []
int_lod_slot = []
float_lod_slot = []
for key in feed_i:
if key not in self.feed_names_:
if ".lod" not in key and key not in self.feed_names_:
raise ValueError("Wrong feed name: {}.".format(key))
if ".lod" in key:
continue
#if not isinstance(feed_i[key], np.ndarray):
self.shape_check(feed_i, key)
if self.feed_types_[key] in int_type:
if i == 0:
int_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
int_shape.append(list(feed_i[key].shape))
shape_lst.extend(list(feed_i[key].shape))
int_shape.append(shape_lst)
else:
int_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
int_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
int_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
int_slot.append(feed_i[key])
self.has_numpy_input = True
else:
int_slot.append(feed_i[key])
self.all_numpy_input = False
elif self.feed_types_[key] in float_type:
if i == 0:
float_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
float_shape.append(list(feed_i[key].shape))
shape_lst.extend(list(feed_i[key].shape))
float_shape.append(shape_lst)
else:
float_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
float_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
float_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
float_slot.append(feed_i[key])
self.has_numpy_input = True
......@@ -310,6 +343,8 @@ class Client(object):
self.all_numpy_input = False
int_slot_batch.append(int_slot)
float_slot_batch.append(float_slot)
int_lod_slot_batch.append(int_lod_slot)
float_lod_slot_batch.append(float_lod_slot)
self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0')
......@@ -317,14 +352,13 @@ class Client(object):
result_batch_handle = self.predictorres_constructor()
if self.all_numpy_input:
res = self.client_handle_.numpy_predict(
float_slot_batch, float_feed_names, float_shape, int_slot_batch,
int_feed_names, int_shape, fetch_names, result_batch_handle,
self.pid, log_id)
float_slot_batch, float_feed_names, float_shape,
float_lod_slot_batch, int_slot_batch, int_feed_names, int_shape,
int_lod_slot_batch, fetch_names, result_batch_handle, self.pid,
log_id)
elif self.has_numpy_input == False:
res = self.client_handle_.batch_predict(
float_slot_batch, float_feed_names, float_shape, int_slot_batch,
int_feed_names, int_shape, fetch_names, result_batch_handle,
self.pid, log_id)
raise ValueError(
"Please make sure all of your inputs are numpy array")
else:
raise ValueError(
"Please make sure the inputs are all in list type or all in numpy.array type"
......@@ -354,8 +388,9 @@ class Client(object):
name))
result_map[name].shape = shape
if name in self.lod_tensor_set:
result_map["{}.lod".format(
name)] = result_batch_handle.get_lod(mi, name)
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == float32_type:
result_map[name] = result_batch_handle.get_float_by_name(
mi, name)
......@@ -367,9 +402,9 @@ class Client(object):
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
result_map["{}.lod".format(
name)] = result_batch_handle.get_lod(mi, name)
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
elif self.fetch_names_to_type_[name] == int32_type:
# result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int32_by_name(
......@@ -382,8 +417,9 @@ class Client(object):
shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape
if name in self.lod_tensor_set:
result_map["{}.lod".format(
name)] = result_batch_handle.get_lod(mi, name)
tmp_lod = result_batch_handle.get_lod(mi, name)
if np.size(tmp_lod) > 0:
result_map["{}.lod".format(name)] = tmp_lod
multi_result_map.append(result_map)
ret = None
if len(model_engine_names) == 1:
......
......@@ -74,7 +74,8 @@ def save_model(server_model_folder,
fetch_var = model_conf.FetchVar()
fetch_var.alias_name = key
fetch_var.name = fetch_var_dict[key].name
fetch_var.is_lod_tensor = fetch_var_dict[key].lod_level >= 1
#fetch_var.is_lod_tensor = fetch_var_dict[key].lod_level >= 1
fetch_var.is_lod_tensor = 1
if fetch_var_dict[key].dtype == core.VarDesc.VarType.INT64:
fetch_var.fetch_type = 0
if fetch_var_dict[key].dtype == core.VarDesc.VarType.FP32:
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving Client version string """
serving_client_version = "0.3.2"
serving_server_version = "0.3.2"
module_proto_version = "0.3.2"
serving_client_version = "0.0.0"
serving_server_version = "0.0.0"
module_proto_version = "0.0.0"
commit_id = ""
......@@ -584,7 +584,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
else:
raise Exception("error type.")
tensor.shape.extend(list(model_result[name].shape))
if name in self.lod_tensor_set_:
if "{}.lod".format(name) in model_result:
tensor.lod.extend(model_result["{}.lod".format(name)]
.tolist())
inst.tensor_array.append(tensor)
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving Client version string """
serving_client_version = "0.3.2"
serving_server_version = "0.3.2"
module_proto_version = "0.3.2"
serving_client_version = "0.0.0"
serving_server_version = "0.0.0"
module_proto_version = "0.0.0"
commit_id = ""
......@@ -118,12 +118,12 @@ class WebService(object):
del feed["fetch"]
if len(feed) == 0:
raise ValueError("empty input")
fetch_map = self.client.predict(feed=feed, fetch=fetch)
fetch_map = self.client.predict(feed=feed, fetch=fetch, batch=True)
result = self.postprocess(
feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
result = {"result": result}
except ValueError as err:
result = {"result": err}
result = {"result": str(err)}
return result
def run_rpc_service(self):
......@@ -171,8 +171,8 @@ class WebService(object):
self.app_instance = app_instance
def _launch_local_predictor(self):
from paddle_serving_app.local_predict import Debugger
self.client = Debugger()
from paddle_serving_app.local_predict import LocalPredictor
self.client = LocalPredictor()
self.client.load_model_config(
"{}".format(self.model_config), gpu=False, profile=False)
......
......@@ -73,6 +73,8 @@ def serve_args():
default=False,
action="store_true",
help="Use Multi-language-service")
parser.add_argument(
"--use_trt", default=False, action="store_true", help="Use TensorRT")
parser.add_argument(
"--product_name",
type=str,
......@@ -205,6 +207,7 @@ class Server(object):
self.cur_path = os.getcwd()
self.use_local_bin = False
self.gpuid = 0
self.use_trt = False
self.model_config_paths = None # for multi-model in a workflow
self.product_name = None
self.container_id = None
......@@ -271,6 +274,9 @@ class Server(object):
def set_gpuid(self, gpuid=0):
self.gpuid = gpuid
def set_trt(self):
self.use_trt = True
def _prepare_engine(self, model_config_paths, device):
if self.model_toolkit_conf == None:
self.model_toolkit_conf = server_sdk.ModelToolkitConf()
......@@ -290,6 +296,7 @@ class Server(object):
engine.enable_ir_optimization = self.ir_optimization
engine.static_optimization = False
engine.force_update_static_cache = False
engine.use_trt = self.use_trt
if device == "cpu":
engine.type = "FLUID_CPU_ANALYSIS_DIR"
......@@ -396,7 +403,10 @@ class Server(object):
for line in version_file.readlines():
if re.match("cuda_version", line):
cuda_version = line.split("\"")[1]
device_version = "serving-gpu-cuda" + cuda_version + "-"
if cuda_version != "trt":
device_version = "serving-gpu-cuda" + cuda_version + "-"
else:
device_version = "serving-gpu-" + cuda_version + "-"
folder_name = device_version + serving_server_version
tar_name = folder_name + ".tar.gz"
......@@ -645,7 +655,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
else:
raise Exception("error type.")
tensor.shape.extend(list(model_result[name].shape))
if name in self.lod_tensor_set_:
if "{}.lod".format(name) in model_result:
tensor.lod.extend(model_result["{}.lod".format(name)]
.tolist())
inst.tensor_array.append(tensor)
......
......@@ -64,6 +64,8 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.set_max_body_size(max_body_size)
if args.use_trt:
server.set_trt()
if args.product_name != None:
server.set_product_name(args.product_name)
......
......@@ -12,8 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving Client version string """
serving_client_version = "0.3.2"
serving_server_version = "0.3.2"
module_proto_version = "0.3.2"
serving_client_version = "0.0.0"
serving_server_version = "0.0.0"
module_proto_version = "0.0.0"
cuda_version = "9"
commit_id = ""
......@@ -178,7 +178,7 @@ class WebService(object):
feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
result = {"result": result}
except ValueError as err:
result = {"result": err}
result = {"result": str(err)}
return result
def run_rpc_service(self):
......@@ -232,8 +232,8 @@ class WebService(object):
self.app_instance = app_instance
def _launch_local_predictor(self, gpu):
from paddle_serving_app.local_predict import Debugger
self.client = Debugger()
from paddle_serving_app.local_predict import LocalPredictor
self.client = LocalPredictor()
self.client.load_model_config(
"{}".format(self.model_config), gpu=gpu, profile=False)
......
......@@ -15,5 +15,5 @@ from . import logger # this module must be the first to import
from .operator import Op, RequestOp, ResponseOp
from .pipeline_server import PipelineServer
from .pipeline_client import PipelineClient
from .local_rpc_service_handler import LocalRpcServiceHandler
from .local_service_handler import LocalServiceHandler
from .analyse import Analyst
......@@ -43,7 +43,6 @@ class DAGExecutor(object):
dag_conf = server_conf["dag"]
self._retry = dag_conf["retry"]
client_type = dag_conf["client_type"]
self._server_use_profile = dag_conf["use_profile"]
channel_size = dag_conf["channel_size"]
self._is_thread_op = dag_conf["is_thread_op"]
......@@ -61,8 +60,8 @@ class DAGExecutor(object):
self._is_thread_op, tracer_interval_s, server_worker_num)
self._dag = DAG(self.name, response_op, self._server_use_profile,
self._is_thread_op, client_type, channel_size,
build_dag_each_worker, self._tracer)
self._is_thread_op, channel_size, build_dag_each_worker,
self._tracer)
(in_channel, out_channel, pack_rpc_func,
unpack_rpc_func) = self._dag.build()
self._dag.start()
......@@ -324,13 +323,12 @@ class DAGExecutor(object):
class DAG(object):
def __init__(self, request_name, response_op, use_profile, is_thread_op,
client_type, channel_size, build_dag_each_worker, tracer):
channel_size, build_dag_each_worker, tracer):
self._request_name = request_name
self._response_op = response_op
self._use_profile = use_profile
self._is_thread_op = is_thread_op
self._channel_size = channel_size
self._client_type = client_type
self._build_dag_each_worker = build_dag_each_worker
self._tracer = tracer
if not self._is_thread_op:
......@@ -570,11 +568,9 @@ class DAG(object):
op.use_profiler(self._use_profile)
op.set_tracer(self._tracer)
if self._is_thread_op:
self._threads_or_proces.extend(
op.start_with_thread(self._client_type))
self._threads_or_proces.extend(op.start_with_thread())
else:
self._threads_or_proces.extend(
op.start_with_process(self._client_type))
self._threads_or_proces.extend(op.start_with_process())
_LOGGER.info("[DAG] start")
# not join yet
......@@ -582,7 +578,8 @@ class DAG(object):
def join(self):
for x in self._threads_or_proces:
x.join()
if x is not None:
x.join()
def stop(self):
for chl in self._channels:
......
......@@ -25,7 +25,7 @@ import (
"github.com/grpc-ecosystem/grpc-gateway/runtime"
"google.golang.org/grpc"
gw "./proto"
gw "serving-gateway/proto"
)
//export run_proxy_server
......
......@@ -27,7 +27,7 @@ _LOGGER = logging.getLogger(__name__)
_workdir_name_gen = util.NameGenerator("workdir_")
class LocalRpcServiceHandler(object):
class LocalServiceHandler(object):
def __init__(self,
model_config,
workdir="",
......
......@@ -38,7 +38,7 @@ from .channel import (ThreadChannel, ProcessChannel, ChannelDataEcode,
ChannelTimeoutError)
from .util import NameGenerator
from .profiler import UnsafeTimeProfiler as TimeProfiler
from . import local_rpc_service_handler
from . import local_service_handler
_LOGGER = logging.getLogger(__name__)
_op_name_gen = NameGenerator("Op")
......@@ -56,7 +56,7 @@ class Op(object):
retry=None,
batch_size=None,
auto_batching_timeout=None,
local_rpc_service_handler=None):
local_service_handler=None):
# In __init__, all the parameters are just saved and Op is not initialized
if name is None:
name = _op_name_gen.next()
......@@ -64,7 +64,7 @@ class Op(object):
self.concurrency = concurrency # amount of concurrency
self.set_input_ops(input_ops)
self._local_rpc_service_handler = local_rpc_service_handler
self._local_service_handler = local_service_handler
self._server_endpoints = server_endpoints
self._fetch_names = fetch_list
self._client_config = client_config
......@@ -123,49 +123,65 @@ class Op(object):
self.with_serving = True
self._server_endpoints = server_endpoints
else:
if self._local_rpc_service_handler is None:
if self._local_service_handler is None:
local_service_conf = conf.get("local_service_conf")
_LOGGER.info("local_service_conf: {}".format(
local_service_conf))
model_config = local_service_conf.get("model_config")
self.client_type = local_service_conf.get("client_type")
_LOGGER.info("model_config: {}".format(model_config))
if model_config is None:
self.with_serving = False
else:
# local rpc service
self.with_serving = True
service_handler = local_rpc_service_handler.LocalRpcServiceHandler(
model_config=model_config,
workdir=local_service_conf["workdir"],
thread_num=local_service_conf["thread_num"],
devices=local_service_conf["devices"],
mem_optim=local_service_conf["mem_optim"],
ir_optim=local_service_conf["ir_optim"])
service_handler.prepare_server() # get fetch_list
serivce_ports = service_handler.get_port_list()
self._server_endpoints = [
"127.0.0.1:{}".format(p) for p in serivce_ports
]
if self._client_config is None:
self._client_config = service_handler.get_client_config(
)
if self._fetch_names is None:
self._fetch_names = service_handler.get_fetch_list()
self._local_rpc_service_handler = service_handler
if self.client_type == "brpc" or self.client_type == "grpc":
service_handler = local_service_handler.LocalServiceHandler(
model_config=model_config,
workdir=local_service_conf["workdir"],
thread_num=local_service_conf["thread_num"],
devices=local_service_conf["devices"],
mem_optim=local_service_conf["mem_optim"],
ir_optim=local_service_conf["ir_optim"])
service_handler.prepare_server() # get fetch_list
serivce_ports = service_handler.get_port_list()
self._server_endpoints = [
"127.0.0.1:{}".format(p) for p in serivce_ports
]
if self._client_config is None:
self._client_config = service_handler.get_client_config(
)
if self._fetch_names is None:
self._fetch_names = service_handler.get_fetch_list(
)
elif self.client_type == "local_predictor":
service_handler = local_service_handler.LocalPredictorServiceHandler(
model_config=model_config,
workdir=local_service_conf["workdir"],
thread_num=local_service_conf["thread_num"],
devices=local_service_conf["devices"])
service_handler.prepare_server() # get fetch_list
self.local_predictor = service_handler.get_client()
if self._client_config is None:
self._client_config = service_handler.get_client_config(
)
if self._fetch_names is None:
self._fetch_names = service_handler.get_fetch_list(
)
self._local_service_handler = service_handler
else:
self.with_serving = True
self._local_rpc_service_handler.prepare_server(
self._local_service_handler.prepare_server(
) # get fetch_list
serivce_ports = self._local_rpc_service_handler.get_port_list(
)
serivce_ports = self._local_service_handler.get_port_list()
self._server_endpoints = [
"127.0.0.1:{}".format(p) for p in serivce_ports
]
if self._client_config is None:
self._client_config = self._local_rpc_service_handler.get_client_config(
self._client_config = self._local_service_handler.get_client_config(
)
if self._fetch_names is None:
self._fetch_names = self._local_rpc_service_handler.get_fetch_list(
self._fetch_names = self._local_service_handler.get_fetch_list(
)
else:
self.with_serving = True
......@@ -188,13 +204,13 @@ class Op(object):
self._batch_size, self._auto_batching_timeout)))
def launch_local_rpc_service(self):
if self._local_rpc_service_handler is None:
if self._local_service_handler is None:
_LOGGER.warning(
self._log("Failed to launch local rpc"
" service: local_rpc_service_handler is None."))
" service: local_service_handler is None."))
return
port = self._local_rpc_service_handler.get_port_list()
self._local_rpc_service_handler.start_server()
port = self._local_service_handler.get_port_list()
self._local_service_handler.start_server()
_LOGGER.info("Op({}) use local rpc service at port: {}"
.format(self.name, port))
......@@ -215,22 +231,25 @@ class Op(object):
def set_tracer(self, tracer):
self._tracer = tracer
def init_client(self, client_type, client_config, server_endpoints,
fetch_names):
def init_client(self, client_config, server_endpoints):
if self.with_serving == False:
_LOGGER.info("Op({}) has no client (and it also do not "
"run the process function)".format(self.name))
return None
if client_type == 'brpc':
if self.client_type == 'brpc':
client = Client()
client.load_client_config(client_config)
elif client_type == 'grpc':
elif self.client_type == 'grpc':
client = MultiLangClient()
elif self.client_type == 'local_predictor':
if self.local_predictor is None:
raise ValueError("local predictor not yet created")
client = self.local_predictor
else:
raise ValueError("Failed to init client: unknow client "
"type {}".format(client_type))
client.connect(server_endpoints)
self._fetch_names = fetch_names
"type {}".format(self.client_type))
if self.client_type != "local_predictor":
client.connect(server_endpoints)
return client
def get_input_ops(self):
......@@ -291,15 +310,25 @@ class Op(object):
(_, input_dict), = input_dicts.items()
return input_dict
def process(self, feed_batch, typical_logid):
def process(self, feed_batch, fetch_names, typical_logid):
err, err_info = ChannelData.check_batch_npdata(feed_batch)
if err != 0:
_LOGGER.critical(
self._log("Failed to run process: {}. Please override "
"preprocess func.".format(err_info)))
os._exit(-1)
call_result = self.client.predict(
feed=feed_batch, fetch=self._fetch_names, log_id=typical_logid)
if self.client_type == "local_predictor":
call_result = self.client.predict(
feed=feed_batch[0],
fetch=fetch_names,
batch=True,
log_id=typical_logid)
else:
call_result = self.client.predict(
feed=feed_batch,
fetch=fetch_names,
batch=True,
log_id=typical_logid)
if isinstance(self.client, MultiLangClient):
if call_result is None or call_result["serving_status_code"] != 0:
return None
......@@ -347,23 +376,22 @@ class Op(object):
for channel in channels:
channel.push(data, name)
def start_with_process(self, client_type):
def start_with_process(self):
trace_buffer = None
if self._tracer is not None:
trace_buffer = self._tracer.data_buffer()
proces = []
process = []
for concurrency_idx in range(self.concurrency):
p = multiprocessing.Process(
target=self._run,
args=(concurrency_idx, self._get_input_channel(),
self._get_output_channels(), client_type, False,
trace_buffer))
self._get_output_channels(), False, trace_buffer))
p.daemon = True
p.start()
proces.append(p)
return proces
process.append(p)
return process
def start_with_thread(self, client_type):
def start_with_thread(self):
trace_buffer = None
if self._tracer is not None:
trace_buffer = self._tracer.data_buffer()
......@@ -372,8 +400,7 @@ class Op(object):
t = threading.Thread(
target=self._run,
args=(concurrency_idx, self._get_input_channel(),
self._get_output_channels(), client_type, True,
trace_buffer))
self._get_output_channels(), True, trace_buffer))
# When a process exits, it attempts to terminate
# all of its daemonic child processes.
t.daemon = True
......@@ -652,7 +679,7 @@ class Op(object):
return parsed_data_dict, need_profile_dict, profile_dict
def _run(self, concurrency_idx, input_channel, output_channels, client_type,
def _run(self, concurrency_idx, input_channel, output_channels,
is_thread_op, trace_buffer):
op_info_prefix = "[{}|{}]".format(self.name, concurrency_idx)
tid = threading.current_thread().ident
......@@ -660,8 +687,7 @@ class Op(object):
# init op
profiler = None
try:
profiler = self._initialize(is_thread_op, client_type,
concurrency_idx)
profiler = self._initialize(is_thread_op, concurrency_idx)
except Exception as e:
_LOGGER.critical(
"{} Failed to init op: {}".format(op_info_prefix, e),
......@@ -801,16 +827,15 @@ class Op(object):
except Queue.Full:
break
def _initialize(self, is_thread_op, client_type, concurrency_idx):
def _initialize(self, is_thread_op, concurrency_idx):
if is_thread_op:
with self._for_init_op_lock:
if not self._succ_init_op:
# for the threaded version of Op, each thread cannot get its concurrency_idx
self.concurrency_idx = None
# init client
self.client = self.init_client(
client_type, self._client_config,
self._server_endpoints, self._fetch_names)
self.client = self.init_client(self._client_config,
self._server_endpoints)
# user defined
self.init_op()
self._succ_init_op = True
......@@ -818,9 +843,8 @@ class Op(object):
else:
self.concurrency_idx = concurrency_idx
# init client
self.client = self.init_client(client_type, self._client_config,
self._server_endpoints,
self._fetch_names)
self.client = self.init_client(self._client_config,
self._server_endpoints)
# user defined
self.init_op()
......
......@@ -28,17 +28,11 @@ import util
py_version = sys.version_info
def copy_lib():
if py_version[0] == 2:
lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
elif py_version[1] == 6:
lib_list = ['libpython3.6m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
elif py_version[1] == 7:
lib_list = ['libpython3.7m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
os.popen('mkdir -p paddle_serving_client/lib')
lib_list = ['${OPENSSL_CRYPTO_LIBRARY}', '${OPENSSL_SSL_LIBRARY}',
'${PYTHON_LIBRARY}']
for lib in lib_list:
r = os.popen('whereis {}'.format(lib))
text = r.read()
os.popen('cp {} ./paddle_serving_client/lib'.format(text.strip().split(' ')[1]))
os.popen('cp {} ./paddle_serving_client/lib'.format(lib))
max_version, mid_version, min_version = util.python_version()
......@@ -53,9 +47,6 @@ REQUIRED_PACKAGES = [
'grpcio-tools >= 1.28.1'
]
if not util.find_package("paddlepaddle") and not util.find_package("paddlepaddle-gpu"):
REQUIRED_PACKAGES.append("paddlepaddle")
packages=['paddle_serving_client',
'paddle_serving_client.proto',
......
......@@ -29,7 +29,7 @@ util.gen_pipeline_code("paddle_serving_server")
REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app'
'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app', 'func_timeout', 'pyyaml'
]
packages=['paddle_serving_server',
......
......@@ -19,17 +19,19 @@ from __future__ import print_function
from setuptools import setup, Distribution, Extension
from setuptools import find_packages
from setuptools import setup
from paddle_serving_server_gpu.version import serving_server_version
from paddle_serving_server_gpu.version import serving_server_version, cuda_version
import util
max_version, mid_version, min_version = util.python_version()
if cuda_version != "trt":
cuda_version = "post" + cuda_version
max_version, mid_version, min_version = util.python_version()
# gen pipeline proto code
util.gen_pipeline_code("paddle_serving_server_gpu")
REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app'
'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app', 'func_timeout', 'pyyaml'
]
packages=['paddle_serving_server_gpu',
......@@ -56,7 +58,7 @@ package_data={'paddle_serving_server_gpu': ['pipeline/gateway/libproxy_server.so
setup(
name='paddle-serving-server-gpu',
version=serving_server_version.replace('-', '') + '.post@CUDA_VERSION_MAJOR@',
version=serving_server_version.replace('-', '') + "." + cuda_version,
description=
('Paddle Serving Package for saved model with PaddlePaddle'),
url='https://github.com/PaddlePaddle/Serving',
......
......@@ -44,8 +44,8 @@ def gen_pipeline_code(package_name):
ret = os.system(
"cd {}/pipeline/gateway/proto/ && "
"../../../../../third_party/install/protobuf/bin/protoc -I. "
"-I$GOPATH/src "
"-I$GOPATH/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis "
"-I$GOPATH/pkg/mod "
"-I$GOPATH/pkg/mod/github.com/grpc-ecosystem/grpc-gateway\@v1.15.2/third_party/googleapis "
"--go_out=plugins=grpc:. "
"gateway.proto".format(package_name))
if ret != 0:
......@@ -54,14 +54,18 @@ def gen_pipeline_code(package_name):
ret = os.system(
"cd {}/pipeline/gateway/proto/ && "
"../../../../../third_party/install/protobuf/bin/protoc -I. "
"-I$GOPATH/src "
"-I$GOPATH/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis "
"-I$GOPATH/pkg/mod "
"-I$GOPATH/pkg/mod/github.com/grpc-ecosystem/grpc-gateway\@v1.15.2/third_party/googleapis "
"--grpc-gateway_out=logtostderr=true:. "
"gateway.proto".format(package_name))
if ret != 0:
exit(1)
# pipeline grpc-gateway shared-lib
ret = os.system("cd {}/pipeline/gateway/ && go mod init serving-gateway".
format(package_name))
ret = os.system("cd {}/pipeline/gateway/ && go mod vendor && go mod tidy".
format(package_name))
ret = os.system(
"cd {}/pipeline/gateway && "
"go build -buildmode=c-shared -o libproxy_server.so proxy_server.go".
......
......@@ -41,6 +41,12 @@ RUN yum -y install wget && \
echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
source /root/.bashrc && \
cd .. && rm -rf Python-3.6.8* && \
wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-* &&\
yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
yum clean all && \
echo "export LANG=en_US.utf8" >> /root/.bashrc && \
......
......@@ -41,6 +41,12 @@ RUN yum -y install wget && \
echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
source /root/.bashrc && \
cd .. && rm -rf Python-3.6.8* && \
wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-* && \
yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
yum clean all && \
localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
......
......@@ -34,6 +34,13 @@ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2
&& cd .. \
&& rm -rf patchelf-0.10*
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-*
RUN yum install -y python3 python3-devel
RUN yum -y update >/dev/null \
......
......@@ -5,7 +5,14 @@ RUN yum -y install wget >/dev/null \
&& yum -y install git openssl-devel curl-devel bzip2-devel python-devel \
&& yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false \
&& yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
&& yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
&& yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-*
RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
&& tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
......@@ -32,3 +39,5 @@ RUN yum install -y python3 python3-devel \
RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
&& echo "export LANG=en_US.utf8" >> /root/.bashrc \
&& echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
......@@ -6,6 +6,13 @@ RUN yum -y install wget >/dev/null \
&& yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
&& yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-*
RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
&& tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
&& mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \
......
......@@ -18,14 +18,20 @@ function init() {
export PYTHONROOT=/usr
cd Serving
export SERVING_WORKDIR=$PWD
$PYTHONROOT/bin/python -m pip install -r python/requirements.txt
$PYTHONROOT/bin/python -m pip install paddlepaddle
export GOPATH=$HOME/go
export PATH=$PATH:$GOPATH/bin
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
go get -u github.com/golang/protobuf/protoc-gen-go
go get -u google.golang.org/grpc
go env -w GO111MODULE=on
go env -w GOPROXY=https://goproxy.cn,direct
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0
}
function check_cmd() {
......@@ -605,7 +611,7 @@ function python_test_grpc_impl() {
# test load server config and client config in Server side
cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube
<<COMMENT #comment for compile bug, todo fix conflict between grpc-gateway and cube-agent
check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz > /dev/null"
check_cmd "tar xf ctr_cube_unittest.tar.gz"
check_cmd "mv models/ctr_client_conf ./"
......@@ -626,9 +632,11 @@ function python_test_grpc_impl() {
echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67"
exit 1
fi
COMMENT
echo "grpc impl test success"
kill_server_process
ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
#ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
cd .. # pwd: /Serving/python/examples/grpc_impl_example
;;
......@@ -665,6 +673,7 @@ function python_test_grpc_impl() {
cd .. # pwd: /Serving/python/examples/grpc_impl_example
# test load server config and client config in Server side
<<COMMENT #comment for compile bug, todo fix conflict between grpc-gateway and cube-agent
cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube
check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz"
......@@ -689,10 +698,11 @@ function python_test_grpc_impl() {
echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67"
exit 1
fi
COMMENT
echo "grpc impl test success"
kill_server_process
ps -ef | grep "test_server_gpu" | grep -v serving_build | grep -v grep | awk '{print $2}' | xargs kill
ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
#ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
cd .. # pwd: /Serving/python/examples/grpc_impl_example
;;
*)
......@@ -829,8 +839,8 @@ EOF
kill_process_by_port 18080
# test: process servicer & thread op
pip uninstall grpcio -y
pip install grpcio --no-binary=grpcio
#pip uninstall grpcio -y
#pip install grpcio --no-binary=grpcio
cat << EOF > config.yml
rpc_port: 18080
worker_num: 4
......@@ -944,7 +954,7 @@ function python_run_test() {
local TYPE=$1 # pwd: /Serving
cd python/examples # pwd: /Serving/python/examples
python_test_fit_a_line $TYPE # pwd: /Serving/python/examples
python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
#python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
python_test_bert $TYPE # pwd: /Serving/python/examples
python_test_imdb $TYPE # pwd: /Serving/python/examples
python_test_lac $TYPE # pwd: /Serving/python/examples
......@@ -953,7 +963,7 @@ function python_run_test() {
python_test_yolov4 $TYPE # pwd: /Serving/python/examples
python_test_grpc_impl $TYPE # pwd: /Serving/python/examples
python_test_resnet50 $TYPE # pwd: /Serving/python/examples
python_test_pipeline $TYPE # pwd: /Serving/python/examples
#python_test_pipeline $TYPE # pwd: /Serving/python/examples
echo "test python $TYPE part finished as expected."
cd ../.. # pwd: /Serving
}
......@@ -1098,7 +1108,7 @@ function main() {
build_app $TYPE # pwd: /Serving
java_run_test $TYPE # pwd: /Serving
python_run_test $TYPE # pwd: /Serving
monitor_test $TYPE # pwd: /Serving
#monitor_test $TYPE # pwd: /Serving
echo "serving $TYPE part finished as expected."
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册