提交 bcc0c749 编写于 作者: B BohaoWu

Merge remote-tracking branch 'upstream/develop' into develop

...@@ -45,9 +45,10 @@ nvidia-docker exec -it test bash ...@@ -45,9 +45,10 @@ nvidia-docker exec -it test bash
``` ```
```shell ```shell
pip install paddle-serving-client pip install paddle-serving-client==0.3.2
pip install paddle-serving-server # CPU pip install paddle-serving-server==0.3.2 # CPU
pip install paddle-serving-server-gpu # GPU pip install paddle-serving-server-gpu==0.3.2.post9 # GPU with CUDA9.0
pip install paddle-serving-server-gpu==0.3.2.post10 # GPU with CUDA9.0
``` ```
You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download. You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
...@@ -172,6 +173,11 @@ Here, `client.predict` function has two arguments. `feed` is a `python dict` wit ...@@ -172,6 +173,11 @@ Here, `client.predict` function has two arguments. `feed` is a `python dict` wit
- [An End-to-end tutorial from training to inference service deployment](doc/TRAIN_TO_SERVICE.md) - [An End-to-end tutorial from training to inference service deployment](doc/TRAIN_TO_SERVICE.md)
- [Write Bert-as-Service in 10 minutes](doc/BERT_10_MINS.md) - [Write Bert-as-Service in 10 minutes](doc/BERT_10_MINS.md)
### Tutorial at AIStudio
- [Introduction to PaddleServing](https://aistudio.baidu.com/aistudio/projectdetail/605819)
- [Image Segmentation on Paddle Serving](https://aistudio.baidu.com/aistudio/projectdetail/457715)
- [Sentimental Analysis](https://aistudio.baidu.com/aistudio/projectdetail/509014)
### Developers ### Developers
- [How to config Serving native operators on server side?](doc/SERVER_DAG.md) - [How to config Serving native operators on server side?](doc/SERVER_DAG.md)
- [How to develop a new Serving operator?](doc/NEW_OPERATOR.md) - [How to develop a new Serving operator?](doc/NEW_OPERATOR.md)
......
...@@ -47,9 +47,10 @@ nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/se ...@@ -47,9 +47,10 @@ nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/se
nvidia-docker exec -it test bash nvidia-docker exec -it test bash
``` ```
```shell ```shell
pip install paddle-serving-client pip install paddle-serving-client==0.3.2
pip install paddle-serving-server # CPU pip install paddle-serving-server==0.3.2 # CPU
pip install paddle-serving-server-gpu # GPU pip install paddle-serving-server-gpu==0.3.2.post9 # GPU with CUDA9.0
pip install paddle-serving-server-gpu==0.3.2.post10 # GPU with CUDA10.0
``` ```
您可能需要使用国内镜像源(例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`)来加速下载。 您可能需要使用国内镜像源(例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`)来加速下载。
...@@ -169,6 +170,11 @@ print(fetch_map) ...@@ -169,6 +170,11 @@ print(fetch_map)
- [端到端完成从训练到部署全流程](doc/TRAIN_TO_SERVICE_CN.md) - [端到端完成从训练到部署全流程](doc/TRAIN_TO_SERVICE_CN.md)
- [十分钟构建Bert-As-Service](doc/BERT_10_MINS_CN.md) - [十分钟构建Bert-As-Service](doc/BERT_10_MINS_CN.md)
### AIStudio教程
- [PaddleServing作业](https://aistudio.baidu.com/aistudio/projectdetail/605819)
- [PaddleServing图像分割](https://aistudio.baidu.com/aistudio/projectdetail/457715)
- [PaddleServing情感分析](https://aistudio.baidu.com/aistudio/projectdetail/509014)
### 开发者教程 ### 开发者教程
- [如何配置Server端的计算图?](doc/SERVER_DAG_CN.md) - [如何配置Server端的计算图?](doc/SERVER_DAG_CN.md)
- [如何开发一个新的General Op?](doc/NEW_OPERATOR_CN.md) - [如何开发一个新的General Op?](doc/NEW_OPERATOR_CN.md)
......
...@@ -31,7 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}") ...@@ -31,7 +31,7 @@ message( "WITH_GPU = ${WITH_GPU}")
# Paddle Version should be one of: # Paddle Version should be one of:
# latest: latest develop build # latest: latest develop build
# version number like 1.5.2 # version number like 1.5.2
SET(PADDLE_VERSION "1.7.2") SET(PADDLE_VERSION "1.8.4")
if (WITH_GPU) if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
syntax = "proto2"; syntax = "proto2";
package baidu.paddle_serving.multi_lang;
option java_multiple_files = true; option java_multiple_files = true;
option java_package = "io.paddle.serving.grpc"; option java_package = "io.paddle.serving.grpc";
option java_outer_classname = "ServingProto"; option java_outer_classname = "ServingProto";
...@@ -37,6 +39,7 @@ message InferenceRequest { ...@@ -37,6 +39,7 @@ message InferenceRequest {
repeated string feed_var_names = 2; repeated string feed_var_names = 2;
repeated string fetch_var_names = 3; repeated string fetch_var_names = 3;
required bool is_python = 4 [ default = false ]; required bool is_python = 4 [ default = false ];
required uint64 log_id = 5 [ default = 0 ];
}; };
message InferenceResponse { message InferenceResponse {
......
...@@ -58,6 +58,8 @@ message ResourceConf { ...@@ -58,6 +58,8 @@ message ResourceConf {
optional string cube_config_path = 5; optional string cube_config_path = 5;
optional string cube_config_file = 6; optional string cube_config_file = 6;
optional int32 cube_quant_bits = 7; // set 0 if no quant. optional int32 cube_quant_bits = 7; // set 0 if no quant.
optional string auth_product_name = 8;
optional string auth_container_id = 9;
}; };
// DAG node depency info // DAG node depency info
......
...@@ -22,7 +22,8 @@ ...@@ -22,7 +22,8 @@
#ifdef BCLOUD #ifdef BCLOUD
#include "baidu/rpc/channel.h" #include "baidu/rpc/channel.h"
#include "baidu/rpc/parallel_channel.h" #include "baidu/rpc/parallel_channel.h"
#include "rapidjson/document.h" #include "rapidjson_1.0/document.h"
#include "rapidjson_1.0/rapidjson.h"
#else #else
#include "brpc/channel.h" #include "brpc/channel.h"
#include "brpc/parallel_channel.h" #include "brpc/parallel_channel.h"
......
...@@ -227,7 +227,8 @@ class PredictorClient { ...@@ -227,7 +227,8 @@ class PredictorClient {
const std::vector<std::vector<int>>& int_shape, const std::vector<std::vector<int>>& int_shape,
const std::vector<std::string>& fetch_name, const std::vector<std::string>& fetch_name,
PredictorRes& predict_res_batch, // NOLINT PredictorRes& predict_res_batch, // NOLINT
const int& pid); const int& pid,
const uint64_t log_id);
int numpy_predict( int numpy_predict(
const std::vector<std::vector<py::array_t<float>>>& float_feed_batch, const std::vector<std::vector<py::array_t<float>>>& float_feed_batch,
...@@ -238,7 +239,8 @@ class PredictorClient { ...@@ -238,7 +239,8 @@ class PredictorClient {
const std::vector<std::vector<int>>& int_shape, const std::vector<std::vector<int>>& int_shape,
const std::vector<std::string>& fetch_name, const std::vector<std::string>& fetch_name,
PredictorRes& predict_res_batch, // NOLINT PredictorRes& predict_res_batch, // NOLINT
const int& pid); const int& pid,
const uint64_t log_id);
private: private:
PredictorApi _api; PredictorApi _api;
......
...@@ -39,7 +39,9 @@ using configure::GeneralModelConfig; ...@@ -39,7 +39,9 @@ using configure::GeneralModelConfig;
void PredictorClient::init_gflags(std::vector<std::string> argv) { void PredictorClient::init_gflags(std::vector<std::string> argv) {
std::call_once(gflags_init_flag, [&]() { std::call_once(gflags_init_flag, [&]() {
#ifndef BCLOUD
FLAGS_logtostderr = true; FLAGS_logtostderr = true;
#endif
argv.insert(argv.begin(), "dummy"); argv.insert(argv.begin(), "dummy");
int argc = argv.size(); int argc = argv.size();
char **arr = new char *[argv.size()]; char **arr = new char *[argv.size()];
...@@ -144,7 +146,8 @@ int PredictorClient::batch_predict( ...@@ -144,7 +146,8 @@ int PredictorClient::batch_predict(
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name, const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch, PredictorRes &predict_res_batch,
const int &pid) { const int &pid,
const uint64_t log_id) {
int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size()); int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
predict_res_batch.clear(); predict_res_batch.clear();
...@@ -162,6 +165,7 @@ int PredictorClient::batch_predict( ...@@ -162,6 +165,7 @@ int PredictorClient::batch_predict(
VLOG(2) << "int feed name size: " << int_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size; VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req; Request req;
req.set_log_id(log_id);
for (auto &name : fetch_name) { for (auto &name : fetch_name) {
req.add_fetch_var_names(name); req.add_fetch_var_names(name);
} }
...@@ -356,7 +360,8 @@ int PredictorClient::numpy_predict( ...@@ -356,7 +360,8 @@ int PredictorClient::numpy_predict(
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name, const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch, PredictorRes &predict_res_batch,
const int &pid) { const int &pid,
const uint64_t log_id) {
int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size()); int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
VLOG(2) << "batch size: " << batch_size; VLOG(2) << "batch size: " << batch_size;
predict_res_batch.clear(); predict_res_batch.clear();
...@@ -374,6 +379,7 @@ int PredictorClient::numpy_predict( ...@@ -374,6 +379,7 @@ int PredictorClient::numpy_predict(
VLOG(2) << "int feed name size: " << int_feed_name.size(); VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size; VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req; Request req;
req.set_log_id(log_id);
for (auto &name : fetch_name) { for (auto &name : fetch_name) {
req.add_fetch_var_names(name); req.add_fetch_var_names(name);
} }
......
...@@ -107,7 +107,8 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -107,7 +107,8 @@ PYBIND11_MODULE(serving_client, m) {
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name, const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch, PredictorRes &predict_res_batch,
const int &pid) { const int &pid,
const uint64_t log_id) {
return self.batch_predict(float_feed_batch, return self.batch_predict(float_feed_batch,
float_feed_name, float_feed_name,
float_shape, float_shape,
...@@ -116,7 +117,8 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -116,7 +117,8 @@ PYBIND11_MODULE(serving_client, m) {
int_shape, int_shape,
fetch_name, fetch_name,
predict_res_batch, predict_res_batch,
pid); pid,
log_id);
}, },
py::call_guard<py::gil_scoped_release>()) py::call_guard<py::gil_scoped_release>())
.def("numpy_predict", .def("numpy_predict",
...@@ -131,7 +133,8 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -131,7 +133,8 @@ PYBIND11_MODULE(serving_client, m) {
const std::vector<std::vector<int>> &int_shape, const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name, const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch, PredictorRes &predict_res_batch,
const int &pid) { const int &pid,
const uint64_t log_id) {
return self.numpy_predict(float_feed_batch, return self.numpy_predict(float_feed_batch,
float_feed_name, float_feed_name,
float_shape, float_shape,
...@@ -140,7 +143,8 @@ PYBIND11_MODULE(serving_client, m) { ...@@ -140,7 +143,8 @@ PYBIND11_MODULE(serving_client, m) {
int_shape, int_shape,
fetch_name, fetch_name,
predict_res_batch, predict_res_batch,
pid); pid,
log_id);
}, },
py::call_guard<py::gil_scoped_release>()); py::call_guard<py::gil_scoped_release>());
} }
......
...@@ -45,36 +45,41 @@ int GeneralCopyOp::inference() { ...@@ -45,36 +45,41 @@ int GeneralCopyOp::inference() {
const std::string pre_name = pre_node_names[0]; const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name); const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
VLOG(2) << "precedent name: " << pre_name; uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") precedent name: " << pre_name;
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
VLOG(2) << "input size: " << in->size(); VLOG(2) << "(logid=" << log_id << ") input size: " << in->size();
int batch_size = input_blob->GetBatchSize(); int batch_size = input_blob->GetBatchSize();
int input_var_num = 0; int input_var_num = 0;
GeneralBlob *res = mutable_data<GeneralBlob>(); GeneralBlob *res = mutable_data<GeneralBlob>();
res->SetLogId(log_id);
TensorVector *out = &res->tensor_vector; TensorVector *out = &res->tensor_vector;
VLOG(2) << "input batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
res->SetBatchSize(batch_size); res->SetBatchSize(batch_size);
if (!res) { if (!res) {
LOG(ERROR) << "Failed get op tls reader object output"; LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
} }
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
VLOG(2) << "Going to init lod tensor"; VLOG(2) << "(logid=" << log_id << ") Going to init lod tensor";
for (int i = 0; i < in->size(); ++i) { for (int i = 0; i < in->size(); ++i) {
paddle::PaddleTensor lod_tensor; paddle::PaddleTensor lod_tensor;
CopyLod(&in->at(i), &lod_tensor); CopyLod(&in->at(i), &lod_tensor);
lod_tensor.dtype = in->at(i).dtype; lod_tensor.dtype = in->at(i).dtype;
lod_tensor.name = in->at(i).name; lod_tensor.name = in->at(i).name;
VLOG(2) << "lod tensor [" << i << "].name = " << lod_tensor.name; VLOG(2) << "(logid=" << log_id << ") lod tensor [" << i
<< "].name = " << lod_tensor.name;
out->push_back(lod_tensor); out->push_back(lod_tensor);
} }
VLOG(2) << "pack done."; VLOG(2) << "(logid=" << log_id << ") pack done.";
for (int i = 0; i < out->size(); ++i) { for (int i = 0; i < out->size(); ++i) {
int64_t *src_ptr = static_cast<int64_t *>(in->at(i).data.data()); int64_t *src_ptr = static_cast<int64_t *>(in->at(i).data.data());
...@@ -86,7 +91,7 @@ int GeneralCopyOp::inference() { ...@@ -86,7 +91,7 @@ int GeneralCopyOp::inference() {
} }
} }
VLOG(2) << "output done."; VLOG(2) << "(logid=" << log_id << ") output done.";
timeline.Pause(); timeline.Pause();
int64_t end = timeline.TimeStampUS(); int64_t end = timeline.TimeStampUS();
...@@ -94,7 +99,7 @@ int GeneralCopyOp::inference() { ...@@ -94,7 +99,7 @@ int GeneralCopyOp::inference() {
AddBlobInfo(res, start); AddBlobInfo(res, start);
AddBlobInfo(res, end); AddBlobInfo(res, end);
VLOG(2) << "read data from client success"; VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0; return 0;
} }
......
...@@ -13,20 +13,12 @@ ...@@ -13,20 +13,12 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include <string> #include <string>
#include <vector>
#include "core/general-server/general_model_service.pb.h" #include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/resource.h" #include "core/predictor/framework/resource.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
......
...@@ -50,18 +50,20 @@ int GeneralDistKVInferOp::inference() { ...@@ -50,18 +50,20 @@ int GeneralDistKVInferOp::inference() {
const std::string pre_name = pre_node_names[0]; const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name); const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
VLOG(2) << "Get precedent op name: " << pre_name; uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data<GeneralBlob>(); GeneralBlob *output_blob = mutable_data<GeneralBlob>();
if (!input_blob) { if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op:" << pre_name; LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op:" << pre_name;
return -1; return -1;
} }
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize(); int batch_size = input_blob->GetBatchSize();
VLOG(2) << "input batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
std::vector<uint64_t> keys; std::vector<uint64_t> keys;
std::vector<rec::mcube::CubeValue> values; std::vector<rec::mcube::CubeValue> values;
int sparse_count = 0; int sparse_count = 0;
...@@ -96,13 +98,14 @@ int GeneralDistKVInferOp::inference() { ...@@ -96,13 +98,14 @@ int GeneralDistKVInferOp::inference() {
rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance(); rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
std::vector<std::string> table_names = cube->get_table_names(); std::vector<std::string> table_names = cube->get_table_names();
if (table_names.size() == 0) { if (table_names.size() == 0) {
LOG(ERROR) << "cube init error or cube config not given."; LOG(ERROR) << "(logid=" << log_id
<< ") cube init error or cube config not given.";
return -1; return -1;
} }
int ret = cube->seek(table_names[0], keys, &values); int ret = cube->seek(table_names[0], keys, &values);
int64_t cube_end = timeline.TimeStampUS(); int64_t cube_end = timeline.TimeStampUS();
if (values.size() != keys.size() || values[0].buff.size() == 0) { if (values.size() != keys.size() || values[0].buff.size() == 0) {
LOG(ERROR) << "cube value return null"; LOG(ERROR) << "(logid=" << log_id << ") cube value return null";
} }
size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float); size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float);
TensorVector sparse_out; TensorVector sparse_out;
...@@ -153,14 +156,16 @@ int GeneralDistKVInferOp::inference() { ...@@ -153,14 +156,16 @@ int GeneralDistKVInferOp::inference() {
infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end()); infer_in.insert(infer_in.end(), sparse_out.begin(), sparse_out.end());
output_blob->SetBatchSize(batch_size); output_blob->SetBatchSize(batch_size);
output_blob->SetLogId(log_id);
VLOG(2) << "infer batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
if (InferManager::instance().infer( if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) { engine_name().c_str(), &infer_in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << engine_name(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name();
return -1; return -1;
} }
......
...@@ -15,17 +15,9 @@ ...@@ -15,17 +15,9 @@
#pragma once #pragma once
#include <string> #include <string>
#include <vector> #include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include "core/general-server/general_model_service.pb.h" #include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/op/general_infer_helper.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
......
...@@ -59,10 +59,13 @@ int GeneralDistKVQuantInferOp::inference() { ...@@ -59,10 +59,13 @@ int GeneralDistKVQuantInferOp::inference() {
return -1; return -1;
} }
uint64_t log_id = input_blob->GetLogId();
output_blob->SetLogId(log_id);
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize(); int batch_size = input_blob->GetBatchSize();
VLOG(2) << "input batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
std::vector<uint64_t> keys; std::vector<uint64_t> keys;
std::vector<rec::mcube::CubeValue> values; std::vector<rec::mcube::CubeValue> values;
int sparse_count = 0; int sparse_count = 0;
...@@ -94,13 +97,14 @@ int GeneralDistKVQuantInferOp::inference() { ...@@ -94,13 +97,14 @@ int GeneralDistKVQuantInferOp::inference() {
rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance(); rec::mcube::CubeAPI *cube = rec::mcube::CubeAPI::instance();
std::vector<std::string> table_names = cube->get_table_names(); std::vector<std::string> table_names = cube->get_table_names();
if (table_names.size() == 0) { if (table_names.size() == 0) {
LOG(ERROR) << "cube init error or cube config not given."; LOG(ERROR) << "(logid=" << log_id
<< ") cube init error or cube config not given.";
return -1; return -1;
} }
int ret = cube->seek(table_names[0], keys, &values); int ret = cube->seek(table_names[0], keys, &values);
if (values.size() != keys.size() || values[0].buff.size() == 0) { if (values.size() != keys.size() || values[0].buff.size() == 0) {
LOG(ERROR) << "cube value return null"; LOG(ERROR) << "(logid=" << log_id << ") cube value return null";
} }
TensorVector sparse_out; TensorVector sparse_out;
...@@ -182,7 +186,7 @@ int GeneralDistKVQuantInferOp::inference() { ...@@ -182,7 +186,7 @@ int GeneralDistKVQuantInferOp::inference() {
output_blob->SetBatchSize(batch_size); output_blob->SetBatchSize(batch_size);
VLOG(2) << "infer batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
...@@ -190,7 +194,8 @@ int GeneralDistKVQuantInferOp::inference() { ...@@ -190,7 +194,8 @@ int GeneralDistKVQuantInferOp::inference() {
if (InferManager::instance().infer( if (InferManager::instance().infer(
engine_name().c_str(), &infer_in, out, batch_size)) { engine_name().c_str(), &infer_in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << engine_name(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name();
return -1; return -1;
} }
......
...@@ -15,17 +15,9 @@ ...@@ -15,17 +15,9 @@
#pragma once #pragma once
#include <string> #include <string>
#include <vector> #include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include "core/general-server/general_model_service.pb.h" #include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/op/general_infer_helper.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
......
...@@ -15,17 +15,9 @@ ...@@ -15,17 +15,9 @@
#pragma once #pragma once
#include <string.h> #include <string.h>
#include <string>
#include <vector> #include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT #include "paddle_inference_api.h" // NOLINT
#endif
#include <string>
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
...@@ -35,6 +27,7 @@ struct GeneralBlob { ...@@ -35,6 +27,7 @@ struct GeneralBlob {
std::vector<paddle::PaddleTensor> tensor_vector; std::vector<paddle::PaddleTensor> tensor_vector;
int64_t time_stamp[20]; int64_t time_stamp[20];
int p_size = 0; int p_size = 0;
uint64_t _log_id = -1; // for logging
int _batch_size; int _batch_size;
...@@ -46,9 +39,11 @@ struct GeneralBlob { ...@@ -46,9 +39,11 @@ struct GeneralBlob {
tensor_vector.clear(); tensor_vector.clear();
} }
int SetBatchSize(int batch_size) { _batch_size = batch_size; } void SetBatchSize(int batch_size) { _batch_size = batch_size; }
void SetLogId(uint64_t log_id) { _log_id = log_id; }
int GetBatchSize() const { return _batch_size; } int GetBatchSize() const { return _batch_size; }
uint64_t GetLogId() const { return _log_id; }
std::string ShortDebugString() const { return "Not implemented!"; } std::string ShortDebugString() const { return "Not implemented!"; }
}; };
......
...@@ -47,22 +47,26 @@ int GeneralInferOp::inference() { ...@@ -47,22 +47,26 @@ int GeneralInferOp::inference() {
const std::string pre_name = pre_node_names[0]; const std::string pre_name = pre_node_names[0];
const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name); const GeneralBlob *input_blob = get_depend_argument<GeneralBlob>(pre_name);
VLOG(2) << "Get precedent op name: " << pre_name; uint64_t log_id = input_blob->GetLogId();
VLOG(2) << "(logid=" << log_id << ") Get precedent op name: " << pre_name;
GeneralBlob *output_blob = mutable_data<GeneralBlob>(); GeneralBlob *output_blob = mutable_data<GeneralBlob>();
output_blob->SetLogId(log_id);
if (!input_blob) { if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op:" << pre_name; LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op:" << pre_name;
return -1; return -1;
} }
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
TensorVector *out = &output_blob->tensor_vector; TensorVector *out = &output_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize();
VLOG(2) << "input batch size: " << batch_size;
output_blob->SetBatchSize(batch_size); int batch_size = input_blob->_batch_size;
VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
VLOG(2) << "infer batch size: " << batch_size; output_blob->_batch_size = batch_size;
VLOG(2) << "(logid=" << log_id << ") infer batch size: " << batch_size;
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
...@@ -70,7 +74,8 @@ int GeneralInferOp::inference() { ...@@ -70,7 +74,8 @@ int GeneralInferOp::inference() {
if (InferManager::instance().infer( if (InferManager::instance().infer(
engine_name().c_str(), in, out, batch_size)) { engine_name().c_str(), in, out, batch_size)) {
LOG(ERROR) << "Failed do infer in fluid model: " << engine_name().c_str(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed do infer in fluid model: " << engine_name().c_str();
return -1; return -1;
} }
......
...@@ -15,17 +15,9 @@ ...@@ -15,17 +15,9 @@
#pragma once #pragma once
#include <string> #include <string>
#include <vector> #include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include "core/general-server/general_model_service.pb.h" #include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/op/general_infer_helper.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
......
...@@ -37,9 +37,9 @@ int conf_check(const Request *req, ...@@ -37,9 +37,9 @@ int conf_check(const Request *req,
const std::shared_ptr<PaddleGeneralModelConfig> &model_config) { const std::shared_ptr<PaddleGeneralModelConfig> &model_config) {
int var_num = req->insts(0).tensor_array_size(); int var_num = req->insts(0).tensor_array_size();
if (var_num != model_config->_feed_type.size()) { if (var_num != model_config->_feed_type.size()) {
VLOG(2) << "var num: " << var_num; LOG(ERROR) << "feed var number not match: model config["
VLOG(2) << "model config var num: " << model_config->_feed_type.size(); << model_config->_feed_type.size() << "] vs. actual[" << var_num
LOG(ERROR) << "feed var number not match."; << "]";
return -1; return -1;
} }
...@@ -72,6 +72,7 @@ int conf_check(const Request *req, ...@@ -72,6 +72,7 @@ int conf_check(const Request *req,
int GeneralReaderOp::inference() { int GeneralReaderOp::inference() {
// reade request from client // reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
uint64_t log_id = req->log_id();
int batch_size = req->insts_size(); int batch_size = req->insts_size();
int input_var_num = 0; int input_var_num = 0;
...@@ -83,25 +84,29 @@ int GeneralReaderOp::inference() { ...@@ -83,25 +84,29 @@ int GeneralReaderOp::inference() {
TensorVector *out = &res->tensor_vector; TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size); res->SetBatchSize(batch_size);
res->SetLogId(log_id);
if (!res) { if (!res) {
LOG(ERROR) << "Failed get op tls reader object output"; LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
} }
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size(); int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "var num: " << var_num; VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
VLOG(2) << "start to call load general model_conf op"; VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "get resource pointer done."; VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config = std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config(); resource.get_general_model_config();
VLOG(2) << "print general model config done."; VLOG(2) << "(logid=" << log_id << ") print general model config done.";
// TODO(guru4elephant): how to do conditional check? // TODO(guru4elephant): how to do conditional check?
/* /*
...@@ -122,7 +127,8 @@ int GeneralReaderOp::inference() { ...@@ -122,7 +127,8 @@ int GeneralReaderOp::inference() {
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor; paddle::PaddleTensor lod_tensor;
elem_type[i] = req->insts(0).tensor_array(i).elem_type(); elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i]; VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64 if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t); elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64; lod_tensor.dtype = paddle::PaddleDType::INT64;
...@@ -137,17 +143,19 @@ int GeneralReaderOp::inference() { ...@@ -137,17 +143,19 @@ int GeneralReaderOp::inference() {
if (model_config->_is_lod_feed[i]) { if (model_config->_is_lod_feed[i]) {
lod_tensor.lod.resize(1); lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0); lod_tensor.lod[0].push_back(0);
VLOG(2) << "var[" << i << "] is lod_tensor"; VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
} else { } else {
lod_tensor.shape.push_back(batch_size); lod_tensor.shape.push_back(batch_size);
capacity[i] = 1; capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) { for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k); int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(2) << "shape for var[" << i << "]: " << dim; VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
<< "]: " << dim;
capacity[i] *= dim; capacity[i] *= dim;
lod_tensor.shape.push_back(dim); lod_tensor.shape.push_back(dim);
} }
VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i]; VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor, capacity: " << capacity[i];
} }
lod_tensor.name = model_config->_feed_name[i]; lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor); out->push_back(lod_tensor);
...@@ -167,11 +175,12 @@ int GeneralReaderOp::inference() { ...@@ -167,11 +175,12 @@ int GeneralReaderOp::inference() {
} else if (tensor.int_data_size() > 0) { } else if (tensor.int_data_size() > 0) {
data_len = tensor.int_data_size(); data_len = tensor.int_data_size();
} }
VLOG(2) << "tensor size for var[" << i << "]: " << data_len; VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
<< "]: " << data_len;
tensor_size += data_len; tensor_size += data_len;
int cur_len = out->at(i).lod[0].back(); int cur_len = out->at(i).lod[0].back();
VLOG(2) << "current len: " << cur_len; VLOG(2) << "(logid=" << log_id << ") current len: " << cur_len;
int sample_len = 0; int sample_len = 0;
if (tensor.shape_size() == 1) { if (tensor.shape_size() == 1) {
...@@ -180,7 +189,7 @@ int GeneralReaderOp::inference() { ...@@ -180,7 +189,7 @@ int GeneralReaderOp::inference() {
sample_len = tensor.shape(0); sample_len = tensor.shape(0);
} }
out->at(i).lod[0].push_back(cur_len + sample_len); out->at(i).lod[0].push_back(cur_len + sample_len);
VLOG(2) << "new len: " << cur_len + sample_len; VLOG(2) << "(logid=" << log_id << ") new len: " << cur_len + sample_len;
} }
out->at(i).data.Resize(tensor_size * elem_size[i]); out->at(i).data.Resize(tensor_size * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back()}; out->at(i).shape = {out->at(i).lod[0].back()};
...@@ -190,11 +199,11 @@ int GeneralReaderOp::inference() { ...@@ -190,11 +199,11 @@ int GeneralReaderOp::inference() {
if (out->at(i).shape.size() == 1) { if (out->at(i).shape.size() == 1) {
out->at(i).shape.push_back(1); out->at(i).shape.push_back(1);
} }
VLOG(2) << "var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back(); << "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else { } else {
out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(2) << "var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i]; << "] is tensor and capacity=" << batch_size * capacity[i];
} }
} }
...@@ -203,8 +212,8 @@ int GeneralReaderOp::inference() { ...@@ -203,8 +212,8 @@ int GeneralReaderOp::inference() {
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
if (elem_type[i] == 0) { if (elem_type[i] == 0) {
int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data()); int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
VLOG(2) << "first element data in var[" << i << "] is " VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< req->insts(0).tensor_array(i).int64_data(0); << "] is " << req->insts(0).tensor_array(i).int64_data(0);
int offset = 0; int offset = 0;
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).int64_data_size(); int elem_num = req->insts(j).tensor_array(i).int64_data_size();
...@@ -219,8 +228,8 @@ int GeneralReaderOp::inference() { ...@@ -219,8 +228,8 @@ int GeneralReaderOp::inference() {
} }
} else if (elem_type[i] == 1) { } else if (elem_type[i] == 1) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data()); float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "first element data in var[" << i << "] is " VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< req->insts(0).tensor_array(i).float_data(0); << "] is " << req->insts(0).tensor_array(i).float_data(0);
int offset = 0; int offset = 0;
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).float_data_size(); int elem_num = req->insts(j).tensor_array(i).float_data_size();
...@@ -235,8 +244,8 @@ int GeneralReaderOp::inference() { ...@@ -235,8 +244,8 @@ int GeneralReaderOp::inference() {
} }
} else if (elem_type[i] == 2) { } else if (elem_type[i] == 2) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data()); int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "first element data in var[" << i << "] is " VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< req->insts(0).tensor_array(i).int_data(0); << "] is " << req->insts(0).tensor_array(i).int_data(0);
int offset = 0; int offset = 0;
for (int j = 0; j < batch_size; ++j) { for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).int_data_size(); int elem_num = req->insts(j).tensor_array(i).int_data_size();
...@@ -252,15 +261,16 @@ int GeneralReaderOp::inference() { ...@@ -252,15 +261,16 @@ int GeneralReaderOp::inference() {
} }
} }
VLOG(2) << "output size: " << out->size(); VLOG(2) << "(logid=" << log_id << ") output size: " << out->size();
timeline.Pause(); timeline.Pause();
int64_t end = timeline.TimeStampUS(); int64_t end = timeline.TimeStampUS();
res->p_size = 0; res->p_size = 0;
res->_batch_size = batch_size;
AddBlobInfo(res, start); AddBlobInfo(res, start);
AddBlobInfo(res, end); AddBlobInfo(res, end);
VLOG(2) << "read data from client success"; VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0; return 0;
} }
DEFINE_OP(GeneralReaderOp); DEFINE_OP(GeneralReaderOp);
......
...@@ -13,21 +13,13 @@ ...@@ -13,21 +13,13 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include <string> #include <string>
#include <vector>
#include "core/general-server/general_model_service.pb.h" #include "core/general-server/general_model_service.pb.h"
#include "core/general-server/load_general_model_service.pb.h" #include "core/general-server/load_general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/resource.h" #include "core/predictor/framework/resource.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
......
...@@ -42,6 +42,9 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; ...@@ -42,6 +42,9 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralResponseOp::inference() { int GeneralResponseOp::inference() {
const std::vector<std::string> pre_node_names = pre_names(); const std::vector<std::string> pre_node_names = pre_names();
VLOG(2) << "pre node names size: " << pre_node_names.size(); VLOG(2) << "pre node names size: " << pre_node_names.size();
const GeneralBlob *input_blob;
uint64_t log_id =
get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
// response inst with only fetch_var_names // response inst with only fetch_var_names
...@@ -52,15 +55,17 @@ int GeneralResponseOp::inference() { ...@@ -52,15 +55,17 @@ int GeneralResponseOp::inference() {
// timeline.Start(); // timeline.Start();
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
VLOG(2) << "start to call load general model_conf op"; VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "get resource pointer done."; VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config = std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config(); resource.get_general_model_config();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size; VLOG(2) << "(logid=" << log_id
<< ") max body size : " << brpc::fLU64::FLAGS_max_body_size;
std::vector<int> fetch_index; std::vector<int> fetch_index;
fetch_index.resize(req->fetch_var_names_size()); fetch_index.resize(req->fetch_var_names_size());
...@@ -69,16 +74,16 @@ int GeneralResponseOp::inference() { ...@@ -69,16 +74,16 @@ int GeneralResponseOp::inference() {
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
} }
const GeneralBlob *input_blob;
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi]; const std::string &pre_name = pre_node_names[pi];
VLOG(2) << "pre names[" << pi << "]: " << pre_name << " (" VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
<< pre_node_names.size() << ")"; << " (" << pre_node_names.size() << ")";
input_blob = get_depend_argument<GeneralBlob>(pre_name); input_blob = get_depend_argument<GeneralBlob>(pre_name);
// fprintf(stderr, "input(%s) blob address %x\n", pre_names.c_str(), // fprintf(stderr, "input(%s) blob address %x\n", pre_names.c_str(),
// input_blob); // input_blob);
if (!input_blob) { if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name; LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op: " << pre_name;
return -1; return -1;
} }
...@@ -92,17 +97,19 @@ int GeneralResponseOp::inference() { ...@@ -92,17 +97,19 @@ int GeneralResponseOp::inference() {
for (auto &idx : fetch_index) { for (auto &idx : fetch_index) {
Tensor *tensor = fetch_inst->add_tensor_array(); Tensor *tensor = fetch_inst->add_tensor_array();
if (model_config->_is_lod_fetch[idx]) { if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx] VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
<< " is lod_tensor"; << model_config->_fetch_name[idx] << " is lod_tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) { for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k]; VLOG(2) << "(logid=" << log_id << ") shape[" << k
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]); tensor->add_shape(in->at(idx).shape[k]);
} }
} else { } else {
VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx] VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] "
<< " is tensor"; << model_config->_fetch_name[idx] << " is tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) { for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k]; VLOG(2) << "(logid=" << log_id << ") shape[" << k
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]); tensor->add_shape(in->at(idx).shape[k]);
} }
} }
...@@ -119,8 +126,8 @@ int GeneralResponseOp::inference() { ...@@ -119,8 +126,8 @@ int GeneralResponseOp::inference() {
auto dtype = in->at(idx).dtype; auto dtype = in->at(idx).dtype;
if (dtype == paddle::PaddleDType::INT64) { if (dtype == paddle::PaddleDType::INT64) {
VLOG(2) << "Prepare int64 var [" << model_config->_fetch_name[idx] VLOG(2) << "(logid=" << log_id << ") Prepare int64 var ["
<< "]."; << model_config->_fetch_name[idx] << "].";
int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data()); int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
// from // from
// https://stackoverflow.com/questions/15499641/copy-a-stdvector-to-a-repeated-field-from-protobuf-with-memcpy // https://stackoverflow.com/questions/15499641/copy-a-stdvector-to-a-repeated-field-from-protobuf-with-memcpy
...@@ -130,16 +137,16 @@ int GeneralResponseOp::inference() { ...@@ -130,16 +137,16 @@ int GeneralResponseOp::inference() {
fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap( fetch_p->mutable_tensor_array(var_idx)->mutable_int64_data()->Swap(
&tmp_data); &tmp_data);
} else if (dtype == paddle::PaddleDType::FLOAT32) { } else if (dtype == paddle::PaddleDType::FLOAT32) {
VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx] VLOG(2) << "(logid=" << log_id << ") Prepare float var ["
<< "]."; << model_config->_fetch_name[idx] << "].";
float *data_ptr = static_cast<float *>(in->at(idx).data.data()); float *data_ptr = static_cast<float *>(in->at(idx).data.data());
google::protobuf::RepeatedField<float> tmp_data(data_ptr, google::protobuf::RepeatedField<float> tmp_data(data_ptr,
data_ptr + cap); data_ptr + cap);
fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap( fetch_p->mutable_tensor_array(var_idx)->mutable_float_data()->Swap(
&tmp_data); &tmp_data);
} else if (dtype == paddle::PaddleDType::INT32) { } else if (dtype == paddle::PaddleDType::INT32) {
VLOG(2) << "Prepare int32 var [" << model_config->_fetch_name[idx] VLOG(2) << "(logid=" << log_id << ")Prepare int32 var ["
<< "]."; << model_config->_fetch_name[idx] << "].";
int32_t *data_ptr = static_cast<int32_t *>(in->at(idx).data.data()); int32_t *data_ptr = static_cast<int32_t *>(in->at(idx).data.data());
google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr, google::protobuf::RepeatedField<int32_t> tmp_data(data_ptr,
data_ptr + cap); data_ptr + cap);
...@@ -154,7 +161,8 @@ int GeneralResponseOp::inference() { ...@@ -154,7 +161,8 @@ int GeneralResponseOp::inference() {
} }
} }
VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready"; VLOG(2) << "(logid=" << log_id << ") fetch var ["
<< model_config->_fetch_name[idx] << "] ready";
var_idx++; var_idx++;
} }
} }
...@@ -167,7 +175,8 @@ int GeneralResponseOp::inference() { ...@@ -167,7 +175,8 @@ int GeneralResponseOp::inference() {
// a more elegant way. // a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]); input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "p size for input blob: " << input_blob->p_size; VLOG(2) << "(logid=" << log_id
<< ") p size for input blob: " << input_blob->p_size;
int profile_time_idx = -1; int profile_time_idx = -1;
if (pi == 0) { if (pi == 0) {
profile_time_idx = 0; profile_time_idx = 0;
......
...@@ -15,16 +15,8 @@ ...@@ -15,16 +15,8 @@
#pragma once #pragma once
#include <string> #include <string>
#include <vector> #include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include "core/general-server/general_model_service.pb.h" #include "core/general-server/general_model_service.pb.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
......
...@@ -35,6 +35,7 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; ...@@ -35,6 +35,7 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;
int GeneralTextReaderOp::inference() { int GeneralTextReaderOp::inference() {
// reade request from client // reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
uint64_t log_id = req->log_id();
int batch_size = req->insts_size(); int batch_size = req->insts_size();
int input_var_num = 0; int input_var_num = 0;
...@@ -44,16 +45,18 @@ int GeneralTextReaderOp::inference() { ...@@ -44,16 +45,18 @@ int GeneralTextReaderOp::inference() {
std::vector<int64_t> capacity; std::vector<int64_t> capacity;
GeneralBlob *res = mutable_data<GeneralBlob>(); GeneralBlob *res = mutable_data<GeneralBlob>();
TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
if (!res) { if (!res) {
LOG(ERROR) << "Failed get op tls reader object output"; LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op tls reader object output";
} }
TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
res->SetLogId(log_id);
if (batch_size <= 0) { if (batch_size <= 0) {
LOG(ERROR) << "Batch size < 0"; LOG(ERROR) << "(logid=" << log_id << ") Batch size < 0";
return -1; return -1;
} }
...@@ -61,17 +64,18 @@ int GeneralTextReaderOp::inference() { ...@@ -61,17 +64,18 @@ int GeneralTextReaderOp::inference() {
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
int var_num = req->insts(0).tensor_array_size(); int var_num = req->insts(0).tensor_array_size();
VLOG(2) << "var num: " << var_num; VLOG(2) << "(logid=" << log_id << ") var num: " << var_num;
VLOG(2) << "start to call load general model_conf op"; VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "get resource pointer done."; VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config = std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config(); resource.get_general_model_config();
VLOG(2) << "print general model config done."; VLOG(2) << "(logid=" << log_id << ") print general model config done.";
elem_type.resize(var_num); elem_type.resize(var_num);
elem_size.resize(var_num); elem_size.resize(var_num);
...@@ -79,7 +83,8 @@ int GeneralTextReaderOp::inference() { ...@@ -79,7 +83,8 @@ int GeneralTextReaderOp::inference() {
for (int i = 0; i < var_num; ++i) { for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor; paddle::PaddleTensor lod_tensor;
elem_type[i] = req->insts(0).tensor_array(i).elem_type(); elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i]; VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64 if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t); elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64; lod_tensor.dtype = paddle::PaddleDType::INT64;
...@@ -91,17 +96,19 @@ int GeneralTextReaderOp::inference() { ...@@ -91,17 +96,19 @@ int GeneralTextReaderOp::inference() {
if (req->insts(0).tensor_array(i).shape(0) == -1) { if (req->insts(0).tensor_array(i).shape(0) == -1) {
lod_tensor.lod.resize(1); lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0); lod_tensor.lod[0].push_back(0);
VLOG(2) << "var[" << i << "] is lod_tensor"; VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
} else { } else {
lod_tensor.shape.push_back(batch_size); lod_tensor.shape.push_back(batch_size);
capacity[i] = 1; capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) { for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k); int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(2) << "shape for var[" << i << "]: " << dim; VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
<< "]: " << dim;
capacity[i] *= dim; capacity[i] *= dim;
lod_tensor.shape.push_back(dim); lod_tensor.shape.push_back(dim);
} }
VLOG(2) << "var[" << i << "] is tensor, capacity: " << capacity[i]; VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor, capacity: " << capacity[i];
} }
lod_tensor.name = model_config->_feed_name[i]; lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor); out->push_back(lod_tensor);
...@@ -117,11 +124,11 @@ int GeneralTextReaderOp::inference() { ...@@ -117,11 +124,11 @@ int GeneralTextReaderOp::inference() {
} }
out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]); out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back(), 1}; out->at(i).shape = {out->at(i).lod[0].back(), 1};
VLOG(2) << "var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back(); << "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else { } else {
out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]); out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
VLOG(2) << "var[" << i VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i]; << "] is tensor and capacity=" << batch_size * capacity[i];
} }
} }
...@@ -163,7 +170,7 @@ int GeneralTextReaderOp::inference() { ...@@ -163,7 +170,7 @@ int GeneralTextReaderOp::inference() {
AddBlobInfo(res, start); AddBlobInfo(res, start);
AddBlobInfo(res, end); AddBlobInfo(res, end);
VLOG(2) << "read data from client success"; VLOG(2) << "(logid=" << log_id << ") read data from client success";
return 0; return 0;
} }
DEFINE_OP(GeneralTextReaderOp); DEFINE_OP(GeneralTextReaderOp);
......
...@@ -13,21 +13,13 @@ ...@@ -13,21 +13,13 @@
// limitations under the License. // limitations under the License.
#pragma once #pragma once
#include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include <string> #include <string>
#include <vector>
#include "core/general-server/general_model_service.pb.h" #include "core/general-server/general_model_service.pb.h"
#include "core/general-server/load_general_model_service.pb.h" #include "core/general-server/load_general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/resource.h" #include "core/predictor/framework/resource.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
......
...@@ -40,6 +40,9 @@ int GeneralTextResponseOp::inference() { ...@@ -40,6 +40,9 @@ int GeneralTextResponseOp::inference() {
VLOG(2) << "Going to run inference"; VLOG(2) << "Going to run inference";
const std::vector<std::string> pre_node_names = pre_names(); const std::vector<std::string> pre_node_names = pre_names();
VLOG(2) << "pre node names size: " << pre_node_names.size(); VLOG(2) << "pre node names size: " << pre_node_names.size();
const GeneralBlob *input_blob;
uint64_t log_id =
get_depend_argument<GeneralBlob>(pre_node_names[0])->GetLogId();
const Request *req = dynamic_cast<const Request *>(get_request_message()); const Request *req = dynamic_cast<const Request *>(get_request_message());
// response inst with only fetch_var_names // response inst with only fetch_var_names
...@@ -48,11 +51,12 @@ int GeneralTextResponseOp::inference() { ...@@ -48,11 +51,12 @@ int GeneralTextResponseOp::inference() {
Timer timeline; Timer timeline;
int64_t start = timeline.TimeStampUS(); int64_t start = timeline.TimeStampUS();
VLOG(2) << "start to call load general model_conf op"; VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource = baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance(); baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "get resource pointer done."; VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config = std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config(); resource.get_general_model_config();
...@@ -63,20 +67,20 @@ int GeneralTextResponseOp::inference() { ...@@ -63,20 +67,20 @@ int GeneralTextResponseOp::inference() {
model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)]; model_config->_fetch_alias_name_to_index[req->fetch_var_names(i)];
} }
const GeneralBlob *input_blob;
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
const std::string &pre_name = pre_node_names[pi]; const std::string &pre_name = pre_node_names[pi];
VLOG(2) << "pre names[" << pi << "]: " << pre_name << " (" VLOG(2) << "(logid=" << log_id << ") pre names[" << pi << "]: " << pre_name
<< pre_node_names.size() << ")"; << " (" << pre_node_names.size() << ")";
input_blob = get_depend_argument<GeneralBlob>(pre_name); input_blob = get_depend_argument<GeneralBlob>(pre_name);
if (!input_blob) { if (!input_blob) {
LOG(ERROR) << "Failed mutable depended argument, op: " << pre_name; LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable depended argument, op: " << pre_name;
return -1; return -1;
} }
const TensorVector *in = &input_blob->tensor_vector; const TensorVector *in = &input_blob->tensor_vector;
int batch_size = input_blob->GetBatchSize(); int batch_size = input_blob->GetBatchSize();
VLOG(2) << "input batch size: " << batch_size; VLOG(2) << "(logid=" << log_id << ") input batch size: " << batch_size;
ModelOutput *output = res->add_outputs(); ModelOutput *output = res->add_outputs();
output->set_engine_name( output->set_engine_name(
...@@ -88,12 +92,13 @@ int GeneralTextResponseOp::inference() { ...@@ -88,12 +92,13 @@ int GeneralTextResponseOp::inference() {
// currently only response float tensor or lod_tensor // currently only response float tensor or lod_tensor
tensor->set_elem_type(1); tensor->set_elem_type(1);
if (model_config->_is_lod_fetch[idx]) { if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "out[" << idx << " is lod_tensor"; VLOG(2) << "(logid=" << log_id << ") out[" << idx << " is lod_tensor";
tensor->add_shape(-1); tensor->add_shape(-1);
} else { } else {
VLOG(2) << "out[" << idx << "] is tensor"; VLOG(2) << "(logid=" << log_id << ") out[" << idx << "] is tensor";
for (int k = 1; k < in->at(idx).shape.size(); ++k) { for (int k = 1; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k - 1 << "]: " << in->at(idx).shape[k]; VLOG(2) << "(logid=" << log_id << ") shape[" << k - 1
<< "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]); tensor->add_shape(in->at(idx).shape[k]);
} }
} }
...@@ -137,7 +142,8 @@ int GeneralTextResponseOp::inference() { ...@@ -137,7 +142,8 @@ int GeneralTextResponseOp::inference() {
// a more elegant way. // a more elegant way.
for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) { for (uint32_t pi = 0; pi < pre_node_names.size(); ++pi) {
input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]); input_blob = get_depend_argument<GeneralBlob>(pre_node_names[pi]);
VLOG(2) << "p size for input blob: " << input_blob->p_size; VLOG(2) << "(logid=" << log_id
<< ") p size for input blob: " << input_blob->p_size;
int profile_time_idx = -1; int profile_time_idx = -1;
if (pi == 0) { if (pi == 0) {
profile_time_idx = 0; profile_time_idx = 0;
......
...@@ -15,17 +15,9 @@ ...@@ -15,17 +15,9 @@
#pragma once #pragma once
#include <string> #include <string>
#include <vector> #include <vector>
#ifdef BCLOUD
#ifdef WITH_GPU
#include "paddle/paddle_inference_api.h"
#else
#include "paddle/fluid/inference/api/paddle_inference_api.h"
#endif
#else
#include "paddle_inference_api.h" // NOLINT
#endif
#include "core/general-server/general_model_service.pb.h" #include "core/general-server/general_model_service.pb.h"
#include "core/general-server/op/general_infer_helper.h" #include "core/general-server/op/general_infer_helper.h"
#include "paddle_inference_api.h" // NOLINT
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
......
...@@ -37,6 +37,7 @@ message Request { ...@@ -37,6 +37,7 @@ message Request {
repeated FeedInst insts = 1; repeated FeedInst insts = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
}; };
message Response { message Response {
......
...@@ -21,6 +21,7 @@ option cc_generic_services = true; ...@@ -21,6 +21,7 @@ option cc_generic_services = true;
message RequestAndResponse { message RequestAndResponse {
required int32 a = 1; required int32 a = 1;
required float b = 2; required float b = 2;
required uint64 log_id = 3 [ default = 0 ];
}; };
service LoadGeneralModelService { service LoadGeneralModelService {
......
...@@ -280,25 +280,29 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -280,25 +280,29 @@ class PdsCodeGenerator : public CodeGenerator {
" baidu::rpc::ClosureGuard done_guard(done);\n" " baidu::rpc::ClosureGuard done_guard(done);\n"
" baidu::rpc::Controller* cntl = \n" " baidu::rpc::Controller* cntl = \n"
" static_cast<baidu::rpc::Controller*>(cntl_base);\n" " static_cast<baidu::rpc::Controller*>(cntl_base);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
" " " "
"::baidu::paddle_serving::predictor::InferServiceManager::instance(" "::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n" ").item(\"$service$\");\n"
" if (svr == NULL) {\n" " if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n" " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n" " cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n" " return ;\n"
" }\n" " }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
"\"\]\";\n" "<< cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"\"\]\";\n" "<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT "<< \"$name$\" << \"\]\";\n"
" int err_code = svr->inference(request, response);\n" " int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n" " if (err_code != 0) {\n"
" LOG(WARNING)\n" " LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n" " << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n" " << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference " " cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n" "failed!\");\n"
...@@ -306,7 +310,8 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -306,7 +310,8 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n" " gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n" " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n" " // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n", // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
"name", "name",
class_name, class_name,
"service", "service",
...@@ -317,26 +322,31 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -317,26 +322,31 @@ class PdsCodeGenerator : public CodeGenerator {
" baidu::rpc::ClosureGuard done_guard(done);\n" " baidu::rpc::ClosureGuard done_guard(done);\n"
" baidu::rpc::Controller* cntl = \n" " baidu::rpc::Controller* cntl = \n"
" static_cast<baidu::rpc::Controller*>(cntl_base);\n" " static_cast<baidu::rpc::Controller*>(cntl_base);\n"
" uint64_t log_id = equest->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
" " " "
"::baidu::paddle_serving::predictor::InferServiceManager::instance(" "::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n" ").item(\"$service$\");\n"
" if (svr == NULL) {\n" " if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n" " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n" " cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n" " return ;\n"
" }\n" " }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
"\"\]\";\n" "<< cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"\"\]\";\n" "<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT "<< \"$name$\" << \"\]\";\n"
" butil::IOBufBuilder debug_os;\n" " butil::IOBufBuilder debug_os;\n"
" int err_code = svr->inference(request, response, &debug_os);\n" " int err_code = svr->inference(request, response, log_id, "
"&debug_os);\n"
" if (err_code != 0) {\n" " if (err_code != 0) {\n"
" LOG(WARNING)\n" " LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n" " << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n" " << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference " " cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n" "failed!\");\n"
...@@ -345,9 +355,11 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -345,9 +355,11 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n" " gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n" " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n" " // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n"
" LOG(INFO)\n" " LOG(INFO)\n"
" << \"TC=[\" << (end - start) << \"] Received debug " " << \"(logid=\" << log_id << \") TC=[\" << (end - start) << "
"\"] Received debug "
"request[log_id=\" << cntl->log_id()\n" "request[log_id=\" << cntl->log_id()\n"
" << \"] from \" << cntl->remote_side()\n" " << \"] from \" << cntl->remote_side()\n"
" << \" to \" << cntl->local_side();\n", " << \" to \" << cntl->local_side();\n",
...@@ -1011,25 +1023,31 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1011,25 +1023,31 @@ class PdsCodeGenerator : public CodeGenerator {
" brpc::ClosureGuard done_guard(done);\n" " brpc::ClosureGuard done_guard(done);\n"
" brpc::Controller* cntl = \n" " brpc::Controller* cntl = \n"
" static_cast<brpc::Controller*>(cntl_base);\n" " static_cast<brpc::Controller*>(cntl_base);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
" " " "
"::baidu::paddle_serving::predictor::InferServiceManager::instance(" "::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n" ").item(\"$service$\");\n"
" if (svr == NULL) {\n" " if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n" " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n" " cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n" " return ;\n"
" }\n" " }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") "
"remote_side=\[\" << cntl->remote_side() << " // NOLINT
"\"\]\";\n" "\"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") "
"local_side=\[\" << cntl->local_side() << " // NOLINT
"\"\]\";\n" "\"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") "
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT "service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT
" int err_code = svr->inference(request, response);\n" " int err_code = svr->inference(request, response, log_id);\n"
" if (err_code != 0) {\n" " if (err_code != 0) {\n"
" LOG(WARNING)\n" " LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n" " << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n" " << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference " " cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n" "failed!\");\n"
...@@ -1037,7 +1055,8 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1037,7 +1055,8 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n" " gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n" " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n" " // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n", // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n", // NOLINT
"name", "name",
class_name, class_name,
"service", "service",
...@@ -1048,26 +1067,31 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1048,26 +1067,31 @@ class PdsCodeGenerator : public CodeGenerator {
" brpc::ClosureGuard done_guard(done);\n" " brpc::ClosureGuard done_guard(done);\n"
" brpc::Controller* cntl = \n" " brpc::Controller* cntl = \n"
" static_cast<brpc::Controller*>(cntl_base);\n" " static_cast<brpc::Controller*>(cntl_base);\n"
" uint64_t log_id = request->log_id();\n"
" cntl->set_log_id(log_id);\n"
" ::baidu::paddle_serving::predictor::InferService* svr = \n" " ::baidu::paddle_serving::predictor::InferService* svr = \n"
" " " "
"::baidu::paddle_serving::predictor::InferServiceManager::instance(" "::baidu::paddle_serving::predictor::InferServiceManager::instance("
").item(\"$service$\");\n" ").item(\"$service$\");\n"
" if (svr == NULL) {\n" " if (svr == NULL) {\n"
" LOG(ERROR) << \"Not found service: $service$\";\n" " LOG(ERROR) << \"(logid=\" << log_id << \") Not found service: "
"$service$\";\n"
" cntl->SetFailed(404, \"Not found service: $service$\");\n" " cntl->SetFailed(404, \"Not found service: $service$\");\n"
" return ;\n" " return ;\n"
" }\n" " }\n"
" LOG(INFO) << \" remote_side=\[\" << cntl->remote_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") remote_side=\[\" " // NOLINT
"\"\]\";\n" " << cntl->remote_side() << \"\]\";\n"
" LOG(INFO) << \" local_side=\[\" << cntl->local_side() << " // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") local_side=\[\" " // NOLINT
"\"\]\";\n" "<< cntl->local_side() << \"\]\";\n"
" LOG(INFO) << \" service_name=\[\" << \"$name$\" << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") service_name=\[\" " // NOLINT
" LOG(INFO) << \" log_id=\[\" << cntl->log_id() << \"\]\";\n" // NOLINT "<< \"$name$\" << \"\]\";\n"
" butil::IOBufBuilder debug_os;\n" " butil::IOBufBuilder debug_os;\n"
" int err_code = svr->inference(request, response, &debug_os);\n" " int err_code = svr->inference(request, response, log_id, "
"&debug_os);\n"
" if (err_code != 0) {\n" " if (err_code != 0) {\n"
" LOG(WARNING)\n" " LOG(WARNING)\n"
" << \"Failed call inferservice[$name$], name[$service$]\"\n" " << \"(logid=\" << log_id << \") Failed call "
"inferservice[$name$], name[$service$]\"\n"
" << \", error_code: \" << err_code;\n" " << \", error_code: \" << err_code;\n"
" cntl->SetFailed(err_code, \"InferService inference " " cntl->SetFailed(err_code, \"InferService inference "
"failed!\");\n" "failed!\");\n"
...@@ -1076,9 +1100,11 @@ class PdsCodeGenerator : public CodeGenerator { ...@@ -1076,9 +1100,11 @@ class PdsCodeGenerator : public CodeGenerator {
" gettimeofday(&tv, NULL);\n" " gettimeofday(&tv, NULL);\n"
" long end = tv.tv_sec * 1000000 + tv.tv_usec;\n" " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"
" // flush notice log\n" " // flush notice log\n"
" LOG(INFO) << \" tc=\[\" << (end - start) << \"\]\";\n" // NOLINT " LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - " // NOLINT
"start) << \"\]\";\n" // NOLINT
" LOG(INFO)\n" " LOG(INFO)\n"
" << \"TC=[\" << (end - start) << \"] Received debug " " << \"(logid=\" << log_id << \") TC=[\" << (end - start) << "
"\"] Received debug "
"request[log_id=\" << cntl->log_id()\n" "request[log_id=\" << cntl->log_id()\n"
" << \"] from \" << cntl->remote_side()\n" " << \"] from \" << cntl->remote_side()\n"
" << \" to \" << cntl->local_side();\n", " << \" to \" << cntl->local_side();\n",
......
...@@ -6,7 +6,7 @@ include(framework/CMakeLists.txt) ...@@ -6,7 +6,7 @@ include(framework/CMakeLists.txt)
include(tools/CMakeLists.txt) include(tools/CMakeLists.txt)
include(src/CMakeLists.txt) include(src/CMakeLists.txt)
add_definitions(-D__STDC_FORMAT_MACROS)
add_library(pdserving ${pdserving_srcs}) add_library(pdserving ${pdserving_srcs})
set_source_files_properties( set_source_files_properties(
${pdserving_srcs} ${pdserving_srcs}
......
...@@ -50,7 +50,7 @@ ...@@ -50,7 +50,7 @@
#include "butil/time.h" #include "butil/time.h"
#endif #endif
#include "glog/raw_logging.h" #define ERROR_STRING_LEN 10240
#include "core/configure/general_model_config.pb.h" #include "core/configure/general_model_config.pb.h"
#include "core/configure/include/configure_parser.h" #include "core/configure/include/configure_parser.h"
......
...@@ -72,9 +72,10 @@ class Channel { ...@@ -72,9 +72,10 @@ class Channel {
const std::string& op() { return _op; } const std::string& op() { return _op; }
int share_to_bus(Bus* bus) { int share_to_bus(Bus* bus, const uint64_t log_id) {
if (bus->regist(_op, this) != 0) { if (bus->regist(_op, this) != 0) {
LOG(ERROR) << "Failed regist channel[" << _op << "] to bus!"; LOG(ERROR) << "(logid=" << log_id << ") Failed regist channel[" << _op
<< "] to bus!";
return -1; return -1;
} }
......
...@@ -155,13 +155,11 @@ int Dag::init(const configure::Workflow& conf, const std::string& name) { ...@@ -155,13 +155,11 @@ int Dag::init(const configure::Workflow& conf, const std::string& name) {
} }
if (FLAGS_el_log_level == 16) { if (FLAGS_el_log_level == 16) {
LOG(INFO) << "DAG: " << _dag_name; LOG(INFO) << "DAG: " << _dag_name << ", Op Num: " << _index_nodes.size();
LOG(INFO) << ", Op Num: " << _index_nodes.size();
for (uint32_t nid = 0; nid < _index_nodes.size(); nid++) { for (uint32_t nid = 0; nid < _index_nodes.size(); nid++) {
DagNode* node = _index_nodes[nid]; DagNode* node = _index_nodes[nid];
LOG(INFO) << ", OP-" << node->id << "-" << node->name << "-" LOG(INFO) << "OP-" << node->id << "-" << node->name << "-" << node->type
<< node->type; << " depends: " << node->depends.size();
LOG(INFO) << " depends: " << node->depends.size();
boost::unordered_map<std::string, EdgeMode>::iterator it; boost::unordered_map<std::string, EdgeMode>::iterator it;
for (it = node->depends.begin(); it != node->depends.end(); it++) { for (it = node->depends.begin(); it != node->depends.end(); it++) {
...@@ -214,8 +212,8 @@ int Dag::topo_sort() { ...@@ -214,8 +212,8 @@ int Dag::topo_sort() {
} }
} }
for (int i = 0; i < in_degree.size(); ++i) { for (int i = 0; i < in_degree.size(); ++i) {
LOG(INFO) << "(" << _index_nodes[i]->name << ") in_degree[" << i VLOG(2) << "(" << _index_nodes[i]->name << ") in_degree[" << i
<< "]: " << in_degree[i]; << "]: " << in_degree[i];
} }
int sorted_num = 0; int sorted_num = 0;
DagStage* stage = new (std::nothrow) DagStage(); DagStage* stage = new (std::nothrow) DagStage();
......
...@@ -26,7 +26,9 @@ namespace baidu { ...@@ -26,7 +26,9 @@ namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace predictor { namespace predictor {
int DagView::init(Dag* dag, const std::string& service_name) { int DagView::init(Dag* dag,
const std::string& service_name,
const uint64_t log_id) {
_name = dag->name(); _name = dag->name();
_full_name = service_name + NAME_DELIMITER + dag->name(); _full_name = service_name + NAME_DELIMITER + dag->name();
_bus = butil::get_object<Bus>(); _bus = butil::get_object<Bus>();
...@@ -36,17 +38,20 @@ int DagView::init(Dag* dag, const std::string& service_name) { ...@@ -36,17 +38,20 @@ int DagView::init(Dag* dag, const std::string& service_name) {
for (uint32_t si = 0; si < stage_size; si++) { for (uint32_t si = 0; si < stage_size; si++) {
const DagStage* stage = dag->stage_by_index(si); const DagStage* stage = dag->stage_by_index(si);
if (stage == NULL) { if (stage == NULL) {
LOG(ERROR) << "Failed get stage by index:" << si; LOG(ERROR) << "(logid=" << log_id << ") Failed get stage by index:" << si;
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
ViewStage* vstage = butil::get_object<ViewStage>(); ViewStage* vstage = butil::get_object<ViewStage>();
if (vstage == NULL) { if (vstage == NULL) {
LOG(ERROR) << "Failed get vstage from object pool" LOG(ERROR) << "(logid=" << log_id
<< ") Failed get vstage from object pool"
<< "at:" << si; << "at:" << si;
return ERR_MEM_ALLOC_FAILURE; return ERR_MEM_ALLOC_FAILURE;
} }
VLOG(2) << "stage[" << si << "] name: " << stage->full_name; VLOG(2) << "(logid=" << log_id << ") stage[" << si
VLOG(2) << "stage[" << si << "] node size: " << stage->nodes.size(); << "] name: " << stage->full_name;
VLOG(2) << "(logid=" << log_id << ") stage[" << si
<< "] node size: " << stage->nodes.size();
vstage->full_name = service_name + NAME_DELIMITER + stage->full_name; vstage->full_name = service_name + NAME_DELIMITER + stage->full_name;
uint32_t node_size = stage->nodes.size(); uint32_t node_size = stage->nodes.size();
// create tls view node // create tls view node
...@@ -54,31 +59,39 @@ int DagView::init(Dag* dag, const std::string& service_name) { ...@@ -54,31 +59,39 @@ int DagView::init(Dag* dag, const std::string& service_name) {
DagNode* node = stage->nodes[ni]; DagNode* node = stage->nodes[ni];
ViewNode* vnode = butil::get_object<ViewNode>(); ViewNode* vnode = butil::get_object<ViewNode>();
if (vnode == NULL) { if (vnode == NULL) {
LOG(ERROR) << "Failed get vnode at:" << ni; LOG(ERROR) << "(logid=" << log_id << ") Failed get vnode at:" << ni;
return ERR_MEM_ALLOC_FAILURE; return ERR_MEM_ALLOC_FAILURE;
} }
// factory type // factory type
Op* op = OpRepository::instance().get_op(node->type); Op* op = OpRepository::instance().get_op(node->type);
if (op == NULL) { if (op == NULL) {
LOG(ERROR) << "Failed get op with type:" << node->type; LOG(ERROR) << "(logid=" << log_id
<< ") Failed get op with type:" << node->type;
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
// initialize a TLS op object // initialize a TLS op object
VLOG(2) << "dag view initialized: \n" VLOG(2) << "(logid=" << log_id << ") dag view initialized: \n"
<< "node id: " << node->id << "\n" << "node id: " << node->id << "\n"
<< "node name: " << node->name << "\n" << "node name: " << node->name << "\n"
<< "node type: " << node->type; << "node type: " << node->type;
if (op->init(_bus, dag, node->id, node->name, node->type, node->conf) != if (op->init(_bus,
0) { dag,
LOG(WARNING) << "Failed init op, type:" << node->type; node->id,
node->name,
node->type,
node->conf,
log_id) != 0) {
LOG(WARNING) << "(logid=" << log_id
<< ") Failed init op, type:" << node->type;
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
op->set_full_name(service_name + NAME_DELIMITER + node->full_name); op->set_full_name(service_name + NAME_DELIMITER + node->full_name);
// Set the name of the Op as the key of the matching engine. // Set the name of the Op as the key of the matching engine.
VLOG(2) << "op->set_engine_name(" << node->name.c_str() << ")"; VLOG(2) << "(logid=" << log_id << ") op->set_engine_name("
<< node->name.c_str() << ")";
op->set_engine_name(node->name); op->set_engine_name(node->name);
vnode->conf = node; vnode->conf = node;
...@@ -88,7 +101,7 @@ int DagView::init(Dag* dag, const std::string& service_name) { ...@@ -88,7 +101,7 @@ int DagView::init(Dag* dag, const std::string& service_name) {
it != vnode->conf->depends.end(); it != vnode->conf->depends.end();
++it) { ++it) {
std::string pre_node_name = it->first; std::string pre_node_name = it->first;
VLOG(2) << "add op pre name: \n" VLOG(2) << "(logid=" << log_id << ") add op pre name: \n"
<< "current op name: " << vnode->op->op_name() << "current op name: " << vnode->op->op_name()
<< ", previous op name: " << pre_node_name; << ", previous op name: " << pre_node_name;
vnode->op->add_pre_node_name(pre_node_name); vnode->op->add_pre_node_name(pre_node_name);
...@@ -102,7 +115,7 @@ int DagView::init(Dag* dag, const std::string& service_name) { ...@@ -102,7 +115,7 @@ int DagView::init(Dag* dag, const std::string& service_name) {
//<< " previous op name: " //<< " previous op name: "
//<< _view[si - 1]->nodes.back()->op->op_name(); //<< _view[si - 1]->nodes.back()->op->op_name();
// vstage->nodes.back()->op->set_pre_node_name( // vstage->nodes.back()->op->set_pre_node_name(
//_view[si - 1]->nodes.back()->op->op_name()); // _view[si - 1]->nodes.back()->op->op_name());
/*}*/ /*}*/
_view.push_back(vstage); _view.push_back(vstage);
} }
...@@ -133,14 +146,15 @@ int DagView::deinit() { ...@@ -133,14 +146,15 @@ int DagView::deinit() {
return ERR_OK; return ERR_OK;
} }
int DagView::execute(butil::IOBufBuilder* debug_os) { int DagView::execute(const uint64_t log_id, butil::IOBufBuilder* debug_os) {
uint32_t stage_size = _view.size(); uint32_t stage_size = _view.size();
for (uint32_t si = 0; si < stage_size; si++) { for (uint32_t si = 0; si < stage_size; si++) {
TRACEPRINTF("start to execute stage[%u]", si); TRACEPRINTF("(logid=%" PRIu64 ") start to execute stage[%u]", log_id, si);
int errcode = execute_one_stage(_view[si], debug_os); int errcode = execute_one_stage(_view[si], log_id, debug_os);
TRACEPRINTF("finish to execute stage[%u]", si); TRACEPRINTF("(logid=%" PRIu64 ") finish to execute stage[%u]", log_id, si);
if (errcode < 0) { if (errcode < 0) {
LOG(ERROR) << "failed execute stage[" << _view[si]->debug(); LOG(ERROR) << "(logid=" << log_id << ") Failed execute stage["
<< _view[si]->debug();
return errcode; return errcode;
} }
} }
...@@ -151,29 +165,34 @@ int DagView::execute(butil::IOBufBuilder* debug_os) { ...@@ -151,29 +165,34 @@ int DagView::execute(butil::IOBufBuilder* debug_os) {
// You can derive a subclass to implement this func. // You can derive a subclass to implement this func.
// ParallelDagView maybe the one you want. // ParallelDagView maybe the one you want.
int DagView::execute_one_stage(ViewStage* vstage, int DagView::execute_one_stage(ViewStage* vstage,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
butil::Timer stage_time(butil::Timer::STARTED); butil::Timer stage_time(butil::Timer::STARTED);
uint32_t node_size = vstage->nodes.size(); uint32_t node_size = vstage->nodes.size();
VLOG(2) << "vstage->nodes.size(): " << node_size; VLOG(2) << "(logid=" << log_id << ") vstage->nodes.size(): " << node_size;
for (uint32_t ni = 0; ni < node_size; ni++) { for (uint32_t ni = 0; ni < node_size; ni++) {
ViewNode* vnode = vstage->nodes[ni]; ViewNode* vnode = vstage->nodes[ni];
DagNode* conf = vnode->conf; DagNode* conf = vnode->conf;
Op* op = vnode->op; Op* op = vnode->op;
TRACEPRINTF("start to execute op[%s]", op->name()); TRACEPRINTF(
int errcode = op->process(debug_os != NULL); "(logid=%" PRIu64 ") start to execute op[%s]", log_id, op->name());
TRACEPRINTF("finish to execute op[%s]", op->name()); int errcode = op->process(log_id, debug_os != NULL);
TRACEPRINTF(
"(logid=%" PRIu64 ") finish to execute op[%s]", log_id, op->name());
if (errcode < 0) { if (errcode < 0) {
LOG(ERROR) << "Execute failed, Op:" << op->debug_string(); LOG(ERROR) << "(logid=" << log_id
<< ") Execute failed, Op:" << op->debug_string();
return errcode; return errcode;
} }
if (errcode > 0) { if (errcode > 0) {
LOG(INFO) << "Execute ignore, Op:" << op->debug_string(); LOG(INFO) << "(logid=" << log_id
<< ") Execute ignore, Op:" << op->debug_string();
continue; continue;
} }
if (debug_os) { if (debug_os) {
(*debug_os) << "{\"op_name\": \"" << op->name() (*debug_os) << "(logid=" << log_id << ") {\"op_name\": \"" << op->name()
<< "\", \"debug_str:\": \"" << op->debug_string() << "\", \"debug_str:\": \"" << op->debug_string()
<< "\", \"time_info\": \"" << op->time_info() << "\"}"; << "\", \"time_info\": \"" << op->time_info() << "\"}";
} }
...@@ -186,34 +205,34 @@ int DagView::execute_one_stage(ViewStage* vstage, ...@@ -186,34 +205,34 @@ int DagView::execute_one_stage(ViewStage* vstage,
return ERR_OK; return ERR_OK;
} }
int DagView::set_request_channel(Channel& request) { int DagView::set_request_channel(Channel& request, const uint64_t log_id) {
// Each workflow should get the very beginning // Each workflow should get the very beginning
// request (channel), and commit it to bus, for // request (channel), and commit it to bus, for
// the first stage ops consuming. // the first stage ops consuming.
request.share_to_bus(_bus); request.share_to_bus(_bus, log_id);
return ERR_OK; return ERR_OK;
} }
const Channel* DagView::get_response_channel() const { const Channel* DagView::get_response_channel(const uint64_t log_id) const {
// Caller obtains response channel from bus, and // Caller obtains response channel from bus, and
// writes it to rpc response(protbuf/json) // writes it to rpc response(protbuf/json)
if (_view.size() < 1) { if (_view.size() < 1) {
LOG(ERROR) << "invalid empty view stage!"; LOG(ERROR) << "(logid=" << log_id << ") invalid empty view stage!";
return NULL; return NULL;
} }
ViewStage* last_stage = _view[_view.size() - 1]; ViewStage* last_stage = _view[_view.size() - 1];
if (last_stage->nodes.size() != 1 || last_stage->nodes[0] == NULL) { if (last_stage->nodes.size() != 1 || last_stage->nodes[0] == NULL) {
LOG(ERROR) << "Invalid last stage, size[" << last_stage->nodes.size() LOG(ERROR) << "(logid=" << log_id << ") Invalid last stage, size["
<< "] != 1"; << last_stage->nodes.size() << "] != 1";
return NULL; return NULL;
} }
Op* last_op = last_stage->nodes[0]->op; Op* last_op = last_stage->nodes[0]->op;
if (last_op == NULL) { if (last_op == NULL) {
LOG(ERROR) << "Last op is NULL"; LOG(ERROR) << "(logid=" << log_id << ") Last op is NULL";
return NULL; return NULL;
} }
return last_op->mutable_channel(); return last_op->mutable_channel();
......
...@@ -47,21 +47,22 @@ class DagView { ...@@ -47,21 +47,22 @@ class DagView {
~DagView() {} ~DagView() {}
int init(Dag* dag, const std::string& service_name); int init(Dag* dag, const std::string& service_name, const uint64_t log_id);
int deinit(); int deinit();
int execute(butil::IOBufBuilder* debug_os); int execute(const uint64_t log_id, butil::IOBufBuilder* debug_os);
// The default execution strategy is in sequencing // The default execution strategy is in sequencing
// You can derive a subclass to implement this func. // You can derive a subclass to implement this func.
// ParallelDagView maybe the one you want. // ParallelDagView maybe the one you want.
virtual int execute_one_stage(ViewStage* vstage, virtual int execute_one_stage(ViewStage* vstage,
const uint64_t log_id,
butil::IOBufBuilder* debug_os); butil::IOBufBuilder* debug_os);
int set_request_channel(Channel& request); // NOLINT int set_request_channel(Channel& request, const uint64_t log_id); // NOLINT
const Channel* get_response_channel() const; const Channel* get_response_channel(const uint64_t log_id) const;
const std::string& name() const { return _name; } const std::string& name() const { return _name; }
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
#include <string> #include <string>
#include <utility> #include <utility>
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "glog/raw_logging.h"
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace predictor { namespace predictor {
...@@ -28,7 +28,12 @@ namespace predictor { ...@@ -28,7 +28,12 @@ namespace predictor {
FactoryDerive<D, B>* factory = new (std::nothrow) FactoryDerive<D, B>(); \ FactoryDerive<D, B>* factory = new (std::nothrow) FactoryDerive<D, B>(); \
if (factory == NULL || \ if (factory == NULL || \
FactoryPool<B>::instance().register_factory(tag, factory) != 0) { \ FactoryPool<B>::instance().register_factory(tag, factory) != 0) { \
RAW_LOG_FATAL("Failed regist factory: %s in macro!", #D); \ char err_str[ERROR_STRING_LEN]; \
snprintf(err_str, \
ERROR_STRING_LEN - 1, \
"Failed regist factory: %s in macro!", \
#D); \
RAW_LOG(FATAL, err_str); \
return -1; \ return -1; \
} \ } \
return 0; \ return 0; \
...@@ -54,7 +59,13 @@ namespace predictor { ...@@ -54,7 +59,13 @@ namespace predictor {
if (factory == NULL || \ if (factory == NULL || \
::baidu::paddle_serving::predictor::FactoryPool<B>::instance() \ ::baidu::paddle_serving::predictor::FactoryPool<B>::instance() \
.register_factory(#D, factory) != 0) { \ .register_factory(#D, factory) != 0) { \
RAW_LOG_FATAL("Failed regist factory: %s->%s in macro!", #D, #B); \ char err_str[ERROR_STRING_LEN]; \
snprintf(err_str, \
ERROR_STRING_LEN - 1, \
"Failed regist factory: %s->%s in macro!", \
#D, \
#B); \
RAW_LOG(FATAL, err_str); \
return; \ return; \
} \ } \
return; \ return; \
...@@ -66,15 +77,26 @@ namespace predictor { ...@@ -66,15 +77,26 @@ namespace predictor {
::baidu::paddle_serving::predictor::FactoryDerive<D, B>* factory = new ( \ ::baidu::paddle_serving::predictor::FactoryDerive<D, B>* factory = new ( \
::std::nothrow)::baidu::paddle_serving::predictor::FactoryDerive<D, \ ::std::nothrow)::baidu::paddle_serving::predictor::FactoryDerive<D, \
B>(); \ B>(); \
char err_str[ERROR_STRING_LEN]; \
if (factory == NULL || \ if (factory == NULL || \
::baidu::paddle_serving::predictor::FactoryPool<B>::instance() \ ::baidu::paddle_serving::predictor::FactoryPool<B>::instance() \
.register_factory(N, factory) != 0) { \ .register_factory(N, factory) != 0) { \
RAW_LOG_FATAL( \ snprintf(err_str, \
"Failed regist factory: %s->%s, tag: %s in macro!", #D, #B, N); \ ERROR_STRING_LEN - 1, \
"Failed regist factory: %s->%s, tag: %s in macro!", \
#D, \
#B, \
N); \
RAW_LOG(FATAL, err_str); \
return; \ return; \
} \ } \
RAW_LOG_WARNING( \ snprintf(err_str, \
"Succ regist factory: %s->%s, tag: %s in macro!", #D, #B, N); \ ERROR_STRING_LEN - 1, \
"Succ regist factory: %s->%s, tag: %s in macro!", \
#D, \
#B, \
N); \
RAW_LOG(WARNING, err_str); \
return; \ return; \
} }
...@@ -102,24 +124,35 @@ class FactoryPool { ...@@ -102,24 +124,35 @@ class FactoryPool {
} }
int register_factory(const std::string& tag, FactoryBase<B>* factory) { int register_factory(const std::string& tag, FactoryBase<B>* factory) {
char err_str[ERROR_STRING_LEN];
typename std::map<std::string, FactoryBase<B>*>::iterator it = typename std::map<std::string, FactoryBase<B>*>::iterator it =
_pool.find(tag); _pool.find(tag);
if (it != _pool.end()) { if (it != _pool.end()) {
RAW_LOG_FATAL("Insert duplicate with tag: %s", tag.c_str()); snprintf(err_str,
ERROR_STRING_LEN - 1,
"Insert duplicate with tag: %s",
tag.c_str());
RAW_LOG(FATAL, err_str);
return -1; return -1;
} }
std::pair<typename std::map<std::string, FactoryBase<B>*>::iterator, bool> std::pair<typename std::map<std::string, FactoryBase<B>*>::iterator, bool>
r = _pool.insert(std::make_pair(tag, factory)); r = _pool.insert(std::make_pair(tag, factory));
if (!r.second) { if (!r.second) {
RAW_LOG_FATAL("Failed insert new factory with: %s", tag.c_str()); snprintf(err_str,
ERROR_STRING_LEN - 1,
"Failed insert new factory with: %s",
tag.c_str());
RAW_LOG(FATAL, err_str);
return -1; return -1;
} }
RAW_LOG_INFO("Succ insert one factory, tag: %s, base type %s", snprintf(err_str,
tag.c_str(), ERROR_STRING_LEN - 1,
typeid(B).name()); "Succ insert one factory, tag: %s, base type %s",
tag.c_str(),
typeid(B).name());
RAW_LOG(INFO, err_str);
return 0; return 0;
} }
...@@ -127,9 +160,13 @@ class FactoryPool { ...@@ -127,9 +160,13 @@ class FactoryPool {
typename std::map<std::string, FactoryBase<B>*>::iterator it = typename std::map<std::string, FactoryBase<B>*>::iterator it =
_pool.find(tag); _pool.find(tag);
if (it == _pool.end() || it->second == NULL) { if (it == _pool.end() || it->second == NULL) {
RAW_LOG_FATAL("Not found factory pool, tag: %s, pool size %u", char err_str[ERROR_STRING_LEN];
tag.c_str(), snprintf(err_str,
_pool.size()); ERROR_STRING_LEN - 1,
"Not found factory pool, tag: %s, pool size %u",
tag.c_str(),
_pool.size());
RAW_LOG(FATAL, err_str);
return NULL; return NULL;
} }
......
...@@ -603,6 +603,7 @@ class VersionedInferEngine : public InferEngine { ...@@ -603,6 +603,7 @@ class VersionedInferEngine : public InferEngine {
LOG(ERROR) << "Failed generate engine with type:" << engine_type; LOG(ERROR) << "Failed generate engine with type:" << engine_type;
return -1; return -1;
} }
#ifndef BCLOUD
VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr; VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr;
int tmp = FLAGS_logtostderr; int tmp = FLAGS_logtostderr;
if (engine->proc_initialize(conf, version) != 0) { if (engine->proc_initialize(conf, version) != 0) {
...@@ -611,6 +612,12 @@ class VersionedInferEngine : public InferEngine { ...@@ -611,6 +612,12 @@ class VersionedInferEngine : public InferEngine {
} }
VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr; VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr;
FLAGS_logtostderr = tmp; FLAGS_logtostderr = tmp;
#else
if (engine->proc_initialize(conf, version) != 0) {
LOG(ERROR) << "Failed initialize engine, type:" << engine_type;
return -1;
}
#endif
auto r = _versions.insert(std::make_pair(engine->version(), engine)); auto r = _versions.insert(std::make_pair(engine->version(), engine));
if (!r.second) { if (!r.second) {
LOG(ERROR) << "Failed insert item: " << engine->version() LOG(ERROR) << "Failed insert item: " << engine->version()
......
...@@ -62,7 +62,10 @@ class OpRepository { ...@@ -62,7 +62,10 @@ class OpRepository {
template <typename OP_TYPE> template <typename OP_TYPE>
void regist_op(std::string op_type) { void regist_op(std::string op_type) {
_repository[op_type] = &OpFactory<OP_TYPE>::instance(); _repository[op_type] = &OpFactory<OP_TYPE>::instance();
RAW_LOG_INFO("Succ regist op: %s", op_type.c_str()); char err_str[ERROR_STRING_LEN];
snprintf(
err_str, ERROR_STRING_LEN - 1, "Succ regist op: %s", op_type.c_str());
RAW_LOG(INFO, err_str);
} }
Op* get_op(std::string op_type); Op* get_op(std::string op_type);
......
...@@ -17,6 +17,9 @@ ...@@ -17,6 +17,9 @@
#include <string> #include <string>
#include "core/predictor/common/inner_common.h" #include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/kv_manager.h" #include "core/predictor/framework/kv_manager.h"
#ifdef BCLOUD
#include "aipe_sec_client.h" // NOLINT
#endif
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
namespace predictor { namespace predictor {
...@@ -109,6 +112,42 @@ int Resource::initialize(const std::string& path, const std::string& file) { ...@@ -109,6 +112,42 @@ int Resource::initialize(const std::string& path, const std::string& file) {
} }
LOG(WARNING) << "Successfully proc initialized mempool wrapper"; LOG(WARNING) << "Successfully proc initialized mempool wrapper";
#ifdef WITH_AUTH
std::string product_name_str = resource_conf.auth_product_name();
std::string container_id_str = resource_conf.auth_container_id();
char* product_name = new char[product_name_str.size() + 1];
snprintf(product_name,
product_name_str.size() + 1,
"%s",
product_name_str.c_str());
char* container_id = new char[container_id_str.size() + 1];
snprintf(container_id,
container_id_str.size() + 1,
"%s",
container_id_str.c_str());
aipe_auth_request request;
request.product_name = product_name;
request.container_id = container_id;
request.request_ts = (int64_t)time(NULL);
LOG(INFO) << "\nEasypack info"
<< "\nproduct name: " << request.product_name
<< "\ncontainer_id: " << request.container_id
<< "\nrequest time stamp: " << request.request_ts;
aipe_auth_response response;
response = check_auth(request);
if (response.result == 0) {
LOG(INFO) << "Authentication succeed.";
} else {
LOG(ERROR) << "Authentication failed. Error code: " << response.result;
return -1;
}
#endif
if (FLAGS_enable_model_toolkit) { if (FLAGS_enable_model_toolkit) {
int err = 0; int err = 0;
std::string model_toolkit_path = resource_conf.model_toolkit_path(); std::string model_toolkit_path = resource_conf.model_toolkit_path();
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include <butil/time.h> // butil::Timer #include <butil/time.h> // butil::Timer
#endif #endif
#include <inttypes.h>
#include <list> #include <list>
#include <string> #include <string>
#include <vector> #include <vector>
...@@ -135,50 +136,63 @@ const std::string& InferService::name() const { return _infer_service_format; } ...@@ -135,50 +136,63 @@ const std::string& InferService::name() const { return _infer_service_format; }
// ´®ÐÐÖ´ÐÐÿ¸öworkflow // ´®ÐÐÖ´ÐÐÿ¸öworkflow
int InferService::inference(const google::protobuf::Message* request, int InferService::inference(const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
TRACEPRINTF("start to inference"); TRACEPRINTF("(logid=%" PRIu64 ") start to inference", log_id);
// when funtion call begins, framework will reset // when funtion call begins, framework will reset
// thread local variables&resources automatically. // thread local variables&resources automatically.
if (Resource::instance().thread_clear() != 0) { if (Resource::instance().thread_clear() != 0) {
LOG(ERROR) << "Failed thread clear whole resource"; LOG(ERROR) << "(logid=" << log_id << ") Failed thread clear whole resource";
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
TRACEPRINTF("finish to thread clear"); TRACEPRINTF("(logid=%" PRIu64 ") finish to thread clear", log_id);
if (_enable_map_request_to_workflow) { if (_enable_map_request_to_workflow) {
LOG(INFO) << "enable map request == True"; VLOG(2) << "(logid=" << log_id << ") enable map request == True";
std::vector<Workflow*>* workflows = _map_request_to_workflow(request); std::vector<Workflow*>* workflows =
_map_request_to_workflow(request, log_id);
if (!workflows || workflows->size() == 0) { if (!workflows || workflows->size() == 0) {
LOG(ERROR) << "Failed to map request to workflow"; LOG(ERROR) << "(logid=" << log_id
<< ") Failed to map request to workflow";
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
size_t fsize = workflows->size(); size_t fsize = workflows->size();
for (size_t fi = 0; fi < fsize; ++fi) { for (size_t fi = 0; fi < fsize; ++fi) {
Workflow* workflow = (*workflows)[fi]; Workflow* workflow = (*workflows)[fi];
if (workflow == NULL) { if (workflow == NULL) {
LOG(ERROR) << "Failed to get valid workflow at: " << fi; LOG(ERROR) << "(logid=" << log_id
<< ") Failed to get valid workflow at: " << fi;
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
TRACEPRINTF("start to execute workflow[%s]", workflow->name().c_str()); TRACEPRINTF("(logid=%" PRIu64 ") start to execute workflow[%s]",
int errcode = _execute_workflow(workflow, request, response, debug_os); log_id,
TRACEPRINTF("finish to execute workflow[%s]", workflow->name().c_str()); workflow->name().c_str());
int errcode =
_execute_workflow(workflow, request, response, log_id, debug_os);
TRACEPRINTF("(logid=%" PRIu64 ") finish to execute workflow[%s]",
log_id,
workflow->name().c_str());
if (errcode < 0) { if (errcode < 0) {
LOG(ERROR) << "Failed execute workflow[" << workflow->name() LOG(ERROR) << "(logid=" << log_id << ") Failed execute workflow["
<< "] in:" << name(); << workflow->name() << "] in:" << name();
return errcode; return errcode;
} }
} }
} else { } else {
LOG(INFO) << "enable map request == False"; VLOG(2) << "(logid=" << log_id << ") enable map request == False";
TRACEPRINTF("start to execute one workflow"); TRACEPRINTF("(logid=%" PRIu64 ") start to execute one workflow", log_id);
size_t fsize = _flows.size(); size_t fsize = _flows.size();
for (size_t fi = 0; fi < fsize; ++fi) { for (size_t fi = 0; fi < fsize; ++fi) {
TRACEPRINTF("start to execute one workflow-%lu", fi); TRACEPRINTF(
int errcode = execute_one_workflow(fi, request, response, debug_os); "(logid=%" PRIu64 ") start to execute one workflow-%lu", log_id, fi);
TRACEPRINTF("finish to execute one workflow-%lu", fi); int errcode =
execute_one_workflow(fi, request, response, log_id, debug_os);
TRACEPRINTF(
"(logid=%" PRIu64 ") finish to execute one workflow-%lu", log_id, fi);
if (errcode < 0) { if (errcode < 0) {
LOG(ERROR) << "Failed execute 0-th workflow in:" << name(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed execute 0-th workflow in:" << name();
return errcode; return errcode;
} }
} }
...@@ -188,26 +202,30 @@ int InferService::inference(const google::protobuf::Message* request, ...@@ -188,26 +202,30 @@ int InferService::inference(const google::protobuf::Message* request,
int InferService::debug(const google::protobuf::Message* request, int InferService::debug(const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
return inference(request, response, debug_os); return inference(request, response, log_id, debug_os);
} }
int InferService::execute_one_workflow(uint32_t index, int InferService::execute_one_workflow(uint32_t index,
const google::protobuf::Message* request, const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
if (index >= _flows.size()) { if (index >= _flows.size()) {
LOG(ERROR) << "Faield execute workflow, index: " << index LOG(ERROR) << "(logid=" << log_id
<< ") Faield execute workflow, index: " << index
<< " >= max:" << _flows.size(); << " >= max:" << _flows.size();
return ERR_OVERFLOW_FAILURE; return ERR_OVERFLOW_FAILURE;
} }
Workflow* workflow = _flows[index]; Workflow* workflow = _flows[index];
return _execute_workflow(workflow, request, response, debug_os); return _execute_workflow(workflow, request, response, log_id, debug_os);
} }
int InferService::_execute_workflow(Workflow* workflow, int InferService::_execute_workflow(Workflow* workflow,
const google::protobuf::Message* request, const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
butil::Timer workflow_time(butil::Timer::STARTED); butil::Timer workflow_time(butil::Timer::STARTED);
// create and submit beginer channel // create and submit beginer channel
...@@ -215,54 +233,62 @@ int InferService::_execute_workflow(Workflow* workflow, ...@@ -215,54 +233,62 @@ int InferService::_execute_workflow(Workflow* workflow,
req_channel.init(0, START_OP_NAME); req_channel.init(0, START_OP_NAME);
req_channel = request; req_channel = request;
DagView* dv = workflow->fetch_dag_view(full_name()); DagView* dv = workflow->fetch_dag_view(full_name(), log_id);
dv->set_request_channel(req_channel); dv->set_request_channel(req_channel, log_id);
// call actual inference interface // call actual inference interface
int errcode = dv->execute(debug_os); int errcode = dv->execute(log_id, debug_os);
if (errcode < 0) { if (errcode < 0) {
LOG(ERROR) << "Failed execute dag for workflow:" << workflow->name(); LOG(ERROR) << "(logid=" << log_id
<< ") Failed execute dag for workflow:" << workflow->name();
return errcode; return errcode;
} }
TRACEPRINTF("finish to dv execute"); TRACEPRINTF("(logid=%" PRIu64 ") finish to dv execute", log_id);
// create ender channel and copy // create ender channel and copy
const Channel* res_channel = dv->get_response_channel(); const Channel* res_channel = dv->get_response_channel(log_id);
if (res_channel == NULL) {
LOG(ERROR) << "(logid=" << log_id << ") Failed get response channel";
return ERR_INTERNAL_FAILURE;
}
if (!_merger || !_merger->merge(res_channel->message(), response)) { if (!_merger || !_merger->merge(res_channel->message(), response)) {
LOG(ERROR) << "Failed merge channel res to response"; LOG(ERROR) << "(logid=" << log_id
<< ") Failed merge channel res to response";
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
TRACEPRINTF("finish to copy from"); TRACEPRINTF("(logid=%" PRIu64 ") finish to copy from", log_id);
workflow_time.stop(); workflow_time.stop();
LOG(INFO) << "workflow total time: " << workflow_time.u_elapsed(); LOG(INFO) << "(logid=" << log_id
<< ") workflow total time: " << workflow_time.u_elapsed();
PredictorMetric::GetInstance()->update_latency_metric( PredictorMetric::GetInstance()->update_latency_metric(
WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed()); WORKFLOW_METRIC_PREFIX + dv->full_name(), workflow_time.u_elapsed());
// return tls data to object pool // return tls data to object pool
workflow->return_dag_view(dv); workflow->return_dag_view(dv);
TRACEPRINTF("finish to return dag view"); TRACEPRINTF("(logid=%" PRIu64 ") finish to return dag view", log_id);
return ERR_OK; return ERR_OK;
} }
std::vector<Workflow*>* InferService::_map_request_to_workflow( std::vector<Workflow*>* InferService::_map_request_to_workflow(
const google::protobuf::Message* request) { const google::protobuf::Message* request, const uint64_t log_id) {
const google::protobuf::Descriptor* desc = request->GetDescriptor(); const google::protobuf::Descriptor* desc = request->GetDescriptor();
const google::protobuf::FieldDescriptor* field = const google::protobuf::FieldDescriptor* field =
desc->FindFieldByName(_request_field_key); desc->FindFieldByName(_request_field_key);
if (field == NULL) { if (field == NULL) {
LOG(ERROR) << "No field[" << _request_field_key << "] in [" LOG(ERROR) << "(logid=" << log_id << ") No field[" << _request_field_key
<< desc->full_name() << "]."; << "] in [" << desc->full_name() << "].";
return NULL; return NULL;
} }
if (field->is_repeated()) { if (field->is_repeated()) {
LOG(ERROR) << "field[" << desc->full_name() << "." << _request_field_key LOG(ERROR) << "(logid=" << log_id << ") field[" << desc->full_name() << "."
<< "] is repeated."; << _request_field_key << "] is repeated.";
return NULL; return NULL;
} }
if (field->cpp_type() != google::protobuf::FieldDescriptor::CPPTYPE_STRING) { if (field->cpp_type() != google::protobuf::FieldDescriptor::CPPTYPE_STRING) {
LOG(ERROR) << "field[" << desc->full_name() << "." << _request_field_key LOG(ERROR) << "(logid=" << log_id << ") field[" << desc->full_name() << "."
<< "] should be string"; << _request_field_key << "] should be string";
return NULL; return NULL;
} }
const std::string& field_value = const std::string& field_value =
...@@ -270,7 +296,7 @@ std::vector<Workflow*>* InferService::_map_request_to_workflow( ...@@ -270,7 +296,7 @@ std::vector<Workflow*>* InferService::_map_request_to_workflow(
std::vector<Workflow*>* p_workflow = std::vector<Workflow*>* p_workflow =
_request_to_workflow_map.seek(field_value); _request_to_workflow_map.seek(field_value);
if (p_workflow == NULL) { if (p_workflow == NULL) {
LOG(ERROR) << "cannot find key[" << field_value LOG(ERROR) << "(logid=" << log_id << ") cannot find key[" << field_value
<< "] in _request_to_workflow_map"; << "] in _request_to_workflow_map";
return NULL; return NULL;
} }
......
...@@ -52,25 +52,29 @@ class InferService { ...@@ -52,25 +52,29 @@ class InferService {
// Execute each workflow serially // Execute each workflow serially
virtual int inference(const google::protobuf::Message* request, virtual int inference(const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os = NULL); butil::IOBufBuilder* debug_os = NULL);
int debug(const google::protobuf::Message* request, int debug(const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os); butil::IOBufBuilder* debug_os);
int execute_one_workflow(uint32_t index, int execute_one_workflow(uint32_t index,
const google::protobuf::Message* request, const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os); butil::IOBufBuilder* debug_os);
private: private:
int _execute_workflow(Workflow* workflow, int _execute_workflow(Workflow* workflow,
const google::protobuf::Message* request, const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os); butil::IOBufBuilder* debug_os);
std::vector<Workflow*>* _map_request_to_workflow( std::vector<Workflow*>* _map_request_to_workflow(
const google::protobuf::Message* request); const google::protobuf::Message* request, const uint64_t log_id);
private: private:
std::vector<Workflow*> _flows; std::vector<Workflow*> _flows;
...@@ -88,6 +92,7 @@ class ParallelInferService : public InferService { ...@@ -88,6 +92,7 @@ class ParallelInferService : public InferService {
// Execute workflows in parallel // Execute workflows in parallel
int inference(const google::protobuf::Message* request, int inference(const google::protobuf::Message* request,
google::protobuf::Message* response, google::protobuf::Message* response,
const uint64_t log_id,
butil::IOBufBuilder* debug_os) { butil::IOBufBuilder* debug_os) {
return 0; return 0;
} }
......
...@@ -23,17 +23,24 @@ namespace predictor { ...@@ -23,17 +23,24 @@ namespace predictor {
#define REGIST_FORMAT_SERVICE(svr_name, svr) \ #define REGIST_FORMAT_SERVICE(svr_name, svr) \
do { \ do { \
char err_str[ERROR_STRING_LEN]; \
int ret = \ int ret = \
::baidu::paddle_serving::predictor::FormatServiceManager::instance() \ ::baidu::paddle_serving::predictor::FormatServiceManager::instance() \
.regist_service(svr_name, svr); \ .regist_service(svr_name, svr); \
if (ret != 0) { \ if (ret != 0) { \
RAW_LOG_ERROR("Failed regist service[%s][%s]", \ snprintf(err_str, \
svr_name.c_str(), \ ERROR_STRING_LEN - 1, \
typeid(svr).name()); \ "Failed regist service[%s][%s]", \
svr_name.c_str(), \
typeid(svr).name()); \
RAW_LOG(ERROR, err_str); \
} else { \ } else { \
RAW_LOG_INFO("Success regist service[%s][%s]", \ snprintf(err_str, \
svr_name.c_str(), \ ERROR_STRING_LEN - 1, \
typeid(svr).name()); \ "Success regist service[%s][%s]", \
svr_name.c_str(), \
typeid(svr).name()); \
RAW_LOG(INFO, err_str); \
} \ } \
} while (0) } while (0)
...@@ -42,31 +49,46 @@ class FormatServiceManager { ...@@ -42,31 +49,46 @@ class FormatServiceManager {
typedef google::protobuf::Service Service; typedef google::protobuf::Service Service;
int regist_service(const std::string& svr_name, Service* svr) { int regist_service(const std::string& svr_name, Service* svr) {
char err_str[ERROR_STRING_LEN];
if (_service_map.find(svr_name) != _service_map.end()) { if (_service_map.find(svr_name) != _service_map.end()) {
RAW_LOG_ERROR("Service[%s][%s] already exist!", snprintf(err_str,
svr_name.c_str(), ERROR_STRING_LEN - 1,
typeid(svr).name()); "Service[%s][%s] already exist!",
svr_name.c_str(),
typeid(svr).name());
RAW_LOG(ERROR, err_str);
return -1; return -1;
} }
std::pair<boost::unordered_map<std::string, Service*>::iterator, bool> ret; std::pair<boost::unordered_map<std::string, Service*>::iterator, bool> ret;
ret = _service_map.insert(std::make_pair(svr_name, svr)); ret = _service_map.insert(std::make_pair(svr_name, svr));
if (ret.second == false) { if (ret.second == false) {
RAW_LOG_ERROR("Service[%s][%s] insert failed!", snprintf(err_str,
svr_name.c_str(), ERROR_STRING_LEN - 1,
typeid(svr).name()); "Service[%s][%s] insert failed!",
svr_name.c_str(),
typeid(svr).name());
RAW_LOG(ERROR, err_str);
return -1; return -1;
} }
RAW_LOG_INFO("Service[%s] insert successfully!", svr_name.c_str()); snprintf(err_str,
ERROR_STRING_LEN - 1,
"Service[%s] insert successfully!",
svr_name.c_str());
RAW_LOG(INFO, err_str);
return 0; return 0;
} }
Service* get_service(const std::string& svr_name) { Service* get_service(const std::string& svr_name) {
char err_str[ERROR_STRING_LEN];
boost::unordered_map<std::string, Service*>::iterator res; boost::unordered_map<std::string, Service*>::iterator res;
if ((res = _service_map.find(svr_name)) == _service_map.end()) { if ((res = _service_map.find(svr_name)) == _service_map.end()) {
RAW_LOG_WARNING("Service[%s] not found in service manager!", snprintf(err_str,
svr_name.c_str()); ERROR_STRING_LEN - 1,
"Service[%s] not found in service manager!",
svr_name.c_str());
RAW_LOG(WARNING, err_str);
return NULL; return NULL;
} }
return (*res).second; return (*res).second;
......
...@@ -32,21 +32,22 @@ int Workflow::init(const configure::Workflow& conf) { ...@@ -32,21 +32,22 @@ int Workflow::init(const configure::Workflow& conf) {
return 0; return 0;
} }
DagView* Workflow::fetch_dag_view(const std::string& service_name) { DagView* Workflow::fetch_dag_view(const std::string& service_name,
const uint64_t log_id) {
DagView* view = NULL; DagView* view = NULL;
if (_type == "Sequence") { if (_type == "Sequence") {
view = butil::get_object<DagView>(); view = butil::get_object<DagView>();
} else if (_type == "Parallel") { } else if (_type == "Parallel") {
view = butil::get_object<ParallelDagView>(); view = butil::get_object<ParallelDagView>();
} else { } else {
LOG(ERROR) << "Unknown dag type:" << _type << "!"; LOG(ERROR) << "(logid=" << log_id << ") Unknown dag type:" << _type << "!";
return NULL; return NULL;
} }
if (view == NULL) { if (view == NULL) {
LOG(ERROR) << "create dag view from pool failed!"; LOG(ERROR) << "(logid=" << log_id << ") create dag view from pool failed!";
return NULL; return NULL;
} }
view->init(&_dag, service_name); view->init(&_dag, service_name, log_id);
return view; return view;
} }
......
...@@ -36,7 +36,8 @@ class Workflow { ...@@ -36,7 +36,8 @@ class Workflow {
// different apps. // different apps.
int init(const configure::Workflow& conf); int init(const configure::Workflow& conf);
DagView* fetch_dag_view(const std::string& service_name); DagView* fetch_dag_view(const std::string& service_name,
const uint64_t log_id);
int deinit() { return 0; } int deinit() { return 0; }
......
...@@ -35,7 +35,8 @@ int Op::init(Bus* bus, ...@@ -35,7 +35,8 @@ int Op::init(Bus* bus,
uint32_t id, uint32_t id,
const std::string& name, const std::string& name,
const std::string& type, const std::string& type,
void* conf) { void* conf,
const uint64_t log_id) {
_bus = bus; _bus = bus;
_dag = dag; _dag = dag;
_id = id; _id = id;
...@@ -45,7 +46,8 @@ int Op::init(Bus* bus, ...@@ -45,7 +46,8 @@ int Op::init(Bus* bus,
_timer = butil::get_object<TimerFlow>(); _timer = butil::get_object<TimerFlow>();
if (!_timer) { if (!_timer) {
LOG(ERROR) << "Invalid timerflow in op:" << this->name(); LOG(ERROR) << "(logid=" << log_id
<< ") Invalid timerflow in op:" << this->name();
return -1; return -1;
} }
...@@ -55,7 +57,8 @@ int Op::init(Bus* bus, ...@@ -55,7 +57,8 @@ int Op::init(Bus* bus,
Channel* channel = mutable_channel(); Channel* channel = mutable_channel();
if (channel == NULL) { if (channel == NULL) {
LOG(ERROR) << "Failed mutable channel in op: " << this->id() << ", " LOG(ERROR) << "(logid=" << log_id
<< ") Failed mutable channel in op: " << this->id() << ", "
<< this->name() << "!"; << this->name() << "!";
return -1; return -1;
} }
...@@ -96,18 +99,20 @@ int Op::check_time(const char* tag) { ...@@ -96,18 +99,20 @@ int Op::check_time(const char* tag) {
return 0; return 0;
} }
int Op::process(bool debug) { int Op::process(const uint64_t log_id, bool debug) {
butil::Timer op_time(butil::Timer::STARTED); butil::Timer op_time(butil::Timer::STARTED);
if (debug && _timer) { if (debug && _timer) {
_timer->start(); _timer->start();
} }
if (!_has_init) { if (!_has_init) {
LOG(ERROR) << "Make sure op has been init before inference"; LOG(ERROR) << "(logid=" << log_id
<< ") Make sure op has been init before inference";
return ERR_INTERNAL_FAILURE; return ERR_INTERNAL_FAILURE;
} }
if (_has_calc) { if (_has_calc) {
LOG(INFO) << "Op: " << _name << " already processed before"; LOG(INFO) << "(logid=" << log_id << ") Op: " << _name
<< " already processed before";
return ERR_OK; return ERR_OK;
} }
...@@ -143,7 +148,7 @@ int Op::process(bool debug) { ...@@ -143,7 +148,7 @@ int Op::process(bool debug) {
// 3. share output to bus // 3. share output to bus
Channel* channel = mutable_channel(); Channel* channel = mutable_channel();
channel->share_to_bus(_bus); channel->share_to_bus(_bus, log_id);
// 4. mark has calculated // 4. mark has calculated
_has_calc = true; _has_calc = true;
...@@ -156,7 +161,8 @@ int Op::process(bool debug) { ...@@ -156,7 +161,8 @@ int Op::process(bool debug) {
op_time.stop(); op_time.stop();
PredictorMetric::GetInstance()->update_latency_metric( PredictorMetric::GetInstance()->update_latency_metric(
OP_METRIC_PREFIX + full_name(), op_time.u_elapsed()); OP_METRIC_PREFIX + full_name(), op_time.u_elapsed());
LOG(INFO) << " " << name() << "_time=[" << op_time.u_elapsed() << "]"; LOG(INFO) << "(logid=" << log_id << ") " << name() << "_time=["
<< op_time.u_elapsed() << "]";
return ERR_OK; return ERR_OK;
} }
......
...@@ -113,13 +113,14 @@ class Op { ...@@ -113,13 +113,14 @@ class Op {
uint32_t id, uint32_t id,
const std::string& name, const std::string& name,
const std::string& type, const std::string& type,
void* conf); void* conf,
const uint64_t log_id);
int deinit(); int deinit();
int check_time(const char* tag); int check_time(const char* tag);
int process(bool debug); int process(const uint64_t log_id, bool debug);
std::string time_info(); std::string time_info();
......
...@@ -202,8 +202,6 @@ int main(int argc, char** argv) { ...@@ -202,8 +202,6 @@ int main(int argc, char** argv) {
} }
VLOG(2) << "Succ call pthread worker start function"; VLOG(2) << "Succ call pthread worker start function";
#ifndef BCLOUD
if (Resource::instance().general_model_initialize(FLAGS_resource_path, if (Resource::instance().general_model_initialize(FLAGS_resource_path,
FLAGS_resource_file) != 0) { FLAGS_resource_file) != 0) {
LOG(ERROR) << "Failed to initialize general model conf: " LOG(ERROR) << "Failed to initialize general model conf: "
...@@ -213,6 +211,7 @@ int main(int argc, char** argv) { ...@@ -213,6 +211,7 @@ int main(int argc, char** argv) {
VLOG(2) << "Succ initialize general model"; VLOG(2) << "Succ initialize general model";
#ifndef BCLOUD
// FATAL messages are output to stderr // FATAL messages are output to stderr
FLAGS_stderrthreshold = 3; FLAGS_stderrthreshold = 3;
#endif #endif
......
...@@ -50,9 +50,9 @@ class WeightedRandomRender : public EndpointRouterBase { ...@@ -50,9 +50,9 @@ class WeightedRandomRender : public EndpointRouterBase {
Factory<WeightedRandomRender, EndpointRouterBase>* factory = Factory<WeightedRandomRender, EndpointRouterBase>* factory =
new (std::nothrow) Factory<WeightedRandomRender, EndpointRouterBase>(); new (std::nothrow) Factory<WeightedRandomRender, EndpointRouterBase>();
if (factory == NULL) { if (factory == NULL) {
RAW_LOG_ERROR( RAW_LOG(ERROR,
"Failed regist factory: WeightedRandomRender->EndpointRouterBase in " "Failed regist factory: WeightedRandomRender->EndpointRouterBase \
"macro!"); in macro!");
return -1; return -1;
} }
...@@ -62,9 +62,9 @@ class WeightedRandomRender : public EndpointRouterBase { ...@@ -62,9 +62,9 @@ class WeightedRandomRender : public EndpointRouterBase {
// together. // together.
if (FactoryPool<EndpointRouterBase>::instance().register_factory( if (FactoryPool<EndpointRouterBase>::instance().register_factory(
"WeightedRandomRender", factory) != 0) { "WeightedRandomRender", factory) != 0) {
RAW_LOG_INFO( RAW_LOG(INFO,
"Factory has been registed: " "Factory has been registed: \
"WeightedRandomRender->EndpointRouterBase."); WeightedRandomRender->EndpointRouterBase.");
} }
return 0; return 0;
......
...@@ -18,7 +18,6 @@ ...@@ -18,7 +18,6 @@
#include <utility> #include <utility>
#include "core/sdk-cpp/include/common.h" #include "core/sdk-cpp/include/common.h"
#include "core/sdk-cpp/include/stub_impl.h" #include "core/sdk-cpp/include/stub_impl.h"
#include "glog/raw_logging.h"
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
...@@ -28,12 +27,20 @@ namespace sdk_cpp { ...@@ -28,12 +27,20 @@ namespace sdk_cpp {
namespace brpc = baidu::rpc; namespace brpc = baidu::rpc;
#endif #endif
#define ERROR_STRING_LEN 10240
#define INLINE_REGIST_OBJECT(D, B, E) \ #define INLINE_REGIST_OBJECT(D, B, E) \
do { \ do { \
Factory<D, B>* factory = new (std::nothrow) Factory<D, B>(); \ Factory<D, B>* factory = new (std::nothrow) Factory<D, B>(); \
if (factory == NULL || \ if (factory == NULL || \
FactoryPool<B>::instance().register_factory(#D, factory) != 0) { \ FactoryPool<B>::instance().register_factory(#D, factory) != 0) { \
RAW_LOG_ERROR("Failed regist factory: %s->%s in macro!", #D, #B); \ char err_str[ERROR_STRING_LEN]; \
snprintf(err_str, \
ERROR_STRING_LEN - 1, \
"Failed regist factory: %s->%s in macro!", \
#D, \
#B); \
RAW_LOG(ERROR, err_str); \
return E; \ return E; \
} \ } \
} while (0) } while (0)
...@@ -43,7 +50,12 @@ namespace brpc = baidu::rpc; ...@@ -43,7 +50,12 @@ namespace brpc = baidu::rpc;
Factory<D, B>* factory = new (std::nothrow) Factory<D, B>(); \ Factory<D, B>* factory = new (std::nothrow) Factory<D, B>(); \
if (factory == NULL || \ if (factory == NULL || \
FactoryPool<B>::instance().register_factory(tag, factory) != 0) { \ FactoryPool<B>::instance().register_factory(tag, factory) != 0) { \
RAW_LOG_ERROR("Failed regist factory: %s in macro!", #D); \ char err_str[ERROR_STRING_LEN]; \
snprintf(err_str, \
ERROR_STRING_LEN - 1, \
"Failed regist factory: %s in macro!", \
#D); \
RAW_LOG(ERROR, err_str); \
return -1; \ return -1; \
} \ } \
return 0; \ return 0; \
...@@ -66,7 +78,13 @@ namespace brpc = baidu::rpc; ...@@ -66,7 +78,13 @@ namespace brpc = baidu::rpc;
if (factory == NULL || \ if (factory == NULL || \
::baidu::paddle_serving::sdk_cpp::FactoryPool<B>::instance() \ ::baidu::paddle_serving::sdk_cpp::FactoryPool<B>::instance() \
.register_factory(#D, factory) != 0) { \ .register_factory(#D, factory) != 0) { \
RAW_LOG_ERROR("Failed regist factory: %s->%s in macro!", #D, #B); \ char err_str[ERROR_STRING_LEN]; \
snprintf(err_str, \
ERROR_STRING_LEN - 1, \
"Failed regist factory: %s->%s in macro!", \
#D, \
#B); \
RAW_LOG(ERROR, err_str); \
return; \ return; \
} \ } \
return; \ return; \
...@@ -80,8 +98,14 @@ namespace brpc = baidu::rpc; ...@@ -80,8 +98,14 @@ namespace brpc = baidu::rpc;
if (factory == NULL || \ if (factory == NULL || \
::baidu::paddle_serving::sdk_cpp::FactoryPool<B>::instance() \ ::baidu::paddle_serving::sdk_cpp::FactoryPool<B>::instance() \
.register_factory(T, factory) != 0) { \ .register_factory(T, factory) != 0) { \
RAW_LOG_ERROR( \ char err_str[ERROR_STRING_LEN]; \
"Failed regist factory: %s->%s, tag %s in macro!", #D, #B, T); \ snprintf(err_str, \
ERROR_STRING_LEN - 1, \
"Failed regist factory: %s->%s, tag %s in macro!", \
#D, \
#B, \
T); \
RAW_LOG(ERROR, err_str); \
return; \ return; \
} \ } \
return; \ return; \
...@@ -108,8 +132,13 @@ namespace brpc = baidu::rpc; ...@@ -108,8 +132,13 @@ namespace brpc = baidu::rpc;
::baidu::paddle_serving::sdk_cpp::FactoryPool< \ ::baidu::paddle_serving::sdk_cpp::FactoryPool< \
::baidu::paddle_serving::sdk_cpp::Stub>::instance() \ ::baidu::paddle_serving::sdk_cpp::Stub>::instance() \
.register_factory(T, factory) != 0) { \ .register_factory(T, factory) != 0) { \
RAW_LOG_ERROR( \ char err_str[ERROR_STRING_LEN]; \
"Failed regist factory: %s->Stub, tag: %s in macro!", #D, T); \ snprintf(err_str, \
ERROR_STRING_LEN - 1, \
"Failed regist factory: %s->Stub, tag: %s in macro!", \
#D, \
T); \
RAW_LOG(ERROR, err_str); \
return; \ return; \
} \ } \
return; \ return; \
...@@ -146,14 +175,24 @@ class FactoryPool { ...@@ -146,14 +175,24 @@ class FactoryPool {
typename std::map<std::string, FactoryBase<B>*>::iterator it = typename std::map<std::string, FactoryBase<B>*>::iterator it =
_pool.find(tag); _pool.find(tag);
if (it != _pool.end()) { if (it != _pool.end()) {
RAW_LOG_ERROR("Insert duplicate with tag: %s", tag.c_str()); char err_str[ERROR_STRING_LEN];
snprintf(err_str,
ERROR_STRING_LEN - 1,
"Insert duplicate with tag: %s",
tag.c_str());
RAW_LOG(ERROR, err_str);
return -1; return -1;
} }
std::pair<typename std::map<std::string, FactoryBase<B>*>::iterator, bool> std::pair<typename std::map<std::string, FactoryBase<B>*>::iterator, bool>
r = _pool.insert(std::make_pair(tag, factory)); r = _pool.insert(std::make_pair(tag, factory));
if (!r.second) { if (!r.second) {
RAW_LOG_ERROR("Failed insert new factory with: %s", tag.c_str()); char err_str[ERROR_STRING_LEN];
snprintf(err_str,
ERROR_STRING_LEN - 1,
"Failed insert new factory with: %s",
tag.c_str());
RAW_LOG(ERROR, err_str);
return -1; return -1;
} }
...@@ -164,9 +203,13 @@ class FactoryPool { ...@@ -164,9 +203,13 @@ class FactoryPool {
typename std::map<std::string, FactoryBase<B>*>::iterator it = typename std::map<std::string, FactoryBase<B>*>::iterator it =
_pool.find(tag); _pool.find(tag);
if (it == _pool.end() || it->second == NULL) { if (it == _pool.end() || it->second == NULL) {
RAW_LOG_ERROR("Not found factory pool, tag: %s, pool size: %u", char err_str[ERROR_STRING_LEN];
tag.c_str(), snprintf(err_str,
_pool.size()); ERROR_STRING_LEN - 1,
"Not found factory pool, tag: %s, pool size: %u",
tag.c_str(),
_pool.size());
RAW_LOG(ERROR, err_str);
return NULL; return NULL;
} }
......
...@@ -37,6 +37,7 @@ message Request { ...@@ -37,6 +37,7 @@ message Request {
repeated FeedInst insts = 1; repeated FeedInst insts = 1;
repeated string fetch_var_names = 2; repeated string fetch_var_names = 2;
optional bool profile_server = 3 [ default = false ]; optional bool profile_server = 3 [ default = false ];
required uint64 log_id = 4 [ default = 0 ];
}; };
message Response { message Response {
......
...@@ -4,26 +4,25 @@ ...@@ -4,26 +4,25 @@
## Compilation environment requirements ## Compilation environment requirements
| module | version | | module | version |
| :--------------------------: | :----------------------------------------------------------: | | :--------------------------: | :-------------------------------: |
| OS | CentOS 7 | | OS | CentOS 7 |
| gcc | 4.8.5 and later | | gcc | 4.8.5 and later |
| gcc-c++ | 4.8.5 and later | | gcc-c++ | 4.8.5 and later |
| git | 3.82 and later | | git | 3.82 and later |
| cmake | 3.2.0 and later | | cmake | 3.2.0 and later |
| Python | 2.7.2 and later / 3.6 and later | | Python | 2.7.2 and later / 3.6 and later |
| Go | 1.9.2 and later | | Go | 1.9.2 and later |
| git | 2.17.1 and later | | git | 2.17.1 and later |
| glibc-static | 2.17 | | glibc-static | 2.17 |
| openssl-devel | 1.0.2k | | openssl-devel | 1.0.2k |
| bzip2-devel | 1.0.6 and later | | bzip2-devel | 1.0.6 and later |
| python-devel / python3-devel | 2.7.5 and later / 3.6.8 and later | | python-devel / python3-devel | 2.7.5 and later / 3.6.8 and later |
| sqlite-devel | 3.7.17 and later | | sqlite-devel | 3.7.17 and later |
| patchelf | 0.9 and later | | patchelf | 0.9 and later |
| libXext | 1.3.3 | | libXext | 1.3.3 |
| libSM | 1.2.2 | | libSM | 1.2.2 |
| libXrender | 0.9.10 | | libXrender | 0.9.10 |
| python-whl | numpy>=1.12, <=1.16.4<br/>google>=2.0.3<br/>protobuf>=3.12.2<br/>grpcio-tools>=1.28.1<br/>grpcio>=1.28.1<br/>func-timeout>=4.3.5<br/>pyyaml>=1.3.0<br/>sentencepiece==0.1.92<br>flask>=1.1.2<br>ujson>=2.0.3 |
It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you, see [this document](DOCKER_IMAGES.md). It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you, see [this document](DOCKER_IMAGES.md).
...@@ -62,6 +61,25 @@ pip install -r python/requirements.txt ...@@ -62,6 +61,25 @@ pip install -r python/requirements.txt
If Python3 is used, replace `pip` with `pip3`. If Python3 is used, replace `pip` with `pip3`.
## GOPATH Setting
## Compile Arguments
The default GOPATH is `$HOME/go`, which you can set to other values.
```shell
export GOPATH=$HOME/go
export PATH=$PATH:$GOPATH/bin
```
## Get go packages
```shell
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
go get -u github.com/golang/protobuf/protoc-gen-go
go get -u google.golang.org/grpc
```
## Compile Server ## Compile Server
...@@ -70,7 +88,10 @@ If Python3 is used, replace `pip` with `pip3`. ...@@ -70,7 +88,10 @@ If Python3 is used, replace `pip` with `pip3`.
``` shell ``` shell
mkdir server-build-cpu && cd server-build-cpu mkdir server-build-cpu && cd server-build-cpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON .. cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON ..
make -j10 make -j10
``` ```
...@@ -80,7 +101,11 @@ you can execute `make install` to put targets under directory `./output`, you ne ...@@ -80,7 +101,11 @@ you can execute `make install` to put targets under directory `./output`, you ne
``` shell ``` shell
mkdir server-build-gpu && cd server-build-gpu mkdir server-build-gpu && cd server-build-gpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON .. cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON \
-DWITH_GPU=ON ..
make -j10 make -j10
``` ```
...@@ -94,7 +119,10 @@ execute `make install` to put targets under directory `./output` ...@@ -94,7 +119,10 @@ execute `make install` to put targets under directory `./output`
``` shell ``` shell
mkdir client-build && cd client-build mkdir client-build && cd client-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON .. cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCLIENT=ON ..
make -j10 make -j10
``` ```
...@@ -114,7 +142,7 @@ make ...@@ -114,7 +142,7 @@ make
## Install wheel package ## Install wheel package
Regardless of the client, server or App part, after compiling, install the whl package under `python/dist/`. Regardless of the client, server or App part, after compiling, install the whl package in `python/dist/` in the temporary directory(`server-build-cpu`, `server-build-gpu`, `client-build`,`app-build`) of the compilation process.
...@@ -124,6 +152,12 @@ When running the python server, it will check the `SERVING_BIN` environment vari ...@@ -124,6 +152,12 @@ When running the python server, it will check the `SERVING_BIN` environment vari
## Verify
Please use the example under `python/examples` to verify.
## CMake Option Description ## CMake Option Description
| Compile Options | Description | Default | | Compile Options | Description | Default |
......
...@@ -4,26 +4,25 @@ ...@@ -4,26 +4,25 @@
## 编译环境设置 ## 编译环境设置
| 组件 | 版本要求 | | 组件 | 版本要求 |
| :--------------------------: | :----------------------------------------------------------: | | :--------------------------: | :-------------------------------: |
| OS | CentOS 7 | | OS | CentOS 7 |
| gcc | 4.8.5 and later | | gcc | 4.8.5 and later |
| gcc-c++ | 4.8.5 and later | | gcc-c++ | 4.8.5 and later |
| git | 3.82 and later | | git | 3.82 and later |
| cmake | 3.2.0 and later | | cmake | 3.2.0 and later |
| Python | 2.7.2 and later / 3.6 and later | | Python | 2.7.2 and later / 3.6 and later |
| Go | 1.9.2 and later | | Go | 1.9.2 and later |
| git | 2.17.1 and later | | git | 2.17.1 and later |
| glibc-static | 2.17 | | glibc-static | 2.17 |
| openssl-devel | 1.0.2k | | openssl-devel | 1.0.2k |
| bzip2-devel | 1.0.6 and later | | bzip2-devel | 1.0.6 and later |
| python-devel / python3-devel | 2.7.5 and later / 3.6.8 and later | | python-devel / python3-devel | 2.7.5 and later / 3.6.8 and later |
| sqlite-devel | 3.7.17 and later | | sqlite-devel | 3.7.17 and later |
| patchelf | 0.9 | | patchelf | 0.9 |
| libXext | 1.3.3 | | libXext | 1.3.3 |
| libSM | 1.2.2 | | libSM | 1.2.2 |
| libXrender | 0.9.10 | | libXrender | 0.9.10 |
| python-whl | numpy>=1.12, <=1.16.4<br/>google>=2.0.3<br/>protobuf>=3.12.2<br/>grpcio-tools>=1.28.1<br/>grpcio>=1.28.1<br/>func-timeout>=4.3.5<br/>pyyaml>=1.3.0<br/>sentencepiece==0.1.92<br/>flask>=1.1.2<br/>ujson>=2.0.3 |
推荐使用Docker编译,我们已经为您准备好了Paddle Serving编译环境,详见[该文档](DOCKER_IMAGES_CN.md) 推荐使用Docker编译,我们已经为您准备好了Paddle Serving编译环境,详见[该文档](DOCKER_IMAGES_CN.md)
...@@ -62,6 +61,22 @@ pip install -r python/requirements.txt ...@@ -62,6 +61,22 @@ pip install -r python/requirements.txt
如果使用 Python3,请以 `pip3` 替换 `pip` 如果使用 Python3,请以 `pip3` 替换 `pip`
## GOPATH 设置
默认 GOPATH 设置为 `$HOME/go`,您也可以设置为其他值。
```shell
export GOPATH=$HOME/go
export PATH=$PATH:$GOPATH/bin
```
## 获取 Go packages
```shell
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
go get -u github.com/golang/protobuf/protoc-gen-go
go get -u google.golang.org/grpc
```
## 编译Server部分 ## 编译Server部分
...@@ -114,7 +129,7 @@ make ...@@ -114,7 +129,7 @@ make
## 安装wheel包 ## 安装wheel包
无论是Client端,Server端还是App部分,编译完成后,安装`python/dist/`的whl包即可。 无论是Client端,Server端还是App部分,编译完成后,安装编译过程临时目录(`server-build-cpu``server-build-gpu``client-build``app-build`)下的`python/dist/`的whl包即可。
...@@ -124,6 +139,12 @@ make ...@@ -124,6 +139,12 @@ make
## 如何验证
请使用 `python/examples` 下的例子进行验证。
## CMake选项说明 ## CMake选项说明
| 编译选项 | 说明 | 默认 | | 编译选项 | 说明 | 默认 |
......
# FAQ # FAQ
- Q如何调整RPC服务的等待时间,避免超时? - Q: 如何调整RPC服务的等待时间,避免超时?
A使用set_rpc_timeout_ms设置更长的等待时间,单位为毫秒,默认时间为20秒。 A: 使用set_rpc_timeout_ms设置更长的等待时间,单位为毫秒,默认时间为20秒。
示例: 示例:
``` ```
...@@ -15,4 +15,25 @@ ...@@ -15,4 +15,25 @@
``` ```
- Q: 如何使用自己编译的Paddle Serving进行预测? - Q: 如何使用自己编译的Paddle Serving进行预测?
A:通过pip命令安装自己编译出的whl包,并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。
A: 通过pip命令安装自己编译出的whl包,并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。
- Q: 执行GPU预测时遇到InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
A: 将显卡驱动对应的libcuda.so的目录添加到LD_LIBRARY_PATH环境变量中
- Q: 执行GPU预测时遇到ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/operators/batch_norm_op.cu:198)
A: 将cudnn的lib64路径添加到LD_LIBRARY_PATH,安装自pypi的Paddle Serving中post9版使用的是cudnn 7.3,post10使用的是cudnn 7.5。如果是使用自己编译的Paddle Serving,可以在log/serving.INFO日志文件中查看对应的cudnn版本。
- Q: 执行GPU预测时遇到Error: Failed to find dynamic library: libcublas.so
A: 将cuda的lib64路径添加到LD_LIBRARY_PATH, post9版本的Paddle Serving使用的是cuda 9.0,post10版本使用的cuda 10.0。
- Q: 部署和预测中的日志信息在哪里查看?
- A: server端的日志分为两部分,一部分打印到标准输出,一部分打印到启动服务时的目录下的log/serving.INFO文件中。
client端的日志直接打印到标准输出。
通过在部署服务之前 'export GLOG_v=3'可以输出更为详细的日志信息。
...@@ -2,6 +2,20 @@ ...@@ -2,6 +2,20 @@
([简体中文](./INFERENCE_TO_SERVING_CN.md)|English) ([简体中文](./INFERENCE_TO_SERVING_CN.md)|English)
We should know something before converting to serving model
**inference_model_dir**:the directory of Paddle inference model
**serving_client_dir**: the directory of server side configuration
**serving_client_dir**: the directory of client side configuration
**model_filename**: this is model description file whose default value is `__model__`, if it's not default name, set `model_filename` explicitly
**params_filename**: during `save_inference_model` every Variable will be save as a single file. If we have the inference model whose params are compressed into one file, please set `params_filename` explicitly
## Example ## Example
``` python ``` python
...@@ -12,3 +26,11 @@ serving_server_dir = "serving_server_dir" ...@@ -12,3 +26,11 @@ serving_server_dir = "serving_server_dir"
feed_var_names, fetch_var_names = inference_model_to_serving( feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_client_dir, serving_server_dir) inference_model_dir, serving_client_dir, serving_server_dir)
``` ```
if your model file and params file are both standalone, please use the following api.
```
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_client_dir, serving_server_dir,
model_filename="model", params_filename="params")
```
...@@ -4,6 +4,19 @@ ...@@ -4,6 +4,19 @@
## 示例 ## 示例
在下列代码中,我们需要知道以下信息。
**模型文件夹**:这个文件夹就是Paddle的inference_model所在的文件夹
**serving_client_dir**: 这个文件夹是inference_model转换成Serving模型后,服务端配置的保存路径
**serving_client_dir**: 这个文件夹是inference_model转换成Serving模型后,客户端配置的保存路径
**模型描述文件**: 模型描述文件也就是`model_filename`默认值为`__model__`,是一个pb2文本文件,如果是别的文件名需要显式指定
**模型参数文件**: 在`save_inference_model`阶段,默认方式是每一个Variable保存一个二进制文件,如果是这种情况就不需要做指定。如果所有参数用压缩成一个文件的形式保存,则需要显式指定`params_filename`
``` python ``` python
from paddle_serving_client.io import inference_model_to_serving from paddle_serving_client.io import inference_model_to_serving
inference_model_dir = "your_inference_model" inference_model_dir = "your_inference_model"
...@@ -12,3 +25,9 @@ serving_server_dir = "serving_server_dir" ...@@ -12,3 +25,9 @@ serving_server_dir = "serving_server_dir"
feed_var_names, fetch_var_names = inference_model_to_serving( feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_client_dir, serving_server_dir) inference_model_dir, serving_client_dir, serving_server_dir)
``` ```
如果模型中有模型描述文件`model_filename` 和 模型参数文件`params_filename`,那么请用
```
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_client_dir, serving_server_dir,
model_filename="model", params_filename="params")
```
...@@ -3,51 +3,51 @@ ...@@ -3,51 +3,51 @@
## CPU server ## CPU server
### Python 3 ### Python 3
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.2-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py3-none-any.whl
``` ```
### Python 2 ### Python 2
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.2-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py2-none-any.whl
``` ```
## GPU server ## GPU server
### Python 3 ### Python 3
``` ```
#cuda 9.0 #cuda 9.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post9-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py3-none-any.whl
#cuda 10.0 #cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post10-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py3-none-any.whl
``` ```
### Python 2 ### Python 2
``` ```
#cuda 9.0 #cuda 9.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post9-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py2-none-any.whl
#cuda 10.0 #cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post10-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py2-none-any.whl
``` ```
## Client ## Client
### Python 3.7 ### Python 3.7
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp37-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp37-none-any.whl
``` ```
### Python 3.6 ### Python 3.6
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp36-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp36-none-any.whl
``` ```
### Python 2.7 ### Python 2.7
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp27-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp27-none-any.whl
``` ```
## App ## App
### Python 3 ### Python 3
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.2-py3-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.0.0-py3-none-any.whl
``` ```
### Python 2 ### Python 2
``` ```
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.2-py2-none-any.whl https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.0.0-py2-none-any.whl
``` ```
# How to develop a new Web service? # How to develop a new Web service?
([简体中文](NEW_WEB_SERVICE_CN.md)|English) ([简体中文](NEW_WEB_SERVICE_CN.md)|English)
This document will take the image classification service based on the Imagenet data set as an example to introduce how to develop a new web service. The complete code can be visited at [here](../python/examples/imagenet/resnet50_web_service.py). This document will take Uci service as an example to introduce how to develop a new Web Service. You can check out the complete code [here](../python/examples/pipeline/simple_web_service/web_service.py).
## WebService base class ## Op base class
In some services, a single model may not meet business needs, requiring multiple models to be concatenated or parallel to complete the entire service. We call a single model operation Op and provide a simple set of interfaces to implement the complex logic of Op concatenation or parallelism.
Paddle Serving implements the [WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23) base class. You need to override its `preprocess` and `postprocess` method. The default implementation is as follows: Data between Ops is passed as a dictionary, Op can be started as threads or process, and Op can be configured for the number of concurrencies, etc.
Typically, you need to inherit the Op base class and override its `init_op`, `preprocess` and `postprocess` methods, which are implemented by default as follows:
```python ```python
class WebService(object): class Op(object):
def init_op(self):
def preprocess(self, feed={}, fetch=[]): pass
return feed, fetch def preprocess(self, input_dicts):
def postprocess(self, feed={}, fetch=[], fetch_map=None): # multiple previous Op
return fetch_map if len(input_dicts) != 1:
_LOGGER.critical(
"Failed to run preprocess: this Op has multiple previous "
"inputs. Please override this func.")
os._exit(-1)
(_, input_dict), = input_dicts.items()
return input_dict
def postprocess(self, input_dicts, fetch_dict):
return fetch_dict
``` ```
### init_op
This method is used to load user-defined resources such as dictionaries. A separator is loaded in the [UciOp](../python/examples/pipeline/simple_web_service/web_service.py).
**Note**: If Op is launched in threaded mode, different threads of the same Op execute `init_op` only once and share `init_op` loaded resources when Op is multi-concurrent.
### preprocess ### preprocess
The preprocess method has two input parameters, `feed` and `fetch`. For an HTTP request `request`: This method is used to preprocess the data before model prediction. It has an `input_dicts` parameter, `input_dicts` is a dictionary, key is the `name` of the previous Op, and value is the data transferred from the corresponding previous op (the data is also in dictionary format).
- The value of `feed` is the feed part `request.json["feed"]` in the request data The `preprocess` method needs to process the data into a ndarray dictionary (key is the feed variable name, and value is the corresponding ndarray value). Op will take the return value as the input of the model prediction and pass the output to the `postprocess` method.
- The value of `fetch` is the fetch part `request.json["fetch"]` in the request data
The return values are the feed and fetch values used in the prediction. **Note**: if Op does not have a model configuration file, the return value of `preprocess` will be directly passed to `postprocess`.
### postprocess ### postprocess
The postprocess method has three input parameters, `feed`, `fetch` and `fetch_map`: This method is used for data post-processing after model prediction. It has two parameters, `input_dicts` and `fetch_dict`.
Where the `input_dicts` parameter is consistent with the parameter in `preprocess` method, and `fetch_dict` is the output of the model prediction (key is the name of the fetch variable, and value is the corresponding ndarray value). Op will take the return value of `postprocess` as the input of subsequent Op `preprocess`.
- The value of `feed` is the feed part `request.json["feed"]` in the request data **Note**: if Op does not have a model configuration file, `fetch_dict` will be the return value of `preprocess`.
- The value of `fetch` is the fetch part `request.json["fetch"]` in the request data
- The value of `fetch_map` is the model output value.
The return value will be processed as `{"reslut": fetch_map}` as the return of the HTTP request.
## Develop ImageService class
Here is the op of the UCI example:
```python
class UciOp(Op):
def init_op(self):
self.separator = ","
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
x_value = input_dict["x"]
if isinstance(x_value, (str, unicode)):
input_dict["x"] = np.array(
[float(x.strip()) for x in x_value.split(self.separator)])
return input_dict
def postprocess(self, input_dicts, fetch_dict):
fetch_dict["price"] = str(fetch_dict["price"][0][0])
return fetch_dict
```
## WebService base class
Paddle Serving implements the [WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23) base class. You need to override its `get_pipeline_response` method to define the topological relationship between Ops. The default implementation is as follows:
```python ```python
class ImageService(WebService): class WebService(object):
def get_pipeline_response(self, read_op):
def preprocess(self, feed={}, fetch=[]): return None
reader = ImageReader() ```
feed_batch = []
for ins in feed: Where `read_op` serves as the entry point of the topology map of the whole service (that is, the first op defined by the user is followed by `read_op`).
if "image" not in ins:
raise ("feed data error!") For single Op service (single model), take Uci service as an example (there is only one Uci prediction model in the whole service):
sample = base64.b64decode(ins["image"])
img = reader.process_image(sample) ```python
feed_batch.append({"image": img}) class UciService(WebService):
return feed_batch, fetch def get_pipeline_response(self, read_op):
uci_op = UciOp(name="uci", input_ops=[read_op])
return uci_op
```
For multiple Op services (multiple models), take Ocr service as an example (the whole service is completed in series by Det model and Rec model):
```python
class OcrService(WebService):
def get_pipeline_response(self, read_op):
det_op = DetOp(name="det", input_ops=[read_op])
rec_op = RecOp(name="rec", input_ops=[det_op])
return rec_op
```
WebService objects need to load a yaml configuration file through the `prepare_pipeline_config` to configure each Op and the entire service. The simplest configuration file is as follows (Uci example):
```yaml
http_port: 18080
op:
uci:
local_service_conf:
model_config: uci_housing_model # path
```
All field names of yaml file are as follows:
```yaml
rpc_port: 18080 # gRPC port
build_dag_each_worker: false # Whether to use process server or not. The default is false
worker_num: 1 # gRPC thread pool size (the number of processes in the process version servicer). The default is 1
http_port: 0 # HTTP service port. Do not start HTTP service when the value is less or equals 0. The default value is 0.
dag:
is_thread_op: true # Whether to use the thread version of OP. The default is true
client_type: brpc # Use brpc or grpc client. The default is brpc
retry: 1 # The number of times DAG executor retries after failure. The default value is 1, that is, no retrying
use_profile: false # Whether to print the log on the server side. The default is false
tracer:
interval_s: -1 # Monitoring time interval of Tracer (in seconds). Do not start monitoring when the value is less than 1. The default value is -1
op:
<op_name>: # op name, corresponding to the one defined in the program
concurrency: 1 # op concurrency number, the default is 1
timeout: -1 # predict timeout in milliseconds. The default value is -1, that is, no timeout
retry: 1 # timeout retransmissions. The default value is 1, that is, do not try again
batch_size: 1 # If this field is set, Op will merge multiple request outputs into a single batch
auto_batching_timeout: -1 # auto-batching timeout in milliseconds. The default value is -1, that is, no timeout
local_service_conf:
model_config: # the path of the corresponding model file. There is no default value(None). If this item is not configured, the model file will not be loaded.
workdir: "" # working directory of corresponding model
thread_num: 2 # the corresponding model is started with thread_num threads
devices: "" # on which device does the model launched. You can specify the GPU card number(such as "0,1,2"), which is CPU by default
mem_optim: true # mem optimization option, the default is true
ir_optim: false # ir optimization option, the default is false
``` ```
For the above `ImageService`, only the `preprocess` method is rewritten to process the image data in Base64 format into the data format required by prediction. All fields of Op can be defined when Op is created in the program (which will override yaml fields).
# 如何开发一个新的Web Service? # 如何开发一个新的Web Service?
(简体中文|[English](NEW_WEB_SERVICE.md)) (简体中文|[English](NEW_WEB_SERVICE.md))
本文档将以Imagenet图像分类服务为例,来介绍如何开发一个新的Web Service。您可以在[这里](../python/examples/imagenet/resnet50_web_service.py)查阅完整的代码。 本文档将以 Uci 房价预测服务为例,来介绍如何开发一个新的Web Service。您可以在[这里](../python/examples/pipeline/simple_web_service/web_service.py)查阅完整的代码。
## Op 基类
在一些服务中,单个模型可能无法满足需求,需要多个模型串联或并联来完成整个服务。我们将单个模型操作称为 Op,并提供了一套简单的接口来实现 Op 串联或并联的复杂逻辑。
## WebService基类 Op 间数据是以字典形式进行传递的,Op 可以以线程或进程方式启动,同时可以对 Op 的并发数等进行配置。
Paddle Serving实现了[WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23)基类,您需要重写它的`preprocess`方法和`postprocess`方法,默认实现如下: 通常情况下,您需要继承 Op 基类,重写它的 `init_op``preprocess``postprocess` 方法,默认实现如下:
```python ```python
class WebService(object): class Op(object):
def init_op(self):
def preprocess(self, feed={}, fetch=[]): pass
return feed, fetch def preprocess(self, input_dicts):
def postprocess(self, feed={}, fetch=[], fetch_map=None): # multiple previous Op
return fetch_map if len(input_dicts) != 1:
_LOGGER.critical(
"Failed to run preprocess: this Op has multiple previous "
"inputs. Please override this func.")
os._exit(-1)
(_, input_dict), = input_dicts.items()
return input_dict
def postprocess(self, input_dicts, fetch_dict):
return fetch_dict
``` ```
### preprocess方法 ### init_op 方法
该方法用于加载用户自定义资源(如字典等),在 [UciOp](../python/examples/pipeline/simple_web_service/web_service.py) 中加载了一个分隔符。
**注意**:如果 Op 是以线程模式加载的,那么在 Op 多并发时,同种 Op 的不同线程只执行一次 `init_op`,且共用 `init_op` 加载的资源。
### preprocess 方法
该方法用于模型预测前对数据的预处理,它有一个 `input_dicts` 参数,`input_dicts` 是一个字典,key 为前继 Op 的 `name`,value 为对应前继 Op 传递过来的数据(数据同样是字典格式)。
`preprocess` 方法需要将数据处理成 ndarray 字典(key 为 feed 变量名,value 为对应的 ndarray 值),Op 会将该返回值作为模型预测的输入,并将输出传递给 `postprocess` 方法。
preprocess方法有两个输入参数,`feed``fetch`。对于一个HTTP请求`request` **注意**:如果 Op 没有配置模型,则 `preprocess` 的返回值会直接传递给 `postprocess`
- `feed`的值为请求数据中的feed部分`request.json["feed"]` ### postprocess 方法
- `fetch`的值为请求数据中的fetch部分`request.json["fetch"]`
返回值分别是预测过程中用到的feed和fetch值 该方法用于模型预测后对数据的后处理,它有两个参数,`input_dicts``fetch_dict`
### postprocess方法 其中,`input_dicts``preprocess` 的参数相同,`fetch_dict` 为模型预测的输出(key 为 fetch 变量名,value 为对应的 ndarray 值)。Op 会将 `postprocess` 的返回值作为后继 Op `preprocess` 的输入。
postprocess方法有三个输入参数,`feed``fetch``fetch_map` **注意**:如果 Op 没有配置模型,则 `fetch_dict` 将为 `preprocess` 的返回值。
- `feed`的值为请求数据中的feed部分`request.json["feed"]`
- `fetch`的值为请求数据中的fetch部分`request.json["fetch"]`
- `fetch_map`的值为fetch到的模型输出值
返回值将会被处理成`{"reslut": fetch_map}`作为HTTP请求的返回。
## 开发ImageService类 下面是 Uci 例子的 Op:
```python ```python
class ImageService(WebService): class UciOp(Op):
def init_op(self):
def preprocess(self, feed={}, fetch=[]): self.separator = ","
reader = ImageReader()
feed_batch = [] def preprocess(self, input_dicts):
for ins in feed: (_, input_dict), = input_dicts.items()
if "image" not in ins: x_value = input_dict["x"]
raise ("feed data error!") if isinstance(x_value, (str, unicode)):
sample = base64.b64decode(ins["image"]) input_dict["x"] = np.array(
img = reader.process_image(sample) [float(x.strip()) for x in x_value.split(self.separator)])
feed_batch.append({"image": img}) return input_dict
return feed_batch, fetch
def postprocess(self, input_dicts, fetch_dict):
fetch_dict["price"] = str(fetch_dict["price"][0][0])
return fetch_dict
```
## WebService 基类
Paddle Serving 实现了 [WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L28) 基类,您需要重写它的 `get_pipeline_response` 方法来定义 Op 间的拓扑关系,并返回作为 Response 的 Op,默认实现如下:
```python
class WebService(object):
def get_pipeline_response(self, read_op):
return None
```
其中,`read_op` 作为整个服务拓扑图的入口(即用户自定义的第一个 Op 的前继为 `read_op`)。
对于单 Op 服务(单模型),以 Uci 服务为例(整个服务中只有一个 Uci 房价预测模型):
```python
class UciService(WebService):
def get_pipeline_response(self, read_op):
uci_op = UciOp(name="uci", input_ops=[read_op])
return uci_op
```
对于多 Op 服务(多模型),以 Ocr 服务为例(整个服务由 Det 模型和 Rec 模型串联完成):
```python
class OcrService(WebService):
def get_pipeline_response(self, read_op):
det_op = DetOp(name="det", input_ops=[read_op])
rec_op = RecOp(name="rec", input_ops=[det_op])
return rec_op
```
WebService 对象需要通过 `prepare_pipeline_config` 加载一个 yaml 配置文件,用来对各个 Op 以及整个服务进行配置,最简单的配置文件如下(Uci 例子):
```yaml
http_port: 18080
op:
uci:
local_service_conf:
model_config: uci_housing_model # 路径
```
yaml 文件的所有字段名详见下面:
```yaml
rpc_port: 18080 # gRPC端口号
build_dag_each_worker: false # 是否使用进程版 Servicer,默认为 false
worker_num: 1 # gRPC线程池大小(进程版 Servicer 中为进程数),默认为 1
http_port: 0 # HTTP 服务的端口号,若该值小于或等于 0 则不开启 HTTP 服务,默认为 0
dag:
is_thread_op: true # 是否使用线程版Op,默认为 true
client_type: brpc # 使用 brpc 或 grpc client,默认为 brpc
retry: 1 # DAG Executor 在失败后重试次数,默认为 1,即不重试
use_profile: false # 是否在 Server 端打印日志,默认为 false
tracer:
interval_s: -1 # Tracer 监控的时间间隔,单位为秒。当该值小于 1 时不启动监控,默认为 -1
op:
<op_name>: # op 名,与程序中定义的相对应
concurrency: 1 # op 并发数,默认为 1
timeout: -1 # 预测超时时间,单位为毫秒。默认为 -1 即不超时
retry: 1 # 超时重发次数。默认为 1 即不重试
batch_size: 1 # auto-batching 中的 batch_size,若设置该字段则 Op 会将多个请求输出合并为一个 batch
auto_batching_timeout: -1 # auto-batching 超时时间,单位为毫秒。默认为 -1 即不超时
local_service_conf:
model_config: # 对应模型文件的路径,无默认值(None)。若不配置该项则不会加载模型文件。
workdir: "" # 对应模型的工作目录
thread_num: 2 # 对应模型用几个线程启动
devices: "" # 模型启动在哪个设备上,可以指定 gpu 卡号(如 "0,1,2"),默认为 cpu
mem_optim: true # mem 优化选项,默认为 true
ir_optim: false # ir 优化选项,默认为 false
``` ```
对于上述的`ImageService`,只重写了前处理方法,将base64格式的图片数据处理成模型预测需要的数据格式 其中,Op 的所有字段均可以在程序中创建 Op 时定义(会覆盖 yaml 的字段)
...@@ -33,6 +33,7 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s ...@@ -33,6 +33,7 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s
- The default function of a single OP is to access a single Paddle Serving Service based on the input Channel data and put the result into the output Channel. - The default function of a single OP is to access a single Paddle Serving Service based on the input Channel data and put the result into the output Channel.
- OP supports user customization, including preprocess, process, postprocess functions that can be inherited and implemented by the user. - OP supports user customization, including preprocess, process, postprocess functions that can be inherited and implemented by the user.
- OP can set the number of concurrencies to increase the number of concurrencies processed. - OP can set the number of concurrencies to increase the number of concurrencies processed.
- OP can obtain data from multiple different RPC requests for Auto-Batching.
- OP can be started by a thread or process. - OP can be started by a thread or process.
### Channel Design ### Channel Design
...@@ -46,6 +47,7 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s ...@@ -46,6 +47,7 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s
</center> </center>
### Extreme Case Consideration ### Extreme Case Consideration
- Request timeout - Request timeout
...@@ -59,9 +61,9 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s ...@@ -59,9 +61,9 @@ The graph execution engine consists of OPs and Channels, and the connected OPs s
- Whether input buffers and output buffers in Channel will increase indefinitely - Whether input buffers and output buffers in Channel will increase indefinitely
- It will not increase indefinitely. The input to the entire graph execution engine is placed inside a Channel's internal queue, directly acting as a traffic control buffer queue for the entire service. - It will not increase indefinitely. The input to the entire graph execution engine is placed inside a Channel's internal queue, directly acting as a traffic control buffer queue for the entire service.
- For input buffer, adjust the number of concurrencies of OP1 and OP2 according to the amount of computation, so that the number of input buffers from each input OP is relatively balanced. - For input buffer, adjust the number of concurrencies of OP1 and OP2 according to the amount of computation, so that the number of input buffers from each input OP is relatively balanced. (The length of the input buffer depends on the speed at which each item in the internal queue is ready)
- For output buffer, you can use a similar process as input buffer, which adjusts the concurrency of OP3 and OP4 to control the buffer length of output buffer. - For output buffer, you can use a similar process as input buffer, which adjusts the concurrency of OP3 and OP4 to control the buffer length of output buffer. (The length of the output buffer depends on the speed at which downstream OPs obtain data from the output buffer)
- Note: The length of the input buffer depends on the speed at which each item in the internal queue is ready, and the length of the output buffer depends on the speed at which downstream OPs obtain data from the output buffer. - The amount of data in the Channel will not exceed `worker_num` of gRPC, that is, it will not exceed the thread pool size.
## Detailed Design ## Detailed Design
...@@ -79,31 +81,36 @@ def __init__(name=None, ...@@ -79,31 +81,36 @@ def __init__(name=None,
client_config=None, client_config=None,
concurrency=1, concurrency=1,
timeout=-1, timeout=-1,
retry=1) retry=1,
batch_size=1,
auto_batching_timeout=None)
``` ```
The meaning of each parameter is as follows: The meaning of each parameter is as follows:
| Parameter | Meaning | | Parameter | Meaning |
| :--------------: | :----------------------------------------------------------: | | :-------------------: | :----------------------------------------------------------: |
| name | (str) String used to identify the OP type, which must be globally unique. | | name | (str) String used to identify the OP type, which must be globally unique. |
| input_ops | (list) A list of all previous OPs of the current Op. | | input_ops | (list) A list of all previous OPs of the current Op. |
| server_endpoints | (list) List of endpoints for remote Paddle Serving Service. If this parameter is not set, the OP will not access the remote Paddle Serving Service, that is, the process operation will not be performed. | | server_endpoints | (list) List of endpoints for remote Paddle Serving Service. If this parameter is not set, the OP will not access the remote Paddle Serving Service, that is, the process operation will not be performed. |
| fetch_list | (list) List of fetch variable names for remote Paddle Serving Service. | | fetch_list | (list) List of fetch variable names for remote Paddle Serving Service. |
| client_config | (str) The path of the client configuration file corresponding to the Paddle Serving Service. | | client_config | (str) The path of the client configuration file corresponding to the Paddle Serving Service. |
| concurrency | (int) The number of concurrent OPs. | | concurrency | (int) The number of concurrent OPs. |
| timeout | (int) The timeout time of the process operation, in seconds. If the value is less than zero, no timeout is considered. | | timeout | (int) The timeout time of the process operation, in ms. If the value is less than zero, no timeout is considered. |
| retry | (int) Timeout number of retries. When the value is 1, no retries are made. | | retry | (int) Timeout number of retries. When the value is 1, no retries are made. |
| batch_size | (int) The expected batch_size of Auto-Batching, since building batches may time out, the actual batch_size may be less than the set value. |
| auto_batching_timeout | (float) Timeout for building batches of Auto-Batching (the unit is ms). |
#### 2. General OP Secondary Development Interface #### 2. General OP Secondary Development Interface
| Interface or Variable | Explain | | Interface or Variable | Explain |
| :--------------------------------------------: | :----------------------------------------------------------: | | :----------------------------------------------: | :----------------------------------------------------------: |
| def preprocess(self, input_dicts) | Process the data obtained from the channel, and the processed data will be used as the input of the **process** function. | | def preprocess(self, input_dicts) | Process the data obtained from the channel, and the processed data will be used as the input of the **process** function. (This function handles a **sample**) |
| def process(self, feed_dict) | The RPC prediction process is based on the Paddle Serving Client, and the processed data will be used as the input of the **postprocess** function. | | def process(self, feed_dict_list, typical_logid) | The RPC prediction process is based on the Paddle Serving Client, and the processed data will be used as the input of the **postprocess** function. (This function handles a **batch**) |
| def postprocess(self, input_dicts, fetch_dict) | After processing the prediction results, the processed data will be put into the subsequent Channel to be obtained by the subsequent OP. | | def postprocess(self, input_dicts, fetch_dict) | After processing the prediction results, the processed data will be put into the subsequent Channel to be obtained by the subsequent OP. (This function handles a **sample**) |
| def init_op(self) | Used to load resources (such as word dictionary). | | def init_op(self) | Used to load resources (such as word dictionary). |
| self.concurrency_idx | Concurrency index of current thread / process (different kinds of OP are calculated separately). | | self.concurrency_idx | Concurrency index of current process(not thread) (different kinds of OP are calculated separately). |
In a running cycle, OP will execute three operations: preprocess, process, and postprocess (when the `server_endpoints` parameter is not set, the process operation is not executed). Users can rewrite these three functions. The default implementation is as follows: In a running cycle, OP will execute three operations: preprocess, process, and postprocess (when the `server_endpoints` parameter is not set, the process operation is not executed). Users can rewrite these three functions. The default implementation is as follows:
...@@ -117,24 +124,28 @@ def preprocess(self, input_dicts): ...@@ -117,24 +124,28 @@ def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items() (_, input_dict), = input_dicts.items()
return input_dict return input_dict
def process(self, feed_dict): def process(self, feed_dict_list, typical_logid):
err, err_info = ChannelData.check_npdata(feed_dict) err, err_info = ChannelData.check_batch_npdata(feed_dict_list)
if err != 0: if err != 0:
raise NotImplementedError( raise NotImplementedError(
"{} Please override preprocess func.".format(err_info)) "{} Please override preprocess func.".format(err_info))
call_result = self.client.predict( call_result = self.client.predict(
feed=feed_dict, fetch=self._fetch_names) feed=feed_dict_list, fetch=self._fetch_names, log_id=typical_logid)
if isinstance(self.client, MultiLangClient):
if call_result is None or call_result["serving_status_code"] != 0:
return None
call_result.pop("serving_status_code")
return call_result return call_result
def postprocess(self, input_dicts, fetch_dict): def postprocess(self, input_dicts, fetch_dict):
return fetch_dict return fetch_dict
``` ```
The parameter of **preprocess** is the data `input_dicts` in the previous Channel. This variable is a dictionary with the name of the previous OP as key and the output of the corresponding OP as value. The parameter of **preprocess** is the data `input_dicts` in the previous Channel. This variable (as a **sample**) is a dictionary with the name of the previous OP as key and the output of the corresponding OP as value.
The parameter of **process** is the input variable `fetch_dict` (the return value of the preprocess function) of the Paddle Serving Client prediction interface. This variable is a dictionary with feed_name as the key and the data in the ndarray format as the value. The parameter of **process** is the input variable `fetch_dict_list` (a list of the return value of the preprocess function) of the Paddle Serving Client prediction interface. This variable (as a **batch**) is a list of dictionaries with feed_name as the key and the data in the ndarray format as the value. `typical_logid` is used as the logid that penetrates to PaddleServingService.
The parameters of **postprocess** are `input_dicts` and `fetch_dict`. `input_dicts` is consistent with the parameter of preprocess, and `fetch_dict` is the return value of the process function (if process is not executed, this value is the return value of preprocess). The parameters of **postprocess** are `input_dicts` and `fetch_dict`. `input_dicts` is consistent with the parameter of preprocess, and `fetch_dict` (as a **sample**) is a sample of the return batch of the process function (if process is not executed, this value is the return value of preprocess).
Users can also rewrite the **init_op** function to load some custom resources (such as word dictionary). The default implementation is as follows: Users can also rewrite the **init_op** function to load some custom resources (such as word dictionary). The default implementation is as follows:
...@@ -143,7 +154,7 @@ def init_op(self): ...@@ -143,7 +154,7 @@ def init_op(self):
pass pass
``` ```
It should be noted that in the threaded version of OP, each OP will only call this function once, so the loaded resources must be thread safe. It should be **noted** that in the threaded version of OP, each OP will only call this function once, so the loaded resources must be thread safe.
#### 3. RequestOp Definition #### 3. RequestOp Definition
...@@ -240,14 +251,17 @@ server.run_server() ...@@ -240,14 +251,17 @@ server.run_server()
Where `response_op` is the responseop mentioned above, PipelineServer will initialize Channels according to the topology relationship of each OP and build the calculation graph. `config_yml_path` is the configuration file of PipelineServer. The example file is as follows: Where `response_op` is the responseop mentioned above, PipelineServer will initialize Channels according to the topology relationship of each OP and build the calculation graph. `config_yml_path` is the configuration file of PipelineServer. The example file is as follows:
```yaml ```yaml
port: 18080 # gRPC port rpc_port: 18080 # gRPC port
worker_num: 1 # gRPC thread pool size (the number of processes in the process version servicer). The default is 1 worker_num: 1 # gRPC thread pool size (the number of processes in the process version servicer). The default is 1
build_dag_each_worker: false # Whether to use process server or not. The default is false build_dag_each_worker: false # Whether to use process server or not. The default is false
http_port: 0 # HTTP service port. Do not start HTTP service when the value is less or equals 0. The default value is 0.
dag: dag:
is_thread_op: true # Whether to use the thread version of OP. The default is true is_thread_op: true # Whether to use the thread version of OP. The default is true
client_type: brpc # Use brpc or grpc client. The default is brpc client_type: brpc # Use brpc or grpc client. The default is brpc
retry: 1 # The number of times DAG executor retries after failure. The default value is 1, that is, no retrying retry: 1 # The number of times DAG executor retries after failure. The default value is 1, that is, no retrying
use_profile: false # Whether to print the log on the server side. The default is false use_profile: false # Whether to print the log on the server side. The default is false
tracer:
interval_s: 600 # Monitoring time interval of Tracer (in seconds). Do not start monitoring when the value is less than 1. The default value is -1
``` ```
...@@ -272,6 +286,8 @@ python -m paddle_serving_server.serve --model imdb_cnn_model --port 9292 &> cnn. ...@@ -272,6 +286,8 @@ python -m paddle_serving_server.serve --model imdb_cnn_model --port 9292 &> cnn.
python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 &> bow.log & python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 &> bow.log &
``` ```
PipelineServing also supports local automatic startup of PaddleServingService. Please refer to the example `python/examples/pipeline/ocr`.
### Start PipelineServer ### Start PipelineServer
Run the following code Run the following code
...@@ -282,14 +298,8 @@ from paddle_serving_server.pipeline import PipelineServer ...@@ -282,14 +298,8 @@ from paddle_serving_server.pipeline import PipelineServer
from paddle_serving_server.pipeline.proto import pipeline_service_pb2 from paddle_serving_server.pipeline.proto import pipeline_service_pb2
from paddle_serving_server.pipeline.channel import ChannelDataEcode from paddle_serving_server.pipeline.channel import ChannelDataEcode
import numpy as np import numpy as np
import logging
from paddle_serving_app.reader import IMDBDataset from paddle_serving_app.reader import IMDBDataset
logging.basicConfig(level=logging.DEBUG)
_LOGGER = logging.getLogger()
class ImdbRequestOp(RequestOp): class ImdbRequestOp(RequestOp):
def init_op(self): def init_op(self):
self.imdb_dataset = IMDBDataset() self.imdb_dataset = IMDBDataset()
...@@ -377,7 +387,7 @@ for f in futures: ...@@ -377,7 +387,7 @@ for f in futures:
## How to optimize through the timeline tool ## How to optimize with the timeline tool
In order to better optimize the performance, PipelineServing provides a timeline tool to monitor the time of each stage of the whole service. In order to better optimize the performance, PipelineServing provides a timeline tool to monitor the time of each stage of the whole service.
...@@ -390,15 +400,23 @@ dag: ...@@ -390,15 +400,23 @@ dag:
use_profile: true use_profile: true
``` ```
After the function is enabled, the server will print the corresponding log information to the standard output in the process of prediction. In order to show the time consumption of each stage more intuitively, scripts are provided for further analysis and processing of log files. After the function is enabled, the server will print the corresponding log information to the standard output in the process of prediction. In order to show the time consumption of each stage more intuitively, Analyst module is provided for further analysis and processing of log files.
The output of the server is first saved to a file. Taking profile as an example, the script converts the time monitoring information in the log into JSON format and saves it to the trace file. The trace file can be visualized through the tracing function of Chrome browser. The output of the server is first saved to a file. Taking `profile.txt` as an example, the script converts the time monitoring information in the log into JSON format and saves it to the `trace` file. The `trace` file can be visualized through the tracing function of Chrome browser.
```shell ```shell
python timeline_trace.py profile trace from paddle_serving_server.pipeline import Analyst
import json
import sys
if __name__ == "__main__":
log_filename = "profile.txt"
trace_filename = "trace"
analyst = Analyst(log_filename)
analyst.save_trace(trace_filename)
``` ```
Specific operation: open Chrome browser, input in the address bar `chrome://tracing/` , jump to the tracing page, click the load button, open the saved trace file, and then visualize the time information of each stage of the prediction service. Specific operation: open Chrome browser, input in the address bar `chrome://tracing/` , jump to the tracing page, click the load button, open the saved `trace` file, and then visualize the time information of each stage of the prediction service.
### Output profile information on client side ### Output profile information on client side
......
...@@ -6,6 +6,7 @@ Paddle Serving 通常用于单模型的一键部署,但端到端的深度学 ...@@ -6,6 +6,7 @@ Paddle Serving 通常用于单模型的一键部署,但端到端的深度学
Paddle Serving 提供了用户友好的多模型组合服务编程框架,Pipeline Serving,旨在降低编程门槛,提高资源使用率(尤其是GPU设备),提升整体的预估效率。 Paddle Serving 提供了用户友好的多模型组合服务编程框架,Pipeline Serving,旨在降低编程门槛,提高资源使用率(尤其是GPU设备),提升整体的预估效率。
## 整体架构设计 ## 整体架构设计
Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示。 Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示。
...@@ -30,9 +31,10 @@ Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示 ...@@ -30,9 +31,10 @@ Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示
### OP的设计 ### OP的设计
- 单个OP默认的功能是根据输入的 Channel 数据,访问一个 Paddle Serving 的单模型服务,并将结果存在输出的 Channel - 单个 OP 默认的功能是根据输入的 Channel 数据,访问一个 Paddle Serving 的单模型服务,并将结果存在输出的 Channel
- 单个 OP 可以支持用户自定义,包括 preprocess,process,postprocess 三个函数都可以由用户继承和实现 - 单个 OP 可以支持用户自定义,包括 preprocess,process,postprocess 三个函数都可以由用户继承和实现
- 单个 OP 可以控制并发数,从而增加处理并发数 - 单个 OP 可以控制并发数,从而增加处理并发数
- 单个 OP 可以获取多个不同 RPC 请求的数据,以实现 Auto-Batching
- OP 可以由线程或进程启动 - OP 可以由线程或进程启动
### Channel的设计 ### Channel的设计
...@@ -59,11 +61,9 @@ Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示 ...@@ -59,11 +61,9 @@ Server端基于 gRPC 和图执行引擎构建,两者的关系如下图所示
- Channel 设计中的 input buffer 和 output buffer 是否会无限增加 - Channel 设计中的 input buffer 和 output buffer 是否会无限增加
- 不会。整个图执行引擎的输入会放到一个 Channel 的 internal queue 里面,直接作为整个服务的流量控制缓冲队列 - 不会。整个图执行引擎的输入会放到一个 Channel 的 internal queue 里面,直接作为整个服务的流量控制缓冲队列
- 对于 input buffer,根据计算量的情况调整 OP1 和 OP2 的并发数,使得 input buffer 来自各个输入 OP 的数量相对平衡 - 对于 input buffer,根据计算量的情况调整 OP1 和 OP2 的并发数,使得 input buffer 来自各个输入 OP 的数量相对平衡(input buffer 的长度取决于 internal queue 中每个 item 完全 ready 的速度)
- 对于 output buffer,可以采用和 input buffer 类似的处理方法,即调整 OP3 和 OP4 的并发数,使得 output buffer 的缓冲长度得到控制 - 对于 output buffer,可以采用和 input buffer 类似的处理方法,即调整 OP3 和 OP4 的并发数,使得 output buffer 的缓冲长度得到控制(output buffer 的长度取决于下游 OP 从 output buffer 获取数据的速度)
- 注:input buffer 的长度取决于 internal queue 中每个 item 完全 ready 的速度,output buffer 的长度取决于下游 OP 从 output buffer 获取数据的速度 - 同时 Channel 中数据量不会超过 gRPC 的 `worker_num`,即线程池大小
## 详细设计
### 用户接口设计 ### 用户接口设计
...@@ -79,31 +79,36 @@ def __init__(name=None, ...@@ -79,31 +79,36 @@ def __init__(name=None,
client_config=None, client_config=None,
concurrency=1, concurrency=1,
timeout=-1, timeout=-1,
retry=1) retry=1,
batch_size=1,
auto_batching_timeout=None)
``` ```
各参数含义如下 各参数含义如下
| 参数名 | 含义 | | 参数名 | 含义 |
| :--------------: | :----------------------------------------------------------: | | :-------------------: | :----------------------------------------------------------: |
| name | (str)用于标识 OP 类型的字符串,该字段必须全局唯一。 | | name | (str)用于标识 OP 类型的字符串,该字段必须全局唯一。 |
| input_ops | (list)当前 OP 的所有前继 OP 的列表。 | | input_ops | (list)当前 OP 的所有前继 OP 的列表。 |
| server_endpoints | (list)远程 Paddle Serving Service 的 endpoints 列表。如果不设置该参数,则不访问远程 Paddle Serving Service,即 不会执行 process 操作。 | | server_endpoints | (list)远程 Paddle Serving Service 的 endpoints 列表。如果不设置该参数,则不访问远程 Paddle Serving Service,即 不会执行 process 操作。 |
| fetch_list | (list)远程 Paddle Serving Service 的 fetch 列表。 | | fetch_list | (list)远程 Paddle Serving Service 的 fetch 列表。 |
| client_config | (str)Paddle Serving Service 对应的 Client 端配置文件路径。 | | client_config | (str)Paddle Serving Service 对应的 Client 端配置文件路径。 |
| concurrency | (int)OP 的并发数。 | | concurrency | (int)OP 的并发数。 |
| timeout | (int)process 操作的超时时间,单位为秒。若该值小于零,则视作不超时。 | | timeout | (int)process 操作的超时时间,单位为毫秒。若该值小于零,则视作不超时。 |
| retry | (int)超时重试次数。当该值为 1 时,不进行重试。 | | retry | (int)超时重试次数。当该值为 1 时,不进行重试。 |
| batch_size | (int)进行 Auto-Batching 的期望 batch_size 大小,由于构建 batch 可能超时,实际 batch_size 可能小于设定值。 |
| auto_batching_timeout | (float)进行 Auto-Batching 构建 batch 的超时时间,单位为毫秒。 |
#### 2. 普通 OP二次开发接口 #### 2. 普通 OP二次开发接口
| 变量或接口 | 说明 | | 变量或接口 | 说明 |
| :--------------------------------------------: | :----------------------------------------------------------: | | :----------------------------------------------: | :----------------------------------------------------------: |
| def preprocess(self, input_dicts) | 对从 Channel 中获取的数据进行处理,处理完的数据将作为 **process** 函数的输入。 | | def preprocess(self, input_dicts) | 对从 Channel 中获取的数据进行处理,处理完的数据将作为 **process** 函数的输入。(该函数对一个 **sample** 进行处理) |
| def process(self, feed_dict) | 基于 Paddle Serving Client 进行 RPC 预测,处理完的数据将作为 **postprocess** 函数的输入。 | | def process(self, feed_dict_list, typical_logid) | 基于 Paddle Serving Client 进行 RPC 预测,处理完的数据将作为 **postprocess** 函数的输入。(该函数对一个 **batch** 进行处理) |
| def postprocess(self, input_dicts, fetch_dict) | 处理预测结果,处理完的数据将被放入后继 Channel 中,以被后继 OP 获取。 | | def postprocess(self, input_dicts, fetch_dict) | 处理预测结果,处理完的数据将被放入后继 Channel 中,以被后继 OP 获取。(该函数对一个 **sample** 进行处理) |
| def init_op(self) | 用于加载资源(如字典等)。 | | def init_op(self) | 用于加载资源(如字典等)。 |
| self.concurrency_idx | 当前线程(进程)的并发数索引(不同种类的 OP 单独计算)。 | | self.concurrency_idx | 当前进程(非线程)的并发数索引(不同种类的 OP 单独计算)。 |
OP 在一个运行周期中会依次执行 preprocess,process,postprocess 三个操作(当不设置 `server_endpoints` 参数时,不执行 process 操作),用户可以对这三个函数进行重写,默认实现如下: OP 在一个运行周期中会依次执行 preprocess,process,postprocess 三个操作(当不设置 `server_endpoints` 参数时,不执行 process 操作),用户可以对这三个函数进行重写,默认实现如下:
...@@ -117,25 +122,28 @@ def preprocess(self, input_dicts): ...@@ -117,25 +122,28 @@ def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items() (_, input_dict), = input_dicts.items()
return input_dict return input_dict
def process(self, feed_dict): def process(self, feed_dict_list, typical_logid):
err, err_info = ChannelData.check_npdata(feed_dict) err, err_info = ChannelData.check_batch_npdata(feed_dict_list)
if err != 0: if err != 0:
raise NotImplementedError( raise NotImplementedError(
"{} Please override preprocess func.".format(err_info)) "{} Please override preprocess func.".format(err_info))
call_result = self.client.predict( call_result = self.client.predict(
feed=feed_dict, fetch=self._fetch_names) feed=feed_dict_list, fetch=self._fetch_names, log_id=typical_logid)
if isinstance(self.client, MultiLangClient):
if call_result is None or call_result["serving_status_code"] != 0:
return None
call_result.pop("serving_status_code")
return call_result return call_result
def postprocess(self, input_dicts, fetch_dict): def postprocess(self, input_dicts, fetch_dict):
return fetch_dict return fetch_dict
``` ```
**preprocess** 的参数是前继 Channel 中的数据 `input_dicts`,该变量(作为一个 **sample**)是一个以前继 OP 的 name 为 Key,对应 OP 的输出为 Value 的字典。
**preprocess** 的参数是前继 Channel 中的数据 `input_dicts`,该变量是一个以前继 OP 的 name 为 Key,对应 OP 的输出为 Value 的字典 **process** 的参数是 Paddle Serving Client 预测接口的输入变量 `fetch_dict_list`(preprocess 函数的返回值的列表),该变量(作为一个 **batch**)是一个列表,列表中的元素为以 feed_name 为 Key,对应 ndarray 格式的数据为 Value 的字典。`typical_logid` 作为向 PaddleServingService 穿透的 logid
**process** 的参数是 Paddle Serving Client 预测接口的输入变量 `fetch_dict`(preprocess 函数的返回值),该变量是一个以 feed_name 为 Key,对应 ndarray 格式的数据为 Value 的字典。 **postprocess** 的参数是 `input_dicts``fetch_dict``input_dicts` 与 preprocess 的参数一致,`fetch_dict` (作为一个 **sample**)是 process 函数的返回 batch 中的一个 sample(如果没有执行 process ,则该值为 preprocess 的返回值)。
**postprocess** 的参数是 `input_dicts``fetch_dict``input_dicts` 与 preprocess 的参数一致,`fetch_dict` 是 process 函数的返回值(如果没有执行 process ,则该值为 preprocess 的返回值)。
用户还可以对 **init_op** 函数进行重写,已加载自定义的一些资源(比如字典等),默认实现如下: 用户还可以对 **init_op** 函数进行重写,已加载自定义的一些资源(比如字典等),默认实现如下:
...@@ -144,7 +152,7 @@ def init_op(self): ...@@ -144,7 +152,7 @@ def init_op(self):
pass pass
``` ```
需要注意的是,在线程版 OP 中,每个 OP 只会调用一次该函数,故加载的资源必须要求是线程安全的。 需要**注意**的是,在线程版 OP 中,每个 OP 只会调用一次该函数,故加载的资源必须要求是线程安全的。
#### 3. RequestOp 定义 #### 3. RequestOp 定义
...@@ -241,14 +249,17 @@ server.run_server() ...@@ -241,14 +249,17 @@ server.run_server()
其中,`response_op` 为上面提到的 ResponseOp,PipelineServer 将会根据各个 OP 的拓扑关系初始化 Channel 并构建计算图。`config_yml_path` 为 PipelineServer 的配置文件,示例文件如下: 其中,`response_op` 为上面提到的 ResponseOp,PipelineServer 将会根据各个 OP 的拓扑关系初始化 Channel 并构建计算图。`config_yml_path` 为 PipelineServer 的配置文件,示例文件如下:
```yaml ```yaml
port: 18080 # gRPC端口号 rpc_port: 18080 # gRPC端口号
worker_num: 1 # gRPC线程池大小(进程版 Servicer 中为进程数),默认为 1 worker_num: 1 # gRPC线程池大小(进程版 Servicer 中为进程数),默认为 1
build_dag_each_worker: false # 是否使用进程版 Servicer,默认为 false build_dag_each_worker: false # 是否使用进程版 Servicer,默认为 false
http_port: 0 # HTTP 服务的端口号,若该值小于或等于 0 则不开启 HTTP 服务,默认为 0
dag: dag:
is_thread_op: true # 是否使用线程版Op,默认为 true is_thread_op: true # 是否使用线程版Op,默认为 true
client_type: brpc # 使用 brpc 或 grpc client,默认为 brpc client_type: brpc # 使用 brpc 或 grpc client,默认为 brpc
retry: 1 # DAG Executor 在失败后重试次数,默认为 1,即不重试 retry: 1 # DAG Executor 在失败后重试次数,默认为 1,即不重试
use_profile: false # 是否在 Server 端打印日志,默认为 false use_profile: false # 是否在 Server 端打印日志,默认为 false
tracer:
interval_s: 600 # Tracer 监控的时间间隔,单位为秒。当该值小于 1 时不启动监控,默认为 -1
``` ```
...@@ -273,6 +284,8 @@ python -m paddle_serving_server.serve --model imdb_cnn_model --port 9292 &> cnn. ...@@ -273,6 +284,8 @@ python -m paddle_serving_server.serve --model imdb_cnn_model --port 9292 &> cnn.
python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 &> bow.log & python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 &> bow.log &
``` ```
PipelineServing 也支持本地自动启动 PaddleServingService,请参考 `python/examples/pipeline/ocr` 下的例子。
### 启动 PipelineServer ### 启动 PipelineServer
运行下面代码 运行下面代码
...@@ -283,14 +296,8 @@ from paddle_serving_server.pipeline import PipelineServer ...@@ -283,14 +296,8 @@ from paddle_serving_server.pipeline import PipelineServer
from paddle_serving_server.pipeline.proto import pipeline_service_pb2 from paddle_serving_server.pipeline.proto import pipeline_service_pb2
from paddle_serving_server.pipeline.channel import ChannelDataEcode from paddle_serving_server.pipeline.channel import ChannelDataEcode
import numpy as np import numpy as np
import logging
from paddle_serving_app.reader import IMDBDataset from paddle_serving_app.reader import IMDBDataset
logging.basicConfig(level=logging.DEBUG)
_LOGGER = logging.getLogger()
class ImdbRequestOp(RequestOp): class ImdbRequestOp(RequestOp):
def init_op(self): def init_op(self):
self.imdb_dataset = IMDBDataset() self.imdb_dataset = IMDBDataset()
...@@ -311,7 +318,6 @@ class CombineOp(Op): ...@@ -311,7 +318,6 @@ class CombineOp(Op):
def preprocess(self, input_data): def preprocess(self, input_data):
combined_prediction = 0 combined_prediction = 0
for op_name, data in input_data.items(): for op_name, data in input_data.items():
_LOGGER.info("{}: {}".format(op_name, data["prediction"]))
combined_prediction += data["prediction"] combined_prediction += data["prediction"]
data = {"prediction": combined_prediction / 2} data = {"prediction": combined_prediction / 2}
return data return data
...@@ -391,15 +397,23 @@ dag: ...@@ -391,15 +397,23 @@ dag:
use_profile: true use_profile: true
``` ```
开启该功能后,Server 端在预测的过程中会将对应的日志信息打印到标准输出,为了更直观地展现各阶段的耗时,提供脚本对日志文件做进一步的分析处理。 开启该功能后,Server 端在预测的过程中会将对应的日志信息打印到标准输出,为了更直观地展现各阶段的耗时,提供 Analyst 模块对日志文件做进一步的分析处理。
使用时先将 Server 的输出保存到文件,以 profile 为例,脚本将日志中的时间打点信息转换成 json 格式保存到trace 文件,trace 文件可以通过 chrome 浏览器的 tracing 功能进行可视化。 使用时先将 Server 的输出保存到文件,以 `profile.txt` 为例,脚本将日志中的时间打点信息转换成 json 格式保存到 `trace` 文件,`trace` 文件可以通过 chrome 浏览器的 tracing 功能进行可视化。
```shell ```python
python timeline_trace.py profile trace from paddle_serving_server.pipeline import Analyst
import json
import sys
if __name__ == "__main__":
log_filename = "profile.txt"
trace_filename = "trace"
analyst = Analyst(log_filename)
analyst.save_trace(trace_filename)
``` ```
具体操作:打开 chrome 浏览器,在地址栏输入 chrome://tracing/ ,跳转至 tracing 页面,点击 load 按钮,打开保存的 trace 文件,即可将预测服务的各阶段时间信息可视化。 具体操作:打开 chrome 浏览器,在地址栏输入 `chrome://tracing/` ,跳转至 tracing 页面,点击 load 按钮,打开保存的 `trace` 文件,即可将预测服务的各阶段时间信息可视化。
### 在 Client 端输出 Profile 信息 ### 在 Client 端输出 Profile 信息
......
...@@ -38,12 +38,15 @@ If you have saved model files using Paddle's `save_inference_model` API, you can ...@@ -38,12 +38,15 @@ If you have saved model files using Paddle's `save_inference_model` API, you can
import paddle_serving_client.io as serving_io import paddle_serving_client.io as serving_io
serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None ) serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None )
``` ```
dirname (str) - Path of saved model files. Program file and parameter files are saved in this directory. Or you can use a build-in python module called `paddle_serving_client.convert` to convert it.
```python
serving_server (str, optional) - The path of model files and configuration files for server. Default: "serving_server". python -m paddle_serving_client.convert --dirname ./your_inference_model_dir
```
serving_client (str, optional) - The path of configuration files for client. Default: "serving_client". Arguments are the same as `inference_model_to_serving` API.
| Argument | Type | Default | Description |
model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename `__model__` will be used. Default: None. |--------------|------|-----------|--------------------------------|
| `dirname` | str | - | Path of saved model files. Program file and parameter files are saved in this directory. |
paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None. | `serving_server` | str | `"serving_server"` | The path of model files and configuration files for server. |
| `serving_client` | str | `"serving_client"` | The path of configuration files for client. |
| `model_filename` | str | None | The name of file to load the inference program. If it is None, the default filename `__model__` will be used. |
| `paras_filename` | str | None | The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. |
...@@ -39,12 +39,15 @@ for line in sys.stdin: ...@@ -39,12 +39,15 @@ for line in sys.stdin:
import paddle_serving_client.io as serving_io import paddle_serving_client.io as serving_io
serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None) serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None)
``` ```
dirname (str) – 需要转换的模型文件存储路径,Program结构文件和参数文件均保存在此目录。 或者你可以使用Paddle Serving提供的名为`paddle_serving_client.convert`的内置模块进行转换。
```python
serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为serving_server。 python -m paddle_serving_client.convert --dirname ./your_inference_model_dir
```
serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为serving_client。 模块参数与`inference_model_to_serving`接口参数相同。
| 参数 | 类型 | 默认值 | 描述 |
model_filename (str,可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None,则使用 `__model__` 作为默认的文件名。默认值为None。 |--------------|------|-----------|--------------------------------|
| `dirname` | str | - | 需要转换的模型文件存储路径,Program结构文件和参数文件均保存在此目录。|
params_filename (str,可选) – 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保存在一个单独的二进制文件中,它才需要被指定。如果模型参数是存储在各自分离的文件中,设置它的值为None。默认值为None。 | `serving_server` | str | `"serving_server"` | 转换后的模型文件和配置文件的存储路径。默认值为serving_server |
| `serving_client` | str | `"serving_client"` | 转换后的客户端配置文件存储路径。默认值为serving_client |
| `model_filename` | str | None | 存储需要转换的模型Inference Program结构的文件名称。如果设置为None,则使用 `__model__` 作为默认的文件名 |
| `paras_filename` | str | None | 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保存在一个单独的二进制文件中,它才需要被指定。如果模型参数是存储在各自分离的文件中,设置它的值为None |
# How to develop a new Web service?
([简体中文](NEW_WEB_SERVICE_CN.md)|English)
This document will take the image classification service based on the Imagenet data set as an example to introduce how to develop a new web service. The complete code can be visited at [here](../python/examples/imagenet/resnet50_web_service.py).
## WebService base class
Paddle Serving implements the [WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23) base class. You need to override its `preprocess` and `postprocess` method. The default implementation is as follows:
```python
class WebService(object):
def preprocess(self, feed={}, fetch=[]):
return feed, fetch
def postprocess(self, feed={}, fetch=[], fetch_map=None):
return fetch_map
```
### preprocess
The preprocess method has two input parameters, `feed` and `fetch`. For an HTTP request `request`:
- The value of `feed` is the feed part `request.json["feed"]` in the request data
- The value of `fetch` is the fetch part `request.json["fetch"]` in the request data
The return values are the feed and fetch values used in the prediction.
### postprocess
The postprocess method has three input parameters, `feed`, `fetch` and `fetch_map`:
- The value of `feed` is the feed part `request.json["feed"]` in the request data
- The value of `fetch` is the fetch part `request.json["fetch"]` in the request data
- The value of `fetch_map` is the model output value.
The return value will be processed as `{"reslut": fetch_map}` as the return of the HTTP request.
## Develop ImageService class
```python
class ImageService(WebService):
def preprocess(self, feed={}, fetch=[]):
reader = ImageReader()
feed_batch = []
for ins in feed:
if "image" not in ins:
raise ("feed data error!")
sample = base64.b64decode(ins["image"])
img = reader.process_image(sample)
feed_batch.append({"image": img})
return feed_batch, fetch
```
For the above `ImageService`, only the `preprocess` method is rewritten to process the image data in Base64 format into the data format required by prediction.
# 如何开发一个新的Web Service?
(简体中文|[English](NEW_WEB_SERVICE.md))
本文档将以Imagenet图像分类服务为例,来介绍如何开发一个新的Web Service。您可以在[这里](../python/examples/imagenet/resnet50_web_service.py)查阅完整的代码。
## WebService基类
Paddle Serving实现了[WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23)基类,您需要重写它的`preprocess`方法和`postprocess`方法,默认实现如下:
```python
class WebService(object):
def preprocess(self, feed={}, fetch=[]):
return feed, fetch
def postprocess(self, feed={}, fetch=[], fetch_map=None):
return fetch_map
```
### preprocess方法
preprocess方法有两个输入参数,`feed``fetch`。对于一个HTTP请求`request`
- `feed`的值为请求数据中的feed部分`request.json["feed"]`
- `fetch`的值为请求数据中的fetch部分`request.json["fetch"]`
返回值分别是预测过程中用到的feed和fetch值。
### postprocess方法
postprocess方法有三个输入参数,`feed``fetch``fetch_map`
- `feed`的值为请求数据中的feed部分`request.json["feed"]`
- `fetch`的值为请求数据中的fetch部分`request.json["fetch"]`
- `fetch_map`的值为fetch到的模型输出值
返回值将会被处理成`{"reslut": fetch_map}`作为HTTP请求的返回。
## 开发ImageService类
```python
class ImageService(WebService):
def preprocess(self, feed={}, fetch=[]):
reader = ImageReader()
feed_batch = []
for ins in feed:
if "image" not in ins:
raise ("feed data error!")
sample = base64.b64decode(ins["image"])
img = reader.process_image(sample)
feed_batch.append({"image": img})
return feed_batch, fetch
```
对于上述的`ImageService`,只重写了前处理方法,将base64格式的图片数据处理成模型预测需要的数据格式。
doc/pipeline_serving-image1.png

96.0 KB | W: | H:

doc/pipeline_serving-image1.png

107.7 KB | W: | H:

doc/pipeline_serving-image1.png
doc/pipeline_serving-image1.png
doc/pipeline_serving-image1.png
doc/pipeline_serving-image1.png
  • 2-up
  • Swipe
  • Onion skin
...@@ -192,14 +192,16 @@ public class Client { ...@@ -192,14 +192,16 @@ public class Client {
private InferenceRequest _packInferenceRequest( private InferenceRequest _packInferenceRequest(
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch) throws IllegalArgumentException { Iterable<String> fetch,
long log_id) throws IllegalArgumentException {
List<String> feed_var_names = new ArrayList<String>(); List<String> feed_var_names = new ArrayList<String>();
feed_var_names.addAll(feed_batch.get(0).keySet()); feed_var_names.addAll(feed_batch.get(0).keySet());
InferenceRequest.Builder req_builder = InferenceRequest.newBuilder() InferenceRequest.Builder req_builder = InferenceRequest.newBuilder()
.addAllFeedVarNames(feed_var_names) .addAllFeedVarNames(feed_var_names)
.addAllFetchVarNames(fetch) .addAllFetchVarNames(fetch)
.setIsPython(false); .setIsPython(false)
.setLogId(log_id);
for (HashMap<String, INDArray> feed_data: feed_batch) { for (HashMap<String, INDArray> feed_data: feed_batch) {
FeedInst.Builder inst_builder = FeedInst.newBuilder(); FeedInst.Builder inst_builder = FeedInst.newBuilder();
for (String name: feed_var_names) { for (String name: feed_var_names) {
...@@ -332,76 +334,151 @@ public class Client { ...@@ -332,76 +334,151 @@ public class Client {
public Map<String, INDArray> predict( public Map<String, INDArray> predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch) { Iterable<String> fetch) {
return predict(feed, fetch, false); return predict(feed, fetch, false, 0);
}
public Map<String, INDArray> predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
long log_id) {
return predict(feed, fetch, false, log_id);
} }
public Map<String, HashMap<String, INDArray>> ensemble_predict( public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch) { Iterable<String> fetch) {
return ensemble_predict(feed, fetch, false); return ensemble_predict(feed, fetch, false, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
long log_id) {
return ensemble_predict(feed, fetch, false, log_id);
} }
public PredictFuture asyn_predict( public PredictFuture asyn_predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch) { Iterable<String> fetch) {
return asyn_predict(feed, fetch, false); return asyn_predict(feed, fetch, false, 0);
}
public PredictFuture asyn_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
long log_id) {
return asyn_predict(feed, fetch, false, log_id);
} }
public Map<String, INDArray> predict( public Map<String, INDArray> predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
return predict(feed, fetch, need_variant_tag, 0);
}
public Map<String, INDArray> predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
List<HashMap<String, INDArray>> feed_batch List<HashMap<String, INDArray>> feed_batch
= new ArrayList<HashMap<String, INDArray>>(); = new ArrayList<HashMap<String, INDArray>>();
feed_batch.add(feed); feed_batch.add(feed);
return predict(feed_batch, fetch, need_variant_tag); return predict(feed_batch, fetch, need_variant_tag, log_id);
} }
public Map<String, HashMap<String, INDArray>> ensemble_predict( public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
return ensemble_predict(feed, fetch, need_variant_tag, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
List<HashMap<String, INDArray>> feed_batch List<HashMap<String, INDArray>> feed_batch
= new ArrayList<HashMap<String, INDArray>>(); = new ArrayList<HashMap<String, INDArray>>();
feed_batch.add(feed); feed_batch.add(feed);
return ensemble_predict(feed_batch, fetch, need_variant_tag); return ensemble_predict(feed_batch, fetch, need_variant_tag, log_id);
} }
public PredictFuture asyn_predict( public PredictFuture asyn_predict(
HashMap<String, INDArray> feed, HashMap<String, INDArray> feed,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
return asyn_predict(feed, fetch, need_variant_tag, 0);
}
public PredictFuture asyn_predict(
HashMap<String, INDArray> feed,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
List<HashMap<String, INDArray>> feed_batch List<HashMap<String, INDArray>> feed_batch
= new ArrayList<HashMap<String, INDArray>>(); = new ArrayList<HashMap<String, INDArray>>();
feed_batch.add(feed); feed_batch.add(feed);
return asyn_predict(feed_batch, fetch, need_variant_tag); return asyn_predict(feed_batch, fetch, need_variant_tag, log_id);
} }
public Map<String, INDArray> predict( public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch) { Iterable<String> fetch) {
return predict(feed_batch, fetch, false); return predict(feed_batch, fetch, false, 0);
}
public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
long log_id) {
return predict(feed_batch, fetch, false, log_id);
} }
public Map<String, HashMap<String, INDArray>> ensemble_predict( public Map<String, HashMap<String, INDArray>> ensemble_predict(
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch) { Iterable<String> fetch) {
return ensemble_predict(feed_batch, fetch, false); return ensemble_predict(feed_batch, fetch, false, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
long log_id) {
return ensemble_predict(feed_batch, fetch, false, log_id);
} }
public PredictFuture asyn_predict( public PredictFuture asyn_predict(
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch) { Iterable<String> fetch) {
return asyn_predict(feed_batch, fetch, false); return asyn_predict(feed_batch, fetch, false, 0);
}
public PredictFuture asyn_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
long log_id) {
return asyn_predict(feed_batch, fetch, false, log_id);
} }
public Map<String, INDArray> predict( public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
return predict(feed_batch, fetch, need_variant_tag, 0);
}
public Map<String, INDArray> predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
try { try {
profiler_.record("java_prepro_0"); profiler_.record("java_prepro_0");
InferenceRequest req = _packInferenceRequest(feed_batch, fetch); InferenceRequest req = _packInferenceRequest(
feed_batch, fetch, log_id);
profiler_.record("java_prepro_1"); profiler_.record("java_prepro_1");
profiler_.record("java_client_infer_0"); profiler_.record("java_client_infer_0");
...@@ -415,7 +492,7 @@ public class Client { ...@@ -415,7 +492,7 @@ public class Client {
= new ArrayList<Map.Entry<String, HashMap<String, INDArray>>>( = new ArrayList<Map.Entry<String, HashMap<String, INDArray>>>(
ensemble_result.entrySet()); ensemble_result.entrySet());
if (list.size() != 1) { if (list.size() != 1) {
System.out.format("predict failed: please use ensemble_predict impl.\n"); System.out.format("Failed to predict: please use ensemble_predict impl.\n");
return null; return null;
} }
profiler_.record("java_postpro_1"); profiler_.record("java_postpro_1");
...@@ -423,7 +500,7 @@ public class Client { ...@@ -423,7 +500,7 @@ public class Client {
return list.get(0).getValue(); return list.get(0).getValue();
} catch (StatusRuntimeException e) { } catch (StatusRuntimeException e) {
System.out.format("predict failed: %s\n", e.toString()); System.out.format("Failed to predict: %s\n", e.toString());
return null; return null;
} }
} }
...@@ -432,9 +509,18 @@ public class Client { ...@@ -432,9 +509,18 @@ public class Client {
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
return ensemble_predict(feed_batch, fetch, need_variant_tag, 0);
}
public Map<String, HashMap<String, INDArray>> ensemble_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
try { try {
profiler_.record("java_prepro_0"); profiler_.record("java_prepro_0");
InferenceRequest req = _packInferenceRequest(feed_batch, fetch); InferenceRequest req = _packInferenceRequest(
feed_batch, fetch, log_id);
profiler_.record("java_prepro_1"); profiler_.record("java_prepro_1");
profiler_.record("java_client_infer_0"); profiler_.record("java_client_infer_0");
...@@ -449,7 +535,7 @@ public class Client { ...@@ -449,7 +535,7 @@ public class Client {
return ensemble_result; return ensemble_result;
} catch (StatusRuntimeException e) { } catch (StatusRuntimeException e) {
System.out.format("predict failed: %s\n", e.toString()); System.out.format("Failed to predict: %s\n", e.toString());
return null; return null;
} }
} }
...@@ -458,7 +544,16 @@ public class Client { ...@@ -458,7 +544,16 @@ public class Client {
List<HashMap<String, INDArray>> feed_batch, List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch, Iterable<String> fetch,
Boolean need_variant_tag) { Boolean need_variant_tag) {
InferenceRequest req = _packInferenceRequest(feed_batch, fetch); return asyn_predict(feed_batch, fetch, need_variant_tag, 0);
}
public PredictFuture asyn_predict(
List<HashMap<String, INDArray>> feed_batch,
Iterable<String> fetch,
Boolean need_variant_tag,
long log_id) {
InferenceRequest req = _packInferenceRequest(
feed_batch, fetch, log_id);
ListenableFuture<InferenceResponse> future = futureStub_.inference(req); ListenableFuture<InferenceResponse> future = futureStub_.inference(req);
PredictFuture predict_future = new PredictFuture(future, PredictFuture predict_future = new PredictFuture(future,
(InferenceResponse resp) -> { (InferenceResponse resp) -> {
......
...@@ -14,6 +14,8 @@ ...@@ -14,6 +14,8 @@
syntax = "proto2"; syntax = "proto2";
package baidu.paddle_serving.multi_lang;
option java_multiple_files = true; option java_multiple_files = true;
option java_package = "io.paddle.serving.grpc"; option java_package = "io.paddle.serving.grpc";
option java_outer_classname = "ServingProto"; option java_outer_classname = "ServingProto";
...@@ -37,6 +39,7 @@ message InferenceRequest { ...@@ -37,6 +39,7 @@ message InferenceRequest {
repeated string feed_var_names = 2; repeated string feed_var_names = 2;
repeated string fetch_var_names = 3; repeated string fetch_var_names = 3;
required bool is_python = 4 [ default = false ]; required bool is_python = 4 [ default = false ];
required uint64 log_id = 5 [ default = 0 ];
}; };
message InferenceResponse { message InferenceResponse {
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "core/configure/inferencer_configure.pb.h" #include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/framework/infer.h" #include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT #include "paddle_inference_api.h" // NOLINT
//#include "predictor/framework/infer.h"
namespace baidu { namespace baidu {
namespace paddle_serving { namespace paddle_serving {
......
if (CLIENT) if (CLIENT)
file(INSTALL pipeline DESTINATION paddle_serving_client) file(INSTALL pipeline DESTINATION paddle_serving_client)
execute_process(COMMAND ${PYTHON_EXECUTABLE} run_codegen.py
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/paddle_serving_client/pipeline/proto)
file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py) file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py)
set(PY_FILES ${SERVING_CLIENT_PY_FILES}) set(PY_FILES ${SERVING_CLIENT_PY_FILES})
SET(PACKAGE_NAME "serving_client") SET(PACKAGE_NAME "serving_client")
...@@ -11,13 +9,9 @@ endif() ...@@ -11,13 +9,9 @@ endif()
if (SERVER) if (SERVER)
if (NOT WITH_GPU) if (NOT WITH_GPU)
file(INSTALL pipeline DESTINATION paddle_serving_server) file(INSTALL pipeline DESTINATION paddle_serving_server)
execute_process(COMMAND ${PYTHON_EXECUTABLE} run_codegen.py
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/paddle_serving_server/pipeline/proto)
file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
else() else()
file(INSTALL pipeline DESTINATION paddle_serving_server_gpu) file(INSTALL pipeline DESTINATION paddle_serving_server_gpu)
execute_process(COMMAND ${PYTHON_EXECUTABLE} run_codegen.py
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/paddle_serving_server_gpu/pipeline/proto)
file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server_gpu/*.py) file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server_gpu/*.py)
endif() endif()
set(PY_FILES ${SERVING_SERVER_PY_FILES}) set(PY_FILES ${SERVING_SERVER_PY_FILES})
...@@ -25,6 +19,8 @@ if (SERVER) ...@@ -25,6 +19,8 @@ if (SERVER)
set(SETUP_LOG_FILE "setup.py.server.log") set(SETUP_LOG_FILE "setup.py.server.log")
endif() endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/util.py
${CMAKE_CURRENT_BINARY_DIR}/util.py)
if (CLIENT) if (CLIENT)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in
${CMAKE_CURRENT_BINARY_DIR}/setup.py) ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
...@@ -47,6 +43,9 @@ if (SERVER) ...@@ -47,6 +43,9 @@ if (SERVER)
endif() endif()
endif() endif()
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/gen_version.py
${CMAKE_CURRENT_BINARY_DIR}/gen_version.py)
set (SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/*.so) set (SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/*.so)
message("python env: " ${py_env}) message("python env: " ${py_env})
...@@ -54,6 +53,7 @@ if (APP) ...@@ -54,6 +53,7 @@ if (APP)
add_custom_command( add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/ COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "app"
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES}) DEPENDS ${SERVING_APP_CORE} general_model_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
...@@ -65,6 +65,7 @@ add_custom_command( ...@@ -65,6 +65,7 @@ add_custom_command(
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/ COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so COMMAND ${CMAKE_COMMAND} -E copy ${SERVING_CLIENT_CORE} ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/serving_client.so
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} python_tag.py COMMAND env ${py_env} ${PYTHON_EXECUTABLE} python_tag.py
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "client"
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES}) DEPENDS ${SERVING_CLIENT_CORE} sdk_configure_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp) add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
...@@ -75,6 +76,7 @@ if (SERVER) ...@@ -75,6 +76,7 @@ if (SERVER)
add_custom_command( add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/ COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py "server"
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
...@@ -83,7 +85,8 @@ if (SERVER) ...@@ -83,7 +85,8 @@ if (SERVER)
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} paddle_serving_server_gpu/gen_cuda_version.py ${CUDA_VERSION_MAJOR} COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" ${CUDA_VERSION_MAJOR}
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
## Get Model ## Get Model
``` ```
python -m paddle_serving_app.package --get_model blazeface python -m paddle_serving_app.package --get_model blazeface
tar -xzvf blazeface.tar.gz tar -xf blazeface.tar.gz
``` ```
## RPC Service ## RPC Service
......
...@@ -16,6 +16,7 @@ from paddle_serving_client import Client ...@@ -16,6 +16,7 @@ from paddle_serving_client import Client
from paddle_serving_app.reader import * from paddle_serving_app.reader import *
import sys import sys
import numpy as np import numpy as np
from paddle_serving_app.reader import BlazeFacePostprocess
preprocess = Sequential([ preprocess = Sequential([
File2Image(), File2Image(),
......
...@@ -90,6 +90,7 @@ def single_func(idx, resource): ...@@ -90,6 +90,7 @@ def single_func(idx, resource):
image = base64.b64encode( image = base64.b64encode(
open("./image_data/n01440764/" + file_list[i]).read()) open("./image_data/n01440764/" + file_list[i]).read())
else: else:
image_path = "./image_data/n01440764/" + file_list[i]
image = base64.b64encode(open(image_path, "rb").read()).decode( image = base64.b64encode(open(image_path, "rb").read()).decode(
"utf-8") "utf-8")
req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]}) req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
......
...@@ -13,7 +13,7 @@ ...@@ -13,7 +13,7 @@
# limitations under the License. # limitations under the License.
import sys import sys
from paddle_serving_client import Client from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
if len(sys.argv) != 4: if len(sys.argv) != 4:
print("python resnet50_web_service.py model device port") print("python resnet50_web_service.py model device port")
......
...@@ -21,8 +21,13 @@ tar xf test_imgs.tar ...@@ -21,8 +21,13 @@ tar xf test_imgs.tar
### Start Service ### Start Service
``` ```
#choose one of cpu/gpu commands as following
#for cpu user
python -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu
#for gpu user
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0 python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0
python ocr_web_server.py python ocr_web_server.py gpu
``` ```
### Client Prediction ### Client Prediction
...@@ -33,7 +38,11 @@ If you want a faster web service, please try Web Debugger Service ...@@ -33,7 +38,11 @@ If you want a faster web service, please try Web Debugger Service
## Web Debugger Service ## Web Debugger Service
``` ```
python ocr_debugger_server.py #choose one of cpu/gpu commands as following
#for cpu user
python ocr_debugger_server.py cpu
#for gpu user
python ocr_debugger_server.py gpu
``` ```
## Web Debugger Client Prediction ## Web Debugger Client Prediction
...@@ -54,15 +63,17 @@ Dataset: RCTW 500 sample images ...@@ -54,15 +63,17 @@ Dataset: RCTW 500 sample images
| Serving web service | 8.69 | 13.41 | 109.97 | 2.82 | 87.76 | 4.29 | 3.98 | 78.51 | 3.66 | 4.12 | 181.02 | 136.49 | 317.51 | | Serving web service | 8.69 | 13.41 | 109.97 | 2.82 | 87.76 | 4.29 | 3.98 | 78.51 | 3.66 | 4.12 | 181.02 | 136.49 | 317.51 |
| Serving Debugger web service | 8.73 | 16.42 | 115.27 | 2.93 | 20.63 | 3.97 | 4.48 | 13.84 | 3.60 | 6.91 | 49.45 | 147.33 | 196.78 | | Serving Debugger web service | 8.73 | 16.42 | 115.27 | 2.93 | 20.63 | 3.97 | 4.48 | 13.84 | 3.60 | 6.91 | 49.45 | 147.33 | 196.78 |
## Appendix: Det or Rec only ## Appendix: For Users who want to launch Det or Rec only
if you are going to detect images not recognize it or directly recognize the words from images. We also provide Det and Rec server for you. if you are going to detect images not recognize it or directly recognize the words from images. We also provide Det and Rec server for you.
### Det Server ### Det Server
``` ```
python det_web_server.py python det_web_server.py cpu #for cpu user
python det_web_server.py gpu #for gpu user
#or #or
python det_debugger_server.py python det_debugger_server.py cpu #for cpu user
python det_debugger_server.py gpu #for gpu user
``` ```
### Det Client ### Det Client
...@@ -75,9 +86,11 @@ python ocr_web_client.py ...@@ -75,9 +86,11 @@ python ocr_web_client.py
### Rec Server ### Rec Server
``` ```
python rec_web_server.py python rec_web_server.py cpu #for cpu user
python rec_web_server.py gpu #for gpu user
#or #or
python rec_debugger_server.py python rec_debugger_server.py cpu #for cpu user
python rec_debugger_server.py gpu #for gpu user
``` ```
### Rec Client ### Rec Client
......
...@@ -15,19 +15,18 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/ocr/test_imgs.t ...@@ -15,19 +15,18 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/ocr/test_imgs.t
tar xf test_imgs.tar tar xf test_imgs.tar
``` ```
### 客户端预测
```
python ocr_rpc_client.py
```
## Web Service服务 ## Web Service服务
### 启动服务 ### 启动服务
``` ```
#根据CPU/GPU设备选择一种启动方式
#for cpu user
python -m paddle_serving_server.serve --model ocr_det_model --port 9293
python ocr_web_server.py cpu
#for gpu user
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0 python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 9293 --gpu_id 0
python ocr_web_server.py python ocr_web_server.py gpu
``` ```
### 启动客户端 ### 启动客户端
...@@ -38,7 +37,11 @@ python ocr_web_client.py ...@@ -38,7 +37,11 @@ python ocr_web_client.py
如果用户需要更快的执行速度,请尝试Debugger版Web服务 如果用户需要更快的执行速度,请尝试Debugger版Web服务
## 启动Debugger版Web服务 ## 启动Debugger版Web服务
``` ```
python ocr_debugger_server.py #根据CPU/GPU设备选择一种启动方式
#for cpu user
python ocr_debugger_server.py cpu
#for gpu user
python ocr_debugger_server.py gpu
``` ```
## 启动客户端 ## 启动客户端
...@@ -66,9 +69,11 @@ GPU: Nvidia Tesla V100单卡 ...@@ -66,9 +69,11 @@ GPU: Nvidia Tesla V100单卡
### 启动检测服务 ### 启动检测服务
``` ```
python det_web_server.py python det_web_server.py cpu #for cpu user
python det_web_server.py gpu #for gpu user
#or #or
python det_debugger_server.py python det_debugger_server.py cpu #for cpu user
python det_debugger_server.py gpu #for gpu user
``` ```
### 检测服务客户端 ### 检测服务客户端
...@@ -81,9 +86,11 @@ python ocr_web_client.py ...@@ -81,9 +86,11 @@ python ocr_web_client.py
### 启动识别服务 ### 启动识别服务
``` ```
python rec_web_server.py python rec_web_server.py cpu #for cpu user
python rec_web_server.py gpu #for gpu user
#or #or
python rec_debugger_server.py python rec_debugger_server.py cpu #for cpu user
python rec_debugger_server.py gpu #for gpu user
``` ```
### 识别服务客户端 ### 识别服务客户端
......
...@@ -21,7 +21,10 @@ from paddle_serving_client import Client ...@@ -21,7 +21,10 @@ from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, ResizeByFactor from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes
from paddle_serving_server_gpu.web_service import WebService if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time import time
import re import re
import base64 import base64
...@@ -64,8 +67,12 @@ class OCRService(WebService): ...@@ -64,8 +67,12 @@ class OCRService(WebService):
ocr_service = OCRService(name="ocr") ocr_service = OCRService(name="ocr")
ocr_service.load_model_config("ocr_det_model") ocr_service.load_model_config("ocr_det_model")
ocr_service.set_gpus("0") if sys.argv[1] == 'gpu':
ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) ocr_service.set_gpus("0")
ocr_service.prepare_server(
workdir="workdir", port=9292, device="gpu", gpuid=0)
elif sys.argv[1] == 'cpu':
ocr_service.prepare_server(workdir="workdir", port=9292)
ocr_service.init_det() ocr_service.init_det()
ocr_service.run_debugger_service() ocr_service.run_debugger_service()
ocr_service.run_web_service() ocr_service.run_web_service()
...@@ -21,7 +21,10 @@ from paddle_serving_client import Client ...@@ -21,7 +21,10 @@ from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, ResizeByFactor from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes
from paddle_serving_server_gpu.web_service import WebService if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time import time
import re import re
import base64 import base64
...@@ -65,8 +68,12 @@ class OCRService(WebService): ...@@ -65,8 +68,12 @@ class OCRService(WebService):
ocr_service = OCRService(name="ocr") ocr_service = OCRService(name="ocr")
ocr_service.load_model_config("ocr_det_model") ocr_service.load_model_config("ocr_det_model")
ocr_service.set_gpus("0") if sys.argv[1] == 'gpu':
ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) ocr_service.set_gpus("0")
ocr_service.prepare_server(
workdir="workdir", port=9292, device="gpu", gpuid=0)
elif sys.argv[1] == 'cpu':
ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
ocr_service.init_det() ocr_service.init_det()
ocr_service.run_rpc_service() ocr_service.run_rpc_service()
ocr_service.run_web_service() ocr_service.run_web_service()
...@@ -22,7 +22,10 @@ from paddle_serving_client import Client ...@@ -22,7 +22,10 @@ from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
from paddle_serving_server_gpu.web_service import WebService if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
from paddle_serving_app.local_predict import Debugger from paddle_serving_app.local_predict import Debugger
import time import time
import re import re
...@@ -37,8 +40,12 @@ class OCRService(WebService): ...@@ -37,8 +40,12 @@ class OCRService(WebService):
(2, 0, 1)) (2, 0, 1))
]) ])
self.det_client = Debugger() self.det_client = Debugger()
self.det_client.load_model_config( if sys.argv[1] == 'gpu':
det_model_config, gpu=True, profile=False) self.det_client.load_model_config(
det_model_config, gpu=True, profile=False)
elif sys.argv[1] == 'cpu':
self.det_client.load_model_config(
det_model_config, gpu=False, profile=False)
self.ocr_reader = OCRReader() self.ocr_reader = OCRReader()
def preprocess(self, feed=[], fetch=[]): def preprocess(self, feed=[], fetch=[]):
...@@ -99,5 +106,8 @@ ocr_service = OCRService(name="ocr") ...@@ -99,5 +106,8 @@ ocr_service = OCRService(name="ocr")
ocr_service.load_model_config("ocr_rec_model") ocr_service.load_model_config("ocr_rec_model")
ocr_service.prepare_server(workdir="workdir", port=9292) ocr_service.prepare_server(workdir="workdir", port=9292)
ocr_service.init_det_debugger(det_model_config="ocr_det_model") ocr_service.init_det_debugger(det_model_config="ocr_det_model")
ocr_service.run_debugger_service(gpu=True) if sys.argv[1] == 'gpu':
ocr_service.run_debugger_service(gpu=True)
elif sys.argv[1] == 'cpu':
ocr_service.run_debugger_service()
ocr_service.run_web_service() ocr_service.run_web_service()
...@@ -22,7 +22,10 @@ from paddle_serving_client import Client ...@@ -22,7 +22,10 @@ from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
from paddle_serving_server_gpu.web_service import WebService if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time import time
import re import re
import base64 import base64
...@@ -90,8 +93,12 @@ class OCRService(WebService): ...@@ -90,8 +93,12 @@ class OCRService(WebService):
ocr_service = OCRService(name="ocr") ocr_service = OCRService(name="ocr")
ocr_service.load_model_config("ocr_rec_model") ocr_service.load_model_config("ocr_rec_model")
ocr_service.set_gpus("0") if sys.argv[1] == 'gpu':
ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) ocr_service.set_gpus("0")
ocr_service.prepare_server(
workdir="workdir", port=9292, device="gpu", gpuid=0)
elif sys.argv[1] == 'cpu':
ocr_service.prepare_server(workdir="workdir", port=9292)
ocr_service.init_det_client( ocr_service.init_det_client(
det_port=9293, det_port=9293,
det_client_config="ocr_det_client/serving_client_conf.prototxt") det_client_config="ocr_det_client/serving_client_conf.prototxt")
......
...@@ -22,7 +22,10 @@ from paddle_serving_client import Client ...@@ -22,7 +22,10 @@ from paddle_serving_client import Client
from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor from paddle_serving_app.reader import Sequential, URL2Image, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
from paddle_serving_server_gpu.web_service import WebService if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
import time import time
import re import re
import base64 import base64
...@@ -64,8 +67,12 @@ class OCRService(WebService): ...@@ -64,8 +67,12 @@ class OCRService(WebService):
ocr_service = OCRService(name="ocr") ocr_service = OCRService(name="ocr")
ocr_service.load_model_config("ocr_rec_model") ocr_service.load_model_config("ocr_rec_model")
ocr_service.set_gpus("0")
ocr_service.init_rec() ocr_service.init_rec()
ocr_service.prepare_server(workdir="workdir", port=9292, device="gpu", gpuid=0) if sys.argv[1] == 'gpu':
ocr_service.set_gpus("0")
ocr_service.prepare_server(
workdir="workdir", port=9292, device="gpu", gpuid=0)
elif sys.argv[1] == 'cpu':
ocr_service.prepare_server(workdir="workdir", port=9292, device="cpu")
ocr_service.run_rpc_service() ocr_service.run_rpc_service()
ocr_service.run_web_service() ocr_service.run_web_service()
# IMDB model ensemble 样例
## 获取模型
```
sh get_data.sh
```
## 启动服务
```
python -m paddle_serving_server_gpu.serve --model imdb_cnn_model --port 9292 &> cnn.log &
python -m paddle_serving_server_gpu.serve --model imdb_bow_model --port 9393 &> bow.log &
python test_pipeline_server.py &>pipeline.log &
```
## 启动客户端
```
python test_pipeline_client.py
```
## HTTP 测试
```
curl -X POST -k http://localhost:9999/prediction -d '{"key": ["words"], "value": ["i am very sad | 0"]}'
```
port: 18080 rpc_port: 18085
worker_num: 1 worker_num: 4
build_dag_each_worker: false build_dag_each_worker: false
http_port: 9999
dag: dag:
is_thread_op: true is_thread_op: false
client_type: brpc client_type: brpc
retry: 1 retry: 1
use_profile: false use_profile: false
tracer:
interval_s: 10
...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from paddle_serving_client.pipeline import PipelineClient from paddle_serving_server.pipeline import PipelineClient
import numpy as np import numpy as np
client = PipelineClient() client = PipelineClient()
...@@ -20,12 +20,16 @@ client.connect(['127.0.0.1:18080']) ...@@ -20,12 +20,16 @@ client.connect(['127.0.0.1:18080'])
words = 'i am very sad | 0' words = 'i am very sad | 0'
futures = [] futures = []
for i in range(100): for i in range(4):
futures.append( futures.append(
client.predict( client.predict(
feed_dict={"words": words}, fetch=["prediction"], asyn=True)) feed_dict={"words": words},
fetch=["prediction"],
asyn=True,
profile=False))
for f in futures: for f in futures:
res = f.result() res = f.result()
if res["ecode"] != 0: if res["ecode"] != 0:
print("predict failed: {}".format(res)) print("predict failed: {}".format(res))
print(res)
...@@ -12,18 +12,21 @@ ...@@ -12,18 +12,21 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from paddle_serving_server.pipeline import Op, RequestOp, ResponseOp from paddle_serving_server.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server.pipeline import PipelineServer from paddle_serving_server.pipeline import PipelineServer
from paddle_serving_server.pipeline.proto import pipeline_service_pb2 from paddle_serving_server.pipeline.proto import pipeline_service_pb2
from paddle_serving_server.pipeline.channel import ChannelDataEcode from paddle_serving_server.pipeline.channel import ChannelDataEcode
import numpy as np import numpy as np
import logging
from paddle_serving_app.reader import IMDBDataset from paddle_serving_app.reader import IMDBDataset
import logging
logging.basicConfig(level=logging.DEBUG)
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger()
user_handler = logging.StreamHandler()
user_handler.setLevel(logging.INFO)
user_handler.setFormatter(
logging.Formatter(
"%(levelname)s %(asctime)s [%(filename)s:%(lineno)d] %(message)s"))
_LOGGER.addHandler(user_handler)
class ImdbRequestOp(RequestOp): class ImdbRequestOp(RequestOp):
...@@ -76,7 +79,9 @@ bow_op = Op(name="bow", ...@@ -76,7 +79,9 @@ bow_op = Op(name="bow",
client_config="imdb_bow_client_conf/serving_client_conf.prototxt", client_config="imdb_bow_client_conf/serving_client_conf.prototxt",
concurrency=1, concurrency=1,
timeout=-1, timeout=-1,
retry=1) retry=1,
batch_size=3,
auto_batching_timeout=1000)
cnn_op = Op(name="cnn", cnn_op = Op(name="cnn",
input_ops=[read_op], input_ops=[read_op],
server_endpoints=["127.0.0.1:9292"], server_endpoints=["127.0.0.1:9292"],
...@@ -84,13 +89,17 @@ cnn_op = Op(name="cnn", ...@@ -84,13 +89,17 @@ cnn_op = Op(name="cnn",
client_config="imdb_cnn_client_conf/serving_client_conf.prototxt", client_config="imdb_cnn_client_conf/serving_client_conf.prototxt",
concurrency=1, concurrency=1,
timeout=-1, timeout=-1,
retry=1) retry=1,
batch_size=1,
auto_batching_timeout=None)
combine_op = CombineOp( combine_op = CombineOp(
name="combine", name="combine",
input_ops=[bow_op, cnn_op], input_ops=[bow_op, cnn_op],
concurrency=5, concurrency=1,
timeout=-1, timeout=-1,
retry=1) retry=1,
batch_size=2,
auto_batching_timeout=None)
# fetch output of bow_op # fetch output of bow_op
# response_op = ImdbResponseOp(input_ops=[bow_op]) # response_op = ImdbResponseOp(input_ops=[bow_op])
......
# OCR Pipeline WebService
(English|[简体中文](./README_CN.md))
This document will take OCR as an example to show how to use Pipeline WebService to start multi-model tandem services.
## Get Model
```
python -m paddle_serving_app.package --get_model ocr_rec
tar -xzvf ocr_rec.tar.gz
python -m paddle_serving_app.package --get_model ocr_det
tar -xzvf ocr_det.tar.gz
```
## Get Dataset (Optional)
```
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/ocr/test_imgs.tar
tar xf test_imgs.tar
```
## Start Service
```
python web_service.py &>log.txt &
```
## Test
```
python pipeline_http_client.py
```
<!--
## More (PipelineServing)
You can choose one of the following versions to start Service.
### Remote Service Version
```
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 12000 --gpu_id 0 &> det.log &
python -m paddle_serving_server_gpu.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python remote_service_pipeline_server.py &>pipeline.log &
```
### Local Service Version
```
python local_service_pipeline_server.py &>pipeline.log &
```
### Hybrid Service Version
```
python -m paddle_serving_server_gpu.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python hybrid_service_pipeline_server.py &>pipeline.log &
```
## Client Prediction
### RPC
```
python pipeline_rpc_client.py
```
### HTTP
```
python pipeline_http_client.py
```
-->
# OCR Pipeline WebService
([English](./README.md)|简体中文)
本文档将以 OCR 为例,介绍如何使用 Pipeline WebService 启动多模型串联的服务。
## 获取模型
```
python -m paddle_serving_app.package --get_model ocr_rec
tar -xzvf ocr_rec.tar.gz
python -m paddle_serving_app.package --get_model ocr_det
tar -xzvf ocr_det.tar.gz
```
## 获取数据集(可选)
```
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/ocr/test_imgs.tar
tar xf test_imgs.tar
```
## 启动 WebService
```
python web_service.py &>log.txt &
```
## 测试
```
python pipeline_http_client.py
```
<!--
## 其他 (PipelineServing)
你可以选择下面任意一种版本启动服务。
### 远程服务版本
```
python -m paddle_serving_server.serve --model ocr_det_model --port 12000 --gpu_id 0 &> det.log &
python -m paddle_serving_server.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python remote_service_pipeline_server.py &>pipeline.log &
```
### 本地服务版本
```
python local_service_pipeline_server.py &>pipeline.log &
```
### 混合服务版本
```
python -m paddle_serving_server_gpu.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python hybrid_service_pipeline_server.py &>pipeline.log &
```
## 启动客户端
### RPC
```
python pipeline_rpc_client.py
```
### HTTP
```
python pipeline_http_client.py
```
-->
rpc_port: 18080
worker_num: 4
build_dag_each_worker: false
http_port: 9999
dag:
is_thread_op: false
client_type: brpc
retry: 1
use_profile: false
op:
det:
concurrency: 2
local_service_conf:
model_config: ocr_det_model
devices: "0"
rec:
concurrency: 1
timeout: -1
retry: 1
local_service_conf:
model_config: ocr_rec_model
devices: "0"
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server_gpu.pipeline import PipelineServer
from paddle_serving_server_gpu.pipeline.proto import pipeline_service_pb2
from paddle_serving_server_gpu.pipeline.channel import ChannelDataEcode
from paddle_serving_server_gpu.pipeline import LocalRpcServiceHandler
import numpy as np
import cv2
import time
import base64
import json
from paddle_serving_app.reader import OCRReader
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
import time
import re
import base64
import logging
_LOGGER = logging.getLogger()
class DetOp(Op):
def init_op(self):
self.det_preprocess = Sequential([
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img}
def postprocess(self, input_dicts, fetch_dict):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
return out_dict
class RecOp(Op):
def init_op(self):
self.ocr_reader = OCRReader()
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
dt_boxes = self.sorted_boxes(dt_boxes)
feed_list = []
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = self.get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
def postprocess(self, input_dicts, fetch_dict):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
read_op = RequestOp()
det_op = DetOp(
name="det",
input_ops=[read_op],
local_rpc_service_handler=LocalRpcServiceHandler(
model_config="ocr_det_model",
workdir="det_workdir", # defalut: "workdir"
thread_num=2, # defalut: 2
devices="0", # gpu0. defalut: "" (cpu)
mem_optim=True, # defalut: True
ir_optim=False, # defalut: False
available_port_generator=None), # defalut: None
concurrency=1)
rec_op = RecOp(
name="rec",
input_ops=[det_op],
server_endpoints=["127.0.0.1:12001"],
fetch_list=["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"],
client_config="ocr_rec_client/serving_client_conf.prototxt",
concurrency=1)
response_op = ResponseOp(input_ops=[rec_op])
server = PipelineServer("ocr")
server.set_response_op(response_op)
server.prepare_server('config.yml')
server.run_server()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server_gpu.pipeline import PipelineServer
from paddle_serving_server_gpu.pipeline.proto import pipeline_service_pb2
from paddle_serving_server_gpu.pipeline.channel import ChannelDataEcode
from paddle_serving_server_gpu.pipeline import LocalRpcServiceHandler
import numpy as np
import cv2
import time
import base64
import json
from paddle_serving_app.reader import OCRReader
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
import time
import re
import base64
import logging
_LOGGER = logging.getLogger()
class DetOp(Op):
def init_op(self):
self.det_preprocess = Sequential([
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img}
def postprocess(self, input_dicts, fetch_dict):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
return out_dict
class RecOp(Op):
def init_op(self):
self.ocr_reader = OCRReader()
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
dt_boxes = self.sorted_boxes(dt_boxes)
feed_list = []
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = self.get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
def postprocess(self, input_dicts, fetch_dict):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
read_op = RequestOp()
det_op = DetOp(
name="det",
input_ops=[read_op],
local_rpc_service_handler=LocalRpcServiceHandler(
model_config="ocr_det_model",
workdir="det_workdir", # defalut: "workdir"
thread_num=2, # defalut: 2
devices="0", # gpu0. defalut: "" (cpu)
mem_optim=True, # defalut: True
ir_optim=False, # defalut: False
available_port_generator=None), # defalut: None
concurrency=1)
rec_op = RecOp(
name="rec",
input_ops=[det_op],
local_rpc_service_handler=LocalRpcServiceHandler(
model_config="ocr_rec_model"),
concurrency=1)
response_op = ResponseOp(input_ops=[rec_op])
server = PipelineServer("ocr")
server.set_response_op(response_op)
server.prepare_server('config.yml')
server.run_server()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_server_gpu.pipeline import PipelineClient
import numpy as np
import requests
import json
import cv2
import base64
import os
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
url = "http://127.0.0.1:9999/ocr/prediction"
test_img_dir = "imgs/"
for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
for i in range(4):
data = {"key": ["image"], "value": [image]}
r = requests.post(url=url, data=json.dumps(data))
print(r.json())
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_server_gpu.pipeline import PipelineClient
import numpy as np
import requests
import json
import cv2
import base64
import os
client = PipelineClient()
client.connect(['127.0.0.1:18080'])
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
test_img_dir = "imgs/"
for img_file in os.listdir(test_img_dir):
with open(os.path.join(test_img_dir, img_file), 'rb') as file:
image_data = file.read()
image = cv2_to_base64(image_data)
for i in range(4):
ret = client.predict(feed_dict={"image": image}, fetch=["res"])
print(ret)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server_gpu.pipeline import PipelineServer
from paddle_serving_server_gpu.pipeline.proto import pipeline_service_pb2
from paddle_serving_server_gpu.pipeline.channel import ChannelDataEcode
import numpy as np
import cv2
import time
import base64
import json
from paddle_serving_app.reader import OCRReader
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
import time
import re
import base64
import logging
_LOGGER = logging.getLogger()
class DetOp(Op):
def init_op(self):
self.det_preprocess = Sequential([
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img}
def postprocess(self, input_dicts, fetch_dict):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
return out_dict
class RecOp(Op):
def init_op(self):
self.ocr_reader = OCRReader()
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
dt_boxes = self.sorted_boxes(dt_boxes)
feed_list = []
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = self.get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
def postprocess(self, input_dicts, fetch_dict):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
read_op = RequestOp()
det_op = DetOp(
name="det",
input_ops=[read_op],
server_endpoints=["127.0.0.1:12000"],
fetch_list=["concat_1.tmp_0"],
client_config="ocr_det_client/serving_client_conf.prototxt",
concurrency=1)
rec_op = RecOp(
name="rec",
input_ops=[det_op],
server_endpoints=["127.0.0.1:12001"],
fetch_list=["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"],
client_config="ocr_rec_client/serving_client_conf.prototxt",
concurrency=1)
response_op = ResponseOp(input_ops=[rec_op])
server = PipelineServer("ocr")
server.set_response_op(response_op)
server.prepare_server('config.yml')
server.run_server()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_serving_server_gpu.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging
import numpy as np
import cv2
import base64
from paddle_serving_app.reader import OCRReader
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
_LOGGER = logging.getLogger()
class DetOp(Op):
def init_op(self):
self.det_preprocess = Sequential([
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img}
def postprocess(self, input_dicts, fetch_dict):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
return out_dict
class RecOp(Op):
def init_op(self):
self.ocr_reader = OCRReader()
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
dt_boxes = self.sorted_boxes(dt_boxes)
feed_list = []
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = self.get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
def postprocess(self, input_dicts, fetch_dict):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
class OcrService(WebService):
def get_pipeline_response(self, read_op):
det_op = DetOp(name="det", input_ops=[read_op])
rec_op = RecOp(name="rec", input_ops=[det_op])
return rec_op
uci_service = OcrService(name="ocr")
uci_service.prepare_pipeline_config("config.yml")
uci_service.run_service()
# Simple Pipeline WebService
This document will takes UCI service as an example to introduce how to use Pipeline WebService.
## Get model
```
sh get_data.sh
```
## Start server
```
python web_service.py &>log.txt &
```
## Http test
```
curl -X POST -k http://localhost:18080/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
```
# Simple Pipeline WebService
这里以 Uci 服务为例来介绍 Pipeline WebService 的使用。
## 获取模型
```
sh get_data.sh
```
## 启动服务
```
python web_service.py &>log.txt &
```
## 测试
```
curl -X POST -k http://localhost:18080/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
```
worker_num: 4
http_port: 18080
dag:
is_thread_op: false
op:
uci:
local_service_conf:
model_config: uci_housing_model
devices: "" # "0,1"
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
tar -xzf uci_housing.tar.gz
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_serving_server_gpu.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging
import numpy as np
_LOGGER = logging.getLogger()
class UciOp(Op):
def init_op(self):
self.separator = ","
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
_LOGGER.info(input_dict)
x_value = input_dict["x"]
if isinstance(x_value, (str, unicode)):
input_dict["x"] = np.array(
[float(x.strip()) for x in x_value.split(self.separator)])
return input_dict
def postprocess(self, input_dicts, fetch_dict):
# _LOGGER.info(fetch_dict)
fetch_dict["price"] = str(fetch_dict["price"][0][0])
return fetch_dict
class UciService(WebService):
def get_pipeline_response(self, read_op):
uci_op = UciOp(name="uci", input_ops=[read_op])
return uci_op
uci_service = UciService(name="uci")
uci_service.prepare_pipeline_config("config.yml")
uci_service.run_service()
...@@ -30,7 +30,6 @@ client.load_client_config("yolov4_client/serving_client_conf.prototxt") ...@@ -30,7 +30,6 @@ client.load_client_config("yolov4_client/serving_client_conf.prototxt")
client.connect(['127.0.0.1:9393']) client.connect(['127.0.0.1:9393'])
im = preprocess(sys.argv[1]) im = preprocess(sys.argv[1])
print(im.shape)
fetch_map = client.predict( fetch_map = client.predict(
feed={ feed={
"image": im, "image": im,
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
import re
import os
import subprocess
def update_info(file_name, feature, info):
new_str = ""
with open(file_name, "r") as f:
for line in f.readlines():
if re.match(feature, line):
if isinstance(info, str):
line = feature + " = \"" + info.strip() + "\"\n"
else:
line = feature + " = \"" + info.decode('utf-8').strip(
) + "\"\n"
new_str = new_str + line
with open(file_name, "w") as f:
f.write(new_str)
if len(sys.argv) > 2:
update_info("paddle_serving_server_gpu/version.py", "cuda_version",
sys.argv[2])
path = "paddle_serving_" + sys.argv[1]
commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
update_info(path + "/version.py", "commit_id", commit_id)
...@@ -13,10 +13,9 @@ ...@@ -13,10 +13,9 @@
# limitations under the License. # limitations under the License.
from .audio_reader import AudioFeatureOp from .audio_reader import AudioFeatureOp
from .chinese_bert_reader import ChineseBertReader from .chinese_bert_reader import ChineseBertReader
from .frame_reader import FrameExtractOp from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, Base64ToImage
from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize
from .image_reader import CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, ResizeByFactor from .image_reader import CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, ResizeByFactor
from .image_reader import RCNNPostprocess, SegPostprocess, PadStride from .image_reader import RCNNPostprocess, SegPostprocess, PadStride, BlazeFacePostprocess
from .image_reader import DBPostProcess, FilterBoxes from .image_reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
from .lac_reader import LACReader from .lac_reader import LACReader
from .senta_reader import SentaReader from .senta_reader import SentaReader
...@@ -317,7 +317,7 @@ class RCNNPostprocess(object): ...@@ -317,7 +317,7 @@ class RCNNPostprocess(object):
self.clip_bbox([xmin, ymin, xmax, ymax]) self.clip_bbox([xmin, ymin, xmax, ymax])
w = xmax - xmin w = xmax - xmin
h = ymax - ymin h = ymax - ymin
im_shape = t['im_shape'][0][i].tolist() im_shape = t['im_shape'].tolist()
im_height, im_width = int(im_shape[0]), int(im_shape[1]) im_height, im_width = int(im_shape[0]), int(im_shape[1])
xmin *= im_width xmin *= im_width
ymin *= im_height ymin *= im_height
...@@ -420,7 +420,7 @@ class RCNNPostprocess(object): ...@@ -420,7 +420,7 @@ class RCNNPostprocess(object):
for key in image_with_bbox: for key in image_with_bbox:
if key == "image": if key == "image":
continue continue
if ".lod" in key: if ".lod" in key or "im_shape" in key:
continue continue
fetch_name = key fetch_name = key
bbox_result = self._get_bbox_result(image_with_bbox, fetch_name, bbox_result = self._get_bbox_result(image_with_bbox, fetch_name,
......
...@@ -12,4 +12,5 @@ ...@@ -12,4 +12,5 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Paddle Serving App version string """ """ Paddle Serving App version string """
serving_app_version = "0.1.2" serving_app_version = "0.0.0"
commit_id = ""
...@@ -233,7 +233,7 @@ class Client(object): ...@@ -233,7 +233,7 @@ class Client(object):
# key)) # key))
pass pass
def predict(self, feed=None, fetch=None, need_variant_tag=False): def predict(self, feed=None, fetch=None, need_variant_tag=False, log_id=0):
self.profile_.record('py_prepro_0') self.profile_.record('py_prepro_0')
if feed is None or fetch is None: if feed is None or fetch is None:
...@@ -319,12 +319,12 @@ class Client(object): ...@@ -319,12 +319,12 @@ class Client(object):
res = self.client_handle_.numpy_predict( res = self.client_handle_.numpy_predict(
float_slot_batch, float_feed_names, float_shape, int_slot_batch, float_slot_batch, float_feed_names, float_shape, int_slot_batch,
int_feed_names, int_shape, fetch_names, result_batch_handle, int_feed_names, int_shape, fetch_names, result_batch_handle,
self.pid) self.pid, log_id)
elif self.has_numpy_input == False: elif self.has_numpy_input == False:
res = self.client_handle_.batch_predict( res = self.client_handle_.batch_predict(
float_slot_batch, float_feed_names, float_shape, int_slot_batch, float_slot_batch, float_feed_names, float_shape, int_slot_batch,
int_feed_names, int_shape, fetch_names, result_batch_handle, int_feed_names, int_shape, fetch_names, result_batch_handle,
self.pid) self.pid, log_id)
else: else:
raise ValueError( raise ValueError(
"Please make sure the inputs are all in list type or all in numpy.array type" "Please make sure the inputs are all in list type or all in numpy.array type"
...@@ -347,6 +347,11 @@ class Client(object): ...@@ -347,6 +347,11 @@ class Client(object):
result_map[name] = result_batch_handle.get_int64_by_name( result_map[name] = result_batch_handle.get_int64_by_name(
mi, name) mi, name)
shape = result_batch_handle.get_shape(mi, name) shape = result_batch_handle.get_shape(mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
result_map[name].shape = shape result_map[name].shape = shape
if name in self.lod_tensor_set: if name in self.lod_tensor_set:
result_map["{}.lod".format( result_map["{}.lod".format(
...@@ -354,6 +359,11 @@ class Client(object): ...@@ -354,6 +359,11 @@ class Client(object):
elif self.fetch_names_to_type_[name] == float32_type: elif self.fetch_names_to_type_[name] == float32_type:
result_map[name] = result_batch_handle.get_float_by_name( result_map[name] = result_batch_handle.get_float_by_name(
mi, name) mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name) shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape result_map[name].shape = shape
if name in self.lod_tensor_set: if name in self.lod_tensor_set:
...@@ -364,6 +374,11 @@ class Client(object): ...@@ -364,6 +374,11 @@ class Client(object):
# result_map[name] will be py::array(numpy array) # result_map[name] will be py::array(numpy array)
result_map[name] = result_batch_handle.get_int32_by_name( result_map[name] = result_batch_handle.get_int32_by_name(
mi, name) mi, name)
if result_map[name].size == 0:
raise ValueError(
"Failed to fetch, maybe the type of [{}]"
" is wrong, please check the model file".format(
name))
shape = result_batch_handle.get_shape(mi, name) shape = result_batch_handle.get_shape(mi, name)
result_map[name].shape = shape result_map[name].shape = shape
if name in self.lod_tensor_set: if name in self.lod_tensor_set:
...@@ -466,10 +481,11 @@ class MultiLangClient(object): ...@@ -466,10 +481,11 @@ class MultiLangClient(object):
if var.is_lod_tensor: if var.is_lod_tensor:
self.lod_tensor_set_.add(var.alias_name) self.lod_tensor_set_.add(var.alias_name)
def _pack_inference_request(self, feed, fetch, is_python): def _pack_inference_request(self, feed, fetch, is_python, log_id):
req = multi_lang_general_model_service_pb2.InferenceRequest() req = multi_lang_general_model_service_pb2.InferenceRequest()
req.fetch_var_names.extend(fetch) req.fetch_var_names.extend(fetch)
req.is_python = is_python req.is_python = is_python
req.log_id = log_id
feed_batch = None feed_batch = None
if isinstance(feed, dict): if isinstance(feed, dict):
feed_batch = [feed] feed_batch = [feed]
...@@ -602,12 +618,13 @@ class MultiLangClient(object): ...@@ -602,12 +618,13 @@ class MultiLangClient(object):
fetch, fetch,
need_variant_tag=False, need_variant_tag=False,
asyn=False, asyn=False,
is_python=True): is_python=True,
log_id=0):
if not asyn: if not asyn:
try: try:
self.profile_.record('py_prepro_0') self.profile_.record('py_prepro_0')
req = self._pack_inference_request( req = self._pack_inference_request(
feed, fetch, is_python=is_python) feed, fetch, is_python=is_python, log_id=log_id)
self.profile_.record('py_prepro_1') self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0') self.profile_.record('py_client_infer_0')
...@@ -626,7 +643,8 @@ class MultiLangClient(object): ...@@ -626,7 +643,8 @@ class MultiLangClient(object):
except grpc.RpcError as e: except grpc.RpcError as e:
return {"serving_status_code": e.code()} return {"serving_status_code": e.code()}
else: else:
req = self._pack_inference_request(feed, fetch, is_python=is_python) req = self._pack_inference_request(
feed, fetch, is_python=is_python, log_id=log_id)
call_future = self.stub_.Inference.future( call_future = self.stub_.Inference.future(
req, timeout=self.rpc_timeout_s_) req, timeout=self.rpc_timeout_s_)
return MultiLangPredictFuture( return MultiLangPredictFuture(
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Usage:
Convert a paddle inference model into a model file that can be used for Paddle Serving.
Example:
python -m paddle_serving_client.convert --dirname ./inference_model
"""
import argparse
from .io import inference_model_to_serving
def parse_args(): # pylint: disable=doc-string-missing
parser = argparse.ArgumentParser("convert")
parser.add_argument(
"--dirname",
type=str,
required=True,
help='Path of saved model files. Program file and parameter files are saved in this directory.'
)
parser.add_argument(
"--serving_server",
type=str,
default="serving_server",
help='The path of model files and configuration files for server. Default: "serving_server".'
)
parser.add_argument(
"--serving_client",
type=str,
default="serving_client",
help='The path of configuration files for client. Default: "serving_client".'
)
parser.add_argument(
"--model_filename",
type=str,
default=None,
help='The name of file to load the inference program. If it is None, the default filename __model__ will be used'
)
parser.add_argument(
"--params_filename",
type=str,
default=None,
help='The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.'
)
return parser.parse_args()
if __name__ == "__main__":
args = parse_args()
inference_model_to_serving(
args.dirname,
serving_server=args.serving_server,
serving_client=args.serving_client,
model_filename=args.model_filename,
params_filename=args.params_filename)
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Paddle Serving Client version string """ """ Paddle Serving Client version string """
serving_client_version = "0.3.2" serving_client_version = "0.0.0"
serving_server_version = "0.3.2" serving_server_version = "0.0.0"
module_proto_version = "0.3.2" module_proto_version = "0.0.0"
commit_id = ""
...@@ -103,8 +103,8 @@ class OpSeqMaker(object): ...@@ -103,8 +103,8 @@ class OpSeqMaker(object):
elif len(node.dependencies) == 1: elif len(node.dependencies) == 1:
if node.dependencies[0].name != self.workflow.nodes[-1].name: if node.dependencies[0].name != self.workflow.nodes[-1].name:
raise Exception( raise Exception(
'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.'. 'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.'
format(node.dependencies[0].name, self.workflow.nodes[ .format(node.dependencies[0].name, self.workflow.nodes[
-1].name)) -1].name))
self.workflow.nodes.extend([node]) self.workflow.nodes.extend([node])
...@@ -157,8 +157,14 @@ class Server(object): ...@@ -157,8 +157,14 @@ class Server(object):
self.cur_path = os.getcwd() self.cur_path = os.getcwd()
self.use_local_bin = False self.use_local_bin = False
self.mkl_flag = False self.mkl_flag = False
self.product_name = None
self.container_id = None
self.model_config_paths = None # for multi-model in a workflow self.model_config_paths = None # for multi-model in a workflow
def get_fetch_list(self):
fetch_names = [var.alias_name for var in self.model_conf.fetch_var]
return fetch_names
def set_max_concurrency(self, concurrency): def set_max_concurrency(self, concurrency):
self.max_concurrency = concurrency self.max_concurrency = concurrency
...@@ -191,6 +197,16 @@ class Server(object): ...@@ -191,6 +197,16 @@ class Server(object):
def set_ir_optimize(self, flag=False): def set_ir_optimize(self, flag=False):
self.ir_optimization = flag self.ir_optimization = flag
def set_product_name(self, product_name=None):
if product_name == None:
raise ValueError("product_name can't be None.")
self.product_name = product_name
def set_container_id(self, container_id):
if container_id == None:
raise ValueError("container_id can't be None.")
self.container_id = container_id
def check_local_bin(self): def check_local_bin(self):
if "SERVING_BIN" in os.environ: if "SERVING_BIN" in os.environ:
self.use_local_bin = True self.use_local_bin = True
...@@ -254,6 +270,10 @@ class Server(object): ...@@ -254,6 +270,10 @@ class Server(object):
self.resource_conf.model_toolkit_file = self.model_toolkit_fn self.resource_conf.model_toolkit_file = self.model_toolkit_fn
self.resource_conf.general_model_path = workdir self.resource_conf.general_model_path = workdir
self.resource_conf.general_model_file = self.general_model_config_fn self.resource_conf.general_model_file = self.general_model_config_fn
if self.product_name != None:
self.resource_conf.auth_product_name = self.product_name
if self.container_id != None:
self.resource_conf.auth_container_id = self.container_id
def _write_pb_str(self, filepath, pb_obj): def _write_pb_str(self, filepath, pb_obj):
with open(filepath, "w") as fout: with open(filepath, "w") as fout:
...@@ -351,8 +371,8 @@ class Server(object): ...@@ -351,8 +371,8 @@ class Server(object):
if os.path.exists(tar_name): if os.path.exists(tar_name):
os.remove(tar_name) os.remove(tar_name)
raise SystemExit( raise SystemExit(
'Download failed, please check your network or permission of {}.'. 'Download failed, please check your network or permission of {}.'
format(self.module_path)) .format(self.module_path))
else: else:
try: try:
print('Decompressing files ..') print('Decompressing files ..')
...@@ -363,8 +383,8 @@ class Server(object): ...@@ -363,8 +383,8 @@ class Server(object):
if os.path.exists(exe_path): if os.path.exists(exe_path):
os.remove(exe_path) os.remove(exe_path)
raise SystemExit( raise SystemExit(
'Decompressing failed, please check your permission of {} or disk space left.'. 'Decompressing failed, please check your permission of {} or disk space left.'
format(self.module_path)) .format(self.module_path))
finally: finally:
os.remove(tar_name) os.remove(tar_name)
#release lock #release lock
...@@ -502,6 +522,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -502,6 +522,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
feed_names = list(request.feed_var_names) feed_names = list(request.feed_var_names)
fetch_names = list(request.fetch_var_names) fetch_names = list(request.fetch_var_names)
is_python = request.is_python is_python = request.is_python
log_id = request.log_id
feed_batch = [] feed_batch = []
for feed_inst in request.insts: for feed_inst in request.insts:
feed_dict = {} feed_dict = {}
...@@ -530,7 +551,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -530,7 +551,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
data.shape = list(feed_inst.tensor_array[idx].shape) data.shape = list(feed_inst.tensor_array[idx].shape)
feed_dict[name] = data feed_dict[name] = data
feed_batch.append(feed_dict) feed_batch.append(feed_dict)
return feed_batch, fetch_names, is_python return feed_batch, fetch_names, is_python, log_id
def _pack_inference_response(self, ret, fetch_names, is_python): def _pack_inference_response(self, ret, fetch_names, is_python):
resp = multi_lang_general_model_service_pb2.InferenceResponse() resp = multi_lang_general_model_service_pb2.InferenceResponse()
...@@ -540,7 +561,6 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -540,7 +561,6 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
results, tag = ret results, tag = ret
resp.tag = tag resp.tag = tag
resp.err_code = 0 resp.err_code = 0
if not self.is_multi_model_: if not self.is_multi_model_:
results = {'general_infer_0': results} results = {'general_infer_0': results}
for model_name, model_result in results.items(): for model_name, model_result in results.items():
...@@ -583,10 +603,13 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -583,10 +603,13 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
return resp return resp
def Inference(self, request, context): def Inference(self, request, context):
feed_dict, fetch_names, is_python = self._unpack_inference_request( feed_dict, fetch_names, is_python, log_id = \
request) self._unpack_inference_request(request)
ret = self.bclient_.predict( ret = self.bclient_.predict(
feed=feed_dict, fetch=fetch_names, need_variant_tag=True) feed=feed_dict,
fetch=fetch_names,
need_variant_tag=True,
log_id=log_id)
return self._pack_inference_response(ret, fetch_names, is_python) return self._pack_inference_response(ret, fetch_names, is_python)
def GetClientConfig(self, request, context): def GetClientConfig(self, request, context):
......
...@@ -58,6 +58,16 @@ def parse_args(): # pylint: disable=doc-string-missing ...@@ -58,6 +58,16 @@ def parse_args(): # pylint: disable=doc-string-missing
default=False, default=False,
action="store_true", action="store_true",
help="Use Multi-language-service") help="Use Multi-language-service")
parser.add_argument(
"--product_name",
type=str,
default=None,
help="product_name for authentication")
parser.add_argument(
"--container_id",
type=str,
default=None,
help="container_id for authentication")
return parser.parse_args() return parser.parse_args()
...@@ -101,6 +111,10 @@ def start_standard_model(): # pylint: disable=doc-string-missing ...@@ -101,6 +111,10 @@ def start_standard_model(): # pylint: disable=doc-string-missing
server.use_mkl(use_mkl) server.use_mkl(use_mkl)
server.set_max_body_size(max_body_size) server.set_max_body_size(max_body_size)
server.set_port(port) server.set_port(port)
if args.product_name != None:
server.set_product_name(args.product_name)
if args.container_id != None:
server.set_container_id(args.container_id)
server.load_model_config(model) server.load_model_config(model)
server.prepare_server(workdir=workdir, port=port, device=device) server.prepare_server(workdir=workdir, port=port, device=device)
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Paddle Serving Client version string """ """ Paddle Serving Client version string """
serving_client_version = "0.3.2" serving_client_version = "0.0.0"
serving_server_version = "0.3.2" serving_server_version = "0.0.0"
module_proto_version = "0.3.2" module_proto_version = "0.0.0"
commit_id = ""
...@@ -21,12 +21,36 @@ from paddle_serving_client import Client ...@@ -21,12 +21,36 @@ from paddle_serving_client import Client
from contextlib import closing from contextlib import closing
import socket import socket
from paddle_serving_server import pipeline
from paddle_serving_server.pipeline import Op
class WebService(object): class WebService(object):
def __init__(self, name="default_service"): def __init__(self, name="default_service"):
self.name = name self.name = name
# pipeline
self._server = pipeline.PipelineServer(self.name)
def get_pipeline_response(self, read_op):
return None
def prepare_pipeline_config(self, yaml_file):
# build dag
read_op = pipeline.RequestOp()
last_op = self.get_pipeline_response(read_op)
if not isinstance(last_op, Op):
raise ValueError("The return value type of `get_pipeline_response` "
"function is not Op type, please check function "
"`get_pipeline_response`.")
response_op = pipeline.ResponseOp(input_ops=[last_op])
self._server.set_response_op(response_op)
self._server.prepare_server(yaml_file)
def run_service(self):
self._server.run_server()
def load_model_config(self, model_config): def load_model_config(self, model_config):
print("This API will be deprecated later. Please do not use it")
self.model_config = model_config self.model_config = model_config
def _launch_rpc_service(self): def _launch_rpc_service(self):
...@@ -63,6 +87,7 @@ class WebService(object): ...@@ -63,6 +87,7 @@ class WebService(object):
device="cpu", device="cpu",
mem_optim=True, mem_optim=True,
ir_optim=False): ir_optim=False):
print("This API will be deprecated later. Please do not use it")
self.workdir = workdir self.workdir = workdir
self.port = port self.port = port
self.device = device self.device = device
...@@ -91,15 +116,18 @@ class WebService(object): ...@@ -91,15 +116,18 @@ class WebService(object):
request.json["fetch"]) request.json["fetch"])
if isinstance(feed, dict) and "fetch" in feed: if isinstance(feed, dict) and "fetch" in feed:
del feed["fetch"] del feed["fetch"]
if len(feed) == 0:
raise ValueError("empty input")
fetch_map = self.client.predict(feed=feed, fetch=fetch) fetch_map = self.client.predict(feed=feed, fetch=fetch)
result = self.postprocess( result = self.postprocess(
feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map) feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
result = {"result": result} result = {"result": result}
except ValueError as err: except ValueError as err:
result = {"result": err} result = {"result": str(err)}
return result return result
def run_rpc_service(self): def run_rpc_service(self):
print("This API will be deprecated later. Please do not use it")
import socket import socket
localIP = socket.gethostbyname(socket.gethostname()) localIP = socket.gethostbyname(socket.gethostname())
print("web service address:") print("web service address:")
...@@ -122,7 +150,34 @@ class WebService(object): ...@@ -122,7 +150,34 @@ class WebService(object):
self.app_instance = app_instance self.app_instance = app_instance
def run_debugger_service(self):
import socket
localIP = socket.gethostbyname(socket.gethostname())
print("web service address:")
print("http://{}:{}/{}/prediction".format(localIP, self.port,
self.name))
app_instance = Flask(__name__)
@app_instance.before_first_request
def init():
self._launch_local_predictor()
service_name = "/" + self.name + "/prediction"
@app_instance.route(service_name, methods=["POST"])
def run():
return self.get_prediction(request)
self.app_instance = app_instance
def _launch_local_predictor(self):
from paddle_serving_app.local_predict import Debugger
self.client = Debugger()
self.client.load_model_config(
"{}".format(self.model_config), gpu=False, profile=False)
def run_web_service(self): def run_web_service(self):
print("This API will be deprecated later. Please do not use it")
self.app_instance.run(host="0.0.0.0", self.app_instance.run(host="0.0.0.0",
port=self.port, port=self.port,
threaded=False, threaded=False,
...@@ -132,9 +187,11 @@ class WebService(object): ...@@ -132,9 +187,11 @@ class WebService(object):
return self.app_instance return self.app_instance
def preprocess(self, feed=[], fetch=[]): def preprocess(self, feed=[], fetch=[]):
print("This API will be deprecated later. Please do not use it")
return feed, fetch return feed, fetch
def postprocess(self, feed=[], fetch=[], fetch_map=None): def postprocess(self, feed=[], fetch=[], fetch_map=None):
print("This API will be deprecated later. Please do not use it")
for key in fetch_map: for key in fetch_map:
fetch_map[key] = fetch_map[key].tolist() fetch_map[key] = fetch_map[key].tolist()
return fetch_map return fetch_map
...@@ -73,6 +73,16 @@ def serve_args(): ...@@ -73,6 +73,16 @@ def serve_args():
default=False, default=False,
action="store_true", action="store_true",
help="Use Multi-language-service") help="Use Multi-language-service")
parser.add_argument(
"--product_name",
type=str,
default=None,
help="product_name for authentication")
parser.add_argument(
"--container_id",
type=str,
default=None,
help="container_id for authentication")
return parser.parse_args() return parser.parse_args()
...@@ -141,8 +151,8 @@ class OpSeqMaker(object): ...@@ -141,8 +151,8 @@ class OpSeqMaker(object):
elif len(node.dependencies) == 1: elif len(node.dependencies) == 1:
if node.dependencies[0].name != self.workflow.nodes[-1].name: if node.dependencies[0].name != self.workflow.nodes[-1].name:
raise Exception( raise Exception(
'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.'. 'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.'
format(node.dependencies[0].name, self.workflow.nodes[ .format(node.dependencies[0].name, self.workflow.nodes[
-1].name)) -1].name))
self.workflow.nodes.extend([node]) self.workflow.nodes.extend([node])
...@@ -196,6 +206,12 @@ class Server(object): ...@@ -196,6 +206,12 @@ class Server(object):
self.use_local_bin = False self.use_local_bin = False
self.gpuid = 0 self.gpuid = 0
self.model_config_paths = None # for multi-model in a workflow self.model_config_paths = None # for multi-model in a workflow
self.product_name = None
self.container_id = None
def get_fetch_list(self):
fetch_names = [var.alias_name for var in self.model_conf.fetch_var]
return fetch_names
def set_max_concurrency(self, concurrency): def set_max_concurrency(self, concurrency):
self.max_concurrency = concurrency self.max_concurrency = concurrency
...@@ -229,6 +245,16 @@ class Server(object): ...@@ -229,6 +245,16 @@ class Server(object):
def set_ir_optimize(self, flag=False): def set_ir_optimize(self, flag=False):
self.ir_optimization = flag self.ir_optimization = flag
def set_product_name(self, product_name=None):
if product_name == None:
raise ValueError("product_name can't be None.")
self.product_name = product_name
def set_container_id(self, container_id):
if container_id == None:
raise ValueError("container_id can't be None.")
self.container_id = container_id
def check_local_bin(self): def check_local_bin(self):
if "SERVING_BIN" in os.environ: if "SERVING_BIN" in os.environ:
self.use_local_bin = True self.use_local_bin = True
...@@ -302,6 +328,10 @@ class Server(object): ...@@ -302,6 +328,10 @@ class Server(object):
self.resource_conf.model_toolkit_file = self.model_toolkit_fn self.resource_conf.model_toolkit_file = self.model_toolkit_fn
self.resource_conf.general_model_path = workdir self.resource_conf.general_model_path = workdir
self.resource_conf.general_model_file = self.general_model_config_fn self.resource_conf.general_model_file = self.general_model_config_fn
if self.product_name != None:
self.resource_conf.auth_product_name = self.product_name
if self.container_id != None:
self.resource_conf.auth_container_id = self.container_id
def _write_pb_str(self, filepath, pb_obj): def _write_pb_str(self, filepath, pb_obj):
with open(filepath, "w") as fout: with open(filepath, "w") as fout:
...@@ -393,8 +423,8 @@ class Server(object): ...@@ -393,8 +423,8 @@ class Server(object):
if os.path.exists(tar_name): if os.path.exists(tar_name):
os.remove(tar_name) os.remove(tar_name)
raise SystemExit( raise SystemExit(
'Download failed, please check your network or permission of {}.'. 'Download failed, please check your network or permission of {}.'
format(self.module_path)) .format(self.module_path))
else: else:
try: try:
print('Decompressing files ..') print('Decompressing files ..')
...@@ -405,8 +435,8 @@ class Server(object): ...@@ -405,8 +435,8 @@ class Server(object):
if os.path.exists(exe_path): if os.path.exists(exe_path):
os.remove(exe_path) os.remove(exe_path)
raise SystemExit( raise SystemExit(
'Decompressing failed, please check your permission of {} or disk space left.'. 'Decompressing failed, please check your permission of {} or disk space left.'
format(self.module_path)) .format(self.module_path))
finally: finally:
os.remove(tar_name) os.remove(tar_name)
#release lock #release lock
...@@ -552,6 +582,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -552,6 +582,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
feed_names = list(request.feed_var_names) feed_names = list(request.feed_var_names)
fetch_names = list(request.fetch_var_names) fetch_names = list(request.fetch_var_names)
is_python = request.is_python is_python = request.is_python
log_id = request.log_id
feed_batch = [] feed_batch = []
for feed_inst in request.insts: for feed_inst in request.insts:
feed_dict = {} feed_dict = {}
...@@ -580,7 +611,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -580,7 +611,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
data.shape = list(feed_inst.tensor_array[idx].shape) data.shape = list(feed_inst.tensor_array[idx].shape)
feed_dict[name] = data feed_dict[name] = data
feed_batch.append(feed_dict) feed_batch.append(feed_dict)
return feed_batch, fetch_names, is_python return feed_batch, fetch_names, is_python, log_id
def _pack_inference_response(self, ret, fetch_names, is_python): def _pack_inference_response(self, ret, fetch_names, is_python):
resp = multi_lang_general_model_service_pb2.InferenceResponse() resp = multi_lang_general_model_service_pb2.InferenceResponse()
...@@ -633,10 +664,13 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. ...@@ -633,10 +664,13 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
return resp return resp
def Inference(self, request, context): def Inference(self, request, context):
feed_dict, fetch_names, is_python = self._unpack_inference_request( feed_dict, fetch_names, is_python, log_id \
request) = self._unpack_inference_request(request)
ret = self.bclient_.predict( ret = self.bclient_.predict(
feed=feed_dict, fetch=fetch_names, need_variant_tag=True) feed=feed_dict,
fetch=fetch_names,
need_variant_tag=True,
log_id=log_id)
return self._pack_inference_response(ret, fetch_names, is_python) return self._pack_inference_response(ret, fetch_names, is_python)
def GetClientConfig(self, request, context): def GetClientConfig(self, request, context):
......
...@@ -65,6 +65,11 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss ...@@ -65,6 +65,11 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
server.set_ir_optimize(ir_optim) server.set_ir_optimize(ir_optim)
server.set_max_body_size(max_body_size) server.set_max_body_size(max_body_size)
if args.product_name != None:
server.set_product_name(args.product_name)
if args.container_id != None:
server.set_container_id(args.container_id)
server.load_model_config(model) server.load_model_config(model)
server.prepare_server(workdir=workdir, port=port, device=device) server.prepare_server(workdir=workdir, port=port, device=device)
if gpuid >= 0: if gpuid >= 0:
...@@ -83,8 +88,8 @@ def start_multi_card(args): # pylint: disable=doc-string-missing ...@@ -83,8 +88,8 @@ def start_multi_card(args): # pylint: disable=doc-string-missing
for ids in gpus: for ids in gpus:
if int(ids) >= len(env_gpus): if int(ids) >= len(env_gpus):
print( print(
" Max index of gpu_ids out of range, the number of CUDA_VISIBLE_DEVICES is {}.". " Max index of gpu_ids out of range, the number of CUDA_VISIBLE_DEVICES is {}."
format(len(env_gpus))) .format(len(env_gpus)))
exit(-1) exit(-1)
else: else:
env_gpus = [] env_gpus = []
......
...@@ -12,7 +12,8 @@ ...@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
""" Paddle Serving Client version string """ """ Paddle Serving Client version string """
serving_client_version = "0.3.2" serving_client_version = "0.0.0"
serving_server_version = "0.3.2" serving_server_version = "0.0.0"
module_proto_version = "0.3.2" module_proto_version = "0.0.0"
cuda_version = "9" cuda_version = "9"
commit_id = ""
...@@ -24,17 +24,43 @@ import sys ...@@ -24,17 +24,43 @@ import sys
import numpy as np import numpy as np
import paddle_serving_server_gpu as serving import paddle_serving_server_gpu as serving
from paddle_serving_server_gpu import pipeline
from paddle_serving_server_gpu.pipeline import Op
class WebService(object): class WebService(object):
def __init__(self, name="default_service"): def __init__(self, name="default_service"):
self.name = name self.name = name
self.gpus = [] # pipeline
self.rpc_service_list = [] self._server = pipeline.PipelineServer(self.name)
self.gpus = [] # deprecated
self.rpc_service_list = [] # deprecated
def get_pipeline_response(self, read_op):
return None
def prepare_pipeline_config(self, yaml_file):
# build dag
read_op = pipeline.RequestOp()
last_op = self.get_pipeline_response(read_op)
if not isinstance(last_op, Op):
raise ValueError("The return value type of `get_pipeline_response` "
"function is not Op type, please check function "
"`get_pipeline_response`.")
response_op = pipeline.ResponseOp(input_ops=[last_op])
self._server.set_response_op(response_op)
self._server.prepare_server(yaml_file)
def run_service(self):
self._server.run_server()
def load_model_config(self, model_config): def load_model_config(self, model_config):
print("This API will be deprecated later. Please do not use it")
self.model_config = model_config self.model_config = model_config
def set_gpus(self, gpus): def set_gpus(self, gpus):
print("This API will be deprecated later. Please do not use it")
self.gpus = [int(x) for x in gpus.split(",")] self.gpus = [int(x) for x in gpus.split(",")]
def default_rpc_service(self, def default_rpc_service(self,
...@@ -88,6 +114,7 @@ class WebService(object): ...@@ -88,6 +114,7 @@ class WebService(object):
gpuid=0, gpuid=0,
mem_optim=True, mem_optim=True,
ir_optim=False): ir_optim=False):
print("This API will be deprecated later. Please do not use it")
self.workdir = workdir self.workdir = workdir
self.port = port self.port = port
self.device = device self.device = device
...@@ -151,10 +178,11 @@ class WebService(object): ...@@ -151,10 +178,11 @@ class WebService(object):
feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map) feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
result = {"result": result} result = {"result": result}
except ValueError as err: except ValueError as err:
result = {"result": err} result = {"result": str(err)}
return result return result
def run_rpc_service(self): def run_rpc_service(self):
print("This API will be deprecated later. Please do not use it")
import socket import socket
localIP = socket.gethostbyname(socket.gethostname()) localIP = socket.gethostbyname(socket.gethostname())
print("web service address:") print("web service address:")
...@@ -183,6 +211,7 @@ class WebService(object): ...@@ -183,6 +211,7 @@ class WebService(object):
# TODO: maybe change another API name: maybe run_local_predictor? # TODO: maybe change another API name: maybe run_local_predictor?
def run_debugger_service(self, gpu=False): def run_debugger_service(self, gpu=False):
print("This API will be deprecated later. Please do not use it")
import socket import socket
localIP = socket.gethostbyname(socket.gethostname()) localIP = socket.gethostbyname(socket.gethostname())
print("web service address:") print("web service address:")
...@@ -209,18 +238,21 @@ class WebService(object): ...@@ -209,18 +238,21 @@ class WebService(object):
"{}".format(self.model_config), gpu=gpu, profile=False) "{}".format(self.model_config), gpu=gpu, profile=False)
def run_web_service(self): def run_web_service(self):
print("This API will be deprecated later. Please do not use it")
self.app_instance.run(host="0.0.0.0", self.app_instance.run(host="0.0.0.0",
port=self.port, port=self.port,
threaded=False, threaded=False,
processes=1) processes=4)
def get_app_instance(self): def get_app_instance(self):
return self.app_instance return self.app_instance
def preprocess(self, feed=[], fetch=[]): def preprocess(self, feed=[], fetch=[]):
print("This API will be deprecated later. Please do not use it")
return feed, fetch return feed, fetch
def postprocess(self, feed=[], fetch=[], fetch_map=None): def postprocess(self, feed=[], fetch=[], fetch_map=None):
for key in fetch_map.iterkeys(): print("This API will be deprecated later. Please do not use it")
for key in fetch_map:
fetch_map[key] = fetch_map[key].tolist() fetch_map[key] = fetch_map[key].tolist()
return fetch_map return fetch_map
...@@ -11,8 +11,9 @@ ...@@ -11,8 +11,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from . import logger # this module must be the first to import
from operator import Op, RequestOp, ResponseOp from .operator import Op, RequestOp, ResponseOp
from pipeline_server import PipelineServer from .pipeline_server import PipelineServer
from pipeline_client import PipelineClient from .pipeline_client import PipelineClient
from analyse import Analyst from .local_rpc_service_handler import LocalRpcServiceHandler
from .analyse import Analyst
...@@ -17,7 +17,7 @@ import copy ...@@ -17,7 +17,7 @@ import copy
import re import re
import logging import logging
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger(__name__)
class Analyst(object): class Analyst(object):
...@@ -69,7 +69,7 @@ class Analyst(object): ...@@ -69,7 +69,7 @@ class Analyst(object):
with open(self._profile_file) as f: with open(self._profile_file) as f:
for line in f.readlines(): for line in f.readlines():
line = line.strip().split("\t") line = line.strip().split("\t")
if line[0] == "PROFILE": if line[0] == "PROFILE" and len(line) >= 3:
trace_list = self._prase_line(line[1], line[2], counter) trace_list = self._prase_line(line[1], line[2], counter)
counter += 1 counter += 1
for trace in trace_list: for trace in trace_list:
...@@ -164,7 +164,7 @@ class OpAnalyst(object): ...@@ -164,7 +164,7 @@ class OpAnalyst(object):
def add(self, name_str, ts_list): def add(self, name_str, ts_list):
if self._close: if self._close:
_LOGGER.error("OpAnalyst is closed.") _LOGGER.error("Failed to add item: OpAnalyst is closed.")
return return
op_name, curr_idx, step = self._parse(name_str) op_name, curr_idx, step = self._parse(name_str)
if op_name not in self.op_time_list_dict: if op_name not in self.op_time_list_dict:
......
...@@ -12,6 +12,7 @@ ...@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from time import time as _time
import threading import threading
import multiprocessing import multiprocessing
import multiprocessing.queues import multiprocessing.queues
...@@ -25,9 +26,10 @@ else: ...@@ -25,9 +26,10 @@ else:
import numpy as np import numpy as np
import logging import logging
import enum import enum
import os
import copy import copy
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger(__name__)
class ChannelDataEcode(enum.Enum): class ChannelDataEcode(enum.Enum):
...@@ -38,7 +40,8 @@ class ChannelDataEcode(enum.Enum): ...@@ -38,7 +40,8 @@ class ChannelDataEcode(enum.Enum):
RPC_PACKAGE_ERROR = 4 RPC_PACKAGE_ERROR = 4
CLIENT_ERROR = 5 CLIENT_ERROR = 5
CLOSED_ERROR = 6 CLOSED_ERROR = 6
UNKNOW = 7 NO_SERVICE = 7
UNKNOW = 8
class ChannelDataType(enum.Enum): class ChannelDataType(enum.Enum):
...@@ -68,21 +71,25 @@ class ChannelData(object): ...@@ -68,21 +71,25 @@ class ChannelData(object):
''' '''
if ecode is not None: if ecode is not None:
if data_id is None or error_info is None: if data_id is None or error_info is None:
raise ValueError("data_id and error_info cannot be None") _LOGGER.critical("Failed to generate ChannelData: data_id"
" and error_info cannot be None")
os._exit(-1)
datatype = ChannelDataType.ERROR.value datatype = ChannelDataType.ERROR.value
else: else:
if datatype == ChannelDataType.CHANNEL_NPDATA.value: if datatype == ChannelDataType.CHANNEL_NPDATA.value:
ecode, error_info = ChannelData.check_npdata(npdata) ecode, error_info = ChannelData.check_npdata(npdata)
if ecode != ChannelDataEcode.OK.value: if ecode != ChannelDataEcode.OK.value:
datatype = ChannelDataType.ERROR.value datatype = ChannelDataType.ERROR.value
_LOGGER.error(error_info) _LOGGER.error("(logid={}) {}".format(data_id, error_info))
elif datatype == ChannelDataType.DICT.value: elif datatype == ChannelDataType.DICT.value:
ecode, error_info = ChannelData.check_dictdata(dictdata) ecode, error_info = ChannelData.check_dictdata(dictdata)
if ecode != ChannelDataEcode.OK.value: if ecode != ChannelDataEcode.OK.value:
datatype = ChannelDataType.ERROR.value datatype = ChannelDataType.ERROR.value
_LOGGER.error(error_info) _LOGGER.error("(logid={}) {}".format(data_id, error_info))
else: else:
raise ValueError("datatype not match") _LOGGER.critical("(logid={}) datatype not match".format(
data_id))
os._exit(-1)
self.datatype = datatype self.datatype = datatype
self.npdata = npdata self.npdata = npdata
self.dictdata = dictdata self.dictdata = dictdata
...@@ -106,14 +113,24 @@ class ChannelData(object): ...@@ -106,14 +113,24 @@ class ChannelData(object):
for sample in dictdata: for sample in dictdata:
if not isinstance(sample, dict): if not isinstance(sample, dict):
ecode = ChannelDataEcode.TYPE_ERROR.value ecode = ChannelDataEcode.TYPE_ERROR.value
error_info = "the value of data must " \ error_info = "Failed to check data: the type of " \
"be dict, but get {}.".format(type(sample)) "data must be dict, but get {}.".format(type(sample))
break break
elif not isinstance(dictdata, dict): elif not isinstance(dictdata, dict):
# batch size = 1 # batch size = 1
ecode = ChannelDataEcode.TYPE_ERROR.value ecode = ChannelDataEcode.TYPE_ERROR.value
error_info = "the value of data must " \ error_info = "Failed to check data: the type of data must " \
"be dict, but get {}.".format(type(dictdata)) "be dict, but get {}.".format(type(dictdata))
return ecode, error_info
@staticmethod
def check_batch_npdata(batch):
ecode = ChannelDataEcode.OK.value
error_info = None
for npdata in batch:
ecode, error_info = ChannelData.check_npdata(npdata)
if ecode != ChannelDataEcode.OK.value:
break
return ecode, error_info return ecode, error_info
@staticmethod @staticmethod
...@@ -125,27 +142,30 @@ class ChannelData(object): ...@@ -125,27 +142,30 @@ class ChannelData(object):
for sample in npdata: for sample in npdata:
if not isinstance(sample, dict): if not isinstance(sample, dict):
ecode = ChannelDataEcode.TYPE_ERROR.value ecode = ChannelDataEcode.TYPE_ERROR.value
error_info = "the value of data must " \ error_info = "Failed to check data: the " \
"be dict, but get {}.".format(type(sample)) "value of data must be dict, but get {}.".format(
type(sample))
break break
for _, value in sample.items(): for _, value in sample.items():
if not isinstance(value, np.ndarray): if not isinstance(value, np.ndarray):
ecode = ChannelDataEcode.TYPE_ERROR.value ecode = ChannelDataEcode.TYPE_ERROR.value
error_info = "the value of data must " \ error_info = "Failed to check data: the" \
"be np.ndarray, but get {}.".format(type(value)) " value of data must be np.ndarray, but get {}.".format(
type(value))
return ecode, error_info return ecode, error_info
elif isinstance(npdata, dict): elif isinstance(npdata, dict):
# batch_size = 1 # batch_size = 1
for _, value in npdata.items(): for _, value in npdata.items():
if not isinstance(value, np.ndarray): if not isinstance(value, np.ndarray):
ecode = ChannelDataEcode.TYPE_ERROR.value ecode = ChannelDataEcode.TYPE_ERROR.value
error_info = "the value of data must " \ error_info = "Failed to check data: the value " \
"be np.ndarray, but get {}.".format(type(value)) "of data must be np.ndarray, but get {}.".format(
type(value))
break break
else: else:
ecode = ChannelDataEcode.TYPE_ERROR.value ecode = ChannelDataEcode.TYPE_ERROR.value
error_info = "the value of data must " \ error_info = "Failed to check data: the value of data " \
"be dict, but get {}.".format(type(npdata)) "must be dict, but get {}.".format(type(npdata))
return ecode, error_info return ecode, error_info
def parse(self): def parse(self):
...@@ -157,9 +177,19 @@ class ChannelData(object): ...@@ -157,9 +177,19 @@ class ChannelData(object):
# return dict # return dict
feed = self.dictdata feed = self.dictdata
else: else:
raise TypeError("Error type({}) in datatype.".format(self.datatype)) _LOGGER.critical("Failed to parse channeldata: error " \
"type({}) in datatype.".format(self.datatype))
os._exit(-1)
return feed return feed
def __cmp__(self, other):
if self.id < other.id:
return -1
elif self.id == other.id:
return 0
else:
return 1
def __str__(self): def __str__(self):
return "type[{}], ecode[{}], id[{}]".format( return "type[{}], ecode[{}], id[{}]".format(
ChannelDataType(self.datatype).name, self.ecode, self.id) ChannelDataType(self.datatype).name, self.ecode, self.id)
...@@ -175,7 +205,7 @@ class ProcessChannel(object): ...@@ -175,7 +205,7 @@ class ProcessChannel(object):
Only when all types of Ops get the data of the same ID, Only when all types of Ops get the data of the same ID,
the data will be poped; The Op of the same type will not the data will be poped; The Op of the same type will not
get the data of the same ID. get the data of the same ID.
3. (TODO) Timeout and BatchSize are not fully supported. 3. Function front support timeout param to make auto-batching.
Note: Note:
1. The ID of the data in the channel must be different. 1. The ID of the data in the channel must be different.
...@@ -194,16 +224,15 @@ class ProcessChannel(object): ...@@ -194,16 +224,15 @@ class ProcessChannel(object):
maintains the data obtained from queue. maintains the data obtained from queue.
""" """
def __init__(self, manager, name=None, maxsize=0, timeout=None): def __init__(self, manager, name=None, maxsize=0):
# For queue multiprocess: after putting an object on # For queue multiprocess: after putting an object on
# an empty queue there may be an infinitessimal delay # an empty queue there may be an infinitessimal delay
# before the queue's :meth:`~Queue.empty` # before the queue's :meth:`~Queue.empty`
# see more: # see more:
# - https://bugs.python.org/issue18277 # - https://bugs.python.org/issue18277
# - https://hg.python.org/cpython/rev/860fc6a2bd21 # - https://hg.python.org/cpython/rev/860fc6a2bd21
self._que = manager.Queue(maxsize=maxsize) self._que = manager.PriorityQueue(maxsize=maxsize)
self._maxsize = maxsize self._maxsize = maxsize
self._timeout = timeout
self.name = name self.name = name
self._stop = manager.Value('i', 0) self._stop = manager.Value('i', 0)
...@@ -219,6 +248,12 @@ class ProcessChannel(object): ...@@ -219,6 +248,12 @@ class ProcessChannel(object):
self._base_cursor = manager.Value('i', 0) self._base_cursor = manager.Value('i', 0)
self._output_buf = manager.list() self._output_buf = manager.list()
def get_maxsize(self):
return self._maxsize
def size(self):
return self._que.qsize()
def get_producers(self): def get_producers(self):
return self._producers return self._producers
...@@ -228,37 +263,41 @@ class ProcessChannel(object): ...@@ -228,37 +263,41 @@ class ProcessChannel(object):
def _log(self, info_str): def _log(self, info_str):
return "[{}] {}".format(self.name, info_str) return "[{}] {}".format(self.name, info_str)
def debug(self):
return self._log("p: {}, c: {}".format(self.get_producers(),
self.get_consumers()))
def add_producer(self, op_name): def add_producer(self, op_name):
""" not thread safe, and can only be called during initialization. """ """ not thread safe, and can only be called during initialization. """
if op_name in self._producers: if op_name in self._producers:
raise ValueError( _LOGGER.critical(
self._log("producer({}) is already in channel".format(op_name))) self._log("Failed to add producer: producer({})" \
" is already in channel".format(op_name)))
os._exit(-1)
self._producers.append(op_name) self._producers.append(op_name)
_LOGGER.debug(self._log("Succ add a producer: {}".format(op_name)))
def add_consumer(self, op_name): def add_consumer(self, op_name):
""" not thread safe, and can only be called during initialization. """ """ not thread safe, and can only be called during initialization. """
if op_name in self._consumer_cursors: if op_name in self._consumer_cursors:
raise ValueError( _LOGGER.critical(
self._log("consumer({}) is already in channel".format(op_name))) self._log("Failed to add consumer: consumer({})" \
" is already in channel".format(op_name)))
os._exit(-1)
self._consumer_cursors[op_name] = 0 self._consumer_cursors[op_name] = 0
if self._cursor_count.get(0) is None: if self._cursor_count.get(0) is None:
self._cursor_count[0] = 0 self._cursor_count[0] = 0
self._cursor_count[0] += 1 self._cursor_count[0] += 1
_LOGGER.debug(self._log("Succ add a consumer: {}".format(op_name)))
def push(self, channeldata, op_name=None): def push(self, channeldata, op_name=None):
_LOGGER.debug( _LOGGER.debug(
self._log("{} try to push data: {}".format(op_name, self._log("(logid={}) Op({}) Pushing data".format(channeldata.id,
channeldata.__str__()))) op_name)))
if len(self._producers) == 0: if len(self._producers) == 0:
raise Exception( _LOGGER.critical(
self._log( self._log(
"expected number of producers to be greater than 0, but the it is 0." "(logid={}) Op({}) Failed to push data: expected number"
)) " of producers to be greater than 0, but the it is 0.".
format(channeldata.id, op_name)))
os._exit(-1)
elif len(self._producers) == 1: elif len(self._producers) == 1:
with self._cv: with self._cv:
while self._stop.value == 0: while self._stop.value == 0:
...@@ -269,23 +308,23 @@ class ProcessChannel(object): ...@@ -269,23 +308,23 @@ class ProcessChannel(object):
self._cv.wait() self._cv.wait()
if self._stop.value == 1: if self._stop.value == 1:
raise ChannelStopError() raise ChannelStopError()
_LOGGER.debug(
self._log("{} channel size: {}".format(op_name,
self._que.qsize())))
self._cv.notify_all() self._cv.notify_all()
_LOGGER.debug(self._log("{} notify all".format(op_name))) _LOGGER.debug(
_LOGGER.debug(self._log("{} push data succ!".format(op_name))) self._log("(logid={}) Op({}) Pushed data into internal queue.".
format(channeldata.id, op_name)))
return True return True
elif op_name is None: elif op_name is None:
raise Exception( _LOGGER.critical(
self._log( self._log(
"There are multiple producers, so op_name cannot be None.")) "(logid={}) Op({}) Failed to push data: there are multiple "
"producers, so op_name cannot be None.".format(
channeldata.id, op_name)))
os._exit(-1)
producer_num = len(self._producers) producer_num = len(self._producers)
data_id = channeldata.id data_id = channeldata.id
put_data = None put_data = None
with self._cv: with self._cv:
_LOGGER.debug(self._log("{} get lock".format(op_name)))
if data_id not in self._input_buf: if data_id not in self._input_buf:
self._input_buf[data_id] = { self._input_buf[data_id] = {
name: None name: None
...@@ -307,14 +346,12 @@ class ProcessChannel(object): ...@@ -307,14 +346,12 @@ class ProcessChannel(object):
if put_data is None: if put_data is None:
_LOGGER.debug( _LOGGER.debug(
self._log("{} push data succ, but not push to queue.". self._log(
format(op_name))) "(logid={}) Op({}) Pushed data into input_buffer.".
format(data_id, op_name)))
else: else:
while self._stop.value == 0: while self._stop.value == 0:
try: try:
_LOGGER.debug(
self._log("{} push data succ: {}".format(
op_name, put_data.__str__())))
self._que.put(put_data, timeout=0) self._que.put(put_data, timeout=0)
break break
except Queue.Empty: except Queue.Empty:
...@@ -323,43 +360,59 @@ class ProcessChannel(object): ...@@ -323,43 +360,59 @@ class ProcessChannel(object):
raise ChannelStopError() raise ChannelStopError()
_LOGGER.debug( _LOGGER.debug(
self._log("multi | {} push data succ!".format(op_name))) self._log(
"(logid={}) Op({}) Pushed data into internal_queue.".
format(data_id, op_name)))
self._cv.notify_all() self._cv.notify_all()
return True return True
def front(self, op_name=None): def front(self, op_name=None, timeout=None):
_LOGGER.debug(self._log("{} try to get data...".format(op_name))) _LOGGER.debug(
self._log("Op({}) Getting data[?]; timeout(s)={}".format(op_name,
timeout)))
endtime = None
if timeout is not None:
if timeout <= 0:
timeout = None
else:
endtime = _time() + timeout
if len(self._consumer_cursors) == 0: if len(self._consumer_cursors) == 0:
raise Exception( _LOGGER.critical(
self._log( self._log(
"expected number of consumers to be greater than 0, but the it is 0." "Op({}) Failed to get data: expected number of consumers to be " \
)) "greater than 0, but the it is 0.".format(op_name)))
os._exit(-1)
elif len(self._consumer_cursors) == 1: elif len(self._consumer_cursors) == 1:
resp = None resp = None
with self._cv: with self._cv:
while self._stop.value == 0 and resp is None: while self._stop.value == 0 and resp is None:
try: try:
_LOGGER.debug(
self._log("{} try to get(with channel empty: {})".
format(op_name, self._que.empty())))
resp = self._que.get(timeout=0) resp = self._que.get(timeout=0)
break break
except Queue.Empty: except Queue.Empty:
_LOGGER.debug( if timeout is not None:
self._log( remaining = endtime - _time()
"{} wait for empty queue(with channel empty: {})". if remaining <= 0.0:
format(op_name, self._que.empty()))) _LOGGER.debug(
self._cv.wait() self._log("Op({}) Failed to get data: "
"timeout".format(op_name)))
raise ChannelTimeoutError()
self._cv.wait(remaining)
else:
self._cv.wait()
if self._stop.value == 1: if self._stop.value == 1:
raise ChannelStopError() raise ChannelStopError()
_LOGGER.debug( _LOGGER.debug(
self._log("{} get data succ: {}".format(op_name, resp.__str__( self._log("(logid={}) Op({}) Got data".format(resp.values()[0]
)))) .id, op_name)))
return resp return resp
elif op_name is None: elif op_name is None:
raise Exception( _LOGGER.critical(
self._log( self._log(
"There are multiple consumers, so op_name cannot be None.")) "Op({}) Failed to get data: there are multiple consumers, "
"so op_name cannot be None.".format(op_name)))
os._exit(-1)
# In output_buf, different Ops (according to op_name) have different # In output_buf, different Ops (according to op_name) have different
# cursors. In addition, there is a base_cursor. Their difference is # cursors. In addition, there is a base_cursor. Their difference is
...@@ -376,24 +429,25 @@ class ProcessChannel(object): ...@@ -376,24 +429,25 @@ class ProcessChannel(object):
# it is necessary to obtain a data from queue and add it to output_buf. # it is necessary to obtain a data from queue and add it to output_buf.
while self._stop.value == 0 and self._consumer_cursors[ while self._stop.value == 0 and self._consumer_cursors[
op_name] - self._base_cursor.value >= len(self._output_buf): op_name] - self._base_cursor.value >= len(self._output_buf):
_LOGGER.debug(
self._log(
"({}) B self._consumer_cursors: {}, self._base_cursor: {}, len(self._output_buf): {}".
format(op_name, self._consumer_cursors,
self._base_cursor.value, len(self._output_buf))))
try: try:
_LOGGER.debug(
self._log("{} try to get(with channel size: {})".format(
op_name, self._que.qsize())))
channeldata = self._que.get(timeout=0) channeldata = self._que.get(timeout=0)
self._output_buf.append(channeldata) self._output_buf.append(channeldata)
break
except Queue.Empty:
_LOGGER.debug( _LOGGER.debug(
self._log( self._log(
"{} wait for empty queue(with channel size: {})". "(logid={}) Op({}) Pop ready item into output_buffer".
format(op_name, self._que.qsize()))) format(channeldata.values()[0].id, op_name)))
self._cv.wait() break
except Queue.Empty:
if timeout is not None:
remaining = endtime - _time()
if remaining <= 0.0:
_LOGGER.debug(
self._log("Op({}) Failed to get data: timeout".
format(op_name)))
raise ChannelTimeoutError()
self._cv.wait(remaining)
else:
self._cv.wait()
if self._stop.value == 1: if self._stop.value == 1:
raise ChannelStopError() raise ChannelStopError()
...@@ -401,7 +455,6 @@ class ProcessChannel(object): ...@@ -401,7 +455,6 @@ class ProcessChannel(object):
base_cursor = self._base_cursor.value base_cursor = self._base_cursor.value
data_idx = consumer_cursor - base_cursor data_idx = consumer_cursor - base_cursor
resp = self._output_buf[data_idx] resp = self._output_buf[data_idx]
_LOGGER.debug(self._log("{} get data: {}".format(op_name, resp)))
self._cursor_count[consumer_cursor] -= 1 self._cursor_count[consumer_cursor] -= 1
if consumer_cursor == base_cursor and self._cursor_count[ if consumer_cursor == base_cursor and self._cursor_count[
...@@ -413,6 +466,7 @@ class ProcessChannel(object): ...@@ -413,6 +466,7 @@ class ProcessChannel(object):
self._base_cursor.value += 1 self._base_cursor.value += 1
# to avoid cursor overflow # to avoid cursor overflow
if self._base_cursor.value >= self._reset_max_cursor: if self._base_cursor.value >= self._reset_max_cursor:
_LOGGER.info(self._log("Reset cursor in Channel"))
self._base_cursor.value -= self._reset_max_cursor self._base_cursor.value -= self._reset_max_cursor
for name in self._consumer_cursors.keys(): for name in self._consumer_cursors.keys():
self._consumer_cursors[name] -= self._reset_max_cursor self._consumer_cursors[name] -= self._reset_max_cursor
...@@ -430,25 +484,21 @@ class ProcessChannel(object): ...@@ -430,25 +484,21 @@ class ProcessChannel(object):
self._cursor_count[new_consumer_cursor] = 0 self._cursor_count[new_consumer_cursor] = 0
self._cursor_count[new_consumer_cursor] += 1 self._cursor_count[new_consumer_cursor] += 1
_LOGGER.debug(
self._log(
"({}) A self._consumer_cursors: {}, self._base_cursor: {}, len(self._output_buf): {}".
format(op_name, self._consumer_cursors,
self._base_cursor.value, len(self._output_buf))))
_LOGGER.debug(self._log("{} notify all".format(op_name)))
self._cv.notify_all() self._cv.notify_all()
_LOGGER.debug(self._log("multi | {} get data succ!".format(op_name))) _LOGGER.debug(
return resp # reference, read only self._log("(logid={}) Op({}) Got data from output_buffer".format(
resp.values()[0].id, op_name)))
return resp
def stop(self): def stop(self):
_LOGGER.debug(self._log("stop.")) _LOGGER.info(self._log("stop."))
self._stop.value = 1 self._stop.value = 1
with self._cv: with self._cv:
self._cv.notify_all() self._cv.notify_all()
class ThreadChannel(Queue.Queue): class ThreadChannel(Queue.PriorityQueue):
""" """
(Thread version)The channel used for communication between Ops. (Thread version)The channel used for communication between Ops.
...@@ -458,7 +508,7 @@ class ThreadChannel(Queue.Queue): ...@@ -458,7 +508,7 @@ class ThreadChannel(Queue.Queue):
Only when all types of Ops get the data of the same ID, Only when all types of Ops get the data of the same ID,
the data will be poped; The Op of the same type will not the data will be poped; The Op of the same type will not
get the data of the same ID. get the data of the same ID.
3. (TODO) Timeout and BatchSize are not fully supported. 3. Function front support timeout param to make auto-batching.
Note: Note:
1. The ID of the data in the channel must be different. 1. The ID of the data in the channel must be different.
...@@ -477,10 +527,9 @@ class ThreadChannel(Queue.Queue): ...@@ -477,10 +527,9 @@ class ThreadChannel(Queue.Queue):
maintains the data obtained from queue. maintains the data obtained from queue.
""" """
def __init__(self, name=None, maxsize=-1, timeout=None): def __init__(self, name=None, maxsize=-1):
Queue.Queue.__init__(self, maxsize=maxsize) Queue.Queue.__init__(self, maxsize=maxsize)
self._maxsize = maxsize self._maxsize = maxsize
self._timeout = timeout
self.name = name self.name = name
self._stop = False self._stop = False
...@@ -496,6 +545,12 @@ class ThreadChannel(Queue.Queue): ...@@ -496,6 +545,12 @@ class ThreadChannel(Queue.Queue):
self._base_cursor = 0 self._base_cursor = 0
self._output_buf = [] self._output_buf = []
def get_maxsize(self):
return self._maxsize
def size(self):
return self.qsize()
def get_producers(self): def get_producers(self):
return self._producers return self._producers
...@@ -505,37 +560,41 @@ class ThreadChannel(Queue.Queue): ...@@ -505,37 +560,41 @@ class ThreadChannel(Queue.Queue):
def _log(self, info_str): def _log(self, info_str):
return "[{}] {}".format(self.name, info_str) return "[{}] {}".format(self.name, info_str)
def debug(self):
return self._log("p: {}, c: {}".format(self.get_producers(),
self.get_consumers()))
def add_producer(self, op_name): def add_producer(self, op_name):
""" not thread safe, and can only be called during initialization. """ """ not thread safe, and can only be called during initialization. """
if op_name in self._producers: if op_name in self._producers:
raise ValueError( _LOGGER.critical(
self._log("producer({}) is already in channel".format(op_name))) self._log("Failed to add producer: producer({}) is "
"already in channel".format(op_name)))
os._exit(-1)
self._producers.append(op_name) self._producers.append(op_name)
_LOGGER.debug(self._log("Succ add a producer: {}".format(op_name)))
def add_consumer(self, op_name): def add_consumer(self, op_name):
""" not thread safe, and can only be called during initialization. """ """ not thread safe, and can only be called during initialization. """
if op_name in self._consumer_cursors: if op_name in self._consumer_cursors:
raise ValueError( _LOGGER.critical(
self._log("consumer({}) is already in channel".format(op_name))) self._log("Failed to add consumer: consumer({}) is "
"already in channel".format(op_name)))
os._exit(-1)
self._consumer_cursors[op_name] = 0 self._consumer_cursors[op_name] = 0
if self._cursor_count.get(0) is None: if self._cursor_count.get(0) is None:
self._cursor_count[0] = 0 self._cursor_count[0] = 0
self._cursor_count[0] += 1 self._cursor_count[0] += 1
_LOGGER.debug(self._log("Succ add a consumer: {}".format(op_name)))
def push(self, channeldata, op_name=None): def push(self, channeldata, op_name=None):
_LOGGER.debug( _LOGGER.debug(
self._log("{} try to push data: {}".format(op_name, self._log("(logid={}) Op({}) Pushing data".format(channeldata.id,
channeldata.__str__()))) op_name)))
if len(self._producers) == 0: if len(self._producers) == 0:
raise Exception( _LOGGER.critical(
self._log( self._log(
"expected number of producers to be greater than 0, but the it is 0." "(logid={}) Op({}) Failed to push data: expected number of "
)) "producers to be greater than 0, but the it is 0.".format(
channeldata.id, op_name)))
os._exit(-1)
elif len(self._producers) == 1: elif len(self._producers) == 1:
with self._cv: with self._cv:
while self._stop is False: while self._stop is False:
...@@ -547,18 +606,22 @@ class ThreadChannel(Queue.Queue): ...@@ -547,18 +606,22 @@ class ThreadChannel(Queue.Queue):
if self._stop: if self._stop:
raise ChannelStopError() raise ChannelStopError()
self._cv.notify_all() self._cv.notify_all()
_LOGGER.debug(self._log("{} push data succ!".format(op_name))) _LOGGER.debug(
self._log("(logid={}) Op({}) Pushed data into internal_queue.".
format(channeldata.id, op_name)))
return True return True
elif op_name is None: elif op_name is None:
raise Exception( _LOGGER.critical(
self._log( self._log(
"There are multiple producers, so op_name cannot be None.")) "(logid={}) Op({}) Failed to push data: there are multiple"
" producers, so op_name cannot be None.".format(
channeldata.id, op_name)))
os._exit(-1)
producer_num = len(self._producers) producer_num = len(self._producers)
data_id = channeldata.id data_id = channeldata.id
put_data = None put_data = None
with self._cv: with self._cv:
_LOGGER.debug(self._log("{} get lock".format(op_name)))
if data_id not in self._input_buf: if data_id not in self._input_buf:
self._input_buf[data_id] = { self._input_buf[data_id] = {
name: None name: None
...@@ -575,8 +638,9 @@ class ThreadChannel(Queue.Queue): ...@@ -575,8 +638,9 @@ class ThreadChannel(Queue.Queue):
if put_data is None: if put_data is None:
_LOGGER.debug( _LOGGER.debug(
self._log("{} push data succ, but not push to queue.". self._log(
format(op_name))) "(logid={}) Op({}) Pushed data into input_buffer.".
format(data_id, op_name)))
else: else:
while self._stop is False: while self._stop is False:
try: try:
...@@ -588,17 +652,29 @@ class ThreadChannel(Queue.Queue): ...@@ -588,17 +652,29 @@ class ThreadChannel(Queue.Queue):
raise ChannelStopError() raise ChannelStopError()
_LOGGER.debug( _LOGGER.debug(
self._log("multi | {} push data succ!".format(op_name))) self._log(
"(logid={}) Op({}) Pushed data into internal_queue.".
format(data_id, op_name)))
self._cv.notify_all() self._cv.notify_all()
return True return True
def front(self, op_name=None): def front(self, op_name=None, timeout=None):
_LOGGER.debug(self._log("{} try to get data".format(op_name))) _LOGGER.debug(
self._log("Op({}) Getting data[?]; timeout(s)={}".format(op_name,
timeout)))
endtime = None
if timeout is not None:
if timeout <= 0:
timeout = None
else:
endtime = _time() + timeout
if len(self._consumer_cursors) == 0: if len(self._consumer_cursors) == 0:
raise Exception( _LOGGER.critical(
self._log( self._log(
"expected number of consumers to be greater than 0, but the it is 0." "Op({}) Failed to get data: expected number of consumers to be "
)) "greater than 0, but the it is 0.".format(op_name)))
os._exit(-1)
elif len(self._consumer_cursors) == 1: elif len(self._consumer_cursors) == 1:
resp = None resp = None
with self._cv: with self._cv:
...@@ -607,17 +683,29 @@ class ThreadChannel(Queue.Queue): ...@@ -607,17 +683,29 @@ class ThreadChannel(Queue.Queue):
resp = self.get(timeout=0) resp = self.get(timeout=0)
break break
except Queue.Empty: except Queue.Empty:
self._cv.wait() if timeout is not None:
remaining = endtime - _time()
if remaining <= 0.0:
_LOGGER.debug(
self._log(
"Op({}) Failed to get data: timeout".
format(op_name)))
raise ChannelTimeoutError()
self._cv.wait(remaining)
else:
self._cv.wait()
if self._stop: if self._stop:
raise ChannelStopError() raise ChannelStopError()
_LOGGER.debug( _LOGGER.debug(
self._log("{} get data succ: {}".format(op_name, resp.__str__( self._log("(logid={}) Op({}) Got data".format(resp.values()[0]
)))) .id, op_name)))
return resp return resp
elif op_name is None: elif op_name is None:
raise Exception( _LOGGER.critical(
self._log( self._log("Op({}) Failed to get data: there are multiple "
"There are multiple consumers, so op_name cannot be None.")) "consumers, so op_name cannot be None.".format(
op_name)))
os._exit(-1)
# In output_buf, different Ops (according to op_name) have different # In output_buf, different Ops (according to op_name) have different
# cursors. In addition, there is a base_cursor. Their difference is # cursors. In addition, there is a base_cursor. Their difference is
...@@ -637,9 +725,22 @@ class ThreadChannel(Queue.Queue): ...@@ -637,9 +725,22 @@ class ThreadChannel(Queue.Queue):
try: try:
channeldata = self.get(timeout=0) channeldata = self.get(timeout=0)
self._output_buf.append(channeldata) self._output_buf.append(channeldata)
_LOGGER.debug(
self._log(
"(logid={}) Op({}) Pop ready item into output_buffer".
format(channeldata.values()[0].id, op_name)))
break break
except Queue.Empty: except Queue.Empty:
self._cv.wait() if timeout is not None:
remaining = endtime - _time()
if remaining <= 0.0:
_LOGGER.debug(
self._log("Op({}) Failed to get data: timeout".
format(op_name)))
raise ChannelTimeoutError()
self._cv.wait(remaining)
else:
self._cv.wait()
if self._stop: if self._stop:
raise ChannelStopError() raise ChannelStopError()
...@@ -659,6 +760,7 @@ class ThreadChannel(Queue.Queue): ...@@ -659,6 +760,7 @@ class ThreadChannel(Queue.Queue):
self._base_cursor += 1 self._base_cursor += 1
# to avoid cursor overflow # to avoid cursor overflow
if self._base_cursor >= self._reset_max_cursor: if self._base_cursor >= self._reset_max_cursor:
_LOGGER.info(self._log("Reset cursor in Channel"))
self._base_cursor -= self._reset_max_cursor self._base_cursor -= self._reset_max_cursor
for name in self._consumer_cursors: for name in self._consumer_cursors:
self._consumer_cursors[name] -= self._reset_max_cursor self._consumer_cursors[name] -= self._reset_max_cursor
...@@ -668,7 +770,6 @@ class ThreadChannel(Queue.Queue): ...@@ -668,7 +770,6 @@ class ThreadChannel(Queue.Queue):
} }
else: else:
resp = copy.deepcopy(self._output_buf[data_idx]) resp = copy.deepcopy(self._output_buf[data_idx])
_LOGGER.debug(self._log("{} get data: {}".format(op_name, resp)))
self._consumer_cursors[op_name] += 1 self._consumer_cursors[op_name] += 1
new_consumer_cursor = self._consumer_cursors[op_name] new_consumer_cursor = self._consumer_cursors[op_name]
...@@ -678,16 +779,23 @@ class ThreadChannel(Queue.Queue): ...@@ -678,16 +779,23 @@ class ThreadChannel(Queue.Queue):
self._cv.notify_all() self._cv.notify_all()
_LOGGER.debug(self._log("multi | {} get data succ!".format(op_name))) _LOGGER.debug(
self._log("(logid={}) Op({}) Got data from output_buffer".format(
resp.values()[0].id, op_name)))
return resp return resp
def stop(self): def stop(self):
_LOGGER.debug(self._log("stop.")) _LOGGER.info(self._log("stop."))
self._stop = True self._stop = True
with self._cv: with self._cv:
self._cv.notify_all() self._cv.notify_all()
class ChannelTimeoutError(RuntimeError):
def __init__(self):
pass
class ChannelStopError(RuntimeError): class ChannelStopError(RuntimeError):
def __init__(self): def __init__(self):
pass pass
...@@ -24,42 +24,45 @@ else: ...@@ -24,42 +24,45 @@ else:
raise Exception("Error Python version") raise Exception("Error Python version")
import os import os
import logging import logging
import collections
from .operator import Op, RequestOp, ResponseOp, VirtualOp from .operator import Op, RequestOp, ResponseOp, VirtualOp
from .channel import (ThreadChannel, ProcessChannel, ChannelData, from .channel import (ThreadChannel, ProcessChannel, ChannelData,
ChannelDataEcode, ChannelDataType, ChannelStopError) ChannelDataEcode, ChannelDataType, ChannelStopError)
from .profiler import TimeProfiler from .profiler import TimeProfiler, PerformanceTracer
from .util import NameGenerator from .util import NameGenerator, ThreadIdGenerator, PipelineProcSyncManager
from .proto import pipeline_service_pb2
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger(__name__)
class DAGExecutor(object): class DAGExecutor(object):
def __init__(self, response_op, dag_config, show_info): def __init__(self, response_op, server_conf, worker_idx):
self._retry = dag_config.get('retry', 1) build_dag_each_worker = server_conf["build_dag_each_worker"]
server_worker_num = server_conf["worker_num"]
dag_conf = server_conf["dag"]
client_type = dag_config.get('client_type', 'brpc') self._retry = dag_conf["retry"]
self._server_use_profile = dag_config.get('use_profile', False) client_type = dag_conf["client_type"]
channel_size = dag_config.get('channel_size', 0) self._server_use_profile = dag_conf["use_profile"]
self._is_thread_op = dag_config.get('is_thread_op', True) channel_size = dag_conf["channel_size"]
self._is_thread_op = dag_conf["is_thread_op"]
if show_info and self._server_use_profile: tracer_conf = dag_conf["tracer"]
_LOGGER.info("================= PROFILER ================") tracer_interval_s = tracer_conf["interval_s"]
if self._is_thread_op:
_LOGGER.info("op: thread")
_LOGGER.info("profile mode: sync")
else:
_LOGGER.info("op: process")
_LOGGER.info("profile mode: asyn")
_LOGGER.info("-------------------------------------------")
self.name = "@G" self.name = "@DAGExecutor"
self._profiler = TimeProfiler() self._profiler = TimeProfiler()
self._profiler.enable(True) self._profiler.enable(True)
self._tracer = None
if tracer_interval_s >= 1:
self._tracer = PerformanceTracer(
self._is_thread_op, tracer_interval_s, server_worker_num)
self._dag = DAG(self.name, response_op, self._server_use_profile, self._dag = DAG(self.name, response_op, self._server_use_profile,
self._is_thread_op, client_type, channel_size, self._is_thread_op, client_type, channel_size,
show_info) build_dag_each_worker, self._tracer)
(in_channel, out_channel, pack_rpc_func, (in_channel, out_channel, pack_rpc_func,
unpack_rpc_func) = self._dag.build() unpack_rpc_func) = self._dag.build()
self._dag.start() self._dag.start()
...@@ -69,15 +72,23 @@ class DAGExecutor(object): ...@@ -69,15 +72,23 @@ class DAGExecutor(object):
self._pack_rpc_func = pack_rpc_func self._pack_rpc_func = pack_rpc_func
self._unpack_rpc_func = unpack_rpc_func self._unpack_rpc_func = unpack_rpc_func
_LOGGER.debug(self._log(in_channel.debug())) if self._tracer is not None:
_LOGGER.debug(self._log(out_channel.debug())) self._tracer.start()
# generate id: data_id == request_id == log_id
base_counter = 0
gen_id_step = 1
if build_dag_each_worker:
base_counter = worker_idx
gen_id_step = server_worker_num
self._id_generator = ThreadIdGenerator(
max_id=1000000000000000000,
base_counter=base_counter,
step=gen_id_step)
self._id_lock = threading.Lock()
self._id_counter = 0
self._reset_max_id = 1000000000000000000
self._cv_pool = {} self._cv_pool = {}
self._cv_for_cv_pool = threading.Condition() self._cv_for_cv_pool = threading.Condition()
self._fetch_buffer = None self._fetch_buffer = {}
self._recive_func = None self._recive_func = None
self._client_profile_key = "pipeline.profile" self._client_profile_key = "pipeline.profile"
...@@ -86,32 +97,38 @@ class DAGExecutor(object): ...@@ -86,32 +97,38 @@ class DAGExecutor(object):
def start(self): def start(self):
self._recive_func = threading.Thread( self._recive_func = threading.Thread(
target=DAGExecutor._recive_out_channel_func, args=(self, )) target=DAGExecutor._recive_out_channel_func, args=(self, ))
self._recive_func.daemon = True
self._recive_func.start() self._recive_func.start()
_LOGGER.debug("[DAG Executor] Start recive thread")
def stop(self): def stop(self):
self._dag.stop() self._dag.stop()
self._dag.join() self._dag.join()
_LOGGER.info("[DAG Executor] Stop")
def _get_next_data_id(self): def _get_next_data_id(self):
with self._id_lock: data_id = self._id_generator.next()
if self._id_counter >= self._reset_max_id: cond_v = threading.Condition()
self._id_counter -= self._reset_max_id with self._cv_for_cv_pool:
self._id_counter += 1 self._cv_pool[data_id] = cond_v
return self._id_counter - 1 self._fetch_buffer[data_id] = None
return data_id, cond_v
def _set_in_channel(self, in_channel): def _set_in_channel(self, in_channel):
if not isinstance(in_channel, (ThreadChannel, ProcessChannel)): if not isinstance(in_channel, (ThreadChannel, ProcessChannel)):
raise TypeError( _LOGGER.critical("[DAG Executor] Failed to set in_channel: "
self._log('in_channel must be Channel type, but get {}'.format( "in_channel must be Channel type, but get {}".
type(in_channel)))) format(type(in_channel)))
os._exit(-1)
in_channel.add_producer(self.name) in_channel.add_producer(self.name)
self._in_channel = in_channel self._in_channel = in_channel
def _set_out_channel(self, out_channel): def _set_out_channel(self, out_channel):
if not isinstance(out_channel, (ThreadChannel, ProcessChannel)): if not isinstance(out_channel, (ThreadChannel, ProcessChannel)):
raise TypeError( _LOGGER.critical("[DAG Executor] Failed to set out_channel: "
self._log('out_channel must be Channel type, but get {}'.format( "must be Channel type, but get {}".format(
type(out_channel)))) type(out_channel)))
os._exit(-1)
out_channel.add_consumer(self.name) out_channel.add_consumer(self.name)
self._out_channel = out_channel self._out_channel = out_channel
...@@ -121,7 +138,7 @@ class DAGExecutor(object): ...@@ -121,7 +138,7 @@ class DAGExecutor(object):
try: try:
channeldata_dict = self._out_channel.front(self.name) channeldata_dict = self._out_channel.front(self.name)
except ChannelStopError: except ChannelStopError:
_LOGGER.debug(self._log("stop.")) _LOGGER.info("[DAG Executor] Stop.")
with self._cv_for_cv_pool: with self._cv_for_cv_pool:
for data_id, cv in self._cv_pool.items(): for data_id, cv in self._cv_pool.items():
closed_errror_data = ChannelData( closed_errror_data = ChannelData(
...@@ -129,46 +146,61 @@ class DAGExecutor(object): ...@@ -129,46 +146,61 @@ class DAGExecutor(object):
error_info="dag closed.", error_info="dag closed.",
data_id=data_id) data_id=data_id)
with cv: with cv:
self._fetch_buffer = closed_errror_data self._fetch_buffer[data_id] = closed_errror_data
cv.notify_all() cv.notify_all()
break break
if len(channeldata_dict) != 1: if len(channeldata_dict) != 1:
_LOGGER.error("out_channel cannot have multiple input ops") _LOGGER.critical(
"[DAG Executor] Failed to fetch result: out_channel "
"cannot have multiple input ops")
os._exit(-1) os._exit(-1)
(_, channeldata), = channeldata_dict.items() (_, channeldata), = channeldata_dict.items()
if not isinstance(channeldata, ChannelData): if not isinstance(channeldata, ChannelData):
raise TypeError( _LOGGER.critical(
self._log('data must be ChannelData type, but get {}'. '[DAG Executor] Failed to fetch result: data in out_channel" \
format(type(channeldata)))) " must be ChannelData type, but get {}'
.format(type(channeldata)))
os._exit(-1)
data_id = channeldata.id data_id = channeldata.id
_LOGGER.debug("recive thread fetch data: {}".format(data_id)) _LOGGER.debug("(logid={}) [recive thread] Fetched data".format(
data_id))
with self._cv_for_cv_pool: with self._cv_for_cv_pool:
cv = self._cv_pool[data_id] cond_v = self._cv_pool[data_id]
with cv: with cond_v:
self._fetch_buffer = channeldata self._fetch_buffer[data_id] = channeldata
cv.notify_all() cond_v.notify_all()
def _get_channeldata_from_fetch_buffer(self, data_id): def _get_channeldata_from_fetch_buffer(self, data_id, cond_v):
resp = None ready_data = None
cv = threading.Condition()
with self._cv_for_cv_pool: with cond_v:
self._cv_pool[data_id] = cv with self._cv_for_cv_pool:
with cv: if self._fetch_buffer[data_id] is not None:
cv.wait() # The requested data is already ready
_LOGGER.debug("resp func get lock (data_id: {})".format(data_id)) ready_data = self._fetch_buffer[data_id]
resp = copy.deepcopy(self._fetch_buffer) self._cv_pool.pop(data_id)
with self._cv_for_cv_pool: self._fetch_buffer.pop(data_id)
self._cv_pool.pop(data_id) if ready_data is None:
return resp # Wait for data ready
cond_v.wait()
with self._cv_for_cv_pool:
ready_data = self._fetch_buffer[data_id]
self._cv_pool.pop(data_id)
self._fetch_buffer.pop(data_id)
_LOGGER.debug("(logid={}) [resp thread] Got data".format(data_id))
return ready_data
def _pack_channeldata(self, rpc_request, data_id): def _pack_channeldata(self, rpc_request, data_id):
_LOGGER.debug(self._log('start inferce'))
dictdata = None dictdata = None
try: try:
dictdata = self._unpack_rpc_func(rpc_request) dictdata = self._unpack_rpc_func(rpc_request)
except Exception as e: except Exception as e:
_LOGGER.error(
"(logid={}) Failed to parse RPC request package: {}"
.format(data_id, e),
exc_info=True)
return ChannelData( return ChannelData(
ecode=ChannelDataEcode.RPC_PACKAGE_ERROR.value, ecode=ChannelDataEcode.RPC_PACKAGE_ERROR.value,
error_info="rpc package error: {}".format(e), error_info="rpc package error: {}".format(e),
...@@ -181,96 +213,132 @@ class DAGExecutor(object): ...@@ -181,96 +213,132 @@ class DAGExecutor(object):
if key == self._client_profile_key: if key == self._client_profile_key:
profile_value = rpc_request.value[idx] profile_value = rpc_request.value[idx]
break break
client_need_profile = (profile_value == self._client_profile_value)
_LOGGER.debug("(logid={}) Need profile in client: {}".format(
data_id, client_need_profile))
return ChannelData( return ChannelData(
datatype=ChannelDataType.DICT.value, datatype=ChannelDataType.DICT.value,
dictdata=dictdata, dictdata=dictdata,
data_id=data_id, data_id=data_id,
client_need_profile=( client_need_profile=client_need_profile)
profile_value == self._client_profile_value))
def call(self, rpc_request): def call(self, rpc_request):
data_id = self._get_next_data_id() if self._tracer is not None:
trace_buffer = self._tracer.data_buffer()
data_id, cond_v = self._get_next_data_id()
_LOGGER.info("(logid={}) Succ generate id".format(data_id))
start_call, end_call = None, None
if not self._is_thread_op: if not self._is_thread_op:
self._profiler.record("call_{}#DAG-{}_0".format(data_id, data_id)) start_call = self._profiler.record("call_{}#DAG-{}_0".format(
data_id, data_id))
else: else:
self._profiler.record("call_{}#DAG_0".format(data_id)) start_call = self._profiler.record("call_{}#DAG_0".format(data_id))
_LOGGER.debug("(logid={}) Parsing RPC request package".format(data_id))
self._profiler.record("prepack_{}#{}_0".format(data_id, self.name)) self._profiler.record("prepack_{}#{}_0".format(data_id, self.name))
req_channeldata = self._pack_channeldata(rpc_request, data_id) req_channeldata = self._pack_channeldata(rpc_request, data_id)
self._profiler.record("prepack_{}#{}_1".format(data_id, self.name)) self._profiler.record("prepack_{}#{}_1".format(data_id, self.name))
resp_channeldata = None resp_channeldata = None
for i in range(self._retry): for i in range(self._retry):
_LOGGER.debug(self._log('push data')) _LOGGER.debug("(logid={}) Pushing data into Graph engine".format(
#self._profiler.record("push_{}#{}_0".format(data_id, self.name)) data_id))
try: try:
self._in_channel.push(req_channeldata, self.name) self._in_channel.push(req_channeldata, self.name)
except ChannelStopError: except ChannelStopError:
_LOGGER.debug(self._log("stop.")) _LOGGER.debug("[DAG Executor] Stop")
with self._cv_for_cv_pool:
self._cv_pool.pop(data_id)
return self._pack_for_rpc_resp( return self._pack_for_rpc_resp(
ChannelData( ChannelData(
ecode=ChannelDataEcode.CLOSED_ERROR.value, ecode=ChannelDataEcode.CLOSED_ERROR.value,
error_info="dag closed.", error_info="dag closed.",
data_id=data_id)) data_id=data_id))
#self._profiler.record("push_{}#{}_1".format(data_id, self.name))
_LOGGER.debug(self._log('wait for infer')) _LOGGER.debug("(logid={}) Wait for Graph engine...".format(data_id))
#self._profiler.record("fetch_{}#{}_0".format(data_id, self.name)) resp_channeldata = self._get_channeldata_from_fetch_buffer(data_id,
resp_channeldata = self._get_channeldata_from_fetch_buffer(data_id) cond_v)
#self._profiler.record("fetch_{}#{}_1".format(data_id, self.name))
if resp_channeldata.ecode == ChannelDataEcode.OK.value: if resp_channeldata.ecode == ChannelDataEcode.OK.value:
_LOGGER.info("(logid={}) Succ predict".format(data_id))
break break
else:
_LOGGER.error("(logid={}) Failed to predict: {}"
.format(data_id, resp_channeldata.error_info))
if resp_channeldata.ecode != ChannelDataEcode.TIMEOUT.value:
break
if i + 1 < self._retry: if i + 1 < self._retry:
_LOGGER.warn("retry({}): {}".format( _LOGGER.warning("(logid={}) DAGExecutor retry({}/{})".format(
i + 1, resp_channeldata.error_info)) data_id, i + 1, self._retry))
_LOGGER.debug("(logid={}) Packing RPC response package".format(data_id))
self._profiler.record("postpack_{}#{}_0".format(data_id, self.name)) self._profiler.record("postpack_{}#{}_0".format(data_id, self.name))
rpc_resp = self._pack_for_rpc_resp(resp_channeldata) rpc_resp = self._pack_for_rpc_resp(resp_channeldata)
self._profiler.record("postpack_{}#{}_1".format(data_id, self.name)) self._profiler.record("postpack_{}#{}_1".format(data_id, self.name))
if not self._is_thread_op: if not self._is_thread_op:
self._profiler.record("call_{}#DAG-{}_1".format(data_id, data_id)) end_call = self._profiler.record("call_{}#DAG-{}_1".format(data_id,
data_id))
else: else:
self._profiler.record("call_{}#DAG_1".format(data_id)) end_call = self._profiler.record("call_{}#DAG_1".format(data_id))
#self._profiler.print_profile()
if self._tracer is not None:
trace_buffer.put({
"name": "DAG",
"id": data_id,
"succ": resp_channeldata.ecode == ChannelDataEcode.OK.value,
"actions": {
"call_{}".format(data_id): end_call - start_call,
},
})
profile_str = self._profiler.gen_profile_str() profile_str = self._profiler.gen_profile_str()
if self._server_use_profile: if self._server_use_profile:
sys.stderr.write(profile_str) sys.stderr.write(profile_str)
# add profile info into rpc_resp # add profile info into rpc_resp
profile_value = ""
if resp_channeldata.client_need_profile: if resp_channeldata.client_need_profile:
profile_set = resp_channeldata.profile_data_set profile_set = resp_channeldata.profile_data_set
profile_set.add(profile_str) profile_set.add(profile_str)
profile_value = "".join(list(profile_set)) profile_value = "".join(list(profile_set))
rpc_resp.key.append(self._client_profile_key) rpc_resp.key.append(self._client_profile_key)
rpc_resp.value.append(profile_value) rpc_resp.value.append(profile_value)
return rpc_resp return rpc_resp
def _pack_for_rpc_resp(self, channeldata): def _pack_for_rpc_resp(self, channeldata):
_LOGGER.debug(self._log('get channeldata')) try:
return self._pack_rpc_func(channeldata) return self._pack_rpc_func(channeldata)
except Exception as e:
def _log(self, info_str): _LOGGER.error(
return "[{}] {}".format(self.name, info_str) "(logid={}) Failed to pack RPC response package: {}"
.format(channeldata.id, e),
exc_info=True)
resp = pipeline_service_pb2.Response()
resp.ecode = ChannelDataEcode.RPC_PACKAGE_ERROR.value
resp.error_info = "rpc package error: {}".format(e)
return resp
class DAG(object): class DAG(object):
def __init__(self, request_name, response_op, use_profile, is_thread_op, def __init__(self, request_name, response_op, use_profile, is_thread_op,
client_type, channel_size, show_info): client_type, channel_size, build_dag_each_worker, tracer):
self._request_name = request_name self._request_name = request_name
self._response_op = response_op self._response_op = response_op
self._use_profile = use_profile self._use_profile = use_profile
self._is_thread_op = is_thread_op self._is_thread_op = is_thread_op
self._channel_size = channel_size self._channel_size = channel_size
self._client_type = client_type self._client_type = client_type
self._show_info = show_info self._build_dag_each_worker = build_dag_each_worker
self._tracer = tracer
if not self._is_thread_op: if not self._is_thread_op:
self._manager = multiprocessing.Manager() self._manager = PipelineProcSyncManager()
_LOGGER.info("[DAG] Succ init")
def get_use_ops(self, response_op): @staticmethod
def get_use_ops(response_op):
unique_names = set() unique_names = set()
used_ops = set() used_ops = set()
succ_ops_of_use_op = {} # {op_name: succ_ops} succ_ops_of_use_op = {} # {op_name: succ_ops}
...@@ -288,8 +356,10 @@ class DAG(object): ...@@ -288,8 +356,10 @@ class DAG(object):
used_ops.add(pred_op) used_ops.add(pred_op)
# check the name of op is globally unique # check the name of op is globally unique
if pred_op.name in unique_names: if pred_op.name in unique_names:
raise Exception("the name of Op must be unique: {}". _LOGGER.critical("Failed to get used Ops: the"
format(pred_op.name)) " name of Op must be unique: {}".
format(pred_op.name))
os._exit(-1)
unique_names.add(pred_op.name) unique_names.add(pred_op.name)
return used_ops, succ_ops_of_use_op return used_ops, succ_ops_of_use_op
...@@ -301,10 +371,13 @@ class DAG(object): ...@@ -301,10 +371,13 @@ class DAG(object):
else: else:
channel = ProcessChannel( channel = ProcessChannel(
self._manager, name=name_gen.next(), maxsize=self._channel_size) self._manager, name=name_gen.next(), maxsize=self._channel_size)
_LOGGER.debug("[DAG] Generate channel: {}".format(channel.name))
return channel return channel
def _gen_virtual_op(self, name_gen): def _gen_virtual_op(self, name_gen):
return VirtualOp(name=name_gen.next()) vir_op = VirtualOp(name=name_gen.next())
_LOGGER.debug("[DAG] Generate virtual_op: {}".format(vir_op.name))
return vir_op
def _topo_sort(self, used_ops, response_op, out_degree_ops): def _topo_sort(self, used_ops, response_op, out_degree_ops):
out_degree_num = { out_degree_num = {
...@@ -318,7 +391,9 @@ class DAG(object): ...@@ -318,7 +391,9 @@ class DAG(object):
if len(op.get_input_ops()) == 0: if len(op.get_input_ops()) == 0:
zero_indegree_num += 1 zero_indegree_num += 1
if zero_indegree_num != 1: if zero_indegree_num != 1:
raise Exception("DAG contains multiple input Ops") _LOGGER.critical("Failed to topo sort: DAG contains "
"multiple RequestOps")
os._exit(-1)
last_op = response_op.get_input_ops()[0] last_op = response_op.get_input_ops()[0]
ques[que_idx].put(last_op) ques[que_idx].put(last_op)
...@@ -342,37 +417,47 @@ class DAG(object): ...@@ -342,37 +417,47 @@ class DAG(object):
break break
que_idx = (que_idx + 1) % 2 que_idx = (que_idx + 1) % 2
if sorted_op_num < len(used_ops): if sorted_op_num < len(used_ops):
raise Exception("not legal DAG") _LOGGER.critical("Failed to topo sort: not legal DAG")
os._exit(-1)
return dag_views, last_op return dag_views, last_op
def _build_dag(self, response_op): def _build_dag(self, response_op):
if response_op is None: if response_op is None:
raise Exception("response_op has not been set.") _LOGGER.critical("Failed to build DAG: ResponseOp"
used_ops, out_degree_ops = self.get_use_ops(response_op) " has not been set.")
if self._show_info: os._exit(-1)
used_ops, out_degree_ops = DAG.get_use_ops(response_op)
if not self._build_dag_each_worker:
_LOGGER.info("================= USED OP =================") _LOGGER.info("================= USED OP =================")
for op in used_ops: for op in used_ops:
if op.name != self._request_name: if not isinstance(op, RequestOp):
_LOGGER.info(op.name) _LOGGER.info(op.name)
_LOGGER.info("-------------------------------------------") _LOGGER.info("-------------------------------------------")
if len(used_ops) <= 1: if len(used_ops) <= 1:
raise Exception( _LOGGER.critical(
"Besides RequestOp and ResponseOp, there should be at least one Op in DAG." "Failed to build DAG: besides RequestOp and ResponseOp, "
) "there should be at least one Op in DAG.")
os._exit(-1)
if self._build_dag_each_worker:
_LOGGER.info("Because `build_dag_each_worker` mode is used, "
"Auto-batching is set to the default config: "
"batch_size=1, auto_batching_timeout=None")
for op in used_ops:
op.use_default_auto_batching_config()
dag_views, last_op = self._topo_sort(used_ops, response_op, dag_views, last_op = self._topo_sort(used_ops, response_op,
out_degree_ops) out_degree_ops)
dag_views = list(reversed(dag_views)) dag_views = list(reversed(dag_views))
if self._show_info: if not self._build_dag_each_worker:
_LOGGER.info("================== DAG ====================") _LOGGER.debug("================== DAG ====================")
for idx, view in enumerate(dag_views): for idx, view in enumerate(dag_views):
_LOGGER.info("(VIEW {})".format(idx)) _LOGGER.debug("(VIEW {})".format(idx))
for op in view: for op in view:
_LOGGER.info(" [{}]".format(op.name)) _LOGGER.debug(" [{}]".format(op.name))
for out_op in out_degree_ops[op.name]: for out_op in out_degree_ops[op.name]:
_LOGGER.info(" - {}".format(out_op.name)) _LOGGER.debug(" - {}".format(out_op.name))
_LOGGER.info("-------------------------------------------") _LOGGER.debug("-------------------------------------------")
# create channels and virtual ops # create channels and virtual ops
virtual_op_name_gen = NameGenerator("vir") virtual_op_name_gen = NameGenerator("vir")
...@@ -414,7 +499,6 @@ class DAG(object): ...@@ -414,7 +499,6 @@ class DAG(object):
continue continue
channel = self._gen_channel(channel_name_gen) channel = self._gen_channel(channel_name_gen)
channels.append(channel) channels.append(channel)
_LOGGER.debug("{} => {}".format(channel.name, op.name))
op.add_input_channel(channel) op.add_input_channel(channel)
pred_ops = pred_op_of_next_view_op[op.name] pred_ops = pred_op_of_next_view_op[op.name]
if v_idx == 0: if v_idx == 0:
...@@ -422,8 +506,6 @@ class DAG(object): ...@@ -422,8 +506,6 @@ class DAG(object):
else: else:
# if pred_op is virtual op, it will use ancestors as producers to channel # if pred_op is virtual op, it will use ancestors as producers to channel
for pred_op in pred_ops: for pred_op in pred_ops:
_LOGGER.debug("{} => {}".format(pred_op.name,
channel.name))
pred_op.add_output_channel(channel) pred_op.add_output_channel(channel)
processed_op.add(op.name) processed_op.add(op.name)
# find same input op to combine channel # find same input op to combine channel
...@@ -439,8 +521,6 @@ class DAG(object): ...@@ -439,8 +521,6 @@ class DAG(object):
same_flag = False same_flag = False
break break
if same_flag: if same_flag:
_LOGGER.debug("{} => {}".format(channel.name,
other_op.name))
other_op.add_input_channel(channel) other_op.add_input_channel(channel)
processed_op.add(other_op.name) processed_op.add(other_op.name)
output_channel = self._gen_channel(channel_name_gen) output_channel = self._gen_channel(channel_name_gen)
...@@ -458,14 +538,19 @@ class DAG(object): ...@@ -458,14 +538,19 @@ class DAG(object):
actual_ops.append(op) actual_ops.append(op)
for c in channels: for c in channels:
_LOGGER.debug(c.debug()) _LOGGER.debug("Channel({}):\n\t- producers: {}\n\t- consumers: {}"
.format(c.name, c.get_producers(), c.get_consumers()))
return (actual_ops, channels, input_channel, output_channel, pack_func, return (actual_ops, channels, input_channel, output_channel, pack_func,
unpack_func) unpack_func)
def get_channels(self):
return self._channels
def build(self): def build(self):
(actual_ops, channels, input_channel, output_channel, pack_func, (actual_ops, channels, input_channel, output_channel, pack_func,
unpack_func) = self._build_dag(self._response_op) unpack_func) = self._build_dag(self._response_op)
_LOGGER.info("[DAG] Succ build DAG")
self._actual_ops = actual_ops self._actual_ops = actual_ops
self._channels = channels self._channels = channels
...@@ -474,18 +559,24 @@ class DAG(object): ...@@ -474,18 +559,24 @@ class DAG(object):
self._pack_func = pack_func self._pack_func = pack_func
self._unpack_func = unpack_func self._unpack_func = unpack_func
if self._tracer is not None:
self._tracer.set_channels(self._channels)
return self._input_channel, self._output_channel, self._pack_func, self._unpack_func return self._input_channel, self._output_channel, self._pack_func, self._unpack_func
def start(self): def start(self):
self._threads_or_proces = [] self._threads_or_proces = []
for op in self._actual_ops: for op in self._actual_ops:
op.use_profiler(self._use_profile) op.use_profiler(self._use_profile)
op.set_tracer(self._tracer)
if self._is_thread_op: if self._is_thread_op:
self._threads_or_proces.extend( self._threads_or_proces.extend(
op.start_with_thread(self._client_type)) op.start_with_thread(self._client_type))
else: else:
self._threads_or_proces.extend( self._threads_or_proces.extend(
op.start_with_process(self._client_type)) op.start_with_process(self._client_type))
_LOGGER.info("[DAG] start")
# not join yet # not join yet
return self._threads_or_proces return self._threads_or_proces
......
...@@ -11,17 +11,3 @@ ...@@ -11,17 +11,3 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
import sys
import re
import os
new_str = ""
with open("paddle_serving_server_gpu/version.py", "r") as f:
for line in f.readlines():
if re.match("cuda_version", line):
line = re.sub(r"\d+", sys.argv[1], line)
new_str = new_str + line
with open("paddle_serving_server_gpu/version.py", "w") as f:
f.write(new_str)
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package baidu.paddle_serving.pipeline_serving;
option go_package = ".;pipeline_serving";
import "google/api/annotations.proto";
message Response {
repeated string key = 1;
repeated string value = 2;
int32 ecode = 3;
string error_info = 4;
};
message Request {
repeated string key = 1;
repeated string value = 2;
string name = 3;
}
service PipelineService {
rpc inference(Request) returns (Response) {
option (google.api.http) = {
post : "/{name=*}/prediction"
body : "*"
};
}
};
// Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"C"
"flag"
"net/http"
"log"
"strconv"
"golang.org/x/net/context"
"github.com/grpc-ecosystem/grpc-gateway/runtime"
"google.golang.org/grpc"
gw "./proto"
)
//export run_proxy_server
func run_proxy_server(grpc_port int, http_port int) error {
var (
pipelineEndpoint = flag.String("pipeline_endpoint", "localhost:" + strconv.Itoa(grpc_port), "endpoint of PipelineService")
)
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
defer cancel()
mux := runtime.NewServeMux()
opts := []grpc.DialOption{grpc.WithInsecure()}
err := gw.RegisterPipelineServiceHandlerFromEndpoint(ctx, mux, *pipelineEndpoint, opts)
if err != nil {
return err
}
log.Println("start proxy service")
return http.ListenAndServe(":" + strconv.Itoa(http_port), mux) // proxy port
}
func main() {}
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import logging
import multiprocessing
try:
from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
PACKAGE_VERSION = "GPU"
except ImportError:
from paddle_serving_server import OpMaker, OpSeqMaker, Server
PACKAGE_VERSION = "CPU"
from . import util
_LOGGER = logging.getLogger(__name__)
_workdir_name_gen = util.NameGenerator("workdir_")
class LocalRpcServiceHandler(object):
def __init__(self,
model_config,
workdir="",
thread_num=2,
devices="",
mem_optim=True,
ir_optim=False,
available_port_generator=None):
if available_port_generator is None:
available_port_generator = util.GetAvailablePortGenerator()
self._model_config = model_config
self._port_list = []
if devices == "":
# cpu
devices = [-1]
self._port_list.append(available_port_generator.next())
_LOGGER.info("Model({}) will be launch in cpu device. Port({})"
.format(model_config, self._port_list))
else:
# gpu
if PACKAGE_VERSION == "CPU":
raise ValueError(
"You are using the CPU version package("
"paddle-serving-server), unable to set devices")
devices = [int(x) for x in devices.split(",")]
for _ in devices:
self._port_list.append(available_port_generator.next())
_LOGGER.info("Model({}) will be launch in gpu device: {}. Port({})"
.format(model_config, devices, self._port_list))
self._workdir = workdir
self._devices = devices
self._thread_num = thread_num
self._mem_optim = mem_optim
self._ir_optim = ir_optim
self._rpc_service_list = []
self._server_pros = []
self._fetch_vars = None
def get_fetch_list(self):
return self._fetch_vars
def get_port_list(self):
return self._port_list
def get_client_config(self):
return os.path.join(self._model_config, "serving_server_conf.prototxt")
def _prepare_one_server(self, workdir, port, gpuid, thread_num, mem_optim,
ir_optim):
device = "gpu"
if gpuid == -1:
device = "cpu"
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
server = Server()
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.load_model_config(self._model_config)
if gpuid >= 0:
server.set_gpuid(gpuid)
server.prepare_server(workdir=workdir, port=port, device=device)
if self._fetch_vars is None:
self._fetch_vars = server.get_fetch_list()
return server
def _start_one_server(self, service_idx):
self._rpc_service_list[service_idx].run_server()
def prepare_server(self):
for i, device_id in enumerate(self._devices):
if self._workdir != "":
workdir = "{}_{}".format(self._workdir, i)
else:
workdir = _workdir_name_gen.next()
self._rpc_service_list.append(
self._prepare_one_server(
workdir,
self._port_list[i],
device_id,
thread_num=self._thread_num,
mem_optim=self._mem_optim,
ir_optim=self._ir_optim))
def start_server(self):
for i, service in enumerate(self._rpc_service_list):
p = multiprocessing.Process(
target=self._start_one_server, args=(i, ))
p.daemon = True
self._server_pros.append(p)
for p in self._server_pros:
p.start()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import logging.config
import os
class SectionLevelFilter(object):
def __init__(self, levels):
self._levels = levels
def filter(self, logRecord):
return logRecord.levelno in self._levels
log_dir = "PipelineServingLogs"
if not os.path.exists(log_dir):
os.makedirs(log_dir)
logger_config = {
"version": 1,
"formatters": {
"normal_fmt": {
"format":
"%(levelname)s %(asctime)s [%(filename)s:%(lineno)d] %(message)s",
},
"tracer_fmt": {
"format": "%(asctime)s %(message)s",
},
},
"handlers": {
"f_pipeline.log": {
"class": "logging.FileHandler",
"level": "INFO",
"formatter": "normal_fmt",
"filename": os.path.join(log_dir, "pipeline.log"),
},
"f_pipeline.log.wf": {
"class": "logging.FileHandler",
"level": "WARNING",
"formatter": "normal_fmt",
"filename": os.path.join(log_dir, "pipeline.log.wf"),
},
"f_tracer.log": {
"class": "logging.FileHandler",
"level": "INFO",
"formatter": "tracer_fmt",
"filename": os.path.join(log_dir, "pipeline.tracer"),
},
},
"loggers": {
# propagate = True
".".join(__name__.split(".")[:-1] + ["profiler"]): {
"level": "INFO",
"handlers": ["f_tracer.log"],
},
},
"root": {
"level": "DEBUG",
"handlers": ["f_pipeline.log", "f_pipeline.log.wf"],
},
}
logging.config.dictConfig(logger_config)
...@@ -12,7 +12,8 @@ ...@@ -12,7 +12,8 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# pylint: disable=doc-string-missing # pylint: disable=doc-string-missing
from time import time as _time
import time
import threading import threading
import multiprocessing import multiprocessing
from paddle_serving_client import MultiLangClient, Client from paddle_serving_client import MultiLangClient, Client
...@@ -21,16 +22,25 @@ import logging ...@@ -21,16 +22,25 @@ import logging
import func_timeout import func_timeout
import os import os
import sys import sys
import collections
import numpy as np import numpy as np
from numpy import * from numpy import *
if sys.version_info.major == 2:
import Queue
elif sys.version_info.major == 3:
import queue as Queue
else:
raise Exception("Error Python version")
from .proto import pipeline_service_pb2 from .proto import pipeline_service_pb2
from .channel import (ThreadChannel, ProcessChannel, ChannelDataEcode, from .channel import (ThreadChannel, ProcessChannel, ChannelDataEcode,
ChannelData, ChannelDataType, ChannelStopError) ChannelData, ChannelDataType, ChannelStopError,
ChannelTimeoutError)
from .util import NameGenerator from .util import NameGenerator
from .profiler import TimeProfiler from .profiler import UnsafeTimeProfiler as TimeProfiler
from . import local_rpc_service_handler
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger(__name__)
_op_name_gen = NameGenerator("Op") _op_name_gen = NameGenerator("Op")
...@@ -38,60 +48,187 @@ class Op(object): ...@@ -38,60 +48,187 @@ class Op(object):
def __init__(self, def __init__(self,
name=None, name=None,
input_ops=[], input_ops=[],
server_endpoints=[], server_endpoints=None,
fetch_list=[], fetch_list=None,
client_config=None, client_config=None,
concurrency=1, concurrency=None,
timeout=-1, timeout=None,
retry=1): retry=None,
batch_size=None,
auto_batching_timeout=None,
local_rpc_service_handler=None):
# In __init__, all the parameters are just saved and Op is not initialized
if name is None: if name is None:
name = _op_name_gen.next() name = _op_name_gen.next()
self.name = name # to identify the type of OP, it must be globally unique self.name = name # to identify the type of OP, it must be globally unique
self.concurrency = concurrency # amount of concurrency self.concurrency = concurrency # amount of concurrency
self.set_input_ops(input_ops) self.set_input_ops(input_ops)
self._local_rpc_service_handler = local_rpc_service_handler
self._server_endpoints = server_endpoints self._server_endpoints = server_endpoints
self.with_serving = False
if len(self._server_endpoints) != 0:
self.with_serving = True
self._client_config = client_config
self._fetch_names = fetch_list self._fetch_names = fetch_list
self._client_config = client_config
self._timeout = timeout self._timeout = timeout
self._retry = max(1, retry) self._retry = max(1, retry)
self._batch_size = batch_size
self._auto_batching_timeout = auto_batching_timeout
self._input = None self._input = None
self._outputs = [] self._outputs = []
self._server_use_profile = False self._server_use_profile = False
self._tracer = None
# only for multithread # only for thread op
self._for_init_op_lock = threading.Lock() self._for_init_op_lock = threading.Lock()
self._for_close_op_lock = threading.Lock() self._for_close_op_lock = threading.Lock()
self._succ_init_op = False self._succ_init_op = False
self._succ_close_op = False self._succ_close_op = False
def init_from_dict(self, conf):
# init op
if self.concurrency is None:
self.concurrency = conf["concurrency"]
if self._retry is None:
self._retry = conf["retry"]
if self._fetch_names is None:
self._fetch_names = conf.get("fetch_list")
if self._client_config is None:
self._client_config = conf.get("client_config")
if self._timeout is None:
self._timeout = conf["timeout"]
if self._timeout > 0:
self._timeout = self._timeout / 1000.0
else:
self._timeout = -1
if self._batch_size is None:
self._batch_size = conf["batch_size"]
if self._auto_batching_timeout is None:
self._auto_batching_timeout = conf["auto_batching_timeout"]
if self._auto_batching_timeout <= 0 or self._batch_size == 1:
_LOGGER.warning(
self._log(
"Because auto_batching_timeout <= 0 or batch_size == 1,"
" set auto_batching_timeout to None."))
self._auto_batching_timeout = None
else:
self._auto_batching_timeout = self._auto_batching_timeout / 1000.0
if self._server_endpoints is None:
server_endpoints = conf.get("server_endpoints", [])
if len(server_endpoints) != 0:
# remote service
self.with_serving = True
self._server_endpoints = server_endpoints
else:
if self._local_rpc_service_handler is None:
local_service_conf = conf.get("local_service_conf")
_LOGGER.info("local_service_conf: {}".format(
local_service_conf))
model_config = local_service_conf.get("model_config")
_LOGGER.info("model_config: {}".format(model_config))
if model_config is None:
self.with_serving = False
else:
# local rpc service
self.with_serving = True
service_handler = local_rpc_service_handler.LocalRpcServiceHandler(
model_config=model_config,
workdir=local_service_conf["workdir"],
thread_num=local_service_conf["thread_num"],
devices=local_service_conf["devices"],
mem_optim=local_service_conf["mem_optim"],
ir_optim=local_service_conf["ir_optim"])
service_handler.prepare_server() # get fetch_list
serivce_ports = service_handler.get_port_list()
self._server_endpoints = [
"127.0.0.1:{}".format(p) for p in serivce_ports
]
if self._client_config is None:
self._client_config = service_handler.get_client_config(
)
if self._fetch_names is None:
self._fetch_names = service_handler.get_fetch_list()
self._local_rpc_service_handler = service_handler
else:
self.with_serving = True
self._local_rpc_service_handler.prepare_server(
) # get fetch_list
serivce_ports = self._local_rpc_service_handler.get_port_list(
)
self._server_endpoints = [
"127.0.0.1:{}".format(p) for p in serivce_ports
]
if self._client_config is None:
self._client_config = self._local_rpc_service_handler.get_client_config(
)
if self._fetch_names is None:
self._fetch_names = self._local_rpc_service_handler.get_fetch_list(
)
else:
self.with_serving = True
if not isinstance(self, RequestOp) and not isinstance(self, ResponseOp):
_LOGGER.info(
self._log("\n\tinput_ops: {},"
"\n\tserver_endpoints: {}"
"\n\tfetch_list: {}"
"\n\tclient_config: {}"
"\n\tconcurrency: {},"
"\n\ttimeout(s): {},"
"\n\tretry: {},"
"\n\tbatch_size: {},"
"\n\tauto_batching_timeout(s): {}".format(
", ".join([op.name for op in self._input_ops
]), self._server_endpoints,
self._fetch_names, self._client_config,
self.concurrency, self._timeout, self._retry,
self._batch_size, self._auto_batching_timeout)))
def launch_local_rpc_service(self):
if self._local_rpc_service_handler is None:
_LOGGER.warning(
self._log("Failed to launch local rpc"
" service: local_rpc_service_handler is None."))
return
port = self._local_rpc_service_handler.get_port_list()
self._local_rpc_service_handler.start_server()
_LOGGER.info("Op({}) use local rpc service at port: {}"
.format(self.name, port))
def use_default_auto_batching_config(self):
if self._batch_size != 1:
_LOGGER.warning("Op({}) reset batch_size=1 (original: {})"
.format(self.name, self._batch_size))
self._batch_size = 1
if self._auto_batching_timeout != None:
_LOGGER.warning(
"Op({}) reset auto_batching_timeout=None (original: {})"
.format(self.name, self._auto_batching_timeout))
self._auto_batching_timeout = None
def use_profiler(self, use_profile): def use_profiler(self, use_profile):
self._server_use_profile = use_profile self._server_use_profile = use_profile
def _profiler_record(self, string): def set_tracer(self, tracer):
if self._profiler is None: self._tracer = tracer
return
self._profiler.record(string)
def init_client(self, client_type, client_config, server_endpoints, def init_client(self, client_type, client_config, server_endpoints,
fetch_names): fetch_names):
if self.with_serving == False: if self.with_serving == False:
_LOGGER.debug("{} no client".format(self.name)) _LOGGER.info("Op({}) has no client (and it also do not "
"run the process function)".format(self.name))
return None return None
_LOGGER.debug("{} client_config: {}".format(self.name, client_config))
_LOGGER.debug("{} fetch_names: {}".format(self.name, fetch_names))
if client_type == 'brpc': if client_type == 'brpc':
client = Client() client = Client()
client.load_client_config(client_config) client.load_client_config(client_config)
elif client_type == 'grpc': elif client_type == 'grpc':
client = MultiLangClient() client = MultiLangClient()
else: else:
raise ValueError("unknow client type: {}".format(client_type)) raise ValueError("Failed to init client: unknow client "
"type {}".format(client_type))
client.connect(server_endpoints) client.connect(server_endpoints)
self._fetch_names = fetch_names self._fetch_names = fetch_names
return client return client
...@@ -105,16 +242,19 @@ class Op(object): ...@@ -105,16 +242,19 @@ class Op(object):
self._input_ops = [] self._input_ops = []
for op in ops: for op in ops:
if not isinstance(op, Op): if not isinstance(op, Op):
raise TypeError( _LOGGER.critical(
self._log('input op must be Op type, not {}'.format( self._log("Failed to set input_ops: input op "
type(op)))) "must be Op type, not {}".format(type(op))))
os._exit(-1)
self._input_ops.append(op) self._input_ops.append(op)
def add_input_channel(self, channel): def add_input_channel(self, channel):
if not isinstance(channel, (ThreadChannel, ProcessChannel)): if not isinstance(channel, (ThreadChannel, ProcessChannel)):
raise TypeError( _LOGGER.critical(
self._log('input channel must be Channel type, not {}'.format( self._log("Failed to set input_channel: input "
type(channel)))) "channel must be Channel type, not {}".format(
type(channel))))
os._exit(-1)
channel.add_consumer(self.name) channel.add_consumer(self.name)
self._input = channel self._input = channel
...@@ -126,9 +266,10 @@ class Op(object): ...@@ -126,9 +266,10 @@ class Op(object):
def add_output_channel(self, channel): def add_output_channel(self, channel):
if not isinstance(channel, (ThreadChannel, ProcessChannel)): if not isinstance(channel, (ThreadChannel, ProcessChannel)):
raise TypeError( _LOGGER.critical(
self._log('output channel must be Channel type, not {}'.format( self._log("Failed to add output_channel: output channel "
type(channel)))) "must be Channel type, not {}".format(type(channel))))
os._exit(-1)
channel.add_producer(self.name) channel.add_producer(self.name)
self._outputs.append(channel) self._outputs.append(channel)
...@@ -141,21 +282,28 @@ class Op(object): ...@@ -141,21 +282,28 @@ class Op(object):
def preprocess(self, input_dicts): def preprocess(self, input_dicts):
# multiple previous Op # multiple previous Op
if len(input_dicts) != 1: if len(input_dicts) != 1:
raise NotImplementedError( _LOGGER.critical(
'this Op has multiple previous inputs. Please override this func.' self._log(
) "Failed to run preprocess: this Op has multiple previous "
"inputs. Please override this func."))
os._exit(-1)
(_, input_dict), = input_dicts.items() (_, input_dict), = input_dicts.items()
return input_dict return input_dict
def process(self, feed_dict): def process(self, feed_batch, typical_logid):
err, err_info = ChannelData.check_npdata(feed_dict) err, err_info = ChannelData.check_batch_npdata(feed_batch)
if err != 0: if err != 0:
raise NotImplementedError( _LOGGER.critical(
"{} Please override preprocess func.".format(err_info)) self._log("Failed to run process: {}. Please override "
"preprocess func.".format(err_info)))
os._exit(-1)
call_result = self.client.predict( call_result = self.client.predict(
feed=feed_dict, fetch=self._fetch_names) feed=feed_batch, fetch=self._fetch_names, log_id=typical_logid)
_LOGGER.debug(self._log("get call_result")) if isinstance(self.client, MultiLangClient):
if call_result is None or call_result["serving_status_code"] != 0:
return None
call_result.pop("serving_status_code")
return call_result return call_result
def postprocess(self, input_dict, fetch_dict): def postprocess(self, input_dict, fetch_dict):
...@@ -184,43 +332,51 @@ class Op(object): ...@@ -184,43 +332,51 @@ class Op(object):
data, data,
channels, channels,
name=None, name=None,
profile_str=None,
client_need_profile=False, client_need_profile=False,
profile_set=None): profile_set=None):
if name is None: if name is None:
name = self.name name = self.name
self._add_profile_into_channeldata(data, client_need_profile,
profile_set)
for channel in channels:
channel.push(data, name)
def _add_profile_into_channeldata(self, data, client_need_profile,
profile_set):
profile_str = self._profiler.gen_profile_str()
if self._server_use_profile:
sys.stderr.write(profile_str)
# add profile into channeldata
if client_need_profile and profile_set is not None: if client_need_profile and profile_set is not None:
profile_set.add(profile_str) if profile_str is not None:
profile_set.add(profile_str)
data.add_profile(profile_set) data.add_profile(profile_set)
for channel in channels:
channel.push(data, name)
def start_with_process(self, client_type): def start_with_process(self, client_type):
trace_buffer = None
if self._tracer is not None:
trace_buffer = self._tracer.data_buffer()
proces = [] proces = []
for concurrency_idx in range(self.concurrency): for concurrency_idx in range(self.concurrency):
p = multiprocessing.Process( p = multiprocessing.Process(
target=self._run, target=self._run,
args=(concurrency_idx, self._get_input_channel(), args=(concurrency_idx, self._get_input_channel(),
self._get_output_channels(), client_type, False)) self._get_output_channels(), client_type, False,
trace_buffer))
p.daemon = True
p.start() p.start()
proces.append(p) proces.append(p)
return proces return proces
def start_with_thread(self, client_type): def start_with_thread(self, client_type):
trace_buffer = None
if self._tracer is not None:
trace_buffer = self._tracer.data_buffer()
threads = [] threads = []
for concurrency_idx in range(self.concurrency): for concurrency_idx in range(self.concurrency):
t = threading.Thread( t = threading.Thread(
target=self._run, target=self._run,
args=(concurrency_idx, self._get_input_channel(), args=(concurrency_idx, self._get_input_channel(),
self._get_output_channels(), client_type, True)) self._get_output_channels(), client_type, True,
trace_buffer))
# When a process exits, it attempts to terminate
# all of its daemonic child processes.
t.daemon = True
t.start() t.start()
threads.append(t) threads.append(t)
return threads return threads
...@@ -228,252 +384,459 @@ class Op(object): ...@@ -228,252 +384,459 @@ class Op(object):
def init_op(self): def init_op(self):
pass pass
def _run_preprocess(self, parsed_data, data_id, log_func): def _run_preprocess(self, parsed_data_dict, op_info_prefix):
preped_data, error_channeldata = None, None _LOGGER.debug("{} Running preprocess".format(op_info_prefix))
try: preped_data_dict = collections.OrderedDict()
preped_data = self.preprocess(parsed_data) err_channeldata_dict = collections.OrderedDict()
except NotImplementedError as e: for data_id, parsed_data in parsed_data_dict.items():
# preprocess function not implemented preped_data, error_channeldata = None, None
error_info = log_func(e) try:
_LOGGER.error(error_info) preped_data = self.preprocess(parsed_data)
error_channeldata = ChannelData( except TypeError as e:
ecode=ChannelDataEcode.NOT_IMPLEMENTED.value, # Error type in channeldata.datatype
error_info=error_info, error_info = "(logid={}) {} Failed to preprocess: {}".format(
data_id=data_id) data_id, op_info_prefix, e)
except TypeError as e: _LOGGER.error(error_info, exc_info=True)
# Error type in channeldata.datatype error_channeldata = ChannelData(
error_info = log_func(e) ecode=ChannelDataEcode.TYPE_ERROR.value,
_LOGGER.error(error_info) error_info=error_info,
error_channeldata = ChannelData( data_id=data_id)
ecode=ChannelDataEcode.TYPE_ERROR.value, except Exception as e:
error_info=error_info, error_info = "(logid={}) {} Failed to preprocess: {}".format(
data_id=data_id) data_id, op_info_prefix, e)
except Exception as e: _LOGGER.error(error_info, exc_info=True)
error_info = log_func(e) error_channeldata = ChannelData(
_LOGGER.error(error_info) ecode=ChannelDataEcode.UNKNOW.value,
error_channeldata = ChannelData( error_info=error_info,
ecode=ChannelDataEcode.UNKNOW.value, data_id=data_id)
error_info=error_info, if error_channeldata is not None:
data_id=data_id) err_channeldata_dict[data_id] = error_channeldata
return preped_data, error_channeldata else:
preped_data_dict[data_id] = preped_data
def _run_process(self, preped_data, data_id, log_func): _LOGGER.debug("{} Succ preprocess".format(op_info_prefix))
midped_data, error_channeldata = None, None return preped_data_dict, err_channeldata_dict
def _run_process(self, preped_data_dict, op_info_prefix):
_LOGGER.debug("{} Running process".format(op_info_prefix))
midped_data_dict = collections.OrderedDict()
err_channeldata_dict = collections.OrderedDict()
if self.with_serving: if self.with_serving:
data_ids = preped_data_dict.keys()
typical_logid = data_ids[0]
if len(data_ids) != 1:
for data_id in data_ids:
_LOGGER.info(
"(logid={}) {} During access to PaddleServingService,"
" we selected logid={} (from batch: {}) as a "
"representative for logging.".format(
data_id, op_info_prefix, typical_logid, data_ids))
# combine samples to batch
one_input = preped_data_dict[data_ids[0]]
feed_batch = []
input_offset = None
if isinstance(one_input, dict):
# sample input
feed_batch = [preped_data_dict[data_id] for data_id in data_ids]
input_offset = list(range(len(data_ids) + 1))
elif isinstance(one_input, list):
# batch input
input_offset = [0]
for data_id in data_ids:
batch_input = preped_data_dict[data_id]
offset = input_offset[-1] + len(batch_input)
feed_batch += batch_input
input_offset.append(offset)
else:
_LOGGER.critical(
"{} Failed to process: expect input type is dict(sample"
" input) or list(batch input), but get {}".format(
op_info_prefix, type(one_input)))
os._exit(-1)
midped_batch = None
ecode = ChannelDataEcode.OK.value ecode = ChannelDataEcode.OK.value
if self._timeout <= 0: if self._timeout <= 0:
try: try:
midped_data = self.process(preped_data) midped_batch = self.process(feed_batch, typical_logid)
except Exception as e: except Exception as e:
ecode = ChannelDataEcode.UNKNOW.value ecode = ChannelDataEcode.UNKNOW.value
error_info = log_func(e) error_info = "(logid={}) {} Failed to process(batch: {}): {}".format(
_LOGGER.error(error_info) typical_logid, op_info_prefix, data_ids, e)
_LOGGER.error(error_info, exc_info=True)
else: else:
for i in range(self._retry): for i in range(self._retry):
try: try:
midped_data = func_timeout.func_timeout( midped_batch = func_timeout.func_timeout(
self._timeout, self.process, args=(preped_data, )) self._timeout,
self.process,
args=(feed_batch, typical_logid))
except func_timeout.FunctionTimedOut as e: except func_timeout.FunctionTimedOut as e:
if i + 1 >= self._retry: if i + 1 >= self._retry:
ecode = ChannelDataEcode.TIMEOUT.value ecode = ChannelDataEcode.TIMEOUT.value
error_info = log_func(e) error_info = "(logid={}) {} Failed to process(batch: {}): " \
"exceeded retry count.".format(
typical_logid, op_info_prefix, data_ids)
_LOGGER.error(error_info) _LOGGER.error(error_info)
else: else:
_LOGGER.warn( _LOGGER.warning(
log_func("timeout, retry({})".format(i + 1))) "(logid={}) {} Failed to process(batch: {}): timeout,"
" and retrying({}/{})...".format(
typical_logid, op_info_prefix, data_ids, i +
1, self._retry))
except Exception as e: except Exception as e:
ecode = ChannelDataEcode.UNKNOW.value ecode = ChannelDataEcode.UNKNOW.value
error_info = log_func(e) error_info = "(logid={}) {} Failed to process(batch: {}): {}".format(
_LOGGER.error(error_info) typical_logid, op_info_prefix, data_ids, e)
_LOGGER.error(error_info, exc_info=True)
break break
else: else:
break break
if ecode != ChannelDataEcode.OK.value: if ecode != ChannelDataEcode.OK.value:
error_channeldata = ChannelData( for data_id in data_ids:
ecode=ecode, error_info=error_info, data_id=data_id) err_channeldata_dict[data_id] = ChannelData(
elif midped_data is None: ecode=ecode, error_info=error_info, data_id=data_id)
elif midped_batch is None:
# op client return None # op client return None
error_channeldata = ChannelData( error_info = "(logid={}) {} Failed to predict, please check if " \
ecode=ChannelDataEcode.CLIENT_ERROR.value, "PaddleServingService is working properly.".format(
error_info=log_func( typical_logid, op_info_prefix)
"predict failed. pls check the server side."), _LOGGER.error(error_info)
data_id=data_id) for data_id in data_ids:
err_channeldata_dict[data_id] = ChannelData(
ecode=ChannelDataEcode.CLIENT_ERROR.value,
error_info=error_info,
data_id=data_id)
else:
# transform np format to dict format
var_names = midped_batch.keys()
lod_var_names = set()
lod_offset_names = set()
for name in var_names:
lod_offset_name = "{}.lod".format(name)
if lod_offset_name in var_names:
_LOGGER.debug("(logid={}) {} {} is LodTensor".format(
typical_logid, op_info_prefix, name))
lod_var_names.add(name)
lod_offset_names.add(lod_offset_name)
for idx, data_id in enumerate(data_ids):
midped_data_dict[data_id] = {}
for name, value in midped_batch.items():
if name in lod_offset_names:
continue
if name in lod_var_names:
# lodtensor
lod_offset_name = "{}.lod".format(name)
lod_offset = midped_batch[lod_offset_name]
for idx, data_id in enumerate(data_ids):
data_offset_left = input_offset[idx]
data_offset_right = input_offset[idx + 1]
lod_offset_left = lod_offset[data_offset_left]
lod_offset_right = lod_offset[data_offset_right]
midped_data_dict[data_id][name] = value[
lod_offset_left:lod_offset_right]
midped_data_dict[data_id][lod_offset_name] = \
lod_offset[data_offset_left:data_offset_right + 1] - lod_offset[data_offset_left]
else:
# normal tensor
for idx, data_id in enumerate(data_ids):
left = input_offset[idx]
right = input_offset[idx + 1]
midped_data_dict[data_id][name] = value[left:right]
else: else:
midped_data = preped_data midped_data_dict = preped_data_dict
return midped_data, error_channeldata _LOGGER.debug("{} Succ process".format(op_info_prefix))
return midped_data_dict, err_channeldata_dict
def _run_postprocess(self, parsed_data_dict, midped_data_dict,
op_info_prefix):
_LOGGER.debug("{} Running postprocess".format(op_info_prefix))
postped_data_dict = collections.OrderedDict()
err_channeldata_dict = collections.OrderedDict()
for data_id, midped_data in midped_data_dict.items():
postped_data, err_channeldata = None, None
try:
postped_data = self.postprocess(parsed_data_dict[data_id],
midped_data)
except Exception as e:
error_info = "(logid={}) {} Failed to postprocess: {}".format(
data_id, op_info_prefix, e)
_LOGGER.error(error_info, exc_info=True)
err_channeldata = ChannelData(
ecode=ChannelDataEcode.UNKNOW.value,
error_info=error_info,
data_id=data_id)
if err_channeldata is not None:
err_channeldata_dict[data_id] = err_channeldata
continue
else:
if not isinstance(postped_data, dict):
error_info = "(logid={}) {} Failed to postprocess: " \
"output of postprocess funticon must be " \
"dict type, but get {}".format(
data_id, op_info_prefix,
type(postped_data))
_LOGGER.error(error_info)
err_channeldata = ChannelData(
ecode=ChannelDataEcode.UNKNOW.value,
error_info=error_info,
data_id=data_id)
err_channeldata_dict[data_id] = err_channeldata
continue
output_data = None
err, _ = ChannelData.check_npdata(postped_data)
if err == 0:
output_data = ChannelData(
ChannelDataType.CHANNEL_NPDATA.value,
npdata=postped_data,
data_id=data_id)
else:
output_data = ChannelData(
ChannelDataType.DICT.value,
dictdata=postped_data,
data_id=data_id)
postped_data_dict[data_id] = output_data
_LOGGER.debug("{} Succ postprocess".format(op_info_prefix))
return postped_data_dict, err_channeldata_dict
def _auto_batching_generator(self, input_channel, op_name, batch_size,
timeout, op_info_prefix):
while True:
batch = []
while len(batch) == 0:
endtime = None
if timeout is not None:
endtime = _time() + timeout
for idx in range(batch_size):
try:
channeldata_dict = None
if timeout is not None:
remaining = endtime - _time()
if remaining <= 0.0:
_LOGGER.debug("{} Failed to generate batch: "
"timeout".format(op_info_prefix))
break
channeldata_dict = input_channel.front(op_name,
timeout)
else:
channeldata_dict = input_channel.front(op_name)
batch.append(channeldata_dict)
except ChannelTimeoutError:
_LOGGER.debug("{} Failed to generate batch: "
"timeout".format(op_info_prefix))
break
_LOGGER.debug("{} Got actual batch_size: {}".format(op_info_prefix,
len(batch)))
yield batch
def _parse_channeldata_batch(self, batch, output_channels):
parsed_data_dict = collections.OrderedDict()
need_profile_dict = {}
profile_dict = {}
for channeldata_dict in batch:
(data_id, error_channeldata, parsed_data,
client_need_profile, profile_set) = \
self._parse_channeldata(channeldata_dict)
if error_channeldata is None:
parsed_data_dict[data_id] = parsed_data
need_profile_dict[data_id] = client_need_profile
profile_dict[data_id] = profile_set
else:
# error data in predecessor Op
# (error_channeldata with profile info)
self._push_to_output_channels(error_channeldata,
output_channels)
def _run_postprocess(self, input_dict, midped_data, data_id, log_func): return parsed_data_dict, need_profile_dict, profile_dict
output_data, error_channeldata = None, None
try:
postped_data = self.postprocess(input_dict, midped_data)
except Exception as e:
error_info = log_func(e)
_LOGGER.error(error_info)
error_channeldata = ChannelData(
ecode=ChannelDataEcode.UNKNOW.value,
error_info=error_info,
data_id=data_id)
return output_data, error_channeldata
if not isinstance(postped_data, dict):
error_info = log_func("output of postprocess funticon must be " \
"dict type, but get {}".format(type(postped_data)))
_LOGGER.error(error_info)
error_channeldata = ChannelData(
ecode=ChannelDataEcode.UNKNOW.value,
error_info=error_info,
data_id=data_id)
return output_data, error_channeldata
err, _ = ChannelData.check_npdata(postped_data)
if err == 0:
output_data = ChannelData(
ChannelDataType.CHANNEL_NPDATA.value,
npdata=postped_data,
data_id=data_id)
else:
output_data = ChannelData(
ChannelDataType.DICT.value,
dictdata=postped_data,
data_id=data_id)
return output_data, error_channeldata
def _run(self, concurrency_idx, input_channel, output_channels, client_type, def _run(self, concurrency_idx, input_channel, output_channels, client_type,
is_thread_op): is_thread_op, trace_buffer):
def get_log_func(op_info_prefix):
def log_func(info_str):
return "{} {}".format(op_info_prefix, info_str)
return log_func
op_info_prefix = "[{}|{}]".format(self.name, concurrency_idx) op_info_prefix = "[{}|{}]".format(self.name, concurrency_idx)
log = get_log_func(op_info_prefix)
tid = threading.current_thread().ident tid = threading.current_thread().ident
# init op # init op
self.concurrency_idx = concurrency_idx profiler = None
try: try:
if is_thread_op: profiler = self._initialize(is_thread_op, client_type,
with self._for_init_op_lock: concurrency_idx)
if not self._succ_init_op:
# init profiler
self._profiler = TimeProfiler()
self._profiler.enable(True)
# init client
self.client = self.init_client(
client_type, self._client_config,
self._server_endpoints, self._fetch_names)
# user defined
self.init_op()
self._succ_init_op = True
self._succ_close_op = False
else:
# init profiler
self._profiler = TimeProfiler()
self._profiler.enable(True)
# init client
self.client = self.init_client(client_type, self._client_config,
self._server_endpoints,
self._fetch_names)
# user defined
self.init_op()
except Exception as e: except Exception as e:
_LOGGER.error(log(e)) _LOGGER.critical(
"{} Failed to init op: {}".format(op_info_prefix, e),
exc_info=True)
os._exit(-1) os._exit(-1)
_LOGGER.info("{} Succ init".format(op_info_prefix))
batch_generator = self._auto_batching_generator(
input_channel=input_channel,
op_name=self.name,
batch_size=self._batch_size,
timeout=self._auto_batching_timeout,
op_info_prefix=op_info_prefix)
start, end = None, None
trace_que = collections.deque()
while True: while True:
#self._profiler_record("get#{}_0".format(op_info_prefix)) start = int(round(_time() * 1000000))
try: try:
channeldata_dict = input_channel.front(self.name) channeldata_dict_batch = next(batch_generator)
except ChannelStopError: except ChannelStopError:
_LOGGER.debug(log("stop.")) _LOGGER.debug("{} Stop.".format(op_info_prefix))
if is_thread_op: self._finalize(is_thread_op)
with self._for_close_op_lock:
if not self._succ_close_op:
self._profiler = None
self.client = None
self._succ_init_op = False
self._succ_close_op = True
break break
#self._profiler_record("get#{}_1".format(op_info_prefix)) end = int(round(_time() * 1000000))
_LOGGER.debug(log("input_data: {}".format(channeldata_dict))) in_time = end - start
(data_id, error_channeldata, parsed_data, client_need_profile, # parse channeldata batch
profile_set) = self._parse_channeldata(channeldata_dict) try:
# error data in predecessor Op parsed_data_dict, need_profile_dict, profile_dict \
if error_channeldata is not None: = self._parse_channeldata_batch(
try: channeldata_dict_batch, output_channels)
# error_channeldata with profile info except ChannelStopError:
self._push_to_output_channels(error_channeldata, _LOGGER.debug("{} Stop.".format(op_info_prefix))
output_channels) self._finalize(is_thread_op)
except ChannelStopError: break
_LOGGER.debug(log("stop.")) if len(parsed_data_dict) == 0:
break # data in the whole batch is all error data
continue continue
# preprecess # preprecess
self._profiler_record("prep#{}_0".format(op_info_prefix)) start = profiler.record("prep#{}_0".format(op_info_prefix))
preped_data, error_channeldata = self._run_preprocess(parsed_data, preped_data_dict, err_channeldata_dict \
data_id, log) = self._run_preprocess(parsed_data_dict, op_info_prefix)
self._profiler_record("prep#{}_1".format(op_info_prefix)) end = profiler.record("prep#{}_1".format(op_info_prefix))
if error_channeldata is not None: prep_time = end - start
try: try:
for data_id, err_channeldata in err_channeldata_dict.items():
self._push_to_output_channels( self._push_to_output_channels(
error_channeldata, data=err_channeldata,
output_channels, channels=output_channels,
client_need_profile=client_need_profile, client_need_profile=need_profile_dict[data_id],
profile_set=profile_set) profile_set=profile_dict[data_id])
except ChannelStopError: except ChannelStopError:
_LOGGER.debug(log("stop.")) _LOGGER.debug("{} Stop.".format(op_info_prefix))
break self._finalize(is_thread_op)
break
if len(preped_data_dict) == 0:
continue continue
# process # process
self._profiler_record("midp#{}_0".format(op_info_prefix)) start = profiler.record("midp#{}_0".format(op_info_prefix))
midped_data, error_channeldata = self._run_process(preped_data, midped_data_dict, err_channeldata_dict \
data_id, log) = self._run_process(preped_data_dict, op_info_prefix)
self._profiler_record("midp#{}_1".format(op_info_prefix)) end = profiler.record("midp#{}_1".format(op_info_prefix))
if error_channeldata is not None: midp_time = end - start
try: try:
for data_id, err_channeldata in err_channeldata_dict.items():
self._push_to_output_channels( self._push_to_output_channels(
error_channeldata, data=err_channeldata,
output_channels, channels=output_channels,
client_need_profile=client_need_profile, client_need_profile=need_profile_dict[data_id],
profile_set=profile_set) profile_set=profile_dict[data_id])
except ChannelStopError: except ChannelStopError:
_LOGGER.debug(log("stop.")) _LOGGER.debug("{} Stop.".format(op_info_prefix))
break self._finalize(is_thread_op)
break
if len(midped_data_dict) == 0:
continue continue
# postprocess # postprocess
self._profiler_record("postp#{}_0".format(op_info_prefix)) start = profiler.record("postp#{}_0".format(op_info_prefix))
output_data, error_channeldata = self._run_postprocess( postped_data_dict, err_channeldata_dict \
parsed_data, midped_data, data_id, log) = self._run_postprocess(
self._profiler_record("postp#{}_1".format(op_info_prefix)) parsed_data_dict, midped_data_dict, op_info_prefix)
if error_channeldata is not None: end = profiler.record("postp#{}_1".format(op_info_prefix))
try: postp_time = end - start
try:
for data_id, err_channeldata in err_channeldata_dict.items():
self._push_to_output_channels( self._push_to_output_channels(
error_channeldata, data=err_channeldata,
output_channels, channels=output_channels,
client_need_profile=client_need_profile, client_need_profile=need_profile_dict[data_id],
profile_set=profile_set) profile_set=profile_dict[data_id])
except ChannelStopError: except ChannelStopError:
_LOGGER.debug(log("stop.")) _LOGGER.debug("{} Stop.".format(op_info_prefix))
break self._finalize(is_thread_op)
break
if len(postped_data_dict) == 0:
continue continue
# push data to channel (if run succ) # push data to channel (if run succ)
#self._profiler_record("push#{}_0".format(op_info_prefix)) start = int(round(_time() * 1000000))
try: try:
self._push_to_output_channels( profile_str = profiler.gen_profile_str()
output_data, for data_id, postped_data in postped_data_dict.items():
output_channels, if self._server_use_profile:
client_need_profile=client_need_profile, sys.stderr.write(profile_str)
profile_set=profile_set) self._push_to_output_channels(
data=postped_data,
channels=output_channels,
profile_str=profile_str,
client_need_profile=need_profile_dict[data_id],
profile_set=profile_dict[data_id])
except ChannelStopError: except ChannelStopError:
_LOGGER.debug(log("stop.")) _LOGGER.debug("{} Stop.".format(op_info_prefix))
self._finalize(is_thread_op)
break break
#self._profiler_record("push#{}_1".format(op_info_prefix)) end = int(round(_time() * 1000000))
out_time = end - start
if trace_buffer is not None:
trace_que.append({
"name": self.name,
"actions": {
"in": in_time,
"prep": prep_time,
"midp": midp_time,
"postp": postp_time,
"out": out_time,
}
})
while trace_que:
info = trace_que[0]
try:
trace_buffer.put_nowait(info)
trace_que.popleft()
except Queue.Full:
break
def _initialize(self, is_thread_op, client_type, concurrency_idx):
if is_thread_op:
with self._for_init_op_lock:
if not self._succ_init_op:
# for the threaded version of Op, each thread cannot get its concurrency_idx
self.concurrency_idx = None
# init client
self.client = self.init_client(
client_type, self._client_config,
self._server_endpoints, self._fetch_names)
# user defined
self.init_op()
self._succ_init_op = True
self._succ_close_op = False
else:
self.concurrency_idx = concurrency_idx
# init client
self.client = self.init_client(client_type, self._client_config,
self._server_endpoints,
self._fetch_names)
# user defined
self.init_op()
# use a separate TimeProfiler per thread or process
profiler = TimeProfiler()
profiler.enable(True)
return profiler
def _finalize(self, is_thread_op):
if is_thread_op:
with self._for_close_op_lock:
if not self._succ_close_op:
self._profiler = None
self.client = None
self._succ_init_op = False
self._succ_close_op = True
def _log(self, info): def _log(self, info):
return "{} {}".format(self.name, info) return "{} {}".format(self.name, info)
...@@ -483,13 +846,13 @@ class RequestOp(Op): ...@@ -483,13 +846,13 @@ class RequestOp(Op):
""" RequestOp do not run preprocess, process, postprocess. """ """ RequestOp do not run preprocess, process, postprocess. """
def __init__(self): def __init__(self):
# PipelineService.name = "@G" # PipelineService.name = "@DAGExecutor"
super(RequestOp, self).__init__(name="@G", input_ops=[]) super(RequestOp, self).__init__(name="@DAGExecutor", input_ops=[])
# init op # init op
try: try:
self.init_op() self.init_op()
except Exception as e: except Exception as e:
_LOGGER.error(e) _LOGGER.critical("Op(Request) Failed to init: {}".format(e))
os._exit(-1) os._exit(-1)
def unpack_request_package(self, request): def unpack_request_package(self, request):
...@@ -497,7 +860,9 @@ class RequestOp(Op): ...@@ -497,7 +860,9 @@ class RequestOp(Op):
for idx, key in enumerate(request.key): for idx, key in enumerate(request.key):
data = request.value[idx] data = request.value[idx]
try: try:
data = eval(data) evaled_data = eval(data)
if isinstance(evaled_data, np.ndarray):
data = evaled_data
except Exception as e: except Exception as e:
pass pass
dictdata[key] = data dictdata[key] = data
...@@ -508,12 +873,14 @@ class ResponseOp(Op): ...@@ -508,12 +873,14 @@ class ResponseOp(Op):
""" ResponseOp do not run preprocess, process, postprocess. """ """ ResponseOp do not run preprocess, process, postprocess. """
def __init__(self, input_ops): def __init__(self, input_ops):
super(ResponseOp, self).__init__(name="@R", input_ops=input_ops) super(ResponseOp, self).__init__(
name="@DAGExecutor", input_ops=input_ops)
# init op # init op
try: try:
self.init_op() self.init_op()
except Exception as e: except Exception as e:
_LOGGER.error(e) _LOGGER.critical("Op(ResponseOp) Failed to init: {}".format(
e, exc_info=True))
os._exit(-1) os._exit(-1)
def pack_response_package(self, channeldata): def pack_response_package(self, channeldata):
...@@ -524,7 +891,7 @@ class ResponseOp(Op): ...@@ -524,7 +891,7 @@ class ResponseOp(Op):
feed = channeldata.parse() feed = channeldata.parse()
# ndarray to string: # ndarray to string:
# https://stackoverflow.com/questions/30167538/convert-a-numpy-ndarray-to-stringor-bytes-and-convert-it-back-to-numpy-ndarray # https://stackoverflow.com/questions/30167538/convert-a-numpy-ndarray-to-stringor-bytes-and-convert-it-back-to-numpy-ndarray
np.set_printoptions(threshold=np.nan) np.set_printoptions(threshold=sys.maxsize)
for name, var in feed.items(): for name, var in feed.items():
resp.value.append(var.__repr__()) resp.value.append(var.__repr__())
resp.key.append(name) resp.key.append(name)
...@@ -536,14 +903,19 @@ class ResponseOp(Op): ...@@ -536,14 +903,19 @@ class ResponseOp(Op):
resp.error_info = self._log( resp.error_info = self._log(
"fetch var type must be str({}).".format( "fetch var type must be str({}).".format(
type(var))) type(var)))
_LOGGER.error("(logid={}) Failed to pack RPC "
"response package: {}".format(
channeldata.id, resp.error_info))
break break
resp.value.append(var) resp.value.append(var)
resp.key.append(name) resp.key.append(name)
else: else:
resp.ecode = ChannelDataEcode.TYPE_ERROR.value resp.ecode = ChannelDataEcode.TYPE_ERROR.value
resp.error_info = self._log( resp.error_info = self._log(
"Error type({}) in datatype.".format(channeldata.datatype)) "error type({}) in datatype.".format(channeldata.datatype))
_LOGGER.error(resp.error_info) _LOGGER.error("(logid={}) Failed to pack RPC response"
" package: {}".format(channeldata.id,
resp.error_info))
else: else:
resp.error_info = channeldata.error_info resp.error_info = channeldata.error_info
return resp return resp
...@@ -561,6 +933,7 @@ class VirtualOp(Op): ...@@ -561,6 +933,7 @@ class VirtualOp(Op):
self._virtual_pred_ops.append(op) self._virtual_pred_ops.append(op)
def _actual_pred_op_names(self, op): def _actual_pred_op_names(self, op):
# can use disjoint-set, but it's not necessary
if not isinstance(op, VirtualOp): if not isinstance(op, VirtualOp):
return [op.name] return [op.name]
names = [] names = []
...@@ -570,9 +943,11 @@ class VirtualOp(Op): ...@@ -570,9 +943,11 @@ class VirtualOp(Op):
def add_output_channel(self, channel): def add_output_channel(self, channel):
if not isinstance(channel, (ThreadChannel, ProcessChannel)): if not isinstance(channel, (ThreadChannel, ProcessChannel)):
raise TypeError( _LOGGER.critical(
self._log('output channel must be Channel type, not {}'.format( self._log("Failed to add output_channel: output_channel"
type(channel)))) " must be Channel type, not {}".format(
type(channel))))
os._exit(-1)
for op in self._virtual_pred_ops: for op in self._virtual_pred_ops:
for op_name in self._actual_pred_op_names(op): for op_name in self._actual_pred_op_names(op):
channel.add_producer(op_name) channel.add_producer(op_name)
...@@ -580,27 +955,31 @@ class VirtualOp(Op): ...@@ -580,27 +955,31 @@ class VirtualOp(Op):
def _run(self, concurrency_idx, input_channel, output_channels, client_type, def _run(self, concurrency_idx, input_channel, output_channels, client_type,
is_thread_op): is_thread_op):
def get_log_func(op_info_prefix):
def log_func(info_str):
return "{} {}".format(op_info_prefix, info_str)
return log_func
op_info_prefix = "[{}|{}]".format(self.name, concurrency_idx) op_info_prefix = "[{}|{}]".format(self.name, concurrency_idx)
log = get_log_func(op_info_prefix) log = get_log_func(op_info_prefix)
tid = threading.current_thread().ident tid = threading.current_thread().ident
batch_generator = self._auto_batching_generator(
input_channel=input_channel,
op_name=self.name,
batch_size=1,
timeout=None,
log_func=log)
while True: while True:
try: try:
channeldata_dict = input_channel.front(self.name) channeldata_dict_batch = next(batch_generator)
except ChannelStopError: except ChannelStopError:
_LOGGER.debug(log("stop.")) _LOGGER.debug("{} Stop.".format(op_info_prefix))
self._finalize(is_thread_op)
break break
try: try:
for name, data in channeldata_dict.items(): for channeldata_dict in channeldata_dict_batch:
self._push_to_output_channels( for name, data in channeldata_dict.items():
data, channels=output_channels, name=name) self._push_to_output_channels(
data, channels=output_channels, name=name)
except ChannelStopError: except ChannelStopError:
_LOGGER.debug(log("stop.")) _LOGGER.debug("{} Stop.".format(op_info_prefix))
self._finalize(is_thread_op)
break break
...@@ -18,10 +18,11 @@ import numpy as np ...@@ -18,10 +18,11 @@ import numpy as np
from numpy import * from numpy import *
import logging import logging
import functools import functools
from .channel import ChannelDataEcode
from .proto import pipeline_service_pb2 from .proto import pipeline_service_pb2
from .proto import pipeline_service_pb2_grpc from .proto import pipeline_service_pb2_grpc
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger(__name__)
class PipelineClient(object): class PipelineClient(object):
...@@ -41,11 +42,12 @@ class PipelineClient(object): ...@@ -41,11 +42,12 @@ class PipelineClient(object):
def _pack_request_package(self, feed_dict, profile): def _pack_request_package(self, feed_dict, profile):
req = pipeline_service_pb2.Request() req = pipeline_service_pb2.Request()
np.set_printoptions(threshold=sys.maxsize)
for key, value in feed_dict.items(): for key, value in feed_dict.items():
req.key.append(key) req.key.append(key)
if isinstance(value, np.ndarray): if isinstance(value, np.ndarray):
req.value.append(value.__repr__()) req.value.append(value.__repr__())
elif isinstance(value, str): elif isinstance(value, (str, unicode)):
req.value.append(value) req.value.append(value)
elif isinstance(value, list): elif isinstance(value, list):
req.value.append(np.array(value).__repr__()) req.value.append(np.array(value).__repr__())
...@@ -59,7 +61,11 @@ class PipelineClient(object): ...@@ -59,7 +61,11 @@ class PipelineClient(object):
def _unpack_response_package(self, resp, fetch): def _unpack_response_package(self, resp, fetch):
if resp.ecode != 0: if resp.ecode != 0:
return {"ecode": resp.ecode, "error_info": resp.error_info} return {
"ecode": resp.ecode,
"ecode_desc": ChannelDataEcode(resp.ecode),
"error_info": resp.error_info,
}
fetch_map = {"ecode": resp.ecode} fetch_map = {"ecode": resp.ecode}
for idx, key in enumerate(resp.key): for idx, key in enumerate(resp.key):
if key == self._profile_key: if key == self._profile_key:
...@@ -70,7 +76,9 @@ class PipelineClient(object): ...@@ -70,7 +76,9 @@ class PipelineClient(object):
continue continue
data = resp.value[idx] data = resp.value[idx]
try: try:
data = eval(data) evaled_data = eval(data)
if isinstance(evaled_data, np.ndarray):
data = evaled_data
except Exception as e: except Exception as e:
pass pass
fetch_map[key] = data fetch_map[key] = data
......
...@@ -15,34 +15,41 @@ ...@@ -15,34 +15,41 @@
from concurrent import futures from concurrent import futures
import grpc import grpc
import logging import logging
import json
import socket import socket
import contextlib import contextlib
from contextlib import closing from contextlib import closing
import multiprocessing import multiprocessing
import yaml import yaml
from .proto import pipeline_service_pb2_grpc from .proto import pipeline_service_pb2_grpc, pipeline_service_pb2
from .operator import ResponseOp from . import operator
from .dag import DAGExecutor from . import dag
from . import util
from . import channel
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger(__name__)
class PipelineService(pipeline_service_pb2_grpc.PipelineServiceServicer): class PipelineServicer(pipeline_service_pb2_grpc.PipelineServiceServicer):
def __init__(self, response_op, dag_config, show_info): def __init__(self, name, response_op, dag_conf, worker_idx=-1):
super(PipelineService, self).__init__() super(PipelineServicer, self).__init__()
self._name = name
# init dag executor # init dag executor
self._dag_executor = DAGExecutor( self._dag_executor = dag.DAGExecutor(response_op, dag_conf, worker_idx)
response_op, dag_config, show_info=show_info)
self._dag_executor.start() self._dag_executor.start()
_LOGGER.info("[PipelineServicer] succ init")
def inference(self, request, context): def inference(self, request, context):
if request.name != "" and request.name != self._name:
resp = pipeline_service_pb2.Response()
resp.ecode = channel.ChannelDataEcode.NO_SERVICE.value
resp.error_info = "Failed to inference: Service name error."
return resp
resp = self._dag_executor.call(request) resp = self._dag_executor.call(request)
return resp return resp
def __del__(self):
self._dag_executor.stop()
@contextlib.contextmanager @contextlib.contextmanager
def _reserve_port(port): def _reserve_port(port):
...@@ -59,80 +66,375 @@ def _reserve_port(port): ...@@ -59,80 +66,375 @@ def _reserve_port(port):
class PipelineServer(object): class PipelineServer(object):
def __init__(self): def __init__(self, name=None):
self._port = None self._name = name # for grpc-gateway path
self._rpc_port = None
self._worker_num = None self._worker_num = None
self._response_op = None self._response_op = None
self._proxy_server = None
def _grpc_gateway(self, grpc_port, http_port):
import os
from ctypes import cdll
from . import gateway
lib_path = os.path.join(
os.path.dirname(gateway.__file__), "libproxy_server.so")
proxy_server = cdll.LoadLibrary(lib_path)
proxy_server.run_proxy_server(grpc_port, http_port)
def _run_grpc_gateway(self, grpc_port, http_port):
if http_port <= 0:
_LOGGER.info("Ignore grpc_gateway configuration.")
return
if not util.AvailablePortGenerator.port_is_available(http_port):
raise SystemExit("Failed to run grpc-gateway: prot {} "
"is already used".format(http_port))
if self._proxy_server is not None:
raise RuntimeError("Proxy server has been started.")
self._proxy_server = multiprocessing.Process(
target=self._grpc_gateway, args=(
grpc_port,
http_port, ))
self._proxy_server.daemon = True
self._proxy_server.start()
def set_response_op(self, response_op): def set_response_op(self, response_op):
if not isinstance(response_op, ResponseOp): if not isinstance(response_op, operator.ResponseOp):
raise Exception("response_op must be ResponseOp type.") raise Exception("Failed to set response_op: response_op "
"must be ResponseOp type.")
if len(response_op.get_input_ops()) != 1: if len(response_op.get_input_ops()) != 1:
raise Exception("response_op can only have one previous op.") raise Exception("Failed to set response_op: response_op "
"can only have one previous op.")
self._response_op = response_op self._response_op = response_op
self._used_op, _ = dag.DAG.get_use_ops(self._response_op)
def prepare_server(self, yml_file=None, yml_dict=None):
conf = ServerYamlConfChecker.load_server_yaml_conf(
yml_file=yml_file, yml_dict=yml_dict)
self._rpc_port = conf.get("rpc_port")
self._http_port = conf.get("http_port")
if self._rpc_port is None:
if self._http_port is None:
raise SystemExit("Failed to prepare_server: rpc_port or "
"http_port can not be None.")
else:
# http mode: generate rpc_port
if not util.AvailablePortGenerator.port_is_available(
self._http_port):
raise SystemExit("Failed to prepare_server: http_port({}) "
"is already used".format(self._http_port))
self._rpc_port = util.GetAvailablePortGenerator().next()
else:
if not util.AvailablePortGenerator.port_is_available(
self._rpc_port):
raise SystemExit("Failed to prepare_server: prot {} "
"is already used".format(self._rpc_port))
if self._http_port is None:
# rpc mode
pass
else:
# http mode
if not util.AvailablePortGenerator.port_is_available(
self._http_port):
raise SystemExit("Failed to prepare_server: http_port({}) "
"is already used".format(self._http_port))
self._worker_num = conf["worker_num"]
self._build_dag_each_worker = conf["build_dag_each_worker"]
self._init_ops(conf["op"])
def _port_is_available(self, port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port))
return result != 0
def prepare_server(self, yml_file):
with open(yml_file) as f:
yml_config = yaml.load(f.read())
self._port = yml_config.get('port')
if self._port is None:
raise SystemExit("Please set *port* in [{}] yaml file.".format(
yml_file))
if not self._port_is_available(self._port):
raise SystemExit("Prot {} is already used".format(self._port))
self._worker_num = yml_config.get('worker_num', 1)
self._build_dag_each_worker = yml_config.get('build_dag_each_worker',
False)
_LOGGER.info("============= PIPELINE SERVER =============") _LOGGER.info("============= PIPELINE SERVER =============")
_LOGGER.info("port: {}".format(self._port)) _LOGGER.info("\n{}".format(
_LOGGER.info("worker_num: {}".format(self._worker_num)) json.dumps(
servicer_info = "build_dag_each_worker: {}".format( conf, indent=4, separators=(',', ':'))))
self._build_dag_each_worker)
if self._build_dag_each_worker is True: if self._build_dag_each_worker is True:
servicer_info += " (Make sure that install grpcio whl with --no-binary flag)" _LOGGER.warning(
_LOGGER.info(servicer_info) "(Make sure that install grpcio whl with --no-binary flag: "
"pip install grpcio --no-binary grpcio)")
_LOGGER.info("-------------------------------------------") _LOGGER.info("-------------------------------------------")
self._dag_config = yml_config.get("dag", {}) self._conf = conf
self._start_local_rpc_service()
def _init_ops(self, op_conf):
default_conf = {
"concurrency": 1,
"timeout": -1,
"retry": 1,
"batch_size": 1,
"auto_batching_timeout": -1,
"local_service_conf": {
"workdir": "",
"thread_num": 2,
"devices": "",
"mem_optim": True,
"ir_optim": False,
},
}
for op in self._used_op:
if not isinstance(op, operator.RequestOp) and not isinstance(
op, operator.ResponseOp):
conf = op_conf.get(op.name, default_conf)
op.init_from_dict(conf)
def _start_local_rpc_service(self):
# only brpc now
if self._conf["dag"]["client_type"] != "brpc":
_LOGGER.warning("Local service version must be brpc type now.")
for op in self._used_op:
if not isinstance(op, operator.RequestOp):
op.launch_local_rpc_service()
def run_server(self): def run_server(self):
if self._build_dag_each_worker: if self._build_dag_each_worker:
with _reserve_port(self._port) as port: with _reserve_port(self._rpc_port) as port:
bind_address = 'localhost:{}'.format(port) bind_address = 'localhost:{}'.format(port)
workers = [] workers = []
for i in range(self._worker_num): for i in range(self._worker_num):
show_info = (i == 0) show_info = (i == 0)
worker = multiprocessing.Process( worker = multiprocessing.Process(
target=self._run_server_func, target=self._run_server_func,
args=(bind_address, self._response_op, args=(bind_address, self._response_op, self._conf, i))
self._dag_config))
worker.start() worker.start()
workers.append(worker) workers.append(worker)
self._run_grpc_gateway(
grpc_port=self._rpc_port,
http_port=self._http_port) # start grpc_gateway
for worker in workers: for worker in workers:
worker.join() worker.join()
else: else:
server = grpc.server( server = grpc.server(
futures.ThreadPoolExecutor(max_workers=self._worker_num)) futures.ThreadPoolExecutor(max_workers=self._worker_num),
options=[('grpc.max_send_message_length', 256 * 1024 * 1024),
('grpc.max_receive_message_length', 256 * 1024 * 1024)
])
pipeline_service_pb2_grpc.add_PipelineServiceServicer_to_server( pipeline_service_pb2_grpc.add_PipelineServiceServicer_to_server(
PipelineService(self._response_op, self._dag_config, True), PipelineServicer(self._name, self._response_op, self._conf),
server) server)
server.add_insecure_port('[::]:{}'.format(self._port)) server.add_insecure_port('[::]:{}'.format(self._rpc_port))
server.start() server.start()
self._run_grpc_gateway(
grpc_port=self._rpc_port,
http_port=self._http_port) # start grpc_gateway
server.wait_for_termination() server.wait_for_termination()
def _run_server_func(self, bind_address, response_op, dag_config): def _run_server_func(self, bind_address, response_op, dag_conf, worker_idx):
options = (('grpc.so_reuseport', 1), ) options = [('grpc.so_reuseport', 1),
('grpc.max_send_message_length', 256 * 1024 * 1024),
('grpc.max_send_message_length', 256 * 1024 * 1024)]
server = grpc.server( server = grpc.server(
futures.ThreadPoolExecutor( futures.ThreadPoolExecutor(
max_workers=1, ), options=options) max_workers=1, ), options=options)
pipeline_service_pb2_grpc.add_PipelineServiceServicer_to_server( pipeline_service_pb2_grpc.add_PipelineServiceServicer_to_server(
PipelineService(response_op, dag_config, False), server) PipelineServicer(self._name, response_op, dag_conf, worker_idx),
server)
server.add_insecure_port(bind_address) server.add_insecure_port(bind_address)
server.start() server.start()
server.wait_for_termination() server.wait_for_termination()
class ServerYamlConfChecker(object):
def __init__(self):
pass
@staticmethod
def load_server_yaml_conf(yml_file=None, yml_dict=None):
if yml_file is not None and yml_dict is not None:
raise SystemExit("Failed to prepare_server: only one of yml_file"
" or yml_dict can be selected as the parameter.")
if yml_file is not None:
with open(yml_file) as f:
conf = yaml.load(f.read())
elif yml_dict is not None:
conf = yml_dict
else:
raise SystemExit("Failed to prepare_server: yml_file or yml_dict"
" can not be None.")
ServerYamlConfChecker.check_server_conf(conf)
ServerYamlConfChecker.check_dag_conf(conf["dag"])
ServerYamlConfChecker.check_tracer_conf(conf["dag"]["tracer"])
for op_name in conf["op"]:
ServerYamlConfChecker.check_op_conf(conf["op"][op_name])
ServerYamlConfChecker.check_local_service_conf(conf["op"][op_name][
"local_service_conf"])
return conf
@staticmethod
def check_conf(conf, default_conf, conf_type, conf_qualification):
ServerYamlConfChecker.fill_with_default_conf(conf, default_conf)
ServerYamlConfChecker.check_conf_type(conf, conf_type)
ServerYamlConfChecker.check_conf_qualification(conf, conf_qualification)
@staticmethod
def check_server_conf(conf):
default_conf = {
# "rpc_port": 9292,
"worker_num": 1,
"build_dag_each_worker": False,
#"http_port": 0,
"dag": {},
"op": {},
}
conf_type = {
"rpc_port": int,
"http_port": int,
"worker_num": int,
"build_dag_each_worker": bool,
"grpc_gateway_port": int,
}
conf_qualification = {
"rpc_port": [(">=", 1024), ("<=", 65535)],
"http_port": [(">=", 1024), ("<=", 65535)],
"worker_num": (">=", 1),
}
ServerYamlConfChecker.check_conf(conf, default_conf, conf_type,
conf_qualification)
@staticmethod
def check_local_service_conf(conf):
default_conf = {
"workdir": "",
"thread_num": 2,
"devices": "",
"mem_optim": True,
"ir_optim": False,
}
conf_type = {
"model_config": str,
"workdir": str,
"thread_num": int,
"devices": str,
"mem_optim": bool,
"ir_optim": bool,
}
conf_qualification = {"thread_num": (">=", 1), }
ServerYamlConfChecker.check_conf(conf, default_conf, conf_type,
conf_qualification)
@staticmethod
def check_op_conf(conf):
default_conf = {
"concurrency": 1,
"timeout": -1,
"retry": 1,
"batch_size": 1,
"auto_batching_timeout": -1,
"local_service_conf": {},
}
conf_type = {
"server_endpoints": list,
"fetch_list": list,
"client_config": str,
"concurrency": int,
"timeout": int,
"retry": int,
"batch_size": int,
"auto_batching_timeout": int,
}
conf_qualification = {
"concurrency": (">=", 1),
"retry": (">=", 1),
"batch_size": (">=", 1),
}
ServerYamlConfChecker.check_conf(conf, default_conf, conf_type,
conf_qualification)
@staticmethod
def check_tracer_conf(conf):
default_conf = {"interval_s": -1, }
conf_type = {"interval_s": int, }
conf_qualification = {}
ServerYamlConfChecker.check_conf(conf, default_conf, conf_type,
conf_qualification)
@staticmethod
def check_dag_conf(conf):
default_conf = {
"retry": 1,
"client_type": "brpc",
"use_profile": False,
"channel_size": 0,
"is_thread_op": True,
"tracer": {},
}
conf_type = {
"retry": int,
"client_type": str,
"use_profile": bool,
"channel_size": int,
"is_thread_op": bool,
}
conf_qualification = {
"retry": (">=", 1),
"client_type": ("in", ["brpc", "grpc"]),
"channel_size": (">=", 0),
}
ServerYamlConfChecker.check_conf(conf, default_conf, conf_type,
conf_qualification)
@staticmethod
def fill_with_default_conf(conf, default_conf):
for key, val in default_conf.items():
if conf.get(key) is None:
_LOGGER.warning("[CONF] {} not set, use default: {}"
.format(key, val))
conf[key] = val
@staticmethod
def check_conf_type(conf, conf_type):
for key, val in conf_type.items():
if key not in conf:
continue
if not isinstance(conf[key], val):
raise SystemExit("[CONF] {} must be {} type, but get {}."
.format(key, val, type(conf[key])))
@staticmethod
def check_conf_qualification(conf, conf_qualification):
for key, qualification in conf_qualification.items():
if key not in conf:
continue
if not isinstance(qualification, list):
qualification = [qualification]
if not ServerYamlConfChecker.qualification_check(conf[key],
qualification):
raise SystemExit("[CONF] {} must be {}, but get {}."
.format(key, ", ".join([
"{} {}"
.format(q[0], q[1]) for q in qualification
]), conf[key]))
@staticmethod
def qualification_check(value, qualifications):
if not isinstance(qualifications, list):
qualifications = [qualifications]
ok = True
for q in qualifications:
operator, limit = q
if operator == "<":
ok = value < limit
elif operator == "==":
ok = value == limit
elif operator == ">":
ok = value > limit
elif operator == "<=":
ok = value <= limit
elif operator == ">=":
ok = value >= limit
elif operator == "in":
ok = value in limit
else:
raise SystemExit("unknow operator: {}".format(operator))
if ok == False:
break
return ok
...@@ -22,10 +22,165 @@ elif sys.version_info.major == 3: ...@@ -22,10 +22,165 @@ elif sys.version_info.major == 3:
import queue as Queue import queue as Queue
else: else:
raise Exception("Error Python version") raise Exception("Error Python version")
from time import time as _time
import time import time
import threading import threading
import multiprocessing
_LOGGER = logging.getLogger() _LOGGER = logging.getLogger(__name__)
_LOGGER.propagate = False
class PerformanceTracer(object):
def __init__(self, is_thread_mode, interval_s, server_worker_num):
self._is_thread_mode = is_thread_mode
if is_thread_mode:
# Because the Channel in the thread mode cannot be
# accessed across processes, when using thread mode,
# the PerformanceTracer is also the thread mode.
# However, performance may be affected by GIL.
self._data_buffer = Queue.Queue()
else:
self._data_buffer = multiprocessing.Manager().Queue()
self._interval_s = interval_s
self._thrd = None
self._proc = None
self._channels = []
# The size of data in Channel will not exceed server_worker_num
self._server_worker_num = server_worker_num
def data_buffer(self):
return self._data_buffer
def start(self):
if self._is_thread_mode:
self._thrd = threading.Thread(
target=self._trace_func, args=(self._channels, ))
self._thrd.daemon = True
self._thrd.start()
else:
self._proc = multiprocessing.Process(
target=self._trace_func, args=(self._channels, ))
self._proc.daemon = True
self._proc.start()
def set_channels(self, channels):
self._channels = channels
def _trace_func(self, channels):
all_actions = ["in", "prep", "midp", "postp", "out"]
calcu_actions = ["prep", "midp", "postp"]
while True:
op_cost = {}
err_request = []
err_count = 0
_LOGGER.info("==================== TRACER ======================")
# op
while True:
try:
item = self._data_buffer.get_nowait()
name = item["name"]
actions = item["actions"]
if name == "DAG":
succ = item["succ"]
req_id = item["id"]
if not succ:
err_count += 1
err_request.append(req_id)
if name not in op_cost:
op_cost[name] = {}
for action, cost in actions.items():
if action not in op_cost[name]:
op_cost[name][action] = []
op_cost[name][action].append(cost)
except Queue.Empty:
break
if len(op_cost) != 0:
for name in op_cost:
tot_cost, calcu_cost = 0.0, 0.0
for action, costs in op_cost[name].items():
op_cost[name][action] = sum(costs) / (1e3 * len(costs))
tot_cost += op_cost[name][action]
if name != "DAG":
_LOGGER.info("Op({}):".format(name))
for action in all_actions:
if action in op_cost[name]:
_LOGGER.info("\t{}[{} ms]".format(
action, op_cost[name][action]))
for action in calcu_actions:
if action in op_cost[name]:
calcu_cost += op_cost[name][action]
_LOGGER.info("\tidle[{}]".format(1 - 1.0 * calcu_cost /
tot_cost))
if "DAG" in op_cost:
calls = op_cost["DAG"].values()
calls.sort()
tot = len(calls)
qps = 1.0 * tot / self._interval_s
ave_cost = sum(calls) / tot
latencys = [50, 60, 70, 80, 90, 95, 99]
_LOGGER.info("DAGExecutor:")
_LOGGER.info("\tQuery count[{}]".format(tot))
_LOGGER.info("\tQPS[{} q/s]".format(qps))
_LOGGER.info("\tSucc[{}]".format(1 - 1.0 * err_count / tot))
_LOGGER.info("\tError req[{}]".format(", ".join(
[str(x) for x in err_request])))
_LOGGER.info("\tLatency:")
_LOGGER.info("\t\tave[{} ms]".format(ave_cost))
for latency in latencys:
_LOGGER.info("\t\t.{}[{} ms]".format(latency, calls[int(
tot * latency / 100.0)]))
# channel
_LOGGER.info("Channel (server worker num[{}]):".format(
self._server_worker_num))
for channel in channels:
_LOGGER.info("\t{}(In: {}, Out: {}) size[{}/{}]".format(
channel.name,
channel.get_producers(),
channel.get_consumers(),
channel.size(), channel.get_maxsize()))
time.sleep(self._interval_s)
class UnsafeTimeProfiler(object):
""" thread unsafe profiler """
def __init__(self):
self.pid = os.getpid()
self.print_head = 'PROFILE\tpid:{}\t'.format(self.pid)
self.time_record = [self.print_head]
self._enable = False
def enable(self, enable):
self._enable = enable
def record(self, name):
if self._enable is False:
return
timestamp = int(round(_time() * 1000000))
self.time_record.append('{}:{} '.format(name, timestamp))
return timestamp
def print_profile(self):
if self._enable is False:
return
sys.stderr.write(self.gen_profile_str())
def gen_profile_str(self):
if self._enable is False:
return
self.time_record.append('\n')
profile_str = ''.join(self.time_record)
self.time_record = [self.print_head]
return profile_str
class TimeProfiler(object): class TimeProfiler(object):
...@@ -42,12 +197,13 @@ class TimeProfiler(object): ...@@ -42,12 +197,13 @@ class TimeProfiler(object):
def record(self, name_with_tag): def record(self, name_with_tag):
if self._enable is False: if self._enable is False:
return return
timestamp = int(round(time.time() * 1000000)) timestamp = int(round(_time() * 1000000))
name_with_tag = name_with_tag.split("_") name_with_tag = name_with_tag.split("_")
tag = name_with_tag[-1] tag = name_with_tag[-1]
name = '_'.join(name_with_tag[:-1]) name = '_'.join(name_with_tag[:-1])
with self._lock: with self._lock:
self._time_record.put((name, tag, timestamp)) self._time_record.put((name, tag, timestamp))
return timestamp
def print_profile(self): def print_profile(self):
if self._enable is False: if self._enable is False:
......
...@@ -18,6 +18,7 @@ package baidu.paddle_serving.pipeline_serving; ...@@ -18,6 +18,7 @@ package baidu.paddle_serving.pipeline_serving;
message Request { message Request {
repeated string key = 1; repeated string key = 1;
repeated string value = 2; repeated string value = 2;
optional string name = 3;
}; };
message Response { message Response {
......
...@@ -13,13 +13,131 @@ ...@@ -13,13 +13,131 @@
# limitations under the License. # limitations under the License.
import sys import sys
import logging
import threading
import multiprocessing
import multiprocessing.managers
from contextlib import closing
import socket
if sys.version_info.major == 2:
import Queue
from Queue import PriorityQueue
elif sys.version_info.major == 3:
import queue as Queue
from queue import PriorityQueue
else:
raise Exception("Error Python version")
_LOGGER = logging.getLogger(__name__)
class AvailablePortGenerator(object):
def __init__(self, start_port=12000):
self._curr_port = start_port
@staticmethod
def port_is_available(port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
result = sock.connect_ex(('0.0.0.0', port))
if result != 0:
return True
else:
return False
def next(self):
while not AvailablePortGenerator.port_is_available(self._curr_port):
self._curr_port += 1
self._curr_port += 1
return self._curr_port - 1
_AvailablePortGenerator = AvailablePortGenerator()
def GetAvailablePortGenerator():
return _AvailablePortGenerator
class NameGenerator(object): class NameGenerator(object):
# use unsafe-id-generator
def __init__(self, prefix): def __init__(self, prefix):
self._idx = -1 self._idx = -1
self._prefix = prefix self._prefix = prefix
self._id_generator = UnsafeIdGenerator(1000000000000000000)
def next(self):
next_id = self._id_generator.next()
return "{}{}".format(self._prefix, next_id)
class UnsafeIdGenerator(object):
def __init__(self, max_id, base_counter=0, step=1):
self._base_counter = base_counter
self._counter = self._base_counter
self._step = step
self._max_id = max_id # for reset
def next(self): def next(self):
self._idx += 1 if self._counter >= self._max_id:
return "{}{}".format(self._prefix, self._idx) self._counter = self._base_counter
_LOGGER.info("Reset Id: {}".format(self._counter))
next_id = self._counter
self._counter += self._step
return next_id
class ThreadIdGenerator(UnsafeIdGenerator):
def __init__(self, max_id, base_counter=0, step=1, lock=None):
# if you want to use your lock, you may need to use Reentrant-Lock
self._lock = lock
if self._lock is None:
self._lock = threading.Lock()
super(ThreadIdGenerator, self).__init__(max_id, base_counter, step)
def next(self):
next_id = None
with self._lock:
if self._counter >= self._max_id:
self._counter = self._base_counter
_LOGGER.info("Reset Id: {}".format(self._counter))
next_id = self._counter
self._counter += self._step
return next_id
class ProcessIdGenerator(UnsafeIdGenerator):
def __init__(self, max_id, base_counter=0, step=1, lock=None):
# if you want to use your lock, you may need to use Reentrant-Lock
self._lock = lock
if self._lock is None:
self._lock = multiprocessing.Lock()
self._base_counter = base_counter
self._counter = multiprocessing.Manager().Value('i', 0)
self._step = step
self._max_id = max_id
def next(self):
next_id = None
with self._lock:
if self._counter.value >= self._max_id:
self._counter.value = self._base_counter
_LOGGER.info("Reset Id: {}".format(self._counter.value))
next_id = self._counter.value
self._counter.value += self._step
return next_id
def PipelineProcSyncManager():
"""
add PriorityQueue into SyncManager, see more:
https://stackoverflow.com/questions/25324560/strange-queue-priorityqueue-behaviour-with-multiprocessing-in-python-2-7-6?answertab=active#tab-top
"""
class PipelineManager(multiprocessing.managers.SyncManager):
pass
PipelineManager.register("PriorityQueue", PriorityQueue)
m = PipelineManager()
m.start()
return m
numpy>=1.12, <=1.16.4 ; python_version<"3.5" numpy>=1.12, <=1.16.4 ; python_version<"3.5"
shapely==1.7.0
wheel>=0.34.0, <0.35.0
setuptools>=44.1.0
opencv-python==4.2.0.32
google>=2.0.3 google>=2.0.3
opencv-python==4.2.0.32
protobuf>=3.12.2 protobuf>=3.12.2
grpcio-tools>=1.28.1 grpcio-tools>=1.28.1
grpcio>=1.28.1 grpcio>=1.28.1
......
...@@ -16,7 +16,6 @@ from __future__ import absolute_import ...@@ -16,7 +16,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import platform
import os import os
from setuptools import setup, Distribution, Extension from setuptools import setup, Distribution, Extension
...@@ -24,26 +23,17 @@ from setuptools import find_packages ...@@ -24,26 +23,17 @@ from setuptools import find_packages
from setuptools import setup from setuptools import setup
from paddle_serving_app.version import serving_app_version from paddle_serving_app.version import serving_app_version
from pkg_resources import DistributionNotFound, get_distribution from pkg_resources import DistributionNotFound, get_distribution
import util
def python_version(): max_version, mid_version, min_version = util.python_version()
return [int(v) for v in platform.python_version().split(".")]
def find_package(pkgname):
try:
get_distribution(pkgname)
return True
except DistributionNotFound:
return False
max_version, mid_version, min_version = python_version()
if '${PACK}' == 'ON': if '${PACK}' == 'ON':
copy_lib() copy_lib()
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'sentencepiece', 'opencv-python', 'pillow', 'six >= 1.10.0', 'sentencepiece', 'opencv-python<=4.2.0.32', 'pillow',
'shapely', 'pyclipper' 'shapely<=1.6.1', 'pyclipper'
] ]
packages=['paddle_serving_app', packages=['paddle_serving_app',
......
...@@ -16,7 +16,6 @@ from __future__ import absolute_import ...@@ -16,7 +16,6 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import platform
import os import os
import sys import sys
...@@ -24,20 +23,10 @@ from setuptools import setup, Distribution, Extension ...@@ -24,20 +23,10 @@ from setuptools import setup, Distribution, Extension
from setuptools import find_packages from setuptools import find_packages
from setuptools import setup from setuptools import setup
from paddle_serving_client.version import serving_client_version from paddle_serving_client.version import serving_client_version
from pkg_resources import DistributionNotFound, get_distribution import util
py_version = sys.version_info py_version = sys.version_info
def python_version():
return [int(v) for v in platform.python_version().split(".")]
def find_package(pkgname):
try:
get_distribution(pkgname)
return True
except DistributionNotFound:
return False
def copy_lib(): def copy_lib():
if py_version[0] == 2: if py_version[0] == 2:
lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10'] lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
...@@ -51,18 +40,20 @@ def copy_lib(): ...@@ -51,18 +40,20 @@ def copy_lib():
text = r.read() text = r.read()
os.popen('cp {} ./paddle_serving_client/lib'.format(text.strip().split(' ')[1])) os.popen('cp {} ./paddle_serving_client/lib'.format(text.strip().split(' ')[1]))
max_version, mid_version, min_version = python_version() max_version, mid_version, min_version = util.python_version()
# gen pipeline proto code
util.gen_pipeline_code("paddle_serving_client")
if '${PACK}' == 'ON': if '${PACK}' == 'ON':
copy_lib() copy_lib()
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'numpy >= 1.12', 'grpcio >= 1.28.1', 'six >= 1.10.0', 'protobuf >= 3.11.0', 'numpy >= 1.12', 'grpcio >= 1.28.1',
'grpcio-tools >= 1.28.1' 'grpcio-tools >= 1.28.1'
] ]
if not find_package("paddlepaddle") and not find_package("paddlepaddle-gpu"): if not util.find_package("paddlepaddle") and not util.find_package("paddlepaddle-gpu"):
REQUIRED_PACKAGES.append("paddlepaddle") REQUIRED_PACKAGES.append("paddlepaddle")
...@@ -72,8 +63,10 @@ packages=['paddle_serving_client', ...@@ -72,8 +63,10 @@ packages=['paddle_serving_client',
'paddle_serving_client.metric', 'paddle_serving_client.metric',
'paddle_serving_client.utils', 'paddle_serving_client.utils',
'paddle_serving_client.pipeline', 'paddle_serving_client.pipeline',
'paddle_serving_client.pipeline.proto'] 'paddle_serving_client.pipeline.proto',
package_data={'paddle_serving_client': ['serving_client.so','lib/*'],} 'paddle_serving_client.pipeline.gateway',
'paddle_serving_client.pipeline.gateway.proto']
package_data={'paddle_serving_client': ['serving_client.so', 'lib/*', 'pipeline/gateway/libproxy_server.so'],}
package_dir={'paddle_serving_client': package_dir={'paddle_serving_client':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client', '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client',
'paddle_serving_client.proto': 'paddle_serving_client.proto':
...@@ -87,7 +80,11 @@ package_dir={'paddle_serving_client': ...@@ -87,7 +80,11 @@ package_dir={'paddle_serving_client':
'paddle_serving_client.pipeline': 'paddle_serving_client.pipeline':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/pipeline', '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/pipeline',
'paddle_serving_client.pipeline.proto': 'paddle_serving_client.pipeline.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/pipeline/proto'} '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/pipeline/proto',
'paddle_serving_client.pipeline.gateway':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/pipeline/gateway',
'paddle_serving_client.pipeline.gateway.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/pipeline/gateway/proto'}
setup( setup(
name='paddle-serving-client', name='paddle-serving-client',
......
...@@ -16,17 +16,14 @@ from __future__ import absolute_import ...@@ -16,17 +16,14 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import platform
from setuptools import setup, Distribution, Extension from setuptools import setup, Distribution, Extension
from setuptools import find_packages from setuptools import find_packages
from setuptools import setup from setuptools import setup
from paddle_serving.version import serving_client_version from paddle_serving.version import serving_client_version
from grpc_tools import protoc
import util
def python_version(): max_version, mid_version, min_version = util.python_version()
return [int(v) for v in platform.python_version().split(".")]
max_version, mid_version, min_version = python_version()
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.1.0','paddlepaddle' 'six >= 1.10.0', 'protobuf >= 3.1.0','paddlepaddle'
......
...@@ -16,25 +16,16 @@ from __future__ import absolute_import ...@@ -16,25 +16,16 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import platform
from setuptools import setup, Distribution, Extension from setuptools import setup, Distribution, Extension
from setuptools import find_packages from setuptools import find_packages
from setuptools import setup from setuptools import setup
from paddle_serving_server.version import serving_server_version from paddle_serving_server.version import serving_server_version
from pkg_resources import DistributionNotFound, get_distribution import util
def find_package(pkgname):
try:
get_distribution(pkgname)
return True
except DistributionNotFound:
return False
def python_version(): max_version, mid_version, min_version = util.python_version()
return [int(v) for v in platform.python_version().split(".")]
max_version, mid_version, min_version = python_version() # gen pipeline proto code
util.gen_pipeline_code("paddle_serving_server")
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1', 'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
...@@ -44,7 +35,9 @@ REQUIRED_PACKAGES = [ ...@@ -44,7 +35,9 @@ REQUIRED_PACKAGES = [
packages=['paddle_serving_server', packages=['paddle_serving_server',
'paddle_serving_server.proto', 'paddle_serving_server.proto',
'paddle_serving_server.pipeline', 'paddle_serving_server.pipeline',
'paddle_serving_server.pipeline.proto'] 'paddle_serving_server.pipeline.proto',
'paddle_serving_server.pipeline.gateway',
'paddle_serving_server.pipeline.gateway.proto']
package_dir={'paddle_serving_server': package_dir={'paddle_serving_server':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server', '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server',
...@@ -53,7 +46,13 @@ package_dir={'paddle_serving_server': ...@@ -53,7 +46,13 @@ package_dir={'paddle_serving_server':
'paddle_serving_server.pipeline': 'paddle_serving_server.pipeline':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline', '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline',
'paddle_serving_server.pipeline.proto': 'paddle_serving_server.pipeline.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/proto'} '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/proto',
'paddle_serving_server.pipeline.gateway':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/gateway',
'paddle_serving_server.pipeline.gateway.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/gateway/proto'}
package_data={'paddle_serving_server': ['pipeline/gateway/libproxy_server.so'],}
setup( setup(
name='paddle-serving-server', name='paddle-serving-server',
...@@ -65,6 +64,7 @@ setup( ...@@ -65,6 +64,7 @@ setup(
author_email='guru4elephant@gmail.com', author_email='guru4elephant@gmail.com',
install_requires=REQUIRED_PACKAGES, install_requires=REQUIRED_PACKAGES,
packages=packages, packages=packages,
package_data=package_data,
package_dir=package_dir, package_dir=package_dir,
# PyPI package information. # PyPI package information.
classifiers=[ classifiers=[
......
...@@ -16,25 +16,16 @@ from __future__ import absolute_import ...@@ -16,25 +16,16 @@ from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
import platform
from setuptools import setup, Distribution, Extension from setuptools import setup, Distribution, Extension
from setuptools import find_packages from setuptools import find_packages
from setuptools import setup from setuptools import setup
from paddle_serving_server_gpu.version import serving_server_version from paddle_serving_server_gpu.version import serving_server_version
from pkg_resources import DistributionNotFound, get_distribution import util
def find_package(pkgname):
try:
get_distribution(pkgname)
return True
except DistributionNotFound:
return False
def python_version(): max_version, mid_version, min_version = util.python_version()
return [int(v) for v in platform.python_version().split(".")]
max_version, mid_version, min_version = python_version() # gen pipeline proto code
util.gen_pipeline_code("paddle_serving_server_gpu")
REQUIRED_PACKAGES = [ REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1', 'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
...@@ -44,7 +35,9 @@ REQUIRED_PACKAGES = [ ...@@ -44,7 +35,9 @@ REQUIRED_PACKAGES = [
packages=['paddle_serving_server_gpu', packages=['paddle_serving_server_gpu',
'paddle_serving_server_gpu.proto', 'paddle_serving_server_gpu.proto',
'paddle_serving_server_gpu.pipeline', 'paddle_serving_server_gpu.pipeline',
'paddle_serving_server_gpu.pipeline.proto'] 'paddle_serving_server_gpu.pipeline.proto',
'paddle_serving_server_gpu.pipeline.gateway',
'paddle_serving_server_gpu.pipeline.gateway.proto']
package_dir={'paddle_serving_server_gpu': package_dir={'paddle_serving_server_gpu':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu', '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu',
...@@ -53,7 +46,13 @@ package_dir={'paddle_serving_server_gpu': ...@@ -53,7 +46,13 @@ package_dir={'paddle_serving_server_gpu':
'paddle_serving_server_gpu.pipeline': 'paddle_serving_server_gpu.pipeline':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline', '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline',
'paddle_serving_server_gpu.pipeline.proto': 'paddle_serving_server_gpu.pipeline.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/proto'} '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/proto',
'paddle_serving_server_gpu.pipeline.gateway':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/gateway',
'paddle_serving_server_gpu.pipeline.gateway.proto':
'${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/gateway/proto'}
package_data={'paddle_serving_server_gpu': ['pipeline/gateway/libproxy_server.so'],}
setup( setup(
name='paddle-serving-server-gpu', name='paddle-serving-server-gpu',
...@@ -65,6 +64,7 @@ setup( ...@@ -65,6 +64,7 @@ setup(
author_email='guru4elephant@gmail.com', author_email='guru4elephant@gmail.com',
install_requires=REQUIRED_PACKAGES, install_requires=REQUIRED_PACKAGES,
packages=packages, packages=packages,
package_data=package_data,
package_dir=package_dir, package_dir=package_dir,
# PyPI package information. # PyPI package information.
classifiers=[ classifiers=[
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pkg_resources import DistributionNotFound, get_distribution
from grpc_tools import protoc
import os
import platform
def python_version():
return [int(v) for v in platform.python_version().split(".")]
def find_package(pkgname):
try:
get_distribution(pkgname)
return True
except DistributionNotFound:
return False
def gen_pipeline_code(package_name):
# pipeline service proto
protoc.main((
'',
'-I.',
'--python_out=.',
'--grpc_python_out=.',
'{}/pipeline/proto/pipeline_service.proto'.format(package_name), ))
# pipeline grpc-gateway proto
# *.pb.go
ret = os.system(
"cd {}/pipeline/gateway/proto/ && "
"../../../../../third_party/install/protobuf/bin/protoc -I. "
"-I$GOPATH/src "
"-I$GOPATH/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis "
"--go_out=plugins=grpc:. "
"gateway.proto".format(package_name))
if ret != 0:
exit(1)
# *.gw.go
ret = os.system(
"cd {}/pipeline/gateway/proto/ && "
"../../../../../third_party/install/protobuf/bin/protoc -I. "
"-I$GOPATH/src "
"-I$GOPATH/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis "
"--grpc-gateway_out=logtostderr=true:. "
"gateway.proto".format(package_name))
if ret != 0:
exit(1)
# pipeline grpc-gateway shared-lib
ret = os.system(
"cd {}/pipeline/gateway && "
"go build -buildmode=c-shared -o libproxy_server.so proxy_server.go".
format(package_name))
if ret != 0:
exit(1)
...@@ -12,4 +12,5 @@ RUN yum -y install wget && \ ...@@ -12,4 +12,5 @@ RUN yum -y install wget && \
RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python get-pip.py && rm get-pip.py && \ python get-pip.py && rm get-pip.py && \
localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \ localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
echo "export LANG=en_US.utf8" >> /root/.bashrc echo "export LANG=en_US.utf8" >> /root/.bashrc && \
echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
...@@ -31,7 +31,6 @@ RUN yum -y install wget && \ ...@@ -31,7 +31,6 @@ RUN yum -y install wget && \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python get-pip.py && \ python get-pip.py && \
rm get-pip.py && \ rm get-pip.py && \
pip install google protobuf setuptools wheel flask numpy==1.16.4 && \
wget https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tgz && \ wget https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tgz && \
tar -zxf Python-3.6.8.tgz && \ tar -zxf Python-3.6.8.tgz && \
cd Python-3.6.8 && \ cd Python-3.6.8 && \
...@@ -42,7 +41,7 @@ RUN yum -y install wget && \ ...@@ -42,7 +41,7 @@ RUN yum -y install wget && \
echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \ echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
source /root/.bashrc && \ source /root/.bashrc && \
cd .. && rm -rf Python-3.6.8* && \ cd .. && rm -rf Python-3.6.8* && \
pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \ yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
yum clean all && \ yum clean all && \
echo "export LANG=en_US.utf8" >> /root/.bashrc echo "export LANG=en_US.utf8" >> /root/.bashrc && \
echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
...@@ -31,7 +31,6 @@ RUN yum -y install wget && \ ...@@ -31,7 +31,6 @@ RUN yum -y install wget && \
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \
python get-pip.py && \ python get-pip.py && \
rm get-pip.py && \ rm get-pip.py && \
pip install google protobuf setuptools wheel flask numpy==1.16.4 && \
wget https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tgz && \ wget https://www.python.org/ftp/python/3.6.8/Python-3.6.8.tgz && \
tar -zxf Python-3.6.8.tgz && \ tar -zxf Python-3.6.8.tgz && \
cd Python-3.6.8 && \ cd Python-3.6.8 && \
...@@ -42,8 +41,8 @@ RUN yum -y install wget && \ ...@@ -42,8 +41,8 @@ RUN yum -y install wget && \
echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \ echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
source /root/.bashrc && \ source /root/.bashrc && \
cd .. && rm -rf Python-3.6.8* && \ cd .. && rm -rf Python-3.6.8* && \
pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \ yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
yum clean all && \ yum clean all && \
localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \ localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
echo "export LANG=en_US.utf8" >> /root/.bashrc echo "export LANG=en_US.utf8" >> /root/.bashrc && \
echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
...@@ -23,7 +23,6 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ ...@@ -23,7 +23,6 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
RUN yum -y install python-devel sqlite-devel >/dev/null \ RUN yum -y install python-devel sqlite-devel >/dev/null \
&& curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
&& python get-pip.py >/dev/null \ && python get-pip.py >/dev/null \
&& pip install google protobuf setuptools wheel flask >/dev/null \
&& rm get-pip.py && rm get-pip.py
RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 \ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 \
...@@ -35,8 +34,7 @@ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 ...@@ -35,8 +34,7 @@ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2
&& cd .. \ && cd .. \
&& rm -rf patchelf-0.10* && rm -rf patchelf-0.10*
RUN yum install -y python3 python3-devel \ RUN yum install -y python3 python3-devel
&& pip3 install google protobuf setuptools wheel flask
RUN yum -y update >/dev/null \ RUN yum -y update >/dev/null \
&& yum -y install dnf >/dev/null \ && yum -y install dnf >/dev/null \
......
...@@ -18,6 +18,7 @@ RUN ln -s /usr/local/cuda-10.0/lib64/libcublas.so.10.0 /usr/local/cuda-10.0/lib6 ...@@ -18,6 +18,7 @@ RUN ln -s /usr/local/cuda-10.0/lib64/libcublas.so.10.0 /usr/local/cuda-10.0/lib6
ln -s /usr/local/cuda-10.0/targets/x86_64-linux/lib/libcudnn.so.7 /usr/local/cuda-10.0/targets/x86_64-linux/lib/libcudnn.so && \ ln -s /usr/local/cuda-10.0/targets/x86_64-linux/lib/libcudnn.so.7 /usr/local/cuda-10.0/targets/x86_64-linux/lib/libcudnn.so && \
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-10.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \ echo 'export LD_LIBRARY_PATH=/usr/local/cuda-10.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
echo "export LANG=en_US.utf8" >> /root/.bashrc && \ echo "export LANG=en_US.utf8" >> /root/.bashrc && \
echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc && \
mkdir -p /usr/local/cuda/extras mkdir -p /usr/local/cuda/extras
COPY --from=builder /usr/local/cuda/extras/CUPTI /usr/local/cuda/extras/CUPTI COPY --from=builder /usr/local/cuda/extras/CUPTI /usr/local/cuda/extras/CUPTI
...@@ -23,13 +23,12 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ ...@@ -23,13 +23,12 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
RUN yum -y install python-devel sqlite-devel \ RUN yum -y install python-devel sqlite-devel \
&& curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
&& python get-pip.py >/dev/null \ && python get-pip.py >/dev/null \
&& pip install google protobuf setuptools wheel flask >/dev/null \
&& rm get-pip.py && rm get-pip.py
RUN yum install -y python3 python3-devel \ RUN yum install -y python3 python3-devel \
&& pip3 install google protobuf setuptools wheel flask \
&& yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
&& yum clean all && yum clean all
RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
&& echo "export LANG=en_US.utf8" >> /root/.bashrc && echo "export LANG=en_US.utf8" >> /root/.bashrc \
&& echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
...@@ -18,6 +18,7 @@ RUN ln -s /usr/local/cuda-9.0/lib64/libcublas.so.9.0 /usr/local/cuda-9.0/lib64/l ...@@ -18,6 +18,7 @@ RUN ln -s /usr/local/cuda-9.0/lib64/libcublas.so.9.0 /usr/local/cuda-9.0/lib64/l
ln -s /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so.7 /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so && \ ln -s /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so.7 /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so && \
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-9.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \ echo 'export LD_LIBRARY_PATH=/usr/local/cuda-9.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
echo "export LANG=en_US.utf8" >> /root/.bashrc && \ echo "export LANG=en_US.utf8" >> /root/.bashrc && \
echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc && \
mkdir -p /usr/local/cuda/extras mkdir -p /usr/local/cuda/extras
COPY --from=builder /usr/local/cuda/extras/CUPTI /usr/local/cuda/extras/CUPTI COPY --from=builder /usr/local/cuda/extras/CUPTI /usr/local/cuda/extras/CUPTI
...@@ -22,13 +22,12 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ ...@@ -22,13 +22,12 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
RUN yum -y install python-devel sqlite-devel \ RUN yum -y install python-devel sqlite-devel \
&& curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
&& python get-pip.py >/dev/null \ && python get-pip.py >/dev/null \
&& pip install google protobuf setuptools wheel flask >/dev/null \
&& rm get-pip.py && rm get-pip.py
RUN yum install -y python3 python3-devel \ RUN yum install -y python3 python3-devel \
&& pip3 install google protobuf setuptools wheel flask \
&& yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
&& yum clean all && yum clean all
RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
&& echo "export LANG=en_US.utf8" >> /root/.bashrc && echo "export LANG=en_US.utf8" >> /root/.bashrc \
&& echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
...@@ -19,13 +19,12 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ ...@@ -19,13 +19,12 @@ RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
RUN yum -y install python-devel sqlite-devel \ RUN yum -y install python-devel sqlite-devel \
&& curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
&& python get-pip.py >/dev/null \ && python get-pip.py >/dev/null \
&& pip install google protobuf setuptools wheel flask >/dev/null \
&& rm get-pip.py && rm get-pip.py
RUN yum install -y python3 python3-devel \ RUN yum install -y python3 python3-devel \
&& pip3 install google protobuf setuptools wheel flask \
&& yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
&& yum clean all && yum clean all
RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
&& echo "export LANG=en_US.utf8" >> /root/.bashrc && echo "export LANG=en_US.utf8" >> /root/.bashrc \
&& echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
...@@ -19,6 +19,13 @@ function init() { ...@@ -19,6 +19,13 @@ function init() {
cd Serving cd Serving
export SERVING_WORKDIR=$PWD export SERVING_WORKDIR=$PWD
$PYTHONROOT/bin/python -m pip install -r python/requirements.txt $PYTHONROOT/bin/python -m pip install -r python/requirements.txt
export GOPATH=$HOME/go
export PATH=$PATH:$GOPATH/bin
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
go get -u github.com/golang/protobuf/protoc-gen-go
go get -u google.golang.org/grpc
} }
function check_cmd() { function check_cmd() {
...@@ -298,7 +305,6 @@ function python_test_bert() { ...@@ -298,7 +305,6 @@ function python_test_bert() {
cd bert # pwd: /Serving/python/examples/bert cd bert # pwd: /Serving/python/examples/bert
case $TYPE in case $TYPE in
CPU) CPU)
pip install paddlehub
# Because download from paddlehub may timeout, # Because download from paddlehub may timeout,
# download the model from bos(max_seq_len=128). # download the model from bos(max_seq_len=128).
wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz
...@@ -306,14 +312,12 @@ function python_test_bert() { ...@@ -306,14 +312,12 @@ function python_test_bert() {
sh get_data.sh sh get_data.sh
check_cmd "python -m paddle_serving_server.serve --model bert_chinese_L-12_H-768_A-12_model --port 9292 &" check_cmd "python -m paddle_serving_server.serve --model bert_chinese_L-12_H-768_A-12_model --port 9292 &"
sleep 5 sleep 5
pip install paddle_serving_app
check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_chinese_L-12_H-768_A-12_client/serving_client_conf.prototxt" check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_chinese_L-12_H-768_A-12_client/serving_client_conf.prototxt"
kill_server_process kill_server_process
echo "bert RPC inference pass" echo "bert RPC inference pass"
;; ;;
GPU) GPU)
export CUDA_VISIBLE_DEVICES=0 export CUDA_VISIBLE_DEVICES=0
pip install paddlehub
# Because download from paddlehub may timeout, # Because download from paddlehub may timeout,
# download the model from bos(max_seq_len=128). # download the model from bos(max_seq_len=128).
wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz
...@@ -321,7 +325,6 @@ function python_test_bert() { ...@@ -321,7 +325,6 @@ function python_test_bert() {
sh get_data.sh sh get_data.sh
check_cmd "python -m paddle_serving_server_gpu.serve --model bert_chinese_L-12_H-768_A-12_model --port 9292 --gpu_ids 0 &" check_cmd "python -m paddle_serving_server_gpu.serve --model bert_chinese_L-12_H-768_A-12_model --port 9292 --gpu_ids 0 &"
sleep 5 sleep 5
pip install paddle_serving_app
check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_chinese_L-12_H-768_A-12_client/serving_client_conf.prototxt" check_cmd "head -n 10 data-c.txt | python bert_client.py --model bert_chinese_L-12_H-768_A-12_client/serving_client_conf.prototxt"
kill_server_process kill_server_process
echo "bert RPC inference pass" echo "bert RPC inference pass"
...@@ -760,13 +763,14 @@ function python_test_resnet50(){ ...@@ -760,13 +763,14 @@ function python_test_resnet50(){
} }
function python_test_pipeline(){ function python_test_pipeline(){
# pwd:/ Serving/python/examples # pwd: /Serving/python/examples
local TYPE=$1 local TYPE=$1
export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
unsetproxy unsetproxy
cd pipeline/imdb_model_ensemble cd pipeline # pwd: /Serving/python/examples/pipeline
case $TYPE in case $TYPE in
CPU) CPU)
cd imdb_model_ensemble # pwd: /Serving/python/examples/pipeline/imdb_model_ensemble
# start paddle serving service (brpc) # start paddle serving service (brpc)
sh get_data.sh sh get_data.sh
python -m paddle_serving_server.serve --model imdb_cnn_model --port 9292 --workdir test9292 &> cnn.log & python -m paddle_serving_server.serve --model imdb_cnn_model --port 9292 --workdir test9292 &> cnn.log &
...@@ -775,8 +779,8 @@ function python_test_pipeline(){ ...@@ -775,8 +779,8 @@ function python_test_pipeline(){
# test: thread servicer & thread op # test: thread servicer & thread op
cat << EOF > config.yml cat << EOF > config.yml
port: 18080 rpc_port: 18080
worker_num: 2 worker_num: 4
build_dag_each_worker: false build_dag_each_worker: false
dag: dag:
is_thread_op: true is_thread_op: true
...@@ -792,8 +796,8 @@ EOF ...@@ -792,8 +796,8 @@ EOF
# test: thread servicer & process op # test: thread servicer & process op
cat << EOF > config.yml cat << EOF > config.yml
port: 18080 rpc_port: 18080
worker_num: 2 worker_num: 4
build_dag_each_worker: false build_dag_each_worker: false
dag: dag:
is_thread_op: false is_thread_op: false
...@@ -807,13 +811,13 @@ EOF ...@@ -807,13 +811,13 @@ EOF
ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill
kill_process_by_port 18080 kill_process_by_port 18080
# test: process servicer & thread op # test: process servicer & process op
cat << EOF > config.yml cat << EOF > config.yml
port: 18080 rpc_port: 18080
worker_num: 2 worker_num: 4
build_dag_each_worker: true build_dag_each_worker: false
dag: dag:
is_thread_op: flase is_thread_op: false
client_type: brpc client_type: brpc
retry: 1 retry: 1
use_profile: false use_profile: false
...@@ -823,12 +827,14 @@ EOF ...@@ -823,12 +827,14 @@ EOF
check_cmd "python test_pipeline_client.py" check_cmd "python test_pipeline_client.py"
ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill
kill_process_by_port 18080 kill_process_by_port 18080
# test: process servicer & process op # test: process servicer & thread op
pip uninstall grpcio -y
pip install grpcio --no-binary=grpcio
cat << EOF > config.yml cat << EOF > config.yml
port: 18080 rpc_port: 18080
worker_num: 2 worker_num: 4
build_dag_each_worker: false build_dag_each_worker: true
dag: dag:
is_thread_op: false is_thread_op: false
client_type: brpc client_type: brpc
...@@ -840,7 +846,7 @@ EOF ...@@ -840,7 +846,7 @@ EOF
check_cmd "python test_pipeline_client.py" check_cmd "python test_pipeline_client.py"
ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill ps -ef | grep "pipeline_server" | grep -v grep | awk '{print $2}' | xargs kill
kill_process_by_port 18080 kill_process_by_port 18080
kill_server_process kill_server_process
kill_process_by_port 9292 kill_process_by_port 9292
kill_process_by_port 9393 kill_process_by_port 9393
...@@ -850,8 +856,8 @@ EOF ...@@ -850,8 +856,8 @@ EOF
python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 --use_multilang --workdir test9393 &> bow.log & python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 --use_multilang --workdir test9393 &> bow.log &
sleep 5 sleep 5
cat << EOF > config.yml cat << EOF > config.yml
port: 18080 rpc_port: 18080
worker_num: 2 worker_num: 4
build_dag_each_worker: false build_dag_each_worker: false
dag: dag:
is_thread_op: false is_thread_op: false
...@@ -867,16 +873,47 @@ EOF ...@@ -867,16 +873,47 @@ EOF
kill_server_process kill_server_process
kill_process_by_port 9292 kill_process_by_port 9292
kill_process_by_port 9393 kill_process_by_port 9393
cd ..
cd simple_web_service # pwd: /Serving/python/examples/pipeline/simple_web_service
sh get_data.sh
python web_service.py >/dev/null &
sleep 5
curl -X POST -k http://localhost:18080/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
check http code
http_code=`curl -X POST -k -d '{"key":["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}' -s -w "%{http_code}" -o /dev/null http://localhost:18080/uci/prediction`
if [ ${http_code} -ne 200 ]; then
echo "HTTP status code -ne 200"
exit 1
fi
ps -ef | grep "web_service" | grep -v grep | awk '{print $2}' | xargs kill
ps -ef | grep "pipeline" | grep -v grep | awk '{print $2}' | xargs kill
kill_server_process
cd ..
;; ;;
GPU) GPU)
echo "pipeline ignore GPU test" cd simple_web_service # pwd: /Serving/python/examples/pipeline/simple_web_service
sh get_data.sh
python web_service.py >/dev/null &
sleep 5
curl -X POST -k http://localhost:18080/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
# check http code
http_code=`curl -X POST -k -d '{"key":["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}' -s -w "%{http_code}" -o /dev/null http://localhost:18080/uci/prediction`
if [ ${http_code} -ne 200 ]; then
echo "HTTP status code -ne 200"
exit 1
fi
ps -ef | grep "web_service" | grep -v grep | awk '{print $2}' | xargs kill
ps -ef | grep "pipeline" | grep -v grep | awk '{print $2}' | xargs kill
kill_server_process
cd .. # pwd: /Serving/python/examples/pipeline
;; ;;
*) *)
echo "error type" echo "error type"
exit 1 exit 1
;; ;;
esac esac
cd ../../ cd ..
setproxy setproxy
unset SERVING_BIN unset SERVING_BIN
} }
...@@ -926,118 +963,8 @@ function monitor_test() { ...@@ -926,118 +963,8 @@ function monitor_test() {
mkdir _monitor_test && cd _monitor_test # pwd: /Serving/_monitor_test mkdir _monitor_test && cd _monitor_test # pwd: /Serving/_monitor_test
case $TYPE in case $TYPE in
CPU): CPU):
pip install pyftpdlib # The CPU part and GPU part are identical.
mkdir remote_path # In order to avoid Travis CI timeout (50 min), the CPU version is not checked
mkdir local_path
cd remote_path # pwd: /Serving/_monitor_test/remote_path
check_cmd "python -m pyftpdlib -p 8000 &>/dev/null &"
cd .. # pwd: /Serving/_monitor_test
# type: ftp
# remote_path: /
# remote_model_name: uci_housing.tar.gz
# local_tmp_path: ___tmp
# local_path: local_path
cd remote_path # pwd: /Serving/_monitor_test/remote_path
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
touch donefile
cd .. # pwd: /Serving/_monitor_test
mkdir -p local_path/uci_housing_model
python -m paddle_serving_server.monitor \
--type='ftp' --ftp_host='127.0.0.1' --ftp_port='8000' \
--remote_path='/' --remote_model_name='uci_housing.tar.gz' \
--remote_donefile_name='donefile' --local_path='local_path' \
--local_model_name='uci_housing_model' --local_timestamp_file='fluid_time_file' \
--local_tmp_path='___tmp' --unpacked_filename='uci_housing_model' \
--interval='1' >/dev/null &
sleep 10
if [ ! -f "local_path/uci_housing_model/fluid_time_file" ]; then
echo "local_path/uci_housing_model/fluid_time_file not exist."
exit 1
fi
ps -ef | grep "monitor" | grep -v grep | awk '{print $2}' | xargs kill
rm -rf remote_path/*
rm -rf local_path/*
# type: ftp
# remote_path: /tmp_dir
# remote_model_name: uci_housing_model
# local_tmp_path: ___tmp
# local_path: local_path
mkdir -p remote_path/tmp_dir && cd remote_path/tmp_dir # pwd: /Serving/_monitor_test/remote_path/tmp_dir
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
tar -xzf uci_housing.tar.gz
touch donefile
cd ../.. # pwd: /Serving/_monitor_test
mkdir -p local_path/uci_housing_model
python -m paddle_serving_server.monitor \
--type='ftp' --ftp_host='127.0.0.1' --ftp_port='8000' \
--remote_path='/tmp_dir' --remote_model_name='uci_housing_model' \
--remote_donefile_name='donefile' --local_path='local_path' \
--local_model_name='uci_housing_model' --local_timestamp_file='fluid_time_file' \
--local_tmp_path='___tmp' --interval='1' >/dev/null &
sleep 10
if [ ! -f "local_path/uci_housing_model/fluid_time_file" ]; then
echo "local_path/uci_housing_model/fluid_time_file not exist."
exit 1
fi
ps -ef | grep "monitor" | grep -v grep | awk '{print $2}' | xargs kill
rm -rf remote_path/*
rm -rf local_path/*
# type: general
# remote_path: /
# remote_model_name: uci_housing.tar.gz
# local_tmp_path: ___tmp
# local_path: local_path
cd remote_path # pwd: /Serving/_monitor_test/remote_path
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
touch donefile
cd .. # pwd: /Serving/_monitor_test
mkdir -p local_path/uci_housing_model
python -m paddle_serving_server.monitor \
--type='general' --general_host='ftp://127.0.0.1:8000' \
--remote_path='/' --remote_model_name='uci_housing.tar.gz' \
--remote_donefile_name='donefile' --local_path='local_path' \
--local_model_name='uci_housing_model' --local_timestamp_file='fluid_time_file' \
--local_tmp_path='___tmp' --unpacked_filename='uci_housing_model' \
--interval='1' >/dev/null &
sleep 10
if [ ! -f "local_path/uci_housing_model/fluid_time_file" ]; then
echo "local_path/uci_housing_model/fluid_time_file not exist."
exit 1
fi
ps -ef | grep "monitor" | grep -v grep | awk '{print $2}' | xargs kill
rm -rf remote_path/*
rm -rf local_path/*
# type: general
# remote_path: /tmp_dir
# remote_model_name: uci_housing_model
# local_tmp_path: ___tmp
# local_path: local_path
mkdir -p remote_path/tmp_dir && cd remote_path/tmp_dir # pwd: /Serving/_monitor_test/remote_path/tmp_dir
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
tar -xzf uci_housing.tar.gz
touch donefile
cd ../.. # pwd: /Serving/_monitor_test
mkdir -p local_path/uci_housing_model
python -m paddle_serving_server.monitor \
--type='general' --general_host='ftp://127.0.0.1:8000' \
--remote_path='/tmp_dir' --remote_model_name='uci_housing_model' \
--remote_donefile_name='donefile' --local_path='local_path' \
--local_model_name='uci_housing_model' --local_timestamp_file='fluid_time_file' \
--local_tmp_path='___tmp' --interval='1' >/dev/null &
sleep 10
if [ ! -f "local_path/uci_housing_model/fluid_time_file" ]; then
echo "local_path/uci_housing_model/fluid_time_file not exist."
exit 1
fi
ps -ef | grep "monitor" | grep -v grep | awk '{print $2}' | xargs kill
rm -rf remote_path/*
rm -rf local_path/*
ps -ef | grep "pyftpdlib" | grep -v grep | awk '{print $2}' | xargs kill
;; ;;
GPU): GPU):
pip install pyftpdlib pip install pyftpdlib
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册