未验证 提交 f7f6dd0b 编写于 作者: J Jiawei Wang 提交者: GitHub

Merge branch 'develop' into dependabot/maven/java/examples/junit-junit-4.13.1

......@@ -54,6 +54,7 @@ option(SERVER "Compile Paddle Serving Server" OFF)
option(APP "Compile Paddle Serving App package" OFF)
option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution" OFF)
option(PACK "Compile for whl" OFF)
option(WITH_TRT "Compile Paddle Serving with TRT" OFF)
set(WITH_MKLML ${WITH_MKL})
if (NOT DEFINED WITH_MKLDNN)
......
......@@ -128,6 +128,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `mem_optim_off` | - | - | Disable memory / graphic memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT |
Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
</center>
......
......@@ -124,6 +124,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
| `mem_optim_off` | - | - | Disable memory optimization |
| `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
| `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT |
我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求,请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
</center>
......
......@@ -34,7 +34,11 @@ message( "WITH_GPU = ${WITH_GPU}")
SET(PADDLE_VERSION "1.8.4")
if (WITH_GPU)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")
if (WITH_TRT)
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6")
else()
SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl")
endif()
else()
if (WITH_AVX)
if (WITH_MKLML)
......@@ -50,21 +54,38 @@ endif()
SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/fluid_inference.tgz")
MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}")
if (WITH_GPU OR WITH_MKLML)
ExternalProject_Add(
"extern_paddle"
${EXTERNAL_PROJECT_LOG_ARGS}
URL "${PADDLE_LIB_PATH}"
PREFIX "${PADDLE_SOURCES_DIR}"
DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party &&
${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so
)
if (WITH_TRT)
ExternalProject_Add(
"extern_paddle"
${EXTERNAL_PROJECT_LOG_ARGS}
URL "${PADDLE_LIB_PATH}"
PREFIX "${PADDLE_SOURCES_DIR}"
DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party
)
else()
ExternalProject_Add(
"extern_paddle"
${EXTERNAL_PROJECT_LOG_ARGS}
URL "${PADDLE_LIB_PATH}"
PREFIX "${PADDLE_SOURCES_DIR}"
DOWNLOAD_DIR "${PADDLE_DOWNLOAD_DIR}"
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party &&
${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so
)
endif()
else()
ExternalProject_Add(
"extern_paddle"
......@@ -92,8 +113,16 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a)
ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so)
if (WITH_TRT)
ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer.so)
ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL)
SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer_plugin.so)
endif()
ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a)
......@@ -101,4 +130,9 @@ SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/thir
LIST(APPEND external_project_dependencies paddle)
LIST(APPEND paddle_depend_libs
xxhash)
xxhash)
if(WITH_TRT)
LIST(APPEND paddle_depend_libs
nvinfer nvinfer_plugin)
endif()
......@@ -44,6 +44,7 @@ message EngineDesc {
optional bool static_optimization = 14;
optional bool force_update_static_cache = 15;
optional bool enable_ir_optimization = 16;
optional bool use_trt = 17;
};
// model_toolkit conf
......
......@@ -12,8 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License
#execute_process(COMMAND go env -w GO111MODULE=off)
add_subdirectory(cube-server)
add_subdirectory(cube-api)
add_subdirectory(cube-builder)
add_subdirectory(cube-transfer)
add_subdirectory(cube-agent)
#add_subdirectory(cube-transfer)
#add_subdirectory(cube-agent)
......@@ -218,25 +218,15 @@ class PredictorClient {
int destroy_predictor();
int batch_predict(
const std::vector<std::vector<std::vector<float>>>& float_feed_batch,
const std::vector<std::string>& float_feed_name,
const std::vector<std::vector<int>>& float_shape,
const std::vector<std::vector<std::vector<int64_t>>>& int_feed_batch,
const std::vector<std::string>& int_feed_name,
const std::vector<std::vector<int>>& int_shape,
const std::vector<std::string>& fetch_name,
PredictorRes& predict_res_batch, // NOLINT
const int& pid,
const uint64_t log_id);
int numpy_predict(
const std::vector<std::vector<py::array_t<float>>>& float_feed_batch,
const std::vector<std::string>& float_feed_name,
const std::vector<std::vector<int>>& float_shape,
const std::vector<std::vector<int>>& float_lod_slot_batch,
const std::vector<std::vector<py::array_t<int64_t>>>& int_feed_batch,
const std::vector<std::string>& int_feed_name,
const std::vector<std::vector<int>>& int_shape,
const std::vector<std::vector<int>>& int_lod_slot_batch,
const std::vector<std::string>& fetch_name,
PredictorRes& predict_res_batch, // NOLINT
const int& pid,
......
......@@ -137,227 +137,15 @@ int PredictorClient::create_predictor() {
return 0;
}
int PredictorClient::batch_predict(
const std::vector<std::vector<std::vector<float>>> &float_feed_batch,
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<std::vector<int64_t>>> &int_feed_batch,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch,
const int &pid,
const uint64_t log_id) {
int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
predict_res_batch.clear();
Timer timeline;
int64_t preprocess_start = timeline.TimeStampUS();
int fetch_name_num = fetch_name.size();
_api.thrd_initialize();
std::string variant_tag;
_predictor = _api.fetch_predictor("general_model", &variant_tag);
predict_res_batch.set_variant_tag(variant_tag);
VLOG(2) << "fetch general model predictor done.";
VLOG(2) << "float feed name size: " << float_feed_name.size();
VLOG(2) << "int feed name size: " << int_feed_name.size();
VLOG(2) << "max body size : " << brpc::fLU64::FLAGS_max_body_size;
Request req;
req.set_log_id(log_id);
for (auto &name : fetch_name) {
req.add_fetch_var_names(name);
}
for (int bi = 0; bi < batch_size; bi++) {
VLOG(2) << "prepare batch " << bi;
std::vector<Tensor *> tensor_vec;
FeedInst *inst = req.add_insts();
std::vector<std::vector<float>> float_feed = float_feed_batch[bi];
std::vector<std::vector<int64_t>> int_feed = int_feed_batch[bi];
for (auto &name : float_feed_name) {
tensor_vec.push_back(inst->add_tensor_array());
}
for (auto &name : int_feed_name) {
tensor_vec.push_back(inst->add_tensor_array());
}
VLOG(2) << "batch [" << bi << "] int_feed_name and float_feed_name "
<< "prepared";
int vec_idx = 0;
VLOG(2) << "tensor_vec size " << tensor_vec.size() << " float shape "
<< float_shape.size();
for (auto &name : float_feed_name) {
int idx = _feed_name_to_idx[name];
Tensor *tensor = tensor_vec[idx];
VLOG(2) << "prepare float feed " << name << " shape size "
<< float_shape[vec_idx].size();
for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
tensor->add_shape(float_shape[vec_idx][j]);
}
tensor->set_elem_type(1);
for (uint32_t j = 0; j < float_feed[vec_idx].size(); ++j) {
tensor->add_float_data(float_feed[vec_idx][j]);
}
vec_idx++;
}
VLOG(2) << "batch [" << bi << "] "
<< "float feed value prepared";
vec_idx = 0;
for (auto &name : int_feed_name) {
int idx = _feed_name_to_idx[name];
Tensor *tensor = tensor_vec[idx];
if (_type[idx] == 0) {
VLOG(2) << "prepare int64 feed " << name << " shape size "
<< int_shape[vec_idx].size();
VLOG(3) << "feed var name " << name << " index " << vec_idx
<< "first data " << int_feed[vec_idx][0];
for (uint32_t j = 0; j < int_feed[vec_idx].size(); ++j) {
tensor->add_int64_data(int_feed[vec_idx][j]);
}
} else if (_type[idx] == 2) {
VLOG(2) << "prepare int32 feed " << name << " shape size "
<< int_shape[vec_idx].size();
VLOG(3) << "feed var name " << name << " index " << vec_idx
<< "first data " << int32_t(int_feed[vec_idx][0]);
for (uint32_t j = 0; j < int_feed[vec_idx].size(); ++j) {
tensor->add_int_data(int32_t(int_feed[vec_idx][j]));
}
}
for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
tensor->add_shape(int_shape[vec_idx][j]);
}
tensor->set_elem_type(_type[idx]);
vec_idx++;
}
VLOG(2) << "batch [" << bi << "] "
<< "int feed value prepared";
}
int64_t preprocess_end = timeline.TimeStampUS();
int64_t client_infer_start = timeline.TimeStampUS();
Response res;
int64_t client_infer_end = 0;
int64_t postprocess_start = 0;
int64_t postprocess_end = 0;
if (FLAGS_profile_client) {
if (FLAGS_profile_server) {
req.set_profile_server(true);
}
}
res.Clear();
if (_predictor->inference(&req, &res) != 0) {
LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
_api.thrd_clear();
return -1;
} else {
client_infer_end = timeline.TimeStampUS();
postprocess_start = client_infer_end;
VLOG(2) << "get model output num";
uint32_t model_num = res.outputs_size();
VLOG(2) << "model num: " << model_num;
for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
VLOG(2) << "process model output index: " << m_idx;
auto output = res.outputs(m_idx);
ModelRes model;
model.set_engine_name(output.engine_name());
int idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
int shape_size = output.insts(0).tensor_array(idx).shape_size();
VLOG(2) << "fetch var " << name << " index " << idx << " shape size "
<< shape_size;
model._shape_map[name].resize(shape_size);
for (int i = 0; i < shape_size; ++i) {
model._shape_map[name][i] =
output.insts(0).tensor_array(idx).shape(i);
}
int lod_size = output.insts(0).tensor_array(idx).lod_size();
if (lod_size > 0) {
model._lod_map[name].resize(lod_size);
for (int i = 0; i < lod_size; ++i) {
model._lod_map[name][i] = output.insts(0).tensor_array(idx).lod(i);
}
}
idx += 1;
}
idx = 0;
for (auto &name : fetch_name) {
// int idx = _fetch_name_to_idx[name];
if (_fetch_name_to_type[name] == 0) {
VLOG(2) << "ferch var " << name << "type int64";
int size = output.insts(0).tensor_array(idx).int64_data_size();
model._int64_value_map[name] = std::vector<int64_t>(
output.insts(0).tensor_array(idx).int64_data().begin(),
output.insts(0).tensor_array(idx).int64_data().begin() + size);
} else if (_fetch_name_to_type[name] == 1) {
VLOG(2) << "fetch var " << name << "type float";
int size = output.insts(0).tensor_array(idx).float_data_size();
model._float_value_map[name] = std::vector<float>(
output.insts(0).tensor_array(idx).float_data().begin(),
output.insts(0).tensor_array(idx).float_data().begin() + size);
} else if (_fetch_name_to_type[name] == 2) {
VLOG(2) << "fetch var " << name << "type int32";
int size = output.insts(0).tensor_array(idx).int_data_size();
model._int32_value_map[name] = std::vector<int32_t>(
output.insts(0).tensor_array(idx).int_data().begin(),
output.insts(0).tensor_array(idx).int_data().begin() + size);
}
idx += 1;
}
predict_res_batch.add_model_res(std::move(model));
}
postprocess_end = timeline.TimeStampUS();
}
if (FLAGS_profile_client) {
std::ostringstream oss;
oss << "PROFILE\t"
<< "pid:" << pid << "\t"
<< "prepro_0:" << preprocess_start << " "
<< "prepro_1:" << preprocess_end << " "
<< "client_infer_0:" << client_infer_start << " "
<< "client_infer_1:" << client_infer_end << " ";
if (FLAGS_profile_server) {
int op_num = res.profile_time_size() / 2;
for (int i = 0; i < op_num; ++i) {
oss << "op" << i << "_0:" << res.profile_time(i * 2) << " ";
oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " ";
}
}
oss << "postpro_0:" << postprocess_start << " ";
oss << "postpro_1:" << postprocess_end;
fprintf(stderr, "%s\n", oss.str().c_str());
}
_api.thrd_clear();
return 0;
}
int PredictorClient::numpy_predict(
const std::vector<std::vector<py::array_t<float>>> &float_feed_batch,
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<std::vector<py::array_t<int64_t>>> &int_feed_batch,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch,
const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch,
const int &pid,
......@@ -412,6 +200,9 @@ int PredictorClient::numpy_predict(
for (uint32_t j = 0; j < float_shape[vec_idx].size(); ++j) {
tensor->add_shape(float_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < float_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(float_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(1);
const int float_shape_size = float_shape[vec_idx].size();
switch (float_shape_size) {
......@@ -470,6 +261,9 @@ int PredictorClient::numpy_predict(
for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) {
tensor->add_shape(int_shape[vec_idx][j]);
}
for (uint32_t j = 0; j < int_lod_slot_batch[vec_idx].size(); ++j) {
tensor->add_lod(int_lod_slot_batch[vec_idx][j]);
}
tensor->set_elem_type(_type[idx]);
if (_type[idx] == 0) {
......
......@@ -95,42 +95,18 @@ PYBIND11_MODULE(serving_client, m) {
[](PredictorClient &self) { self.create_predictor(); })
.def("destroy_predictor",
[](PredictorClient &self) { self.destroy_predictor(); })
.def("batch_predict",
[](PredictorClient &self,
const std::vector<std::vector<std::vector<float>>>
&float_feed_batch,
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<std::vector<int64_t>>>
&int_feed_batch,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch,
const int &pid,
const uint64_t log_id) {
return self.batch_predict(float_feed_batch,
float_feed_name,
float_shape,
int_feed_batch,
int_feed_name,
int_shape,
fetch_name,
predict_res_batch,
pid,
log_id);
},
py::call_guard<py::gil_scoped_release>())
.def("numpy_predict",
[](PredictorClient &self,
const std::vector<std::vector<py::array_t<float>>>
&float_feed_batch,
const std::vector<std::string> &float_feed_name,
const std::vector<std::vector<int>> &float_shape,
const std::vector<std::vector<int>> &float_lod_slot_batch,
const std::vector<std::vector<py::array_t<int64_t>>>
&int_feed_batch,
const std::vector<std::string> &int_feed_name,
const std::vector<std::vector<int>> &int_shape,
const std::vector<std::vector<int>> &int_lod_slot_batch,
const std::vector<std::string> &fetch_name,
PredictorRes &predict_res_batch,
const int &pid,
......@@ -138,9 +114,11 @@ PYBIND11_MODULE(serving_client, m) {
return self.numpy_predict(float_feed_batch,
float_feed_name,
float_shape,
float_lod_slot_batch,
int_feed_batch,
int_feed_name,
int_shape,
int_lod_slot_batch,
fetch_name,
predict_res_batch,
pid,
......
......@@ -9,7 +9,7 @@ endif()
target_include_directories(serving PUBLIC
${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
)
include_directories(${CUDNN_ROOT}/include/)
if(WITH_GPU)
target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine
-Wl,--no-whole-archive)
......@@ -29,7 +29,11 @@ if(WITH_GPU)
endif()
if(WITH_MKL OR WITH_GPU)
if (WITH_TRT)
target_link_libraries(serving -liomp5 -lmklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
else()
target_link_libraries(serving -liomp5 -lmklml_intel -lmkldnn -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
endif()
else()
target_link_libraries(serving openblas -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
endif()
......
......@@ -73,8 +73,6 @@ int GeneralReaderOp::inference() {
// reade request from client
const Request *req = dynamic_cast<const Request *>(get_request_message());
uint64_t log_id = req->log_id();
int batch_size = req->insts_size();
int input_var_num = 0;
std::vector<int64_t> elem_type;
std::vector<int64_t> elem_size;
......@@ -83,7 +81,6 @@ int GeneralReaderOp::inference() {
GeneralBlob *res = mutable_data<GeneralBlob>();
TensorVector *out = &res->tensor_vector;
res->SetBatchSize(batch_size);
res->SetLogId(log_id);
if (!res) {
......@@ -98,11 +95,11 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id
<< ") start to call load general model_conf op";
baidu::paddle_serving::predictor::Resource &resource =
baidu::paddle_serving::predictor::Resource::instance();
VLOG(2) << "(logid=" << log_id << ") get resource pointer done.";
std::shared_ptr<PaddleGeneralModelConfig> model_config =
resource.get_general_model_config();
......@@ -122,13 +119,11 @@ int GeneralReaderOp::inference() {
elem_type.resize(var_num);
elem_size.resize(var_num);
capacity.resize(var_num);
// prepare basic information for input
for (int i = 0; i < var_num; ++i) {
paddle::PaddleTensor lod_tensor;
elem_type[i] = req->insts(0).tensor_array(i).elem_type();
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] has elem type: " << elem_type[i];
VLOG(2) << "var[" << i << "] has elem type: " << elem_type[i];
if (elem_type[i] == 0) { // int64
elem_size[i] = sizeof(int64_t);
lod_tensor.dtype = paddle::PaddleDType::INT64;
......@@ -139,13 +134,24 @@ int GeneralReaderOp::inference() {
elem_size[i] = sizeof(int32_t);
lod_tensor.dtype = paddle::PaddleDType::INT32;
}
if (model_config->_is_lod_feed[i]) {
lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0);
// implement lod tensor here
if (req->insts(0).tensor_array(i).lod_size() > 0) {
VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor";
lod_tensor.lod.resize(1);
for (int k = 0; k < req->insts(0).tensor_array(i).lod_size(); ++k) {
lod_tensor.lod[0].push_back(req->insts(0).tensor_array(i).lod(k));
}
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
VLOG(2) << "(logid=" << log_id << ") shape for var[" << i
<< "]: " << dim;
capacity[i] *= dim;
lod_tensor.shape.push_back(dim);
}
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor, capacity: " << capacity[i];
} else {
lod_tensor.shape.push_back(batch_size);
capacity[i] = 1;
for (int k = 0; k < req->insts(0).tensor_array(i).shape_size(); ++k) {
int dim = req->insts(0).tensor_array(i).shape(k);
......@@ -160,51 +166,40 @@ int GeneralReaderOp::inference() {
lod_tensor.name = model_config->_feed_name[i];
out->push_back(lod_tensor);
}
// specify the memory needed for output tensor_vector
for (int i = 0; i < var_num; ++i) {
if (out->at(i).lod.size() == 1) {
int tensor_size = 0;
for (int j = 0; j < batch_size; ++j) {
const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = 0;
if (tensor.int64_data_size() > 0) {
data_len = tensor.int64_data_size();
} else if (tensor.float_data_size() > 0) {
data_len = tensor.float_data_size();
} else if (tensor.int_data_size() > 0) {
data_len = tensor.int_data_size();
}
VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
<< "]: " << data_len;
tensor_size += data_len;
int cur_len = out->at(i).lod[0].back();
VLOG(2) << "(logid=" << log_id << ") current len: " << cur_len;
int sample_len = 0;
if (tensor.shape_size() == 1) {
sample_len = data_len;
} else {
sample_len = tensor.shape(0);
}
out->at(i).lod[0].push_back(cur_len + sample_len);
VLOG(2) << "(logid=" << log_id << ") new len: " << cur_len + sample_len;
}
out->at(i).data.Resize(tensor_size * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back()};
for (int j = 1; j < req->insts(0).tensor_array(i).shape_size(); ++j) {
out->at(i).shape.push_back(req->insts(0).tensor_array(i).shape(j));
const Tensor &tensor = req->insts(0).tensor_array(i);
int data_len = 0;
if (tensor.int64_data_size() > 0) {
data_len = tensor.int64_data_size();
} else if (tensor.float_data_size() > 0) {
data_len = tensor.float_data_size();
} else if (tensor.int_data_size() > 0) {
data_len = tensor.int_data_size();
}
if (out->at(i).shape.size() == 1) {
out->at(i).shape.push_back(1);
VLOG(2) << "(logid=" << log_id << ") tensor size for var[" << i
<< "]: " << data_len;
tensor_size += data_len;
int cur_len = out->at(i).lod[0].back();
VLOG(2) << "(logid=" << log_id << ") current len: " << cur_len;
int sample_len = 0;
if (tensor.shape_size() == 1) {
sample_len = data_len;
} else {
sample_len = tensor.shape(0);
}
VLOG(2) << "(logid=" << log_id << ") new len: " << cur_len + sample_len;
out->at(i).data.Resize(tensor_size * elem_size[i]);
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else {
out->at(i).data.Resize(batch_size * capacity[i] * elem_size[i]);
out->at(i).data.Resize(capacity[i] * elem_size[i]);
VLOG(2) << "(logid=" << log_id << ") var[" << i
<< "] is tensor and capacity=" << batch_size * capacity[i];
<< "] is tensor and capacity=" << capacity[i];
}
}
......@@ -215,58 +210,36 @@ int GeneralReaderOp::inference() {
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << req->insts(0).tensor_array(i).int64_data(0);
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).int64_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(j).tensor_array(i).int64_data(k);
}
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
int elem_num = req->insts(0).tensor_array(i).int64_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(0).tensor_array(i).int64_data(k);
}
} else if (elem_type[i] == 1) {
float *dst_ptr = static_cast<float *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << req->insts(0).tensor_array(i).float_data(0);
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).float_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(j).tensor_array(i).float_data(k);
}
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
int elem_num = req->insts(0).tensor_array(i).float_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(0).tensor_array(i).float_data(k);
}
} else if (elem_type[i] == 2) {
int32_t *dst_ptr = static_cast<int32_t *>(out->at(i).data.data());
VLOG(2) << "(logid=" << log_id << ") first element data in var[" << i
<< "] is " << req->insts(0).tensor_array(i).int_data(0);
int offset = 0;
for (int j = 0; j < batch_size; ++j) {
int elem_num = req->insts(j).tensor_array(i).int_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
}
if (out->at(i).lod.size() == 1) {
offset = out->at(i).lod[0][j + 1];
} else {
offset += capacity[i];
}
int elem_num = req->insts(0).tensor_array(i).int_data_size();
for (int k = 0; k < elem_num; ++k) {
dst_ptr[offset + k] = req->insts(0).tensor_array(i).int_data(k);
}
}
}
VLOG(2) << "(logid=" << log_id << ") output size: " << out->size();
timeline.Pause();
int64_t end = timeline.TimeStampUS();
res->p_size = 0;
res->_batch_size = batch_size;
res->_batch_size = 1;
AddBlobInfo(res, start);
AddBlobInfo(res, end);
......
......@@ -13,7 +13,9 @@ set_source_files_properties(
PROPERTIES
COMPILE_FLAGS "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure)
if (WITH_TRT)
add_definitions(-DWITH_TRT)
endif()
target_link_libraries(pdserving
brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
......
......@@ -38,6 +38,7 @@ class InferEngineCreationParams {
_enable_ir_optimization = false;
_static_optimization = false;
_force_update_static_cache = false;
_use_trt = false;
}
void set_path(const std::string& path) { _path = path; }
......@@ -50,12 +51,16 @@ class InferEngineCreationParams {
_enable_ir_optimization = enable_ir_optimization;
}
void set_use_trt(bool use_trt) { _use_trt = use_trt; }
bool enable_memory_optimization() const {
return _enable_memory_optimization;
}
bool enable_ir_optimization() const { return _enable_ir_optimization; }
bool use_trt() const { return _use_trt; }
void set_static_optimization(bool static_optimization = false) {
_static_optimization = static_optimization;
}
......@@ -86,6 +91,7 @@ class InferEngineCreationParams {
bool _enable_ir_optimization;
bool _static_optimization;
bool _force_update_static_cache;
bool _use_trt;
};
class InferEngine {
......@@ -172,6 +178,10 @@ class ReloadableInferEngine : public InferEngine {
force_update_static_cache);
}
if (conf.has_use_trt()) {
_infer_engine_params.set_use_trt(conf.use_trt());
}
if (!check_need_reload() || load(_infer_engine_params) != 0) {
LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
return -1;
......@@ -553,8 +563,12 @@ class CloneDBReloadableInferEngine
};
template <typename FluidFamilyCore>
#ifdef WITH_TRT
class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
#else
class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
public:
#endif
public: // NOLINT
FluidInferEngine() {}
~FluidInferEngine() {}
......
......@@ -51,8 +51,8 @@ class WeightedRandomRender : public EndpointRouterBase {
new (std::nothrow) Factory<WeightedRandomRender, EndpointRouterBase>();
if (factory == NULL) {
RAW_LOG(ERROR,
"Failed regist factory: WeightedRandomRender->EndpointRouterBase \
in macro!");
"Failed regist factory: WeightedRandomRender->EndpointRouterBase "
"in macro!");
return -1;
}
......@@ -63,8 +63,8 @@ class WeightedRandomRender : public EndpointRouterBase {
if (FactoryPool<EndpointRouterBase>::instance().register_factory(
"WeightedRandomRender", factory) != 0) {
RAW_LOG(INFO,
"Factory has been registed: \
WeightedRandomRender->EndpointRouterBase.");
"Factory has been registed: "
"WeightedRandomRender->EndpointRouterBase.");
}
return 0;
......
......@@ -75,10 +75,12 @@ export PATH=$PATH:$GOPATH/bin
## Get go packages
```shell
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
go get -u github.com/golang/protobuf/protoc-gen-go
go get -u google.golang.org/grpc
go env -w GO111MODULE=on
go env -w GOPROXY=https://goproxy.cn,direct
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0
```
......@@ -89,9 +91,9 @@ go get -u google.golang.org/grpc
``` shell
mkdir server-build-cpu && cd server-build-cpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON ..
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON ..
make -j10
```
......@@ -102,10 +104,28 @@ you can execute `make install` to put targets under directory `./output`, you ne
``` shell
mkdir server-build-gpu && cd server-build-gpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON \
-DWITH_GPU=ON ..
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON ..
make -j10
```
### Integrated TRT version paddle inference library
```
mkdir server-build-trt && cd server-build-trt
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON \
-DWITH_TRT=ON ..
make -j10
```
......@@ -134,7 +154,10 @@ execute `make install` to put targets under directory `./output`
```bash
mkdir app-build && cd app-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DAPP=ON ..
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DAPP=ON ..
make
```
......@@ -165,7 +188,9 @@ Please use the example under `python/examples` to verify.
| WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF |
| WITH_MKL | Compile Paddle Serving with MKL support | OFF |
| WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF |
| CUDNN_ROOT | Define CuDNN library and header path | |
| CUDNN_LIBRARY | Define CuDNN library and header path | |
| CUDA_TOOLKIT_ROOT_DIR | Define CUDA PATH | |
| TENSORRT_ROOT | Define TensorRT PATH | |
| CLIENT | Compile Paddle Serving Client | OFF |
| SERVER | Compile Paddle Serving Server | OFF |
| APP | Compile Paddle Serving App package | OFF |
......@@ -180,7 +205,8 @@ To compile the Paddle Serving GPU version on bare metal, you need to install the
- CUDA
- CuDNN
- NCCL2
To compile the TensorRT version, you need to install the TensorRT library.
Note here:
......@@ -190,21 +216,12 @@ Note here:
The following is the base library version matching relationship used by the PaddlePaddle release version for reference:
| | CUDA | CuDNN | NCCL2 |
| :----: | :-----: | :----------------------: | :----: |
| CUDA 8 | 8.0.61 | CuDNN 7.1.2 for CUDA 8.0 | 2.1.4 |
| CUDA 9 | 9.0.176 | CuDNN 7.3.1 for CUDA 9.0 | 2.2.12 |
| | CUDA | CuDNN | TensorRT |
| :----: | :-----: | :----------------------: | :----: |
| post9 | 9.0 | CuDNN 7.3.1 for CUDA 9.0 | |
| post10 | 10.0 | CuDNN 7.5.1 for CUDA 10.0| |
| trt | 10.1 | CuDNN 7.5.1 for CUDA 10.1| 6.0.1.5 |
### How to make the compiler detect the CuDNN library
Download the corresponding CUDNN version from NVIDIA developer official website and decompressing it, add `-DCUDNN_ROOT` to cmake command, to specify the path of CUDNN.
### How to make the compiler detect the nccl library
After downloading the corresponding version of the nccl2 library from the NVIDIA developer official website and decompressing it, add the following environment variables (take nccl2.1.4 as an example):
```shell
export C_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$CPLUS_INCLUDE_PATH
export LD_LIBRARY_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/lib/:$LD_LIBRARY_PATH
```
......@@ -72,10 +72,12 @@ export PATH=$PATH:$GOPATH/bin
## 获取 Go packages
```shell
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
go get -u github.com/golang/protobuf/protoc-gen-go
go get -u google.golang.org/grpc
go env -w GO111MODULE=on
go env -w GOPROXY=https://goproxy.cn,direct
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0
```
......@@ -85,7 +87,10 @@ go get -u google.golang.org/grpc
``` shell
mkdir server-build-cpu && cd server-build-cpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DSERVER=ON ..
make -j10
```
......@@ -95,21 +100,44 @@ make -j10
``` shell
mkdir server-build-gpu && cd server-build-gpu
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON ..
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON ..
make -j10
```
执行`make install`可以把目标产出放在`./output`目录下。
### 集成TensorRT版本Paddle Inference Library
**注意:** 编译成功后,需要设置`SERVING_BIN`路径,详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)
```
mkdir server-build-trt && cd server-build-trt
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
-DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
-DSERVER=ON \
-DWITH_GPU=ON \
-DWITH_TRT=ON ..
make -j10
```
执行`make install`可以把目标产出放在`./output`目录下。
**注意:** 编译成功后,需要设置`SERVING_BIN`路径,详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)
## 编译Client部分
``` shell
mkdir client-build && cd client-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON ..
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCLIENT=ON ..
make -j10
```
......@@ -121,7 +149,11 @@ make -j10
```bash
mkdir app-build && cd app-build
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCMAKE_INSTALL_PREFIX=./output -DAPP=ON ..
cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-DCMAKE_INSTALL_PREFIX=./output \
-DAPP=ON ..
make
```
......@@ -152,7 +184,10 @@ make
| WITH_AVX | Compile Paddle Serving with AVX intrinsics | OFF |
| WITH_MKL | Compile Paddle Serving with MKL support | OFF |
| WITH_GPU | Compile Paddle Serving with NVIDIA GPU | OFF |
| CUDNN_ROOT | Define CuDNN library and header path | |
| WITH_TRT | Compile Paddle Serving with TensorRT | OFF |
| CUDNN_LIBRARY | Define CuDNN library and header path | |
| CUDA_TOOLKIT_ROOT_DIR | Define CUDA PATH | |
| TENSORRT_ROOT | Define TensorRT PATH | |
| CLIENT | Compile Paddle Serving Client | OFF |
| SERVER | Compile Paddle Serving Server | OFF |
| APP | Compile Paddle Serving App package | OFF |
......@@ -167,7 +202,8 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选
- CUDA
- CuDNN
- NCCL2
编译TensorRT版本,需要安装TensorRT库。
这里要注意的是:
......@@ -176,21 +212,12 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选
以下是PaddlePaddle发布版本所使用的基础库版本匹配关系,供参考:
| | CUDA | CuDNN | NCCL2 |
| :----: | :-----: | :----------------------: | :----: |
| CUDA 8 | 8.0.61 | CuDNN 7.1.2 for CUDA 8.0 | 2.1.4 |
| CUDA 9 | 9.0.176 | CuDNN 7.3.1 for CUDA 9.0 | 2.2.12 |
| | CUDA | CuDNN | TensorRT |
| :----: | :-----: | :----------------------: | :----: |
| post9 | 9.0 | CuDNN 7.3.1 for CUDA 9.0 | |
| post10 | 10.0 | CuDNN 7.5.1 for CUDA 10.0| |
| trt | 10.1 | CuDNN 7.5.1 for CUDA 10.1| 6.0.1.5 |
### 如何让Paddle Serving编译系统探测到CuDNN库
从NVIDIA developer官网下载对应版本CuDNN并在本地解压后,在cmake编译命令中增加`-DCUDNN_ROOT`参数,指定CuDNN库所在路径。
### 如何让Paddle Serving编译系统探测到nccl库
从NVIDIA developer官网下载对应版本nccl2库并解压后,增加如下环境变量 (以nccl2.1.4为例):
```shell
export C_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$CPLUS_INCLUDE_PATH
export LD_LIBRARY_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/lib/:$LD_LIBRARY_PATH
```
从NVIDIA developer官网下载对应版本CuDNN并在本地解压后,在cmake编译命令中增加`-DCUDNN_LIBRARY`参数,指定CuDNN库所在路径。
# FAQ
- Q: 如何调整RPC服务的等待时间,避免超时?
A: 使用set_rpc_timeout_ms设置更长的等待时间,单位为毫秒,默认时间为20秒。
示例:
```
from paddle_serving_client import Client
client = Client()
client.load_client_config(sys.argv[1])
client.set_rpc_timeout_ms(100000)
client.connect(["127.0.0.1:9393"])
```
## 基础知识
- Q: 如何使用自己编译的Paddle Serving进行预测
#### Q: Paddle Serving 、Paddle Inference、PaddleHub Serving三者的区别及联系
A: 通过pip命令安装自己编译出的whl包,并设置SERVING_BIN环境变量为编译出的serving二进制文件路径
**A:** paddle serving是远程服务,即发起预测的设备(手机、浏览器、客户端等)与实际预测的硬件不在一起。 paddle inference是一个library,适合嵌入到一个大系统中保证预测效率,paddle serving调用了paddle inference做远程服务。paddlehub serving可以认为是一个示例,都会使用paddle serving作为统一预测服务入口。如果在web端交互,一般是调用远程服务的形式,可以使用paddle serving的web service搭建
- Q: 执行GPU预测时遇到InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
#### Q: paddle-serving是否支持Int32支持
A: 将显卡驱动对应的libcuda.so的目录添加到LD_LIBRARY_PATH环境变量中
**A:** 在protobuf定feed_type和fetch_type编号与数据类型对应如下
- Q: 执行GPU预测时遇到ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/operators/batch_norm_op.cu:198)
​ 0-int64
A: 将cudnn的lib64路径添加到LD_LIBRARY_PATH,安装自pypi的Paddle Serving中post9版使用的是cudnn 7.3,post10使用的是cudnn 7.5。如果是使用自己编译的Paddle Serving,可以在log/serving.INFO日志文件中查看对应的cudnn版本。
​ 1-float32
- Q: 执行GPU预测时遇到Error: Failed to find dynamic library: libcublas.so
​ 2-int32
A: 将cuda的lib64路径添加到LD_LIBRARY_PATH, post9版本的Paddle Serving使用的是cuda 9.0,post10版本使用的cuda 10.0。
#### Q: paddle-serving是否支持windows和Linux环境下的多线程调用
- Q: 部署和预测中的日志信息在哪里查看?
**A:** 客户端可以发起多线程访问调用服务端
- A: server端的日志分为两部分,一部分打印到标准输出,一部分打印到启动服务时的目录下的log/serving.INFO文件中。
#### Q: paddle-serving如何修改消息大小限制
client端的日志直接打印到标准输出。
**A:** 在server端和client但通过FLAGS_max_body_size来扩大数据量限制,单位为字节,默认为64MB
通过在部署服务之前 'export GLOG_v=3'可以输出更为详细的日志信息。
#### Q: paddle-serving客户端目前支持哪些语言
**A:** java c++ python
#### Q: paddle-serving目前支持哪些协议
**A:** http rpc
## 编译问题
#### Q: 如何使用自己编译的Paddle Serving进行预测?
**A:** 通过pip命令安装自己编译出的whl包,并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。
#### Q: 使用Java客户端,mvn compile过程出现"No compiler is provided in this environment. Perhaps you are running on a JRE rather than a JDK?"错误
**A:** 没有安装JDK,或者JAVA_HOME路径配置错误(正确配置是JDK路径,常见错误配置成JRE路径,例如正确路径参考JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.262.b10-0.el7_8.x86_64/")。Java JDK安装参考https://segmentfault.com/a/1190000015389941
## 部署问题
#### Q: GPU环境运行Serving报错,GPU count is: 0。
```
terminate called after throwing an instance of 'paddle::platform::EnforceNotMet'
what():
--------------------------------------------
C++ Call Stacks (More useful to developers):
--------------------------------------------
0 std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int)
1 paddle::platform::SetDeviceId(int)
2 paddle::AnalysisConfig::fraction_of_gpu_memory_for_pool() const
3 std::unique_ptr<paddle::PaddlePredictor, std::default_delete<paddle::PaddlePredictor> > paddle::CreatePaddlePredictor<paddle::AnalysisConfig, (paddle::PaddleEngineKind)2>(paddle::AnalysisConfig const&)
4 std::unique_ptr<paddle::PaddlePredictor, std::default_delete<paddle::PaddlePredictor> > paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(paddle::AnalysisConfig const&)
----------------------
Error Message Summary:
----------------------
InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
[Hint: Expected id < GetCUDADeviceCount(), but received id:0 >= GetCUDADeviceCount():0.] at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/platform/gpu_info.cc:211)
```
**A:** libcuda.so没有链接成功。首先在机器上找到libcuda.so,ldd检查libnvidia版本与nvidia-smi中版本一致(libnvidia-fatbinaryloader.so.418.39,与NVIDIA-SMI 418.39 Driver Version: 418.39),然后用export导出libcuda.so的路径即可(例如libcuda.so在/usr/lib64/,export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib64/)
#### Q: 遇到 GPU not found, please check your environment or use cpu version by "pip install paddle_serving_server"
**A:** 检查环境中是否有N卡:ls /dev/ | grep nvidia
#### Q: 目前Paddle Serving支持哪些镜像环境?
**A:** 目前(0.4.0)仅支持CentOS,具体列表查阅[这里](https://github.com/PaddlePaddle/Serving/blob/develop/doc/DOCKER_IMAGES.md)
#### Q: python编译的GCC版本与serving的版本不匹配
**A:**:1)使用[GPU docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md#gpunvidia-docker)解决环境问题
​ 2)修改anaconda的虚拟环境下安装的python的gcc版本[参考](https://www.jianshu.com/p/c498b3d86f77)
#### Q: paddle-serving是否支持本地离线安装
**A:** 支持离线部署,需要把一些相关的[依赖包](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE.md)提前准备安装好
## 预测问题
#### Q: 使用GPU第一次预测时特别慢,如何调整RPC服务的等待时间避免超时?
**A:** GPU第一次预测需要初始化。使用set_rpc_timeout_ms设置更长的等待时间,单位为毫秒,默认时间为20秒。
示例:
```
from paddle_serving_client import Client
client = Client()
client.load_client_config(sys.argv[1])
client.set_rpc_timeout_ms(100000)
client.connect(["127.0.0.1:9393"])
```
#### Q: 执行GPU预测时遇到InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
**A:** 将显卡驱动对应的libcuda.so的目录添加到LD_LIBRARY_PATH环境变量中
#### Q: 执行GPU预测时遇到ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (../batch_norm_op.cu:198)
**A:** 将cudnn的lib64路径添加到LD_LIBRARY_PATH,安装自pypi的Paddle Serving中post9版使用的是cudnn 7.3,post10使用的是cudnn 7.5。如果是使用自己编译的Paddle Serving,可以在log/serving.INFO日志文件中查看对应的cudnn版本。
#### Q: 执行GPU预测时遇到Error: Failed to find dynamic library: libcublas.so
**A:** 将cuda的lib64路径添加到LD_LIBRARY_PATH, post9版本的Paddle Serving使用的是cuda 9.0,post10版本使用的cuda 10.0。
#### Q: Client端fetch的变量名如何设置
**A:** 可以查看配置文件serving_server_conf.prototxt,获取需要的变量名
#### Q: 如何使用多语言客户端
**A:** 多语言客户端要与多语言服务端配套使用。当前版本下(0.4.0),服务端需要将Server改为MultiLangServer(如果是以命令行启动的话只需要添加--use_multilang参数),Python客户端需要将Client改为MultiLangClient,同时去除load_client_config的过程。[Java客户端参考文档](https://github.com/PaddlePaddle/Serving/blob/develop/doc/JAVA_SDK_CN.md)
#### Q: 如何在Windows下使用Paddle Serving
**A:** 当前版本(0.4.0)在Windows上可以运行多语言RPC客户端,或使用HTTP方式访问。如果使用多语言RPC客户端,需要在Linux环境(比如本机容器,或远程Linux机器)中运行多语言服务端;如果使用HTTP方式,需要在Linux环境中运行普通服务端
#### Q: libnvinfer.so: cannot open shared object file: No such file or directory)
**A:** 参考该文档安装TensorRT: https://blog.csdn.net/hesongzefairy/article/details/105343525
## 日志排查
#### Q: 部署和预测中的日志信息在哪里查看?
**A:** server端的日志分为两部分,一部分打印到标准输出,一部分打印到启动服务时的目录下的log/serving.INFO文件中。
client端的日志直接打印到标准输出。
通过在部署服务之前 'export GLOG_v=3'可以输出更为详细的日志信息。
#### Q: paddle-serving启动成功后,相关的日志在哪里设置
**A:** 1)警告是glog组件打印的,告知glog初始化之前日志打印在STDERR
​ 2)一般采用GLOG_v方式启动服务同时设置日志级别。
例如:
```
GLOG_v=2 python -m paddle_serving_server.serve --model xxx_conf/ --port 9999
```
#### Q: (GLOG_v=2下)Server端日志一切正常,但Client端始终得不到正确的预测结果
**A:** 可能是配置文件有问题,检查下配置文件(is_load_tensor,fetch_type等有没有问题)
#### Q: 如何给Server传递Logid
**A:** Logid默认为0(后续应该有自动生成Logid的计划,当前版本0.4.0),Client端通过在predict函数中指定log_id参数传递
## 性能优化
# gRPC接口
# gRPC接口使用介绍
- [1.与bRPC接口对比](#1与brpc接口对比)
- [1.1 服务端对比](#11-服务端对比)
- [1.2 客服端对比](#12-客服端对比)
- [1.3 其他](#13-其他)
- [2.示例:线性回归预测服务](#2示例线性回归预测服务)
- [获取数据](#获取数据)
- [开启 gRPC 服务端](#开启-grpc-服务端)
- [客户端预测](#客户端预测)
- [同步预测](#同步预测)
- [异步预测](#异步预测)
- [Batch 预测](#batch-预测)
- [通用 pb 预测](#通用-pb-预测)
- [预测超时](#预测超时)
- [List 输入](#list-输入)
- [3.更多示例](#3更多示例)
使用gRPC接口,Client端可以在Win/Linux/MacOS平台上调用不同语言。gRPC 接口实现结构如下:
![](https://github.com/PaddlePaddle/Serving/blob/develop/doc/grpc_impl.png)
## 1.与bRPC接口对比
#### 1.1 服务端对比
* gRPC Server 端 `load_model_config` 函数添加 `client_config_path` 参数:
gRPC 接口实现形式类似 Web Service:
![](grpc_impl.png)
## 与bRPC接口对比
1. gRPC Server 端 `load_model_config` 函数添加 `client_config_path` 参数:
```python
```
def load_model_config(self, server_config_paths, client_config_path=None)
```
在一些例子中 bRPC Server 端与 bRPC Client 端的配置文件可能不同(如 在cube local 中,Client 端的数据先交给 cube,经过 cube 处理后再交给预测库),此时 gRPC Server 端需要手动设置 gRPC Client 端的配置`client_config_path`
**`client_config_path` 默认为 `<server_config_path>/serving_server_conf.prototxt`。**
在一些例子中 bRPC Server 端与 bRPC Client 端的配置文件可能是不同的(如 cube local 例子中,Client 端的数据先交给 cube,经过 cube 处理后再交给预测库),所以 gRPC Server 端需要获取 gRPC Client 端的配置;同时为了取消 gRPC Client 端手动加载配置文件的过程,所以设计 gRPC Server 端同时加载两个配置文件。`client_config_path` 默认为 `<server_config_path>/serving_server_conf.prototxt`
#### 1.2 客服端对比
2. gRPC Client 端取消 `load_client_config` 步骤:
* gRPC Client 端取消 `load_client_config` 步骤:
`connect` 步骤通过 RPC 获取相应的 prototxt(从任意一个 endpoint 获取即可)。
3. gRPC Client 需要通过 RPC 方式设置 timeout 时间(调用形式与 bRPC Client保持一致)
* gRPC Client 需要通过 RPC 方式设置 timeout 时间(调用形式与 bRPC Client保持一致)
因为 bRPC Client 在 `connect` 后无法更改 timeout 时间,所以当 gRPC Server 收到变更 timeout 的调用请求时会重新创建 bRPC Client 实例以变更 bRPC Client timeout时间,同时 gRPC Client 会设置 gRPC 的 deadline 时间。
**注意,设置 timeout 接口和 Inference 接口不能同时调用(非线程安全),出于性能考虑暂时不加锁。**
4. gRPC Client 端 `predict` 函数添加 `asyn``is_python` 参数:
* gRPC Client 端 `predict` 函数添加 `asyn``is_python` 参数:
```python
```
def predict(self, feed, fetch, need_variant_tag=False, asyn=False, is_python=True)
```
其中,`asyn` 为异步调用选项。当 `asyn=True` 时为异步调用,返回 `MultiLangPredictFuture` 对象,通过 `MultiLangPredictFuture.result()` 阻塞获取预测值;当 `asyn=Fasle` 为同步调用。
1. `asyn` 为异步调用选项。当 `asyn=True` 时为异步调用,返回 `MultiLangPredictFuture` 对象,通过 `MultiLangPredictFuture.result()` 阻塞获取预测值;当 `asyn=Fasle` 为同步调用。
2. `is_python` 为 proto 格式选项。当 `is_python=True` 时,基于 numpy bytes 格式进行数据传输,目前只适用于 Python;当 `is_python=False` 时,以普通数据格式传输,更加通用。使用 numpy bytes 格式传输耗时比普通数据格式小很多(详见 [#654](https://github.com/PaddlePaddle/Serving/pull/654))。
#### 1.3 其他
* 异常处理:当 gRPC Server 端的 bRPC Client 预测失败(返回 `None`)时,gRPC Client 端同样返回None。其他 gRPC 异常会在 Client 内部捕获,并在返回的 fetch_map 中添加一个 "status_code" 字段来区分是否预测正常(参考 timeout 样例)。
* 由于 gRPC 只支持 pick_first 和 round_robin 负载均衡策略,ABTEST 特性还未打齐。
* 系统兼容性:
* [x] CentOS
* [x] macOS
* [x] Windows
* 已经支持的客户端语言:
- Python
- Java
- Go
## 2.示例:线性回归预测服务
以下是采用gRPC实现的关于线性回归预测的一个示例,具体代码详见此[链接](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/grpc_impl_example/fit_a_line)
#### 获取数据
```shell
sh get_data.sh
```
#### 开启 gRPC 服务端
``` shell
python test_server.py uci_housing_model/
```
也可以通过下面的一行代码开启默认 gRPC 服务:
```shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang
```
注:--use_multilang参数用来启用多语言客户端
### 客户端预测
#### 同步预测
``` shell
python test_sync_client.py
```
#### 异步预测
``` shell
python test_asyn_client.py
```
#### Batch 预测
``` shell
python test_batch_client.py
```
`is_python` 为 proto 格式选项。当 `is_python=True` 时,基于 numpy bytes 格式进行数据传输,目前只适用于 Python;当 `is_python=False` 时,以普通数据格式传输,更加通用。使用 numpy bytes 格式传输耗时比普通数据格式小很多(详见 [#654](https://github.com/PaddlePaddle/Serving/pull/654))。
#### 通用 pb 预测
5. 异常处理:当 gRPC Server 端的 bRPC Client 预测失败(返回 `None`)时,gRPC Client 端同样返回None。其他 gRPC 异常会在 Client 内部捕获,并在返回的 fetch_map 中添加一个 "status_code" 字段来区分是否预测正常(参考 timeout 样例)。
``` shell
python test_general_pb_client.py
```
6. 由于 gRPC 只支持 pick_first 和 round_robin 负载均衡策略,ABTEST 特性还未打齐。
#### 预测超时
7. 经测试,gRPC 版本可以在 Windows、macOS 平台使用。
``` shell
python test_timeout_client.py
```
8. 计划支持的客户端语言:
#### List 输入
- [x] Python
- [ ] Java
- [ ] Go
- [ ] JavaScript
``` shell
python test_list_input_client.py
```
## Python 端的一些例子
## 3.更多示例
详见 `python/examples/grpc_impl_example` 下的示例文件。
详见[`python/examples/grpc_impl_example`](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/grpc_impl_example)下的示例文件。
......@@ -24,13 +24,13 @@ inference_model_dir = "your_inference_model"
serving_client_dir = "serving_client_dir"
serving_server_dir = "serving_server_dir"
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_client_dir, serving_server_dir)
inference_model_dir, serving_server_dir, serving_client_dir)
```
if your model file and params file are both standalone, please use the following api.
```
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_client_dir, serving_server_dir,
inference_model_dir, serving_server_dir, serving_client_dir,
model_filename="model", params_filename="params")
```
......@@ -23,11 +23,11 @@ inference_model_dir = "your_inference_model"
serving_client_dir = "serving_client_dir"
serving_server_dir = "serving_server_dir"
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_client_dir, serving_server_dir)
inference_model_dir, serving_server_dir, serving_client_dir)
```
如果模型中有模型描述文件`model_filename` 和 模型参数文件`params_filename`,那么请用
```
feed_var_names, fetch_var_names = inference_model_to_serving(
inference_model_dir, serving_client_dir, serving_server_dir,
inference_model_dir, serving_server_dir, serving_client_dir,
model_filename="model", params_filename="params")
```
......@@ -18,6 +18,8 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py2-none-an
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py3-none-any.whl
#cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py3-none-any.whl
#cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py3-none-any.whl
```
### Python 2
```
......@@ -25,6 +27,8 @@ https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py2-none-any.whl
#cuda 10.0
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py2-none-any.whl
##cuda10.1 with TensorRT 6
https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py2-none-any.whl
```
## Client
......
## Java Demo
### Install package
```
mvn compile
mvn install
cd examples
mvn compile
mvn install
```
### Start Server
take the fit_a_line demo as example
```
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #CPU
python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #GPU
```
### Client Predict
```
java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample fit_a_line
```
The Java example also contains the prediction client of Bert, Model_enaemble, asyn_predict, batch_predict, Cube_local, Cube_quant, and Yolov4 models.
## Java 示例
### 安装客户端依赖
```
mvn compile
mvn install
cd examples
mvn compile
mvn install
```
### 启动服务端
以fit_a_line模型为例
```
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #CPU
python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #GPU
```
### 客户端预测
```
java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample fit_a_line
```
java示例中还包含了bert、model_enaemble、asyn_predict、batch_predict、cube_local、cube_quant、yolov4模型的预测客户端。
......@@ -23,7 +23,6 @@
#include "core/configure/inferencer_configure.pb.h"
#include "core/predictor/framework/infer.h"
#include "paddle_inference_api.h" // NOLINT
//#include "predictor/framework/infer.h"
namespace baidu {
namespace paddle_serving {
......
......@@ -2,6 +2,7 @@ FILE(GLOB fluid_gpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
add_library(fluid_gpu_engine ${fluid_gpu_engine_srcs})
target_include_directories(fluid_gpu_engine PUBLIC
${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
add_dependencies(fluid_gpu_engine pdserving extern_paddle configure)
target_link_libraries(fluid_gpu_engine pdserving paddle_fluid iomp5 mklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
......
......@@ -190,7 +190,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
paddle::AnalysisConfig analysis_config;
analysis_config.SetModel(data_path);
analysis_config.EnableUseGpu(100, FLAGS_gpuid);
analysis_config.EnableUseGpu(1500, FLAGS_gpuid);
analysis_config.SwitchSpecifyInputNames(true);
analysis_config.SetCpuMathLibraryNumThreads(1);
......@@ -198,12 +198,68 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
analysis_config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
analysis_config.SwitchIrOptim(true);
#if 0 // todo: support flexible shape
int min_seq_len = 1;
int max_seq_len = 512;
int opt_seq_len = 128;
int head_number = 12;
int batch = 50;
std::vector<int> min_in_shape = {batch, min_seq_len, 1};
std::vector<int> max_in_shape = {batch, max_seq_len, 1};
std::vector<int> opt_in_shape = {batch, opt_seq_len, 1};
std::string input1_name = "src_text_a_ids";
std::string input2_name = "pos_text_a_ids";
std::string input3_name = "sent_text_a_ids";
std::string input4_name = "stack_0.tmp_0";
std::map<std::string, std::vector<int>> min_input_shape = {
{input1_name, min_in_shape},
{input2_name, min_in_shape},
{input3_name, min_in_shape},
{input4_name, {batch, head_number, min_seq_len, min_seq_len}},
};
std::map<std::string, std::vector<int>> max_input_shape = {
{input1_name, max_in_shape},
{input2_name, max_in_shape},
{input3_name, max_in_shape},
{input4_name, {batch, head_number, max_seq_len, max_seq_len}},
};
std::map<std::string, std::vector<int>> opt_input_shape = {
{input1_name, opt_in_shape},
{input2_name, opt_in_shape},
{input3_name, opt_in_shape},
{input4_name, {batch, head_number, opt_seq_len, opt_seq_len}},
};
analysis_config.SetTRTDynamicShapeInfo(
min_input_shape, max_input_shape, opt_input_shape);
#endif
int max_batch = 32;
int min_subgraph_size = 3;
if (params.use_trt()) {
analysis_config.EnableTensorRtEngine(
1 << 20,
max_batch,
min_subgraph_size,
paddle::AnalysisConfig::Precision::kFloat32,
false,
false);
LOG(INFO) << "create TensorRT predictor";
} else {
analysis_config.SwitchIrOptim(false);
}
if (params.enable_memory_optimization()) {
analysis_config.EnableMemoryOptim();
}
if (params.enable_ir_optimization()) {
analysis_config.SwitchIrOptim(true);
} else {
analysis_config.SwitchIrOptim(false);
}
}
AutoLock lock(GlobalPaddleCreateMutex::instance());
_core =
paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);
......
......@@ -80,6 +80,16 @@ if (SERVER)
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
elseif(WITH_TRT)
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
COMMAND cp -r
${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
"server_gpu" trt
COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
else()
add_custom_command(
OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
......
......@@ -18,16 +18,20 @@ import sys
from paddle_serving_client import Client
from paddle_serving_client.utils import benchmark_args
from paddle_serving_app.reader import ChineseBertReader
import numpy as np
args = benchmark_args()
reader = ChineseBertReader({"max_seq_len": 128})
fetch = ["pooled_output"]
endpoint_list = ["127.0.0.1:9292"]
endpoint_list = ['127.0.0.1:9292']
client = Client()
client.load_client_config(args.model)
client.connect(endpoint_list)
for line in sys.stdin:
feed_dict = reader.process(line)
result = client.predict(feed=feed_dict, fetch=fetch)
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape((128, 1))
#print(feed_dict)
result = client.predict(feed=feed_dict, fetch=fetch, batch=False)
print(result)
......@@ -13,10 +13,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.web_service import WebService
from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader import ChineseBertReader
import sys
import os
import numpy as np
class BertService(WebService):
......@@ -27,18 +28,21 @@ class BertService(WebService):
})
def preprocess(self, feed=[], fetch=[]):
feed_res = [
self.reader.process(ins["words"].encode("utf-8")) for ins in feed
]
return feed_res, fetch
feed_res = []
is_batch = False
for ins in feed:
feed_dict = self.reader.process(ins["words"].encode("utf-8"))
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape(
(len(feed_dict[key]), 1))
feed_res.append(feed_dict)
return feed_res, fetch, is_batch
bert_service = BertService(name="bert")
bert_service.load()
bert_service.load_model_config(sys.argv[1])
gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
bert_service.set_gpus(gpu_ids)
bert_service.prepare_server(
workdir="workdir", port=int(sys.argv[2]), device="gpu")
workdir="workdir", port=int(sys.argv[2]), device="cpu")
bert_service.run_rpc_service()
bert_service.run_web_service()
......@@ -15,6 +15,7 @@
from paddle_serving_client import Client
from paddle_serving_app.reader import ChineseBertReader
import sys
import numpy as np
client = Client()
client.load_client_config("./bert_seq32_client/serving_client_conf.prototxt")
......@@ -28,12 +29,21 @@ expected_shape = {
"pooled_output": (4, 768)
}
batch_size = 4
feed_batch = []
feed_batch = {}
batch_len = 0
for line in sys.stdin:
feed = reader.process(line)
if batch_len == 0:
for key in feed.keys():
val_len = len(feed[key])
feed_batch[key] = np.array(feed[key]).reshape((1, val_len, 1))
continue
if len(feed_batch) < batch_size:
feed_batch.append(feed)
for key in feed.keys():
np.concatenate([
feed_batch[key], np.array(feed[key]).reshape((1, val_len, 1))
])
else:
fetch_map = client.predict(feed=feed_batch, fetch=fetch)
feed_batch = []
......
......@@ -20,7 +20,7 @@ import os
import time
import criteo_reader as criteo
from paddle_serving_client.metric import auc
import numpy as np
import sys
py_version = sys.version_info[0]
......@@ -49,7 +49,8 @@ for ei in range(1000):
data = reader().__next__()
feed_dict = {}
for i in range(1, 27):
feed_dict["sparse_{}".format(i - 1)] = data[0][i]
feed_dict["sparse_{}".format(i - 1)] = np.array(data[0][i]).reshape(-1)
feed_dict["sparse_{}.lod".format(i - 1)] = [0, len(data[0][i])]
fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
end = time.time()
print(end - start)
......@@ -19,6 +19,7 @@ import os
import criteo as criteo
import time
from paddle_serving_client.metric import auc
import numpy as np
py_version = sys.version_info[0]
......@@ -41,10 +42,15 @@ for ei in range(10000):
else:
data = reader().__next__()
feed_dict = {}
feed_dict['dense_input'] = data[0][0]
feed_dict['dense_input'] = np.array(data[0][0]).astype("float32").reshape(
1, 13)
feed_dict['dense_input.lod'] = [0, 1]
for i in range(1, 27):
feed_dict["embedding_{}.tmp_0".format(i - 1)] = data[0][i]
fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
tmp_data = np.array(data[0][i]).astype(np.int64)
feed_dict["embedding_{}.tmp_0".format(i - 1)] = tmp_data.reshape(
(1, len(data[0][i])))
feed_dict["embedding_{}.tmp_0.lod".format(i - 1)] = [0, 1]
fetch_map = client.predict(feed=feed_dict, fetch=["prob"], batch=True)
prob_list.append(fetch_map['prob'][0][1])
label_list.append(data[0][-1][0])
......
......@@ -36,6 +36,7 @@ fetch_map = client.predict(
"im_info": np.array(list(im.shape[1:]) + [1.0]),
"im_shape": np.array(list(im.shape[1:]) + [1.0])
},
fetch=["multiclass_nms"])
fetch=["multiclass_nms"],
batch=False)
fetch_map["image"] = sys.argv[3]
postprocess(fetch_map)
......@@ -27,5 +27,10 @@ test_reader = paddle.batch(
batch_size=1)
for data in test_reader():
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
import numpy as np
new_data = np.zeros((1, 1, 13)).astype("float32")
new_data[0] = data[0][0]
fetch_map = client.predict(
feed={"x": new_data}, fetch=["price"], batch=True)
print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
print(fetch_map)
......@@ -15,6 +15,7 @@
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
import paddle
import numpy as np
def single_func(idx, resource):
......@@ -26,6 +27,7 @@ def single_func(idx, resource):
0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584,
0.6283, 0.4919, 0.1856, 0.0795, -0.0332
]
x = np.array(x)
for i in range(1000):
fetch_map = client.predict(feed={"x": x}, fetch=["price"])
if fetch_map is None:
......
......@@ -38,7 +38,8 @@ start = time.time()
image_file = "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg"
for i in range(10):
img = seq(image_file)
fetch_map = client.predict(feed={"image": img}, fetch=["score"])
fetch_map = client.predict(
feed={"image": img}, fetch=["score"], batch=False)
prob = max(fetch_map["score"][0])
label = label_dict[fetch_map["score"][0].tolist().index(prob)].strip(
).replace(",", "")
......
......@@ -13,6 +13,7 @@
# limitations under the License.
import sys
from paddle_serving_client import Client
import numpy as np
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
if len(sys.argv) != 4:
......@@ -43,12 +44,13 @@ class ImageService(WebService):
def preprocess(self, feed=[], fetch=[]):
feed_batch = []
is_batch = True
for ins in feed:
if "image" not in ins:
raise ("feed data error!")
img = self.seq(ins["image"])
feed_batch.append({"image": img})
return feed_batch, fetch
feed_batch.append({"image": img[np.newaxis, :]})
return feed_batch, fetch, is_batch
def postprocess(self, feed=[], fetch=[], fetch_map={}):
score_list = fetch_map["score"]
......
......@@ -17,7 +17,8 @@ import os
import sys
import time
import requests
from paddle_serving_app.reader import IMDBDataset
import numpy as np
from paddle_serving_app.reader.imdb_reader import IMDBDataset
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import MultiThreadRunner, benchmark_args, show_latency
......@@ -47,11 +48,17 @@ def single_func(idx, resource):
for i in range(1000):
if args.batch_size >= 1:
feed_batch = []
feed = {"words": [], "words.lod": [0]}
for bi in range(args.batch_size):
word_ids, label = imdb_dataset.get_words_and_label(dataset[
bi])
feed_batch.append({"words": word_ids})
result = client.predict(feed=feed_batch, fetch=["prediction"])
feed["words.lod"].append(feed["words.lod"][-1] + len(
word_ids))
feed["words"].extend(word_ids)
feed["words"] = np.array(feed["words"]).reshape(
len(feed["words"]), 1)
result = client.predict(
feed=feed, fetch=["prediction"], batch=True)
if result is None:
raise ("predict failed.")
else:
......
......@@ -13,8 +13,9 @@
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import Client
from paddle_serving_app.reader import IMDBDataset
from paddle_serving_app.reader.imdb_reader import IMDBDataset
import sys
import numpy as np
client = Client()
client.load_client_config(sys.argv[1])
......@@ -28,7 +29,12 @@ imdb_dataset.load_resource(sys.argv[2])
for line in sys.stdin:
word_ids, label = imdb_dataset.get_words_and_label(line)
feed = {"words": word_ids}
word_len = len(word_ids)
feed = {
"words": np.array(word_ids).reshape(word_len, 1),
"words.lod": [0, word_len]
}
#print(feed)
fetch = ["prediction"]
fetch_map = client.predict(feed=feed, fetch=fetch)
fetch_map = client.predict(feed=feed, fetch=fetch, batch=True)
print("{} {}".format(fetch_map["prediction"][0], label[0]))
......@@ -14,8 +14,9 @@
# pylint: disable=doc-string-missing
from paddle_serving_server.web_service import WebService
from paddle_serving_app.reader import IMDBDataset
from paddle_serving_app.reader.imdb_reader import IMDBDataset
import sys
import numpy as np
class IMDBService(WebService):
......@@ -26,10 +27,16 @@ class IMDBService(WebService):
self.dataset.load_resource(args["dict_file_path"])
def preprocess(self, feed={}, fetch=[]):
res_feed = [{
"words": self.dataset.get_words_only(ins["words"])
} for ins in feed]
return res_feed, fetch
feed_batch = []
words_lod = [0]
is_batch = True
for ins in feed:
words = self.dataset.get_words_only(ins["words"])
words = np.array(words).reshape(len(words), 1)
words_lod.append(words_lod[-1] + len(words))
feed_batch.append(words)
feed = {"words": np.concatenate(feed_batch), "words.lod": words_lod}
return feed, fetch, is_batch
imdb_service = IMDBService(name="imdb")
......
......@@ -19,6 +19,7 @@ from paddle_serving_app.reader import LACReader
import sys
import os
import io
import numpy as np
client = Client()
client.load_client_config(sys.argv[1])
......@@ -31,7 +32,17 @@ for line in sys.stdin:
feed_data = reader.process(line)
if len(feed_data) <= 0:
continue
fetch_map = client.predict(feed={"words": feed_data}, fetch=["crf_decode"])
print(feed_data)
#fetch_map = client.predict(feed={"words": np.array(feed_data).reshape(len(feed_data), 1), "words.lod": [0, len(feed_data)]}, fetch=["crf_decode"], batch=True)
fetch_map = client.predict(
feed={
"words": np.array(feed_data + feed_data).reshape(
len(feed_data) * 2, 1),
"words.lod": [0, len(feed_data), 2 * len(feed_data)]
},
fetch=["crf_decode"],
batch=True)
print(fetch_map)
begin = fetch_map['crf_decode.lod'][0]
end = fetch_map['crf_decode.lod'][1]
segs = reader.parse_result(line, fetch_map["crf_decode"][begin:end])
......
......@@ -15,6 +15,7 @@
from paddle_serving_server.web_service import WebService
import sys
from paddle_serving_app.reader import LACReader
import numpy as np
class LACService(WebService):
......@@ -23,13 +24,21 @@ class LACService(WebService):
def preprocess(self, feed={}, fetch=[]):
feed_batch = []
fetch = ["crf_decode"]
lod_info = [0]
is_batch = True
for ins in feed:
if "words" not in ins:
raise ("feed data error!")
feed_data = self.reader.process(ins["words"])
feed_batch.append({"words": feed_data})
fetch = ["crf_decode"]
return feed_batch, fetch
feed_batch.append(np.array(feed_data).reshape(len(feed_data), 1))
lod_info.append(lod_info[-1] + len(feed_data))
feed_dict = {
"words": np.concatenate(
feed_batch, axis=0),
"words.lod": lod_info
}
return feed_dict, fetch, is_batch
def postprocess(self, feed={}, fetch=[], fetch_map={}):
batch_ret = []
......
......@@ -34,9 +34,9 @@ python ocr_web_server.py gpu
```
python ocr_web_client.py
```
If you want a faster web service, please try Web Debugger Service
If you want a faster web service, please try Web LocalPredictor Service
## Web Debugger Service
## Web LocalPredictor Service
```
#choose one of cpu/gpu commands as following
#for cpu user
......@@ -45,7 +45,7 @@ python ocr_debugger_server.py cpu
python ocr_debugger_server.py gpu
```
## Web Debugger Client Prediction
## Web LocalPredictor Client Prediction
```
python ocr_web_client.py
```
......@@ -61,7 +61,7 @@ Dataset: RCTW 500 sample images
| engine | client read image(ms) | client-server tras time(ms) | server read image(ms) | det pre(ms) | det infer(ms) | det post(ms) | rec pre(ms) | rec infer(ms) | rec post(ms) | server-client trans time(ms) | server side time consumption(ms) | server side overhead(ms) | total time(ms) |
|------------------------------|----------------|----------------------------|------------------|--------------------|------------------|--------------------|--------------------|------------------|--------------------|--------------------------|--------------------|--------------|---------------|
| Serving web service | 8.69 | 13.41 | 109.97 | 2.82 | 87.76 | 4.29 | 3.98 | 78.51 | 3.66 | 4.12 | 181.02 | 136.49 | 317.51 |
| Serving Debugger web service | 8.73 | 16.42 | 115.27 | 2.93 | 20.63 | 3.97 | 4.48 | 13.84 | 3.60 | 6.91 | 49.45 | 147.33 | 196.78 |
| Serving LocalPredictor web service | 8.73 | 16.42 | 115.27 | 2.93 | 20.63 | 3.97 | 4.48 | 13.84 | 3.60 | 6.91 | 49.45 | 147.33 | 196.78 |
## Appendix: For Users who want to launch Det or Rec only
if you are going to detect images not recognize it or directly recognize the words from images. We also provide Det and Rec server for you.
......
......@@ -34,8 +34,8 @@ python ocr_web_server.py gpu
python ocr_web_client.py
```
如果用户需要更快的执行速度,请尝试Debugger版Web服务
## 启动Debugger版Web服务
如果用户需要更快的执行速度,请尝试LocalPredictor版Web服务
## 启动LocalPredictor版Web服务
```
#根据CPU/GPU设备选择一种启动方式
#for cpu user
......@@ -60,7 +60,7 @@ GPU: Nvidia Tesla V100单卡
| engine | 客户端读图(ms) | 客户端发送请求到服务端(ms) | 服务端读图(ms) | 检测预处理耗时(ms) | 检测模型耗时(ms) | 检测后处理耗时(ms) | 识别预处理耗时(ms) | 识别模型耗时(ms) | 识别后处理耗时(ms) | 服务端回传客户端时间(ms) | 服务端整体耗时(ms) | 空跑耗时(ms) | 整体耗时(ms) |
|------------------------------|----------------|----------------------------|------------------|--------------------|------------------|--------------------|--------------------|------------------|--------------------|--------------------------|--------------------|--------------|---------------|
| Serving web service | 8.69 | 13.41 | 109.97 | 2.82 | 87.76 | 4.29 | 3.98 | 78.51 | 3.66 | 4.12 | 181.02 | 136.49 | 317.51 |
| Serving Debugger web service | 8.73 | 16.42 | 115.27 | 2.93 | 20.63 | 3.97 | 4.48 | 13.84 | 3.60 | 6.91 | 49.45 | 147.33 | 196.78 |
| Serving LocalPredictor web service | 8.73 | 16.42 | 115.27 | 2.93 | 20.63 | 3.97 | 4.48 | 13.84 | 3.60 | 6.91 | 49.45 | 147.33 | 196.78 |
## 附录: 检测/识别单服务启动
......
......@@ -26,7 +26,7 @@ if sys.argv[1] == 'gpu':
from paddle_serving_server_gpu.web_service import WebService
elif sys.argv[1] == 'cpu':
from paddle_serving_server.web_service import WebService
from paddle_serving_app.local_predict import Debugger
from paddle_serving_app.local_predict import LocalPredictor
import time
import re
import base64
......@@ -39,7 +39,7 @@ class OCRService(WebService):
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.det_client = Debugger()
self.det_client = LocalPredictor()
if sys.argv[1] == 'gpu':
self.det_client.load_model_config(
det_model_config, gpu=True, profile=False)
......
# Imagenet Pipeline WebService
This document will takes Imagenet service as an example to introduce how to use Pipeline WebService.
## Get model
```
sh get_model.sh
```
## Start server
```
python resnet50_web_service.py &>log.txt &
```
## RPC test
```
python pipeline_rpc_client.py
```
# Imagenet Pipeline WebService
这里以 Uci 服务为例来介绍 Pipeline WebService 的使用。
## 获取模型
```
sh get_data.sh
```
## 启动服务
```
python web_service.py &>log.txt &
```
## 测试
```
curl -X POST -k http://localhost:18082/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
```
#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG
##当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num
worker_num: 1
#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port
http_port: 18082
rpc_port: 9999
dag:
#op资源类型, True, 为线程模型;False,为进程模型
is_thread_op: False
op:
imagenet:
#当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
local_service_conf:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 2
#uci模型路径
model_config: ResNet50_vd_model
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0" # "0,1"
#client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
client_type: local_predictor
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["score"]
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/ResNet50_vd.tar.gz
tar -xzvf ResNet50_vd.tar.gz
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imagenet-example/image_data.tar.gz
tar -xzvf image_data.tar.gz
tench, Tinca tinca,
goldfish, Carassius auratus,
great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias,
tiger shark, Galeocerdo cuvieri,
hammerhead, hammerhead shark,
electric ray, crampfish, numbfish, torpedo,
stingray,
cock,
hen,
ostrich, Struthio camelus,
brambling, Fringilla montifringilla,
goldfinch, Carduelis carduelis,
house finch, linnet, Carpodacus mexicanus,
junco, snowbird,
indigo bunting, indigo finch, indigo bird, Passerina cyanea,
robin, American robin, Turdus migratorius,
bulbul,
jay,
magpie,
chickadee,
water ouzel, dipper,
kite,
bald eagle, American eagle, Haliaeetus leucocephalus,
vulture,
great grey owl, great gray owl, Strix nebulosa,
European fire salamander, Salamandra salamandra,
common newt, Triturus vulgaris,
eft,
spotted salamander, Ambystoma maculatum,
axolotl, mud puppy, Ambystoma mexicanum,
bullfrog, Rana catesbeiana,
tree frog, tree-frog,
tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui,
loggerhead, loggerhead turtle, Caretta caretta,
leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea,
mud turtle,
terrapin,
box turtle, box tortoise,
banded gecko,
common iguana, iguana, Iguana iguana,
American chameleon, anole, Anolis carolinensis,
whiptail, whiptail lizard,
agama,
frilled lizard, Chlamydosaurus kingi,
alligator lizard,
Gila monster, Heloderma suspectum,
green lizard, Lacerta viridis,
African chameleon, Chamaeleo chamaeleon,
Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis,
African crocodile, Nile crocodile, Crocodylus niloticus,
American alligator, Alligator mississipiensis,
triceratops,
thunder snake, worm snake, Carphophis amoenus,
ringneck snake, ring-necked snake, ring snake,
hognose snake, puff adder, sand viper,
green snake, grass snake,
king snake, kingsnake,
garter snake, grass snake,
water snake,
vine snake,
night snake, Hypsiglena torquata,
boa constrictor, Constrictor constrictor,
rock python, rock snake, Python sebae,
Indian cobra, Naja naja,
green mamba,
sea snake,
horned viper, cerastes, sand viper, horned asp, Cerastes cornutus,
diamondback, diamondback rattlesnake, Crotalus adamanteus,
sidewinder, horned rattlesnake, Crotalus cerastes,
trilobite,
harvestman, daddy longlegs, Phalangium opilio,
scorpion,
black and gold garden spider, Argiope aurantia,
barn spider, Araneus cavaticus,
garden spider, Aranea diademata,
black widow, Latrodectus mactans,
tarantula,
wolf spider, hunting spider,
tick,
centipede,
black grouse,
ptarmigan,
ruffed grouse, partridge, Bonasa umbellus,
prairie chicken, prairie grouse, prairie fowl,
peacock,
quail,
partridge,
African grey, African gray, Psittacus erithacus,
macaw,
sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita,
lorikeet,
coucal,
bee eater,
hornbill,
hummingbird,
jacamar,
toucan,
drake,
red-breasted merganser, Mergus serrator,
goose,
black swan, Cygnus atratus,
tusker,
echidna, spiny anteater, anteater,
platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus,
wallaby, brush kangaroo,
koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus,
wombat,
jellyfish,
sea anemone, anemone,
brain coral,
flatworm, platyhelminth,
nematode, nematode worm, roundworm,
conch,
snail,
slug,
sea slug, nudibranch,
chiton, coat-of-mail shell, sea cradle, polyplacophore,
chambered nautilus, pearly nautilus, nautilus,
Dungeness crab, Cancer magister,
rock crab, Cancer irroratus,
fiddler crab,
king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica,
American lobster, Northern lobster, Maine lobster, Homarus americanus,
spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish,
crayfish, crawfish, crawdad, crawdaddy,
hermit crab,
isopod,
white stork, Ciconia ciconia,
black stork, Ciconia nigra,
spoonbill,
flamingo,
little blue heron, Egretta caerulea,
American egret, great white heron, Egretta albus,
bittern,
crane,
limpkin, Aramus pictus,
European gallinule, Porphyrio porphyrio,
American coot, marsh hen, mud hen, water hen, Fulica americana,
bustard,
ruddy turnstone, Arenaria interpres,
red-backed sandpiper, dunlin, Erolia alpina,
redshank, Tringa totanus,
dowitcher,
oystercatcher, oyster catcher,
pelican,
king penguin, Aptenodytes patagonica,
albatross, mollymawk,
grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus,
killer whale, killer, orca, grampus, sea wolf, Orcinus orca,
dugong, Dugong dugon,
sea lion,
Chihuahua,
Japanese spaniel,
Maltese dog, Maltese terrier, Maltese,
Pekinese, Pekingese, Peke,
Shih-Tzu,
Blenheim spaniel,
papillon,
toy terrier,
Rhodesian ridgeback,
Afghan hound, Afghan,
basset, basset hound,
beagle,
bloodhound, sleuthhound,
bluetick,
black-and-tan coonhound,
Walker hound, Walker foxhound,
English foxhound,
redbone,
borzoi, Russian wolfhound,
Irish wolfhound,
Italian greyhound,
whippet,
Ibizan hound, Ibizan Podenco,
Norwegian elkhound, elkhound,
otterhound, otter hound,
Saluki, gazelle hound,
Scottish deerhound, deerhound,
Weimaraner,
Staffordshire bullterrier, Staffordshire bull terrier,
American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier,
Bedlington terrier,
Border terrier,
Kerry blue terrier,
Irish terrier,
Norfolk terrier,
Norwich terrier,
Yorkshire terrier,
wire-haired fox terrier,
Lakeland terrier,
Sealyham terrier, Sealyham,
Airedale, Airedale terrier,
cairn, cairn terrier,
Australian terrier,
Dandie Dinmont, Dandie Dinmont terrier,
Boston bull, Boston terrier,
miniature schnauzer,
giant schnauzer,
standard schnauzer,
Scotch terrier, Scottish terrier, Scottie,
Tibetan terrier, chrysanthemum dog,
silky terrier, Sydney silky,
soft-coated wheaten terrier,
West Highland white terrier,
Lhasa, Lhasa apso,
flat-coated retriever,
curly-coated retriever,
golden retriever,
Labrador retriever,
Chesapeake Bay retriever,
German short-haired pointer,
vizsla, Hungarian pointer,
English setter,
Irish setter, red setter,
Gordon setter,
Brittany spaniel,
clumber, clumber spaniel,
English springer, English springer spaniel,
Welsh springer spaniel,
cocker spaniel, English cocker spaniel, cocker,
Sussex spaniel,
Irish water spaniel,
kuvasz,
schipperke,
groenendael,
malinois,
briard,
kelpie,
komondor,
Old English sheepdog, bobtail,
Shetland sheepdog, Shetland sheep dog, Shetland,
collie,
Border collie,
Bouvier des Flandres, Bouviers des Flandres,
Rottweiler,
German shepherd, German shepherd dog, German police dog, alsatian,
Doberman, Doberman pinscher,
miniature pinscher,
Greater Swiss Mountain dog,
Bernese mountain dog,
Appenzeller,
EntleBucher,
boxer,
bull mastiff,
Tibetan mastiff,
French bulldog,
Great Dane,
Saint Bernard, St Bernard,
Eskimo dog, husky,
malamute, malemute, Alaskan malamute,
Siberian husky,
dalmatian, coach dog, carriage dog,
affenpinscher, monkey pinscher, monkey dog,
basenji,
pug, pug-dog,
Leonberg,
Newfoundland, Newfoundland dog,
Great Pyrenees,
Samoyed, Samoyede,
Pomeranian,
chow, chow chow,
keeshond,
Brabancon griffon,
Pembroke, Pembroke Welsh corgi,
Cardigan, Cardigan Welsh corgi,
toy poodle,
miniature poodle,
standard poodle,
Mexican hairless,
timber wolf, grey wolf, gray wolf, Canis lupus,
white wolf, Arctic wolf, Canis lupus tundrarum,
red wolf, maned wolf, Canis rufus, Canis niger,
coyote, prairie wolf, brush wolf, Canis latrans,
dingo, warrigal, warragal, Canis dingo,
dhole, Cuon alpinus,
African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus,
hyena, hyaena,
red fox, Vulpes vulpes,
kit fox, Vulpes macrotis,
Arctic fox, white fox, Alopex lagopus,
grey fox, gray fox, Urocyon cinereoargenteus,
tabby, tabby cat,
tiger cat,
Persian cat,
Siamese cat, Siamese,
Egyptian cat,
cougar, puma, catamount, mountain lion, painter, panther, Felis concolor,
lynx, catamount,
leopard, Panthera pardus,
snow leopard, ounce, Panthera uncia,
jaguar, panther, Panthera onca, Felis onca,
lion, king of beasts, Panthera leo,
tiger, Panthera tigris,
cheetah, chetah, Acinonyx jubatus,
brown bear, bruin, Ursus arctos,
American black bear, black bear, Ursus americanus, Euarctos americanus,
ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus,
sloth bear, Melursus ursinus, Ursus ursinus,
mongoose,
meerkat, mierkat,
tiger beetle,
ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle,
ground beetle, carabid beetle,
long-horned beetle, longicorn, longicorn beetle,
leaf beetle, chrysomelid,
dung beetle,
rhinoceros beetle,
weevil,
fly,
bee,
ant, emmet, pismire,
grasshopper, hopper,
cricket,
walking stick, walkingstick, stick insect,
cockroach, roach,
mantis, mantid,
cicada, cicala,
leafhopper,
lacewing, lacewing fly,
"dragonfly, darning needle, devils darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
damselfly,
admiral,
ringlet, ringlet butterfly,
monarch, monarch butterfly, milkweed butterfly, Danaus plexippus,
cabbage butterfly,
sulphur butterfly, sulfur butterfly,
lycaenid, lycaenid butterfly,
starfish, sea star,
sea urchin,
sea cucumber, holothurian,
wood rabbit, cottontail, cottontail rabbit,
hare,
Angora, Angora rabbit,
hamster,
porcupine, hedgehog,
fox squirrel, eastern fox squirrel, Sciurus niger,
marmot,
beaver,
guinea pig, Cavia cobaya,
sorrel,
zebra,
hog, pig, grunter, squealer, Sus scrofa,
wild boar, boar, Sus scrofa,
warthog,
hippopotamus, hippo, river horse, Hippopotamus amphibius,
ox,
water buffalo, water ox, Asiatic buffalo, Bubalus bubalis,
bison,
ram, tup,
bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis,
ibex, Capra ibex,
hartebeest,
impala, Aepyceros melampus,
gazelle,
Arabian camel, dromedary, Camelus dromedarius,
llama,
weasel,
mink,
polecat, fitch, foulmart, foumart, Mustela putorius,
black-footed ferret, ferret, Mustela nigripes,
otter,
skunk, polecat, wood pussy,
badger,
armadillo,
three-toed sloth, ai, Bradypus tridactylus,
orangutan, orang, orangutang, Pongo pygmaeus,
gorilla, Gorilla gorilla,
chimpanzee, chimp, Pan troglodytes,
gibbon, Hylobates lar,
siamang, Hylobates syndactylus, Symphalangus syndactylus,
guenon, guenon monkey,
patas, hussar monkey, Erythrocebus patas,
baboon,
macaque,
langur,
colobus, colobus monkey,
proboscis monkey, Nasalis larvatus,
marmoset,
capuchin, ringtail, Cebus capucinus,
howler monkey, howler,
titi, titi monkey,
spider monkey, Ateles geoffroyi,
squirrel monkey, Saimiri sciureus,
Madagascar cat, ring-tailed lemur, Lemur catta,
indri, indris, Indri indri, Indri brevicaudatus,
Indian elephant, Elephas maximus,
African elephant, Loxodonta africana,
lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens,
giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca,
barracouta, snoek,
eel,
coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch,
rock beauty, Holocanthus tricolor,
anemone fish,
sturgeon,
gar, garfish, garpike, billfish, Lepisosteus osseus,
lionfish,
puffer, pufferfish, blowfish, globefish,
abacus,
abaya,
"academic gown, academic robe, judges robe",
accordion, piano accordion, squeeze box,
acoustic guitar,
aircraft carrier, carrier, flattop, attack aircraft carrier,
airliner,
airship, dirigible,
altar,
ambulance,
amphibian, amphibious vehicle,
analog clock,
apiary, bee house,
apron,
ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin,
assault rifle, assault gun,
backpack, back pack, knapsack, packsack, rucksack, haversack,
bakery, bakeshop, bakehouse,
balance beam, beam,
balloon,
ballpoint, ballpoint pen, ballpen, Biro,
Band Aid,
banjo,
bannister, banister, balustrade, balusters, handrail,
barbell,
barber chair,
barbershop,
barn,
barometer,
barrel, cask,
barrow, garden cart, lawn cart, wheelbarrow,
baseball,
basketball,
bassinet,
bassoon,
bathing cap, swimming cap,
bath towel,
bathtub, bathing tub, bath, tub,
beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon,
beacon, lighthouse, beacon light, pharos,
beaker,
bearskin, busby, shako,
beer bottle,
beer glass,
bell cote, bell cot,
bib,
bicycle-built-for-two, tandem bicycle, tandem,
bikini, two-piece,
binder, ring-binder,
binoculars, field glasses, opera glasses,
birdhouse,
boathouse,
bobsled, bobsleigh, bob,
bolo tie, bolo, bola tie, bola,
bonnet, poke bonnet,
bookcase,
bookshop, bookstore, bookstall,
bottlecap,
bow,
bow tie, bow-tie, bowtie,
brass, memorial tablet, plaque,
brassiere, bra, bandeau,
breakwater, groin, groyne, mole, bulwark, seawall, jetty,
breastplate, aegis, egis,
broom,
bucket, pail,
buckle,
bulletproof vest,
bullet train, bullet,
butcher shop, meat market,
cab, hack, taxi, taxicab,
caldron, cauldron,
candle, taper, wax light,
cannon,
canoe,
can opener, tin opener,
cardigan,
car mirror,
carousel, carrousel, merry-go-round, roundabout, whirligig,
"carpenters kit, tool kit",
carton,
car wheel,
cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM,
cassette,
cassette player,
castle,
catamaran,
CD player,
cello, violoncello,
cellular telephone, cellular phone, cellphone, cell, mobile phone,
chain,
chainlink fence,
chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour,
chain saw, chainsaw,
chest,
chiffonier, commode,
chime, bell, gong,
china cabinet, china closet,
Christmas stocking,
church, church building,
cinema, movie theater, movie theatre, movie house, picture palace,
cleaver, meat cleaver, chopper,
cliff dwelling,
cloak,
clog, geta, patten, sabot,
cocktail shaker,
coffee mug,
coffeepot,
coil, spiral, volute, whorl, helix,
combination lock,
computer keyboard, keypad,
confectionery, confectionary, candy store,
container ship, containership, container vessel,
convertible,
corkscrew, bottle screw,
cornet, horn, trumpet, trump,
cowboy boot,
cowboy hat, ten-gallon hat,
cradle,
crane,
crash helmet,
crate,
crib, cot,
Crock Pot,
croquet ball,
crutch,
cuirass,
dam, dike, dyke,
desk,
desktop computer,
dial telephone, dial phone,
diaper, nappy, napkin,
digital clock,
digital watch,
dining table, board,
dishrag, dishcloth,
dishwasher, dish washer, dishwashing machine,
disk brake, disc brake,
dock, dockage, docking facility,
dogsled, dog sled, dog sleigh,
dome,
doormat, welcome mat,
drilling platform, offshore rig,
drum, membranophone, tympan,
drumstick,
dumbbell,
Dutch oven,
electric fan, blower,
electric guitar,
electric locomotive,
entertainment center,
envelope,
espresso maker,
face powder,
feather boa, boa,
file, file cabinet, filing cabinet,
fireboat,
fire engine, fire truck,
fire screen, fireguard,
flagpole, flagstaff,
flute, transverse flute,
folding chair,
football helmet,
forklift,
fountain,
fountain pen,
four-poster,
freight car,
French horn, horn,
frying pan, frypan, skillet,
fur coat,
garbage truck, dustcart,
gasmask, respirator, gas helmet,
gas pump, gasoline pump, petrol pump, island dispenser,
goblet,
go-kart,
golf ball,
golfcart, golf cart,
gondola,
gong, tam-tam,
gown,
grand piano, grand,
greenhouse, nursery, glasshouse,
grille, radiator grille,
grocery store, grocery, food market, market,
guillotine,
hair slide,
hair spray,
half track,
hammer,
hamper,
hand blower, blow dryer, blow drier, hair dryer, hair drier,
hand-held computer, hand-held microcomputer,
handkerchief, hankie, hanky, hankey,
hard disc, hard disk, fixed disk,
harmonica, mouth organ, harp, mouth harp,
harp,
harvester, reaper,
hatchet,
holster,
home theater, home theatre,
honeycomb,
hook, claw,
hoopskirt, crinoline,
horizontal bar, high bar,
horse cart, horse-cart,
hourglass,
iPod,
iron, smoothing iron,
"jack-o-lantern",
jean, blue jean, denim,
jeep, landrover,
jersey, T-shirt, tee shirt,
jigsaw puzzle,
jinrikisha, ricksha, rickshaw,
joystick,
kimono,
knee pad,
knot,
lab coat, laboratory coat,
ladle,
lampshade, lamp shade,
laptop, laptop computer,
lawn mower, mower,
lens cap, lens cover,
letter opener, paper knife, paperknife,
library,
lifeboat,
lighter, light, igniter, ignitor,
limousine, limo,
liner, ocean liner,
lipstick, lip rouge,
Loafer,
lotion,
loudspeaker, speaker, speaker unit, loudspeaker system, speaker system,
"loupe, jewelers loupe",
lumbermill, sawmill,
magnetic compass,
mailbag, postbag,
mailbox, letter box,
maillot,
maillot, tank suit,
manhole cover,
maraca,
marimba, xylophone,
mask,
matchstick,
maypole,
maze, labyrinth,
measuring cup,
medicine chest, medicine cabinet,
megalith, megalithic structure,
microphone, mike,
microwave, microwave oven,
military uniform,
milk can,
minibus,
miniskirt, mini,
minivan,
missile,
mitten,
mixing bowl,
mobile home, manufactured home,
Model T,
modem,
monastery,
monitor,
moped,
mortar,
mortarboard,
mosque,
mosquito net,
motor scooter, scooter,
mountain bike, all-terrain bike, off-roader,
mountain tent,
mouse, computer mouse,
mousetrap,
moving van,
muzzle,
nail,
neck brace,
necklace,
nipple,
notebook, notebook computer,
obelisk,
oboe, hautboy, hautbois,
ocarina, sweet potato,
odometer, hodometer, mileometer, milometer,
oil filter,
organ, pipe organ,
oscilloscope, scope, cathode-ray oscilloscope, CRO,
overskirt,
oxcart,
oxygen mask,
packet,
paddle, boat paddle,
paddlewheel, paddle wheel,
padlock,
paintbrush,
"pajama, pyjama, pjs, jammies",
palace,
panpipe, pandean pipe, syrinx,
paper towel,
parachute, chute,
parallel bars, bars,
park bench,
parking meter,
passenger car, coach, carriage,
patio, terrace,
pay-phone, pay-station,
pedestal, plinth, footstall,
pencil box, pencil case,
pencil sharpener,
perfume, essence,
Petri dish,
photocopier,
pick, plectrum, plectron,
pickelhaube,
picket fence, paling,
pickup, pickup truck,
pier,
piggy bank, penny bank,
pill bottle,
pillow,
ping-pong ball,
pinwheel,
pirate, pirate ship,
pitcher, ewer,
"plane, carpenters plane, woodworking plane",
planetarium,
plastic bag,
plate rack,
plow, plough,
"plunger, plumbers helper",
Polaroid camera, Polaroid Land camera,
pole,
police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria,
poncho,
pool table, billiard table, snooker table,
pop bottle, soda bottle,
pot, flowerpot,
"potters wheel",
power drill,
prayer rug, prayer mat,
printer,
prison, prison house,
projectile, missile,
projector,
puck, hockey puck,
punching bag, punch bag, punching ball, punchball,
purse,
quill, quill pen,
quilt, comforter, comfort, puff,
racer, race car, racing car,
racket, racquet,
radiator,
radio, wireless,
radio telescope, radio reflector,
rain barrel,
recreational vehicle, RV, R.V.,
reel,
reflex camera,
refrigerator, icebox,
remote control, remote,
restaurant, eating house, eating place, eatery,
revolver, six-gun, six-shooter,
rifle,
rocking chair, rocker,
rotisserie,
rubber eraser, rubber, pencil eraser,
rugby ball,
rule, ruler,
running shoe,
safe,
safety pin,
saltshaker, salt shaker,
sandal,
sarong,
sax, saxophone,
scabbard,
scale, weighing machine,
school bus,
schooner,
scoreboard,
screen, CRT screen,
screw,
screwdriver,
seat belt, seatbelt,
sewing machine,
shield, buckler,
shoe shop, shoe-shop, shoe store,
shoji,
shopping basket,
shopping cart,
shovel,
shower cap,
shower curtain,
ski,
ski mask,
sleeping bag,
slide rule, slipstick,
sliding door,
slot, one-armed bandit,
snorkel,
snowmobile,
snowplow, snowplough,
soap dispenser,
soccer ball,
sock,
solar dish, solar collector, solar furnace,
sombrero,
soup bowl,
space bar,
space heater,
space shuttle,
spatula,
speedboat,
"spider web, spiders web",
spindle,
sports car, sport car,
spotlight, spot,
stage,
steam locomotive,
steel arch bridge,
steel drum,
stethoscope,
stole,
stone wall,
stopwatch, stop watch,
stove,
strainer,
streetcar, tram, tramcar, trolley, trolley car,
stretcher,
studio couch, day bed,
stupa, tope,
submarine, pigboat, sub, U-boat,
suit, suit of clothes,
sundial,
sunglass,
sunglasses, dark glasses, shades,
sunscreen, sunblock, sun blocker,
suspension bridge,
swab, swob, mop,
sweatshirt,
swimming trunks, bathing trunks,
swing,
switch, electric switch, electrical switch,
syringe,
table lamp,
tank, army tank, armored combat vehicle, armoured combat vehicle,
tape player,
teapot,
teddy, teddy bear,
television, television system,
tennis ball,
thatch, thatched roof,
theater curtain, theatre curtain,
thimble,
thresher, thrasher, threshing machine,
throne,
tile roof,
toaster,
tobacco shop, tobacconist shop, tobacconist,
toilet seat,
torch,
totem pole,
tow truck, tow car, wrecker,
toyshop,
tractor,
trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi,
tray,
trench coat,
tricycle, trike, velocipede,
trimaran,
tripod,
triumphal arch,
trolleybus, trolley coach, trackless trolley,
trombone,
tub, vat,
turnstile,
typewriter keyboard,
umbrella,
unicycle, monocycle,
upright, upright piano,
vacuum, vacuum cleaner,
vase,
vault,
velvet,
vending machine,
vestment,
viaduct,
violin, fiddle,
volleyball,
waffle iron,
wall clock,
wallet, billfold, notecase, pocketbook,
wardrobe, closet, press,
warplane, military plane,
washbasin, handbasin, washbowl, lavabo, wash-hand basin,
washer, automatic washer, washing machine,
water bottle,
water jug,
water tower,
whiskey jug,
whistle,
wig,
window screen,
window shade,
Windsor tie,
wine bottle,
wing,
wok,
wooden spoon,
wool, woolen, woollen,
worm fence, snake fence, snake-rail fence, Virginia fence,
wreck,
yawl,
yurt,
web site, website, internet site, site,
comic book,
crossword puzzle, crossword,
street sign,
traffic light, traffic signal, stoplight,
book jacket, dust cover, dust jacket, dust wrapper,
menu,
plate,
guacamole,
consomme,
hot pot, hotpot,
trifle,
ice cream, icecream,
ice lolly, lolly, lollipop, popsicle,
French loaf,
bagel, beigel,
pretzel,
cheeseburger,
hotdog, hot dog, red hot,
mashed potato,
head cabbage,
broccoli,
cauliflower,
zucchini, courgette,
spaghetti squash,
acorn squash,
butternut squash,
cucumber, cuke,
artichoke, globe artichoke,
bell pepper,
cardoon,
mushroom,
Granny Smith,
strawberry,
orange,
lemon,
fig,
pineapple, ananas,
banana,
jackfruit, jak, jack,
custard apple,
pomegranate,
hay,
carbonara,
chocolate sauce, chocolate syrup,
dough,
meat loaf, meatloaf,
pizza, pizza pie,
potpie,
burrito,
red wine,
espresso,
cup,
eggnog,
alp,
bubble,
cliff, drop, drop-off,
coral reef,
geyser,
lakeside, lakeshore,
promontory, headland, head, foreland,
sandbar, sand bar,
seashore, coast, seacoast, sea-coast,
valley, vale,
volcano,
ballplayer, baseball player,
groom, bridegroom,
scuba diver,
rapeseed,
daisy,
"yellow ladys slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
corn,
acorn,
hip, rose hip, rosehip,
buckeye, horse chestnut, conker,
coral fungus,
agaric,
gyromitra,
stinkhorn, carrion fungus,
earthstar,
hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa,
bolete,
ear, spike, capitulum,
toilet tissue, toilet paper, bathroom tissue
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_server_gpu.pipeline import PipelineClient
import numpy as np
import requests
import json
import cv2
import base64
import os
client = PipelineClient()
client.connect(['127.0.0.1:9999'])
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
with open("daisy.jpg", 'rb') as file:
image_data = file.read()
image = cv2_to_base64(image_data)
for i in range(1):
ret = client.predict(feed_dict={"image": image}, fetch=["label", "prob"])
print(ret)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import sys
from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
try:
from paddle_serving_server_gpu.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
import logging
import numpy as np
import base64, cv2
class ImagenetOp(Op):
def init_op(self):
self.seq = Sequential([
Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225],
True)
])
self.label_dict = {}
label_idx = 0
with open("imagenet.label") as fin:
for line in fin:
self.label_dict[label_idx] = line.strip()
label_idx += 1
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
im = cv2.imdecode(data, cv2.IMREAD_COLOR)
img = self.seq(im)
return {"image": img[np.newaxis, :].copy()}, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id):
print(fetch_dict)
score_list = fetch_dict["score"]
result = {"label": [], "prob": []}
for score in score_list:
score = score.tolist()
max_score = max(score)
result["label"].append(self.label_dict[score.index(max_score)]
.strip().replace(",", ""))
result["prob"].append(max_score)
result["label"] = str(result["label"])
result["prob"] = str(result["prob"])
return result, None, ""
class ImageService(WebService):
def get_pipeline_response(self, read_op):
image_op = ImagenetOp(name="imagenet", input_ops=[read_op])
return image_op
uci_service = ImageService(name="imagenet")
uci_service.prepare_pipeline_config("config.yml")
uci_service.run_service()
......@@ -8,8 +8,8 @@ sh get_data.sh
## 启动服务
```
python -m paddle_serving_server_gpu.serve --model imdb_cnn_model --port 9292 &> cnn.log &
python -m paddle_serving_server_gpu.serve --model imdb_bow_model --port 9393 &> bow.log &
python -m paddle_serving_server.serve --model imdb_cnn_model --port 9292 &> cnn.log &
python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 &> bow.log &
python test_pipeline_server.py &>pipeline.log &
```
......@@ -17,8 +17,3 @@ python test_pipeline_server.py &>pipeline.log &
```
python test_pipeline_client.py
```
## HTTP 测试
```
curl -X POST -k http://localhost:9999/prediction -d '{"key": ["words"], "value": ["i am very sad | 0"]}'
```
rpc_port: 18085
#rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时,会自动将rpc_port设置为http_port+1
rpc_port: 18070
#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port
http_port: 18071
#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG
#当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num
worker_num: 4
build_dag_each_worker: false
http_port: 9999
#build_dag_each_worker, False,框架在进程内创建一条DAG;True,框架会每个进程内创建多个独立的DAG
build_dag_each_worker: False
dag:
is_thread_op: false
client_type: brpc
#op资源类型, True, 为线程模型;False,为进程模型
is_thread_op: True
#重试次数
retry: 1
use_profile: false
#使用性能分析, True,生成Timeline性能数据,对性能有一定影响;False为不使用
use_profile: False
#channel的最大长度,默认为0
channel_size: 0
#tracer, 跟踪框架吞吐,每个OP和channel的工作情况。无tracer时不生成数据
tracer:
#每次trace的时间间隔,单位秒/s
interval_s: 10
op:
bow:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 1
#client连接类型,brpc
client_type: brpc
#Serving交互重试次数,默认不重试
retry: 1
#Serving交互超时时间, 单位ms
timeout: 3000
#Serving IPs
server_endpoints: ["127.0.0.1:9393"]
#bow模型client端配置
client_config: "imdb_bow_client_conf/serving_client_conf.prototxt"
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["prediction"]
#批量查询Serving的数量, 默认1。batch_size>1要设置auto_batching_timeout,否则不足batch_size时会阻塞
batch_size: 1
#批量查询超时,与batch_size配合使用
auto_batching_timeout: 2000
cnn:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 1
#client连接类型,brpc
client_type: brpc
#Serving交互重试次数,默认不重试
retry: 1
#超时时间, 单位ms
timeout: 3000
#Serving IPs
server_endpoints: ["127.0.0.1:9292"]
#cnn模型client端配置
client_config: "imdb_cnn_client_conf/serving_client_conf.prototxt"
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["prediction"]
#批量查询Serving的数量, 默认1。batch_size>1要设置auto_batching_timeout,否则不足batch_size时会阻塞
batch_size: 1
#批量查询超时,与batch_size配合使用
auto_batching_timeout: 2000
combine:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 1
#Serving交互重试次数,默认不重试
retry: 1
#超时时间, 单位ms
timeout: 3000
#批量查询Serving的数量, 默认1。batch_size>1要设置auto_batching_timeout,否则不足batch_size时会阻塞
batch_size: 1
#批量查询超时,与batch_size配合使用
auto_batching_timeout: 2000
......@@ -15,21 +15,22 @@ from paddle_serving_server.pipeline import PipelineClient
import numpy as np
client = PipelineClient()
client.connect(['127.0.0.1:18080'])
client.connect(['127.0.0.1:18070'])
words = 'i am very sad | 0'
futures = []
for i in range(4):
for i in range(100):
futures.append(
client.predict(
feed_dict={"words": words},
feed_dict={"words": words,
"logid": 10000 + i},
fetch=["prediction"],
asyn=True,
profile=False))
for f in futures:
res = f.result()
if res["ecode"] != 0:
if res.err_no != 0:
print("predict failed: {}".format(res))
print(res)
......@@ -15,10 +15,14 @@
from paddle_serving_server.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server.pipeline import PipelineServer
from paddle_serving_server.pipeline.proto import pipeline_service_pb2
from paddle_serving_server.pipeline.channel import ChannelDataEcode
from paddle_serving_server.pipeline.channel import ChannelDataErrcode
import numpy as np
from paddle_serving_app.reader import IMDBDataset
from paddle_serving_app.reader.imdb_reader import IMDBDataset
import logging
try:
from paddle_serving_server.web_service import WebService
except ImportError:
from paddle_serving_server_gpu.web_service import WebService
_LOGGER = logging.getLogger()
user_handler = logging.StreamHandler()
......@@ -41,74 +45,68 @@ class ImdbRequestOp(RequestOp):
continue
words = request.value[idx]
word_ids, _ = self.imdb_dataset.get_words_and_label(words)
dictdata[key] = np.array(word_ids)
return dictdata
word_len = len(word_ids)
dictdata[key] = np.array(word_ids).reshape(word_len, 1)
dictdata["{}.lod".format(key)] = np.array([0, word_len])
log_id = None
if request.logid is not None:
log_id = request.logid
return dictdata, log_id, None, ""
class CombineOp(Op):
def preprocess(self, input_data):
def preprocess(self, input_data, data_id, log_id):
#_LOGGER.info("Enter CombineOp::preprocess")
combined_prediction = 0
for op_name, data in input_data.items():
_LOGGER.info("{}: {}".format(op_name, data["prediction"]))
combined_prediction += data["prediction"]
data = {"prediction": combined_prediction / 2}
return data
return data, False, None, ""
class ImdbResponseOp(ResponseOp):
# Here ImdbResponseOp is consistent with the default ResponseOp implementation
def pack_response_package(self, channeldata):
resp = pipeline_service_pb2.Response()
resp.ecode = channeldata.ecode
if resp.ecode == ChannelDataEcode.OK.value:
resp.err_no = channeldata.error_code
if resp.err_no == ChannelDataErrcode.OK.value:
feed = channeldata.parse()
# ndarray to string
for name, var in feed.items():
resp.value.append(var.__repr__())
resp.key.append(name)
else:
resp.error_info = channeldata.error_info
resp.err_msg = channeldata.error_info
return resp
read_op = ImdbRequestOp()
bow_op = Op(name="bow",
input_ops=[read_op],
server_endpoints=["127.0.0.1:9393"],
fetch_list=["prediction"],
client_config="imdb_bow_client_conf/serving_client_conf.prototxt",
concurrency=1,
timeout=-1,
retry=1,
batch_size=3,
auto_batching_timeout=1000)
cnn_op = Op(name="cnn",
input_ops=[read_op],
server_endpoints=["127.0.0.1:9292"],
fetch_list=["prediction"],
client_config="imdb_cnn_client_conf/serving_client_conf.prototxt",
concurrency=1,
timeout=-1,
retry=1,
batch_size=1,
auto_batching_timeout=None)
combine_op = CombineOp(
name="combine",
input_ops=[bow_op, cnn_op],
concurrency=1,
timeout=-1,
retry=1,
batch_size=2,
auto_batching_timeout=None)
class BowOp(Op):
def init_op(self):
pass
class CnnOp(Op):
def init_op(self):
pass
bow_op = BowOp("bow", input_ops=[read_op])
cnn_op = CnnOp("cnn", input_ops=[read_op])
combine_op = CombineOp("combine", input_ops=[bow_op, cnn_op])
# fetch output of bow_op
# response_op = ImdbResponseOp(input_ops=[bow_op])
#response_op = ImdbResponseOp(input_ops=[bow_op])
# fetch output of combine_op
response_op = ImdbResponseOp(input_ops=[combine_op])
# use default ResponseOp implementation
# response_op = ResponseOp(input_ops=[combine_op])
#response_op = ResponseOp(input_ops=[combine_op])
server = PipelineServer()
server.set_response_op(response_op)
......
......@@ -28,31 +28,9 @@ python web_service.py &>log.txt &
python pipeline_http_client.py
```
<!--
## More (PipelineServing)
You can choose one of the following versions to start Service.
### Remote Service Version
```
python -m paddle_serving_server_gpu.serve --model ocr_det_model --port 12000 --gpu_id 0 &> det.log &
python -m paddle_serving_server_gpu.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python remote_service_pipeline_server.py &>pipeline.log &
```
### Local Service Version
```
python local_service_pipeline_server.py &>pipeline.log &
```
### Hybrid Service Version
```
python -m paddle_serving_server_gpu.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python hybrid_service_pipeline_server.py &>pipeline.log &
```
## Client Prediction
### RPC
......
......@@ -31,26 +31,6 @@ python pipeline_http_client.py
<!--
## 其他 (PipelineServing)
你可以选择下面任意一种版本启动服务。
### 远程服务版本
```
python -m paddle_serving_server.serve --model ocr_det_model --port 12000 --gpu_id 0 &> det.log &
python -m paddle_serving_server.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python remote_service_pipeline_server.py &>pipeline.log &
```
### 本地服务版本
```
python local_service_pipeline_server.py &>pipeline.log &
```
### 混合服务版本
```
python -m paddle_serving_server_gpu.serve --model ocr_rec_model --port 12001 --gpu_id 0 &> rec.log &
python hybrid_service_pipeline_server.py &>pipeline.log &
```
## 启动客户端
### RPC
......
rpc_port: 18080
worker_num: 4
build_dag_each_worker: false
#rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时,会自动将rpc_port设置为http_port+1
rpc_port: 18090
#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port
http_port: 9999
#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG
##当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num
worker_num: 1
#build_dag_each_worker, False,框架在进程内创建一条DAG;True,框架会每个进程内创建多个独立的DAG
build_dag_each_worker: false
dag:
is_thread_op: false
client_type: brpc
#op资源类型, True, 为线程模型;False,为进程模型
is_thread_op: False
#重试次数
retry: 1
#使用性能分析, True,生成Timeline性能数据,对性能有一定影响;False为不使用
use_profile: false
op:
det:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 2
#当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
local_service_conf:
#client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
client_type: local_predictor
#det模型路径
model_config: ocr_det_model
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["concat_1.tmp_0"]
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0"
rec:
concurrency: 1
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 2
#超时时间, 单位ms
timeout: -1
#Serving交互重试次数,默认不重试
retry: 1
#当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
local_service_conf:
#client类型,包括brpc, grpc和local_predictor。local_predictor不启动Serving服务,进程内预测
client_type: local_predictor
#rec模型路径
model_config: ocr_rec_model
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"]
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0"
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server_gpu.pipeline import PipelineServer
from paddle_serving_server_gpu.pipeline.proto import pipeline_service_pb2
from paddle_serving_server_gpu.pipeline.channel import ChannelDataEcode
from paddle_serving_server_gpu.pipeline import LocalRpcServiceHandler
import numpy as np
import cv2
import time
import base64
import json
from paddle_serving_app.reader import OCRReader
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
import time
import re
import base64
import logging
_LOGGER = logging.getLogger()
class DetOp(Op):
def init_op(self):
self.det_preprocess = Sequential([
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img}
def postprocess(self, input_dicts, fetch_dict):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
return out_dict
class RecOp(Op):
def init_op(self):
self.ocr_reader = OCRReader()
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
dt_boxes = self.sorted_boxes(dt_boxes)
feed_list = []
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = self.get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
def postprocess(self, input_dicts, fetch_dict):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
read_op = RequestOp()
det_op = DetOp(
name="det",
input_ops=[read_op],
local_rpc_service_handler=LocalRpcServiceHandler(
model_config="ocr_det_model",
workdir="det_workdir", # defalut: "workdir"
thread_num=2, # defalut: 2
devices="0", # gpu0. defalut: "" (cpu)
mem_optim=True, # defalut: True
ir_optim=False, # defalut: False
available_port_generator=None), # defalut: None
concurrency=1)
rec_op = RecOp(
name="rec",
input_ops=[det_op],
server_endpoints=["127.0.0.1:12001"],
fetch_list=["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"],
client_config="ocr_rec_client/serving_client_conf.prototxt",
concurrency=1)
response_op = ResponseOp(input_ops=[rec_op])
server = PipelineServer("ocr")
server.set_response_op(response_op)
server.prepare_server('config.yml')
server.run_server()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server_gpu.pipeline import PipelineServer
from paddle_serving_server_gpu.pipeline.proto import pipeline_service_pb2
from paddle_serving_server_gpu.pipeline.channel import ChannelDataEcode
from paddle_serving_server_gpu.pipeline import LocalRpcServiceHandler
import numpy as np
import cv2
import time
import base64
import json
from paddle_serving_app.reader import OCRReader
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
import time
import re
import base64
import logging
_LOGGER = logging.getLogger()
class DetOp(Op):
def init_op(self):
self.det_preprocess = Sequential([
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img}
def postprocess(self, input_dicts, fetch_dict):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
return out_dict
class RecOp(Op):
def init_op(self):
self.ocr_reader = OCRReader()
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
dt_boxes = self.sorted_boxes(dt_boxes)
feed_list = []
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = self.get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
def postprocess(self, input_dicts, fetch_dict):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
read_op = RequestOp()
det_op = DetOp(
name="det",
input_ops=[read_op],
local_rpc_service_handler=LocalRpcServiceHandler(
model_config="ocr_det_model",
workdir="det_workdir", # defalut: "workdir"
thread_num=2, # defalut: 2
devices="0", # gpu0. defalut: "" (cpu)
mem_optim=True, # defalut: True
ir_optim=False, # defalut: False
available_port_generator=None), # defalut: None
concurrency=1)
rec_op = RecOp(
name="rec",
input_ops=[det_op],
local_rpc_service_handler=LocalRpcServiceHandler(
model_config="ocr_rec_model"),
concurrency=1)
response_op = ResponseOp(input_ops=[rec_op])
server = PipelineServer("ocr")
server.set_response_op(response_op)
server.prepare_server('config.yml')
server.run_server()
......@@ -11,7 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_server_gpu.pipeline import PipelineClient
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
import requests
import json
......
......@@ -20,7 +20,7 @@ import base64
import os
client = PipelineClient()
client.connect(['127.0.0.1:18080'])
client.connect(['127.0.0.1:18090'])
def cv2_to_base64(image):
......@@ -33,6 +33,6 @@ for img_file in os.listdir(test_img_dir):
image_data = file.read()
image = cv2_to_base64(image_data)
for i in range(4):
for i in range(1):
ret = client.predict(feed_dict={"image": image}, fetch=["res"])
print(ret)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_server_gpu.pipeline import Op, RequestOp, ResponseOp
from paddle_serving_server_gpu.pipeline import PipelineServer
from paddle_serving_server_gpu.pipeline.proto import pipeline_service_pb2
from paddle_serving_server_gpu.pipeline.channel import ChannelDataEcode
import numpy as np
import cv2
import time
import base64
import json
from paddle_serving_app.reader import OCRReader
from paddle_serving_app.reader import Sequential, ResizeByFactor
from paddle_serving_app.reader import Div, Normalize, Transpose
from paddle_serving_app.reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
import time
import re
import base64
import logging
_LOGGER = logging.getLogger()
class DetOp(Op):
def init_op(self):
self.det_preprocess = Sequential([
ResizeByFactor(32, 960), Div(255),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), Transpose(
(2, 0, 1))
])
self.filter_func = FilterBoxes(10, 10)
self.post_func = DBPostProcess({
"thresh": 0.3,
"box_thresh": 0.5,
"max_candidates": 1000,
"unclip_ratio": 1.5,
"min_size": 3
})
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
# Note: class variables(self.var) can only be used in process op mode
self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img}
def postprocess(self, input_dicts, fetch_dict):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
]
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
return out_dict
class RecOp(Op):
def init_op(self):
self.ocr_reader = OCRReader()
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
dt_boxes = self.sorted_boxes(dt_boxes)
feed_list = []
img_list = []
max_wh_ratio = 0
for i, dtbox in enumerate(dt_boxes):
boximg = self.get_rotate_crop_image(im, dt_boxes[i])
img_list.append(boximg)
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
def postprocess(self, input_dicts, fetch_dict):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
read_op = RequestOp()
det_op = DetOp(
name="det",
input_ops=[read_op],
server_endpoints=["127.0.0.1:12000"],
fetch_list=["concat_1.tmp_0"],
client_config="ocr_det_client/serving_client_conf.prototxt",
concurrency=1)
rec_op = RecOp(
name="rec",
input_ops=[det_op],
server_endpoints=["127.0.0.1:12001"],
fetch_list=["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"],
client_config="ocr_rec_client/serving_client_conf.prototxt",
concurrency=1)
response_op = ResponseOp(input_ops=[rec_op])
server = PipelineServer("ocr")
server.set_response_op(response_op)
server.prepare_server('config.yml')
server.run_server()
......@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from paddle_serving_server_gpu.web_service import WebService, Op
except ImportError:
from paddle_serving_server.web_service import WebService, Op
except ImportError:
from paddle_serving_server_gpu.web_service import WebService, Op
import logging
import numpy as np
import cv2
......@@ -43,7 +43,7 @@ class DetOp(Op):
"min_size": 3
})
def preprocess(self, input_dicts):
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
data = base64.b64decode(input_dict["image"].encode('utf8'))
data = np.fromstring(data, np.uint8)
......@@ -52,9 +52,9 @@ class DetOp(Op):
self.ori_h, self.ori_w, _ = self.im.shape
det_img = self.det_preprocess(self.im)
_, self.new_h, self.new_w = det_img.shape
return {"image": det_img}
return {"image": det_img[np.newaxis, :].copy()}, False, None, ""
def postprocess(self, input_dicts, fetch_dict):
def postprocess(self, input_dicts, fetch_dict, log_id):
det_out = fetch_dict["concat_1.tmp_0"]
ratio_list = [
float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w
......@@ -62,7 +62,8 @@ class DetOp(Op):
dt_boxes_list = self.post_func(det_out, [ratio_list])
dt_boxes = self.filter_func(dt_boxes_list[0], [self.ori_h, self.ori_w])
out_dict = {"dt_boxes": dt_boxes, "image": self.im}
return out_dict
print("out dict", out_dict)
return out_dict, None, ""
class RecOp(Op):
......@@ -71,7 +72,7 @@ class RecOp(Op):
self.get_rotate_crop_image = GetRotateCropImage()
self.sorted_boxes = SortedBoxes()
def preprocess(self, input_dicts):
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
im = input_dict["image"]
dt_boxes = input_dict["dt_boxes"]
......@@ -85,19 +86,22 @@ class RecOp(Op):
h, w = boximg.shape[0:2]
wh_ratio = w * 1.0 / h
max_wh_ratio = max(max_wh_ratio, wh_ratio)
for img in img_list:
_, w, h = self.ocr_reader.resize_norm_img(img_list[0],
max_wh_ratio).shape
imgs = np.zeros((len(img_list), 3, w, h)).astype('float32')
for id, img in enumerate(img_list):
norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
feed = {"image": norm_img}
feed_list.append(feed)
return feed_list
imgs[id] = norm_img
feed = {"image": imgs.copy()}
return feed, False, None, ""
def postprocess(self, input_dicts, fetch_dict):
def postprocess(self, input_dicts, fetch_dict, log_id):
rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
res_lst = []
for res in rec_res:
res_lst.append(res[0])
res = {"res": str(res_lst)}
return res
return res, None, ""
class OcrService(WebService):
......
......@@ -15,5 +15,5 @@ python web_service.py &>log.txt &
## Http test
```
curl -X POST -k http://localhost:18080/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
curl -X POST -k http://localhost:18082/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
```
......@@ -15,5 +15,5 @@ python web_service.py &>log.txt &
## 测试
```
curl -X POST -k http://localhost:18080/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
curl -X POST -k http://localhost:18082/uci/prediction -d '{"key": ["x"], "value": ["0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332"]}'
```
worker_num: 4
http_port: 18080
#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG
##当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num
worker_num: 1
#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port
http_port: 18082
dag:
is_thread_op: false
#op资源类型, True, 为线程模型;False,为进程模型
is_thread_op: False
op:
uci:
#当op配置没有server_endpoints时,从local_service_conf读取本地服务配置
local_service_conf:
#并发数,is_thread_op=True时,为线程并发;否则为进程并发
concurrency: 2
#uci模型路径
model_config: uci_housing_model
devices: "" # "0,1"
#计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡
devices: "0" # "0,1"
#client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测
client_type: local_predictor
#Fetch结果列表,以client_config中fetch_var的alias_name为准
fetch_list: ["price"]
......@@ -25,19 +25,25 @@ class UciOp(Op):
def init_op(self):
self.separator = ","
def preprocess(self, input_dicts):
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
_LOGGER.info(input_dict)
_LOGGER.error("UciOp::preprocess >>> log_id:{}, input:{}".format(
log_id, input_dict))
x_value = input_dict["x"]
proc_dict = {}
if isinstance(x_value, (str, unicode)):
input_dict["x"] = np.array(
[float(x.strip()) for x in x_value.split(self.separator)])
return input_dict
[float(x.strip())
for x in x_value.split(self.separator)]).reshape(1, 13)
_LOGGER.error("input_dict:{}".format(input_dict))
def postprocess(self, input_dicts, fetch_dict):
# _LOGGER.info(fetch_dict)
return input_dict, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id):
_LOGGER.info("UciOp::postprocess >>> log_id:{}, fetch_dict:{}".format(
log_id, fetch_dict))
fetch_dict["price"] = str(fetch_dict["price"][0][0])
return fetch_dict
return fetch_dict, None, ""
class UciService(WebService):
......
......@@ -14,10 +14,10 @@
from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
from paddle_serving_app.local_predict import Debugger
from paddle_serving_app.local_predict import LocalPredictor
import sys
debugger = Debugger()
debugger = LocalPredictor()
debugger.load_model_config(sys.argv[1], gpu=True)
seq = Sequential([
......
......@@ -18,7 +18,7 @@ from paddle_serving_client import Client
from paddle_serving_app.reader import LACReader, SentaReader
import os
import sys
import numpy as np
#senta_web_service.py
from paddle_serving_server.web_service import WebService
from paddle_serving_client import Client
......@@ -36,20 +36,36 @@ class SentaService(WebService):
#定义senta模型预测服务的预处理,调用顺序:lac reader->lac模型预测->预测结果后处理->senta reader
def preprocess(self, feed=[], fetch=[]):
feed_data = [{
"words": self.lac_reader.process(x["words"])
} for x in feed]
lac_result = self.lac_client.predict(
feed=feed_data, fetch=["crf_decode"])
feed_batch = []
is_batch = True
words_lod = [0]
for ins in feed:
if "words" not in ins:
raise ("feed data error!")
feed_data = self.lac_reader.process(ins["words"])
words_lod.append(words_lod[-1] + len(feed_data))
feed_batch.append(np.array(feed_data).reshape(len(feed_data), 1))
words = np.concatenate(feed_batch, axis=0)
lac_result = self.lac_client.predict(
feed={"words": words,
"words.lod": words_lod},
fetch=["crf_decode"],
batch=True)
result_lod = lac_result["crf_decode.lod"]
feed_batch = []
words_lod = [0]
for i in range(len(feed)):
segs = self.lac_reader.parse_result(
feed[i]["words"],
lac_result["crf_decode"][result_lod[i]:result_lod[i + 1]])
feed_data = self.senta_reader.process(segs)
feed_batch.append({"words": feed_data})
return feed_batch, fetch
feed_batch.append(np.array(feed_data).reshape(len(feed_data), 1))
words_lod.append(words_lod[-1] + len(feed_data))
return {
"words": np.concatenate(feed_batch),
"words.lod": words_lod
}, fetch, is_batch
senta_service = SentaService(name="senta")
......
#UNET_BENCHMARK 使用说明
## 功能
* benchmark测试
## 注意事项
* 示例图片(可以有多张)请放置于与img_data路径中,支持jpg,jpeg
* 图片张数应该大于等于并发数量
## TODO
* http benchmark
#!/bin/bash
python unet_benchmark.py --thread 1 --batch_size 1 --model ../unet_client/serving_client_conf.prototxt
# thread/batch can be modified as you wish
# -*- coding: utf-8 -*-
#
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
unet bench mark script
20201130 first edition by cg82616424
"""
from __future__ import unicode_literals, absolute_import
import os
import time
import json
import requests
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
from paddle_serving_app.reader import Sequential, File2Image, Resize, Transpose, BGR2RGB, SegPostprocess
args = benchmark_args()
def get_img_names(path):
"""
Brief:
get img files(jpg) under this path
if any exception happened return None
Args:
path (string): image file path
Returns:
list: images names under this folder
"""
if not os.path.exists(path):
return None
if not os.path.isdir(path):
return None
list_name = []
for f_handler in os.listdir(path):
file_path = os.path.join(path, f_handler)
if os.path.isdir(file_path):
continue
else:
if not file_path.endswith(".jpeg") and not file_path.endswith(
".jpg"):
continue
list_name.append(file_path)
return list_name
def preprocess_img(img_list):
"""
Brief:
prepare img data for benchmark
Args:
img_list(list): list for img file path
Returns:
image content binary list after preprocess
"""
preprocess = Sequential([File2Image(), Resize((512, 512))])
result_list = []
for img in img_list:
img_tmp = preprocess(img)
result_list.append(img_tmp)
return result_list
def benckmark_worker(idx, resource):
"""
Brief:
benchmark single worker for unet
Args:
idx(int): worker idx ,use idx to select backend unet service
resource(dict): unet serving endpoint dict
Returns:
latency
TODO:
http benckmarks
"""
profile_flags = False
latency_flags = False
postprocess = SegPostprocess(2)
if os.getenv("FLAGS_profile_client"):
profile_flags = True
if os.getenv("FLAGS_serving_latency"):
latency_flags = True
latency_list = []
client_handler = Client()
client_handler.load_client_config(args.model)
client_handler.connect(
[resource["endpoint"][idx % len(resource["endpoint"])]])
start = time.time()
turns = resource["turns"]
img_list = resource["img_list"]
for i in range(turns):
if args.batch_size >= 1:
l_start = time.time()
feed_batch = []
b_start = time.time()
for bi in range(args.batch_size):
feed_batch.append({"image": img_list[bi]})
b_end = time.time()
if profile_flags:
sys.stderr.write(
"PROFILE\tpid:{}\tunt_pre_0:{} unet_pre_1:{}\n".format(
os.getpid(),
int(round(b_start * 1000000)),
int(round(b_end * 1000000))))
result = client_handler.predict(
feed={"image": img_list[bi]}, fetch=["output"])
#result["filename"] = "./img_data/N0060.jpg" % (os.getpid(), idx, time.time())
#postprocess(result) # if you want to measure post process time, you have to uncomment this line
l_end = time.time()
if latency_flags:
latency_list.append(l_end * 1000 - l_start * 1000)
else:
print("unsupport batch size {}".format(args.batch_size))
end = time.time()
if latency_flags:
return [[end - start], latency_list]
else:
return [[end - start]]
if __name__ == '__main__':
"""
usage:
"""
img_file_list = get_img_names("./img_data")
img_content_list = preprocess_img(img_file_list)
multi_thread_runner = MultiThreadRunner()
endpoint_list = ["127.0.0.1:9494"]
turns = 1
start = time.time()
result = multi_thread_runner.run(benckmark_worker, args.thread, {
"endpoint": endpoint_list,
"turns": turns,
"img_list": img_content_list
})
end = time.time()
total_cost = end - start
avg_cost = 0
for i in range(args.thread):
avg_cost += result[0][i]
avg_cost = avg_cost / args.thread
print("total cost: {}s".format(total_cost))
print("each thread cost: {}s. ".format(avg_cost))
print("qps: {}samples/s".format(args.batch_size * args.thread * turns /
total_cost))
if os.getenv("FLAGS_serving_latency"):
show_latency(result[1])
......@@ -35,6 +35,7 @@ fetch_map = client.predict(
"image": im,
"im_size": np.array(list(im.shape[1:])),
},
fetch=["save_infer_model/scale_0.tmp_0"])
fetch=["save_infer_model/scale_0.tmp_0"],
batch=False)
fetch_map["image"] = sys.argv[1]
postprocess(fetch_map)
......@@ -160,10 +160,10 @@ Therefore, a local prediction tool is built into the paddle_serving_app, which i
Taking [fit_a_line prediction service](../examples/fit_a_line) as an example, the following code can be used to run local prediction.
```python
from paddle_serving_app.local_predict import Debugger
from paddle_serving_app.local_predict import LocalPredictor
import numpy as np
debugger = Debugger()
debugger = LocalPredictor()
debugger.load_model_config("./uci_housing_model", gpu=False)
data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
-0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
......
......@@ -147,10 +147,10 @@ Paddle Serving框架的server预测op使用了Paddle 的预测框架,在部署
[fit_a_line预测服务](../examples/fit_a_line)为例,使用以下代码即可执行本地预测。
```python
from paddle_serving_app.local_predict import Debugger
from paddle_serving_app.local_predict import LocalPredictor
import numpy as np
debugger = Debugger()
debugger = LocalPredictor()
debugger.load_model_config("./uci_housing_model", gpu=False)
data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
-0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
......
......@@ -31,7 +31,13 @@ logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)
class Debugger(object):
class LocalPredictor(object):
"""
Prediction in the current process of the local environment, in process
call, Compared with RPC/HTTP, LocalPredictor has better performance,
because of no network and packaging load.
"""
def __init__(self):
self.feed_names_ = []
self.fetch_names_ = []
......@@ -42,13 +48,41 @@ class Debugger(object):
self.fetch_names_to_idx_ = {}
self.fetch_names_to_type_ = {}
def load_model_config(self, model_path, gpu=False, profile=True, cpu_num=1):
def load_model_config(self,
model_path,
use_gpu=False,
gpu_id=0,
use_profile=False,
thread_num=1,
mem_optim=True,
ir_optim=False,
use_trt=False,
use_feed_fetch_ops=False):
"""
Load model config and set the engine config for the paddle predictor
Args:
model_path: model config path.
use_gpu: calculating with gpu, False default.
gpu_id: gpu id, 0 default.
use_profile: use predictor profiles, False default.
thread_num: thread nums, default 1.
mem_optim: memory optimization, True default.
ir_optim: open calculation chart optimization, False default.
use_trt: use nvidia TensorRT optimization, False default
use_feed_fetch_ops: use feed/fetch ops, False default.
"""
client_config = "{}/serving_server_conf.prototxt".format(model_path)
model_conf = m_config.GeneralModelConfig()
f = open(client_config, 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
config = AnalysisConfig(model_path)
logger.info("load_model_config params: model_path:{}, use_gpu:{},\
gpu_id:{}, use_profile:{}, thread_num:{}, mem_optim:{}, ir_optim:{},\
use_trt:{}, use_feed_fetch_ops:{}".format(
model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim,
ir_optim, use_trt, use_feed_fetch_ops))
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
......@@ -64,19 +98,43 @@ class Debugger(object):
self.fetch_names_to_idx_[var.alias_name] = i
self.fetch_names_to_type_[var.alias_name] = var.fetch_type
if not gpu:
config.disable_gpu()
else:
config.enable_use_gpu(100, 0)
if profile:
if use_profile:
config.enable_profile()
if mem_optim:
config.enable_memory_optim()
config.switch_ir_optim(ir_optim)
config.set_cpu_math_library_num_threads(thread_num)
config.switch_use_feed_fetch_ops(use_feed_fetch_ops)
config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
config.set_cpu_math_library_num_threads(cpu_num)
config.switch_ir_optim(False)
config.switch_use_feed_fetch_ops(False)
if not use_gpu:
config.disable_gpu()
else:
config.enable_use_gpu(100, gpu_id)
if use_trt:
config.enable_tensorrt_engine(
workspace_size=1 << 20,
max_batch_size=32,
min_subgraph_size=3,
use_static=False,
use_calib_mode=False)
self.predictor = create_paddle_predictor(config)
def predict(self, feed=None, fetch=None):
def predict(self, feed=None, fetch=None, batch=False, log_id=0):
"""
Predict locally
Args:
feed: feed var
fetch: fetch var
batch: batch data or not, False default.If batch is False, a new
dimension is added to header of the shape[np.newaxis].
log_id: for logging
Returns:
fetch_map: dict
"""
if feed is None or fetch is None:
raise ValueError("You should specify feed and fetch for prediction")
fetch_list = []
......@@ -121,10 +179,19 @@ class Debugger(object):
name])
if self.feed_types_[name] == 0:
feed[name] = feed[name].astype("int64")
else:
elif self.feed_types_[name] == 1:
feed[name] = feed[name].astype("float32")
elif self.feed_types_[name] == 2:
feed[name] = feed[name].astype("int32")
else:
raise ValueError("local predictor receives wrong data type")
input_tensor = self.predictor.get_input_tensor(name)
input_tensor.copy_from_cpu(feed[name])
if "{}.lod".format(name) in feed:
input_tensor.set_lod([feed["{}.lod".format(name)]])
if batch == False:
input_tensor.copy_from_cpu(feed[name][np.newaxis, :])
else:
input_tensor.copy_from_cpu(feed[name])
output_tensors = []
output_names = self.predictor.get_output_names()
for output_name in output_names:
......@@ -139,5 +206,6 @@ class Debugger(object):
for i, name in enumerate(fetch):
fetch_map[name] = outputs[i]
if len(output_tensors[i].lod()) > 0:
fetch_map[name + ".lod"] = output_tensors[i].lod()[0]
fetch_map[name + ".lod"] = np.array(output_tensors[i].lod()[
0]).astype('int32')
return fetch_map
......@@ -18,5 +18,5 @@ from .image_reader import RCNNPostprocess, SegPostprocess, PadStride, BlazeFaceP
from .image_reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
from .lac_reader import LACReader
from .senta_reader import SentaReader
from .imdb_reader import IMDBDataset
#from .imdb_reader import IMDBDataset
from .ocr_reader import OCRReader
......@@ -22,18 +22,17 @@ import yaml
import copy
import argparse
import logging
import paddle.fluid as fluid
import json
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
precision_map = {
'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
}
#precision_map = {
# 'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
# 'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
# 'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
#}
class Resize(object):
......
......@@ -233,7 +233,12 @@ class Client(object):
# key))
pass
def predict(self, feed=None, fetch=None, need_variant_tag=False, log_id=0):
def predict(self,
feed=None,
fetch=None,
batch=False,
need_variant_tag=False,
log_id=0):
self.profile_.record('py_prepro_0')
if feed is None or fetch is None:
......@@ -260,7 +265,10 @@ class Client(object):
int_feed_names = []
float_feed_names = []
int_shape = []
int_lod_slot_batch = []
float_lod_slot_batch = []
float_shape = []
fetch_names = []
counter = 0
batch_size = len(feed_batch)
......@@ -277,31 +285,56 @@ class Client(object):
for i, feed_i in enumerate(feed_batch):
int_slot = []
float_slot = []
int_lod_slot = []
float_lod_slot = []
for key in feed_i:
if key not in self.feed_names_:
if ".lod" not in key and key not in self.feed_names_:
raise ValueError("Wrong feed name: {}.".format(key))
if ".lod" in key:
continue
#if not isinstance(feed_i[key], np.ndarray):
self.shape_check(feed_i, key)
if self.feed_types_[key] in int_type:
if i == 0:
int_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
int_shape.append(list(feed_i[key].shape))
shape_lst.extend(list(feed_i[key].shape))
int_shape.append(shape_lst)
else:
int_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
int_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
int_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
int_slot.append(feed_i[key])
self.has_numpy_input = True
else:
int_slot.append(feed_i[key])
self.all_numpy_input = False
elif self.feed_types_[key] in float_type:
if i == 0:
float_feed_names.append(key)
shape_lst = []
if batch == False:
feed_i[key] = feed_i[key][np.newaxis, :]
if isinstance(feed_i[key], np.ndarray):
float_shape.append(list(feed_i[key].shape))
shape_lst.extend(list(feed_i[key].shape))
float_shape.append(shape_lst)
else:
float_shape.append(self.feed_shapes_[key])
if "{}.lod".format(key) in feed_i:
float_lod_slot_batch.append(feed_i["{}.lod".format(
key)])
else:
float_lod_slot_batch.append([])
if isinstance(feed_i[key], np.ndarray):
float_slot.append(feed_i[key])
self.has_numpy_input = True
......@@ -310,6 +343,8 @@ class Client(object):
self.all_numpy_input = False
int_slot_batch.append(int_slot)
float_slot_batch.append(float_slot)
int_lod_slot_batch.append(int_lod_slot)
float_lod_slot_batch.append(float_lod_slot)
self.profile_.record('py_prepro_1')
self.profile_.record('py_client_infer_0')
......@@ -317,14 +352,13 @@ class Client(object):
result_batch_handle = self.predictorres_constructor()
if self.all_numpy_input:
res = self.client_handle_.numpy_predict(
float_slot_batch, float_feed_names, float_shape, int_slot_batch,
int_feed_names, int_shape, fetch_names, result_batch_handle,
self.pid, log_id)
float_slot_batch, float_feed_names, float_shape,
float_lod_slot_batch, int_slot_batch, int_feed_names, int_shape,
int_lod_slot_batch, fetch_names, result_batch_handle, self.pid,
log_id)
elif self.has_numpy_input == False:
res = self.client_handle_.batch_predict(
float_slot_batch, float_feed_names, float_shape, int_slot_batch,
int_feed_names, int_shape, fetch_names, result_batch_handle,
self.pid, log_id)
raise ValueError(
"Please make sure all of your inputs are numpy array")
else:
raise ValueError(
"Please make sure the inputs are all in list type or all in numpy.array type"
......
......@@ -92,9 +92,12 @@ def save_model(server_model_folder,
fetch_var.shape.extend(tmp_shape)
config.fetch_var.extend([fetch_var])
cmd = "mkdir -p {}".format(client_config_folder)
os.system(cmd)
try:
save_dirname = os.path.normpath(client_config_folder)
os.makedirs(save_dirname)
except OSError as e:
if e.errno != errno.EEXIST:
raise
with open("{}/serving_client_conf.prototxt".format(client_config_folder),
"w") as fout:
fout.write(str(config))
......
......@@ -23,13 +23,13 @@ import paddle_serving_server as paddle_serving_server
from .version import serving_server_version
from contextlib import closing
import collections
import fcntl
import shutil
import numpy as np
import grpc
from .proto import multi_lang_general_model_service_pb2
import sys
if sys.platform.startswith('win') is False:
import fcntl
sys.path.append(
os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
from .proto import multi_lang_general_model_service_pb2_grpc
......@@ -584,7 +584,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
else:
raise Exception("error type.")
tensor.shape.extend(list(model_result[name].shape))
if name in self.lod_tensor_set_:
if "{}.lod".format(name) in model_result:
tensor.lod.extend(model_result["{}.lod".format(name)]
.tolist())
inst.tensor_array.append(tensor)
......
......@@ -52,6 +52,20 @@ class WebService(object):
def load_model_config(self, model_config):
print("This API will be deprecated later. Please do not use it")
self.model_config = model_config
import os
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
if os.path.isdir(model_config):
client_config = "{}/serving_server_conf.prototxt".format(
model_config)
elif os.path.isfile(path):
client_config = model_config
model_conf = m_config.GeneralModelConfig()
f = open(client_config, 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
self.feed_names = [var.alias_name for var in model_conf.feed_var]
self.fetch_names = [var.alias_name for var in model_conf.fetch_var]
def _launch_rpc_service(self):
op_maker = OpMaker()
......@@ -112,13 +126,14 @@ class WebService(object):
if "fetch" not in request.json:
abort(400)
try:
feed, fetch = self.preprocess(request.json["feed"],
request.json["fetch"])
feed, fetch, is_batch = self.preprocess(request.json["feed"],
request.json["fetch"])
if isinstance(feed, dict) and "fetch" in feed:
del feed["fetch"]
if len(feed) == 0:
raise ValueError("empty input")
fetch_map = self.client.predict(feed=feed, fetch=fetch)
fetch_map = self.client.predict(
feed=feed, fetch=fetch, batch=is_batch)
result = self.postprocess(
feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
result = {"result": result}
......@@ -171,24 +186,22 @@ class WebService(object):
self.app_instance = app_instance
def _launch_local_predictor(self):
from paddle_serving_app.local_predict import Debugger
self.client = Debugger()
from paddle_serving_app.local_predict import LocalPredictor
self.client = LocalPredictor()
self.client.load_model_config(
"{}".format(self.model_config), gpu=False, profile=False)
def run_web_service(self):
print("This API will be deprecated later. Please do not use it")
self.app_instance.run(host="0.0.0.0",
port=self.port,
threaded=False,
processes=1)
self.app_instance.run(host="0.0.0.0", port=self.port, threaded=True)
def get_app_instance(self):
return self.app_instance
def preprocess(self, feed=[], fetch=[]):
print("This API will be deprecated later. Please do not use it")
return feed, fetch
is_batch = True
return feed, fetch, is_batch
def postprocess(self, feed=[], fetch=[], fetch_map=None):
print("This API will be deprecated later. Please do not use it")
......
......@@ -73,6 +73,8 @@ def serve_args():
default=False,
action="store_true",
help="Use Multi-language-service")
parser.add_argument(
"--use_trt", default=False, action="store_true", help="Use TensorRT")
parser.add_argument(
"--product_name",
type=str,
......@@ -205,6 +207,7 @@ class Server(object):
self.cur_path = os.getcwd()
self.use_local_bin = False
self.gpuid = 0
self.use_trt = False
self.model_config_paths = None # for multi-model in a workflow
self.product_name = None
self.container_id = None
......@@ -271,6 +274,9 @@ class Server(object):
def set_gpuid(self, gpuid=0):
self.gpuid = gpuid
def set_trt(self):
self.use_trt = True
def _prepare_engine(self, model_config_paths, device):
if self.model_toolkit_conf == None:
self.model_toolkit_conf = server_sdk.ModelToolkitConf()
......@@ -290,6 +296,7 @@ class Server(object):
engine.enable_ir_optimization = self.ir_optimization
engine.static_optimization = False
engine.force_update_static_cache = False
engine.use_trt = self.use_trt
if device == "cpu":
engine.type = "FLUID_CPU_ANALYSIS_DIR"
......@@ -396,7 +403,10 @@ class Server(object):
for line in version_file.readlines():
if re.match("cuda_version", line):
cuda_version = line.split("\"")[1]
device_version = "serving-gpu-cuda" + cuda_version + "-"
if cuda_version != "trt":
device_version = "serving-gpu-cuda" + cuda_version + "-"
else:
device_version = "serving-gpu-" + cuda_version + "-"
folder_name = device_version + serving_server_version
tar_name = folder_name + ".tar.gz"
......@@ -645,7 +655,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
else:
raise Exception("error type.")
tensor.shape.extend(list(model_result[name].shape))
if name in self.lod_tensor_set_:
if "{}.lod".format(name) in model_result:
tensor.lod.extend(model_result["{}.lod".format(name)]
.tolist())
inst.tensor_array.append(tensor)
......
......@@ -64,6 +64,8 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.set_max_body_size(max_body_size)
if args.use_trt:
server.set_trt()
if args.product_name != None:
server.set_product_name(args.product_name)
......
......@@ -58,6 +58,20 @@ class WebService(object):
def load_model_config(self, model_config):
print("This API will be deprecated later. Please do not use it")
self.model_config = model_config
import os
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
if os.path.isdir(model_config):
client_config = "{}/serving_server_conf.prototxt".format(
model_config)
elif os.path.isfile(path):
client_config = model_config
model_conf = m_config.GeneralModelConfig()
f = open(client_config, 'r')
model_conf = google.protobuf.text_format.Merge(
str(f.read()), model_conf)
self.feed_names = [var.alias_name for var in model_conf.feed_var]
self.fetch_names = [var.alias_name for var in model_conf.fetch_var]
def set_gpus(self, gpus):
print("This API will be deprecated later. Please do not use it")
......@@ -167,13 +181,14 @@ class WebService(object):
if "fetch" not in request.json:
abort(400)
try:
feed, fetch = self.preprocess(request.json["feed"],
request.json["fetch"])
feed, fetch, is_batch = self.preprocess(request.json["feed"],
request.json["fetch"])
if isinstance(feed, dict) and "fetch" in feed:
del feed["fetch"]
if len(feed) == 0:
raise ValueError("empty input")
fetch_map = self.client.predict(feed=feed, fetch=fetch)
fetch_map = self.client.predict(
feed=feed, fetch=fetch, batch=is_batch)
result = self.postprocess(
feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
result = {"result": result}
......@@ -232,24 +247,22 @@ class WebService(object):
self.app_instance = app_instance
def _launch_local_predictor(self, gpu):
from paddle_serving_app.local_predict import Debugger
self.client = Debugger()
from paddle_serving_app.local_predict import LocalPredictor
self.client = LocalPredictor()
self.client.load_model_config(
"{}".format(self.model_config), gpu=gpu, profile=False)
def run_web_service(self):
print("This API will be deprecated later. Please do not use it")
self.app_instance.run(host="0.0.0.0",
port=self.port,
threaded=False,
processes=4)
self.app_instance.run(host="0.0.0.0", port=self.port, threaded=True)
def get_app_instance(self):
return self.app_instance
def preprocess(self, feed=[], fetch=[]):
print("This API will be deprecated later. Please do not use it")
return feed, fetch
is_batch = True
return feed, fetch, is_batch
def postprocess(self, feed=[], fetch=[], fetch_map=None):
print("This API will be deprecated later. Please do not use it")
......
......@@ -15,5 +15,5 @@ from . import logger # this module must be the first to import
from .operator import Op, RequestOp, ResponseOp
from .pipeline_server import PipelineServer
from .pipeline_client import PipelineClient
from .local_rpc_service_handler import LocalRpcServiceHandler
from .local_service_handler import LocalServiceHandler
from .analyse import Analyst
......@@ -32,7 +32,10 @@ import copy
_LOGGER = logging.getLogger(__name__)
class ChannelDataEcode(enum.Enum):
class ChannelDataErrcode(enum.Enum):
"""
ChannelData error code
"""
OK = 0
TIMEOUT = 1
NOT_IMPLEMENTED = 2
......@@ -42,9 +45,21 @@ class ChannelDataEcode(enum.Enum):
CLOSED_ERROR = 6
NO_SERVICE = 7
UNKNOW = 8
PRODUCT_ERROR = 9
class ProductErrCode(enum.Enum):
"""
ProductErrCode is a base class for recording business error code.
product developers inherit this class and extend more error codes.
"""
pass
class ChannelDataType(enum.Enum):
"""
Channel data type
"""
DICT = 0
CHANNEL_NPDATA = 1
ERROR = 2
......@@ -56,20 +71,23 @@ class ChannelData(object):
npdata=None,
dictdata=None,
data_id=None,
ecode=None,
log_id=None,
error_code=None,
error_info=None,
prod_error_code=None,
prod_error_info=None,
client_need_profile=False):
'''
There are several ways to use it:
1. ChannelData(ChannelDataType.CHANNEL_NPDATA.value, npdata, data_id)
2. ChannelData(ChannelDataType.DICT.value, dictdata, data_id)
3. ChannelData(ecode, error_info, data_id)
1. ChannelData(ChannelDataType.CHANNEL_NPDATA.value, npdata, data_id, log_id)
2. ChannelData(ChannelDataType.DICT.value, dictdata, data_id, log_id)
3. ChannelData(error_code, error_info, prod_error_code, prod_error_info, data_id, log_id)
Protobufs are not pickle-able:
https://stackoverflow.com/questions/55344376/how-to-import-protobuf-module
'''
if ecode is not None:
if error_code is not None or prod_error_code is not None:
if data_id is None or error_info is None:
_LOGGER.critical("Failed to generate ChannelData: data_id"
" and error_info cannot be None")
......@@ -77,25 +95,30 @@ class ChannelData(object):
datatype = ChannelDataType.ERROR.value
else:
if datatype == ChannelDataType.CHANNEL_NPDATA.value:
ecode, error_info = ChannelData.check_npdata(npdata)
if ecode != ChannelDataEcode.OK.value:
error_code, error_info = ChannelData.check_npdata(npdata)
if error_code != ChannelDataErrcode.OK.value:
datatype = ChannelDataType.ERROR.value
_LOGGER.error("(logid={}) {}".format(data_id, error_info))
_LOGGER.error("(data_id={} log_id={}) {}".format(
data_id, log_id, error_info))
elif datatype == ChannelDataType.DICT.value:
ecode, error_info = ChannelData.check_dictdata(dictdata)
if ecode != ChannelDataEcode.OK.value:
error_code, error_info = ChannelData.check_dictdata(dictdata)
if error_code != ChannelDataErrcode.OK.value:
datatype = ChannelDataType.ERROR.value
_LOGGER.error("(logid={}) {}".format(data_id, error_info))
_LOGGER.error("(data_id={} log_id={}) {}".format(
data_id, log_id, error_info))
else:
_LOGGER.critical("(logid={}) datatype not match".format(
data_id))
_LOGGER.critical("(data_id={} log_id={}) datatype not match".
format(data_id, log_id))
os._exit(-1)
self.datatype = datatype
self.npdata = npdata
self.dictdata = dictdata
self.id = data_id
self.ecode = ecode
self.log_id = log_id
self.error_code = error_code
self.error_info = error_info
self.prod_error_code = prod_error_code
self.prod_error_info = prod_error_info
self.client_need_profile = client_need_profile
self.profile_data_set = set()
......@@ -106,67 +129,67 @@ class ChannelData(object):
@staticmethod
def check_dictdata(dictdata):
ecode = ChannelDataEcode.OK.value
error_code = ChannelDataErrcode.OK.value
error_info = None
if isinstance(dictdata, list):
# batch data
for sample in dictdata:
if not isinstance(sample, dict):
ecode = ChannelDataEcode.TYPE_ERROR.value
error_code = ChannelDataErrcode.TYPE_ERROR.value
error_info = "Failed to check data: the type of " \
"data must be dict, but get {}.".format(type(sample))
break
elif not isinstance(dictdata, dict):
# batch size = 1
ecode = ChannelDataEcode.TYPE_ERROR.value
error_code = ChannelDataErrcode.TYPE_ERROR.value
error_info = "Failed to check data: the type of data must " \
"be dict, but get {}.".format(type(dictdata))
return ecode, error_info
return error_code, error_info
@staticmethod
def check_batch_npdata(batch):
ecode = ChannelDataEcode.OK.value
error_code = ChannelDataErrcode.OK.value
error_info = None
for npdata in batch:
ecode, error_info = ChannelData.check_npdata(npdata)
if ecode != ChannelDataEcode.OK.value:
error_code, error_info = ChannelData.check_npdata(npdata)
if error_code != ChannelDataErrcode.OK.value:
break
return ecode, error_info
return error_code, error_info
@staticmethod
def check_npdata(npdata):
ecode = ChannelDataEcode.OK.value
error_code = ChannelDataErrcode.OK.value
error_info = None
if isinstance(npdata, list):
# batch data
for sample in npdata:
if not isinstance(sample, dict):
ecode = ChannelDataEcode.TYPE_ERROR.value
error_code = ChannelDataErrcode.TYPE_ERROR.value
error_info = "Failed to check data: the " \
"value of data must be dict, but get {}.".format(
type(sample))
break
for _, value in sample.items():
if not isinstance(value, np.ndarray):
ecode = ChannelDataEcode.TYPE_ERROR.value
error_code = ChannelDataErrcode.TYPE_ERROR.value
error_info = "Failed to check data: the" \
" value of data must be np.ndarray, but get {}.".format(
type(value))
return ecode, error_info
return error_code, error_info
elif isinstance(npdata, dict):
# batch_size = 1
for _, value in npdata.items():
if not isinstance(value, np.ndarray):
ecode = ChannelDataEcode.TYPE_ERROR.value
error_code = ChannelDataErrcode.TYPE_ERROR.value
error_info = "Failed to check data: the value " \
"of data must be np.ndarray, but get {}.".format(
type(value))
break
else:
ecode = ChannelDataEcode.TYPE_ERROR.value
error_code = ChannelDataErrcode.TYPE_ERROR.value
error_info = "Failed to check data: the value of data " \
"must be dict, but get {}.".format(type(npdata))
return ecode, error_info
return error_code, error_info
def parse(self):
feed = None
......@@ -191,8 +214,9 @@ class ChannelData(object):
return 1
def __str__(self):
return "type[{}], ecode[{}], id[{}]".format(
ChannelDataType(self.datatype).name, self.ecode, self.id)
return "type[{}], error_code[{}], data_id[{}], log_id[{}], dict_data[{}]".format(
ChannelDataType(self.datatype).name, self.error_code, self.id,
self.log_id, str(self.dictdata))
class ProcessChannel(object):
......@@ -289,14 +313,14 @@ class ProcessChannel(object):
def push(self, channeldata, op_name=None):
_LOGGER.debug(
self._log("(logid={}) Op({}) Pushing data".format(channeldata.id,
op_name)))
self._log("(data_id={} log_id={}) Op({}) Enter channel::push".
format(channeldata.id, channeldata.log_id, op_name)))
if len(self._producers) == 0:
_LOGGER.critical(
self._log(
"(logid={}) Op({}) Failed to push data: expected number"
"(data_id={} log_id={}) Op({}) Failed to push data: expected number"
" of producers to be greater than 0, but the it is 0.".
format(channeldata.id, op_name)))
format(channeldata.id, channeldata.log_id, op_name)))
os._exit(-1)
elif len(self._producers) == 1:
with self._cv:
......@@ -310,19 +334,21 @@ class ProcessChannel(object):
raise ChannelStopError()
self._cv.notify_all()
_LOGGER.debug(
self._log("(logid={}) Op({}) Pushed data into internal queue.".
format(channeldata.id, op_name)))
self._log(
"(data_id={} log_id={}) Op({}) Pushed data into internal queue.".
format(channeldata.id, channeldata.log_id, op_name)))
return True
elif op_name is None:
_LOGGER.critical(
self._log(
"(logid={}) Op({}) Failed to push data: there are multiple "
"(data_id={} log_id={}) Op({}) Failed to push data: there are multiple "
"producers, so op_name cannot be None.".format(
channeldata.id, op_name)))
channeldata.id, channeldata.log_id, op_name)))
os._exit(-1)
producer_num = len(self._producers)
data_id = channeldata.id
log_id = channeldata.log_id
put_data = None
with self._cv:
if data_id not in self._input_buf:
......@@ -347,8 +373,8 @@ class ProcessChannel(object):
if put_data is None:
_LOGGER.debug(
self._log(
"(logid={}) Op({}) Pushed data into input_buffer.".
format(data_id, op_name)))
"(data_id={} log_id={}) Op({}) Pushed data into input_buffer.".
format(data_id, log_id, op_name)))
else:
while self._stop.value == 0:
try:
......@@ -361,8 +387,8 @@ class ProcessChannel(object):
_LOGGER.debug(
self._log(
"(logid={}) Op({}) Pushed data into internal_queue.".
format(data_id, op_name)))
"(data_id={} log_id={}) Op({}) Pushed data into internal_queue.".
format(data_id, log_id, op_name)))
self._cv.notify_all()
return True
......@@ -404,8 +430,8 @@ class ProcessChannel(object):
if self._stop.value == 1:
raise ChannelStopError()
_LOGGER.debug(
self._log("(logid={}) Op({}) Got data".format(resp.values()[0]
.id, op_name)))
self._log("(data_id={} log_id={}) Op({}) Got data".format(
resp.values()[0].id, resp.values()[0].log_id, op_name)))
return resp
elif op_name is None:
_LOGGER.critical(
......@@ -434,8 +460,9 @@ class ProcessChannel(object):
self._output_buf.append(channeldata)
_LOGGER.debug(
self._log(
"(logid={}) Op({}) Pop ready item into output_buffer".
format(channeldata.values()[0].id, op_name)))
"(data_id={} log_id={}) Op({}) Pop ready item into output_buffer".
format(channeldata.values()[0].id,
channeldata.values()[0].log_id, op_name)))
break
except Queue.Empty:
if timeout is not None:
......@@ -487,8 +514,9 @@ class ProcessChannel(object):
self._cv.notify_all()
_LOGGER.debug(
self._log("(logid={}) Op({}) Got data from output_buffer".format(
resp.values()[0].id, op_name)))
self._log(
"(data_id={} log_id={}) Op({}) Got data from output_buffer".
format(resp.values()[0].id, resp.values()[0].log_id, op_name)))
return resp
def stop(self):
......@@ -586,14 +614,14 @@ class ThreadChannel(Queue.PriorityQueue):
def push(self, channeldata, op_name=None):
_LOGGER.debug(
self._log("(logid={}) Op({}) Pushing data".format(channeldata.id,
op_name)))
self._log("(data_id={} log_id={}) Op({}) Pushing data".format(
channeldata.id, channeldata.log_id, op_name)))
if len(self._producers) == 0:
_LOGGER.critical(
self._log(
"(logid={}) Op({}) Failed to push data: expected number of "
"(data_id={} log_id={}) Op({}) Failed to push data: expected number of "
"producers to be greater than 0, but the it is 0.".format(
channeldata.id, op_name)))
channeldata.id, channeldata.log_id, op_name)))
os._exit(-1)
elif len(self._producers) == 1:
with self._cv:
......@@ -607,19 +635,21 @@ class ThreadChannel(Queue.PriorityQueue):
raise ChannelStopError()
self._cv.notify_all()
_LOGGER.debug(
self._log("(logid={}) Op({}) Pushed data into internal_queue.".
format(channeldata.id, op_name)))
self._log(
"(data_id={} log_id={}) Op({}) Pushed data into internal_queue.".
format(channeldata.id, channeldata.log_id, op_name)))
return True
elif op_name is None:
_LOGGER.critical(
self._log(
"(logid={}) Op({}) Failed to push data: there are multiple"
"(data_id={} log_id={}) Op({}) Failed to push data: there are multiple"
" producers, so op_name cannot be None.".format(
channeldata.id, op_name)))
channeldata.id, channeldata.log_id, op_name)))
os._exit(-1)
producer_num = len(self._producers)
data_id = channeldata.id
log_id = channeldata.log_id
put_data = None
with self._cv:
if data_id not in self._input_buf:
......@@ -639,8 +669,8 @@ class ThreadChannel(Queue.PriorityQueue):
if put_data is None:
_LOGGER.debug(
self._log(
"(logid={}) Op({}) Pushed data into input_buffer.".
format(data_id, op_name)))
"(data_id={} log_id={}) Op({}) Pushed data into input_buffer.".
format(data_id, log_id, op_name)))
else:
while self._stop is False:
try:
......@@ -653,8 +683,8 @@ class ThreadChannel(Queue.PriorityQueue):
_LOGGER.debug(
self._log(
"(logid={}) Op({}) Pushed data into internal_queue.".
format(data_id, op_name)))
"(data_id={} log_id={}) Op({}) Pushed data into internal_queue.".
format(data_id, log_id, op_name)))
self._cv.notify_all()
return True
......@@ -697,8 +727,8 @@ class ThreadChannel(Queue.PriorityQueue):
if self._stop:
raise ChannelStopError()
_LOGGER.debug(
self._log("(logid={}) Op({}) Got data".format(resp.values()[0]
.id, op_name)))
self._log("(data_id={} log_id={}) Op({}) Got data".format(
resp.values()[0].id, resp.values()[0].log_id, op_name)))
return resp
elif op_name is None:
_LOGGER.critical(
......@@ -727,8 +757,9 @@ class ThreadChannel(Queue.PriorityQueue):
self._output_buf.append(channeldata)
_LOGGER.debug(
self._log(
"(logid={}) Op({}) Pop ready item into output_buffer".
format(channeldata.values()[0].id, op_name)))
"(data_id={} log_id={}) Op({}) Pop ready item into output_buffer".
format(channeldata.values()[0].id,
channeldata.values()[0].log_id, op_name)))
break
except Queue.Empty:
if timeout is not None:
......@@ -780,8 +811,9 @@ class ThreadChannel(Queue.PriorityQueue):
self._cv.notify_all()
_LOGGER.debug(
self._log("(logid={}) Op({}) Got data from output_buffer".format(
resp.values()[0].id, op_name)))
self._log(
"(data_id={} log_id={}) Op({}) Got data from output_buffer".
format(resp.values()[0].id, resp.values()[0].log_id, op_name)))
return resp
def stop(self):
......
......@@ -25,10 +25,12 @@ else:
import os
import logging
import collections
import json
from .operator import Op, RequestOp, ResponseOp, VirtualOp
from .channel import (ThreadChannel, ProcessChannel, ChannelData,
ChannelDataEcode, ChannelDataType, ChannelStopError)
ChannelDataErrcode, ChannelDataType, ChannelStopError,
ProductErrCode)
from .profiler import TimeProfiler, PerformanceTracer
from .util import NameGenerator, ThreadIdGenerator, PipelineProcSyncManager
from .proto import pipeline_service_pb2
......@@ -37,13 +39,28 @@ _LOGGER = logging.getLogger(__name__)
class DAGExecutor(object):
"""
DAG Executor, the service entrance of DAG.
"""
def __init__(self, response_op, server_conf, worker_idx):
"""
Initialize DAGExecutor.
Args:
response_op: Response OP
server_conf: server conf. config.yaml
worker_idx: DAGExecutor index, PipelineServer creates many
DAGExecutors when _build_dag_each_worker is true.
Returns:
None.
"""
build_dag_each_worker = server_conf["build_dag_each_worker"]
server_worker_num = server_conf["worker_num"]
dag_conf = server_conf["dag"]
self._retry = dag_conf["retry"]
client_type = dag_conf["client_type"]
self._server_use_profile = dag_conf["use_profile"]
channel_size = dag_conf["channel_size"]
self._is_thread_op = dag_conf["is_thread_op"]
......@@ -61,8 +78,8 @@ class DAGExecutor(object):
self._is_thread_op, tracer_interval_s, server_worker_num)
self._dag = DAG(self.name, response_op, self._server_use_profile,
self._is_thread_op, client_type, channel_size,
build_dag_each_worker, self._tracer)
self._is_thread_op, channel_size, build_dag_each_worker,
self._tracer)
(in_channel, out_channel, pack_rpc_func,
unpack_rpc_func) = self._dag.build()
self._dag.start()
......@@ -75,7 +92,9 @@ class DAGExecutor(object):
if self._tracer is not None:
self._tracer.start()
# generate id: data_id == request_id == log_id
# generate id
# data_id: Server Unique ID, automatically generated by the framework
# log_id: Trace one product request, can be empty, not unique.
base_counter = 0
gen_id_step = 1
if build_dag_each_worker:
......@@ -95,6 +114,15 @@ class DAGExecutor(object):
self._client_profile_value = "1"
def start(self):
"""
Starting one thread for receiving data from the last channel background.
Args:
None
Returns:
None
"""
self._recive_func = threading.Thread(
target=DAGExecutor._recive_out_channel_func, args=(self, ))
self._recive_func.daemon = True
......@@ -102,11 +130,30 @@ class DAGExecutor(object):
_LOGGER.debug("[DAG Executor] Start recive thread")
def stop(self):
"""
Stopping DAG
Args:
None
Returns:
None
"""
self._dag.stop()
self._dag.join()
_LOGGER.info("[DAG Executor] Stop")
def _get_next_data_id(self):
"""
Generate data_id incrementally and Uniquely
Args:
None
Returns:
data_id: uniq id
cond_v: condition variable
"""
data_id = self._id_generator.next()
cond_v = threading.Condition()
with self._cv_for_cv_pool:
......@@ -115,6 +162,15 @@ class DAGExecutor(object):
return data_id, cond_v
def _set_in_channel(self, in_channel):
"""
Set in_channel of DAG
Args:
in_channel: input channel of DAG
Returns:
None
"""
if not isinstance(in_channel, (ThreadChannel, ProcessChannel)):
_LOGGER.critical("[DAG Executor] Failed to set in_channel: "
"in_channel must be Channel type, but get {}".
......@@ -122,8 +178,18 @@ class DAGExecutor(object):
os._exit(-1)
in_channel.add_producer(self.name)
self._in_channel = in_channel
_LOGGER.info("[DAG] set in channel succ, name [{}]".format(self.name))
def _set_out_channel(self, out_channel):
"""
Set out_channel of DAG
Args:
out_channel: output channel of DAG
Returns:
None
"""
if not isinstance(out_channel, (ThreadChannel, ProcessChannel)):
_LOGGER.critical("[DAG Executor] Failed to set out_channel: "
"must be Channel type, but get {}".format(
......@@ -133,6 +199,17 @@ class DAGExecutor(object):
self._out_channel = out_channel
def _recive_out_channel_func(self):
"""
Receiving data from the output channel, and pushing data into
_fetch_buffer. Function _get_channeldata_from_fetch_buffer gets
data by retry time.
Args:
None
Returns:
None
"""
cv = None
while True:
try:
......@@ -142,14 +219,13 @@ class DAGExecutor(object):
with self._cv_for_cv_pool:
for data_id, cv in self._cv_pool.items():
closed_errror_data = ChannelData(
ecode=ChannelDataEcode.CLOSED_ERROR.value,
error_code=ChannelDataErrcode.CLOSED_ERROR.value,
error_info="dag closed.",
data_id=data_id)
with cv:
self._fetch_buffer[data_id] = closed_errror_data
cv.notify_all()
break
if len(channeldata_dict) != 1:
_LOGGER.critical(
"[DAG Executor] Failed to fetch result: out_channel "
......@@ -173,6 +249,16 @@ class DAGExecutor(object):
cond_v.notify_all()
def _get_channeldata_from_fetch_buffer(self, data_id, cond_v):
"""
Getting the channel data from _fetch_buffer.
Args:
data_id: search key
cond_v: conditional variable
Returns:
ready_data: one channel data processed
"""
ready_data = None
with cond_v:
......@@ -189,45 +275,82 @@ class DAGExecutor(object):
ready_data = self._fetch_buffer[data_id]
self._cv_pool.pop(data_id)
self._fetch_buffer.pop(data_id)
_LOGGER.debug("(logid={}) [resp thread] Got data".format(data_id))
_LOGGER.debug("(data_id={}) [resp thread] Got data".format(data_id))
return ready_data
def _pack_channeldata(self, rpc_request, data_id):
"""
Unpacking data from RPC request. and creating one channelData.
Args:
rpc_request: one RPC request
data_id: data id, unique
Returns:
ChannelData: one channel data to be processed
"""
dictdata = None
log_id = None
try:
dictdata = self._unpack_rpc_func(rpc_request)
dictdata, log_id, prod_errcode, prod_errinfo = self._unpack_rpc_func(
rpc_request)
except Exception as e:
_LOGGER.error(
"(logid={}) Failed to parse RPC request package: {}"
.format(data_id, e),
exc_info=True)
return ChannelData(
ecode=ChannelDataEcode.RPC_PACKAGE_ERROR.value,
error_code=ChannelDataErrcode.RPC_PACKAGE_ERROR.value,
error_info="rpc package error: {}".format(e),
data_id=data_id)
data_id=data_id,
log_id=log_id)
else:
# because unpack_rpc_func is rewritten by user, we need
# to look for client_profile_key field in rpc_request
# because unpack_rpc_func is rewritten by user, we need to look
# for product_errcode in returns, and client_profile_key field
# in rpc_request
if prod_errcode is not None:
# product errors occured
_LOGGER.error("unpack_rpc_func prod_errcode:{}".format(
prod_errcode))
return ChannelData(
error_code=ChannelDataErrcode.PRODUCT_ERROR.value,
error_info="",
prod_error_code=prod_errcode,
prod_error_info=prod_errinfo,
data_id=data_id,
log_id=log_id)
profile_value = None
for idx, key in enumerate(rpc_request.key):
if key == self._client_profile_key:
profile_value = rpc_request.value[idx]
break
profile_value = dictdata.get(self._client_profile_key)
client_need_profile = (profile_value == self._client_profile_value)
_LOGGER.debug("(logid={}) Need profile in client: {}".format(
data_id, client_need_profile))
return ChannelData(
datatype=ChannelDataType.DICT.value,
dictdata=dictdata,
data_id=data_id,
log_id=log_id,
client_need_profile=client_need_profile)
def call(self, rpc_request):
"""
DAGExcutor enterance function. There are 5 steps:
1._get_next_data_id: Generate an incremental ID
2._pack_channeldata: pack the channel data from request.
3.retry loop:
a. push channel_data into _in_channel
b. get_channeldata_from_fetch_buffer: get results.
4._pack_for_rpc_resp: pack RPC responses
5.profile: generte profile string and pack into response.
Args:
rpc_request: one RPC request
Returns:
rpc_resp: one RPC response
"""
if self._tracer is not None:
trace_buffer = self._tracer.data_buffer()
data_id, cond_v = self._get_next_data_id()
_LOGGER.info("(logid={}) Succ generate id".format(data_id))
start_call, end_call = None, None
if not self._is_thread_op:
......@@ -236,45 +359,64 @@ class DAGExecutor(object):
else:
start_call = self._profiler.record("call_{}#DAG_0".format(data_id))
_LOGGER.debug("(logid={}) Parsing RPC request package".format(data_id))
self._profiler.record("prepack_{}#{}_0".format(data_id, self.name))
req_channeldata = self._pack_channeldata(rpc_request, data_id)
self._profiler.record("prepack_{}#{}_1".format(data_id, self.name))
log_id = req_channeldata.log_id
_LOGGER.info("(data_id={} log_id={}) Succ Generate ID ".format(data_id,
log_id))
resp_channeldata = None
for i in range(self._retry):
_LOGGER.debug("(logid={}) Pushing data into Graph engine".format(
_LOGGER.debug("(data_id={}) Pushing data into Graph engine".format(
data_id))
try:
if req_channeldata is None:
_LOGGER.critical(
"(data_id={} log_id={}) req_channeldata is None"
.format(data_id, log_id))
if not isinstance(self._in_channel,
(ThreadChannel, ProcessChannel)):
_LOGGER.critical(
"(data_id={} log_id={})[DAG Executor] Failed to "
"set in_channel: in_channel must be Channel type, but get {}".
format(data_id, log_id, type(self._in_channel)))
self._in_channel.push(req_channeldata, self.name)
except ChannelStopError:
_LOGGER.debug("[DAG Executor] Stop")
_LOGGER.error("(data_id:{} log_id={})[DAG Executor] Stop".
format(data_id, log_id))
with self._cv_for_cv_pool:
self._cv_pool.pop(data_id)
return self._pack_for_rpc_resp(
ChannelData(
ecode=ChannelDataEcode.CLOSED_ERROR.value,
error_code=ChannelDataErrcode.CLOSED_ERROR.value,
error_info="dag closed.",
data_id=data_id))
_LOGGER.debug("(logid={}) Wait for Graph engine...".format(data_id))
_LOGGER.debug("(data_id={} log_id={}) Wait for Graph engine...".
format(data_id, log_id))
resp_channeldata = self._get_channeldata_from_fetch_buffer(data_id,
cond_v)
if resp_channeldata.ecode == ChannelDataEcode.OK.value:
_LOGGER.info("(logid={}) Succ predict".format(data_id))
if resp_channeldata.error_code == ChannelDataErrcode.OK.value:
_LOGGER.info("(data_id={} log_id={}) Succ predict".format(
data_id, log_id))
break
else:
_LOGGER.error("(logid={}) Failed to predict: {}"
.format(data_id, resp_channeldata.error_info))
if resp_channeldata.ecode != ChannelDataEcode.TIMEOUT.value:
_LOGGER.error("(data_id={} log_id={}) Failed to predict: {}"
.format(data_id, log_id,
resp_channeldata.error_info))
if resp_channeldata.error_code != ChannelDataErrcode.TIMEOUT.value:
break
if i + 1 < self._retry:
_LOGGER.warning("(logid={}) DAGExecutor retry({}/{})".format(
data_id, i + 1, self._retry))
_LOGGER.warning(
"(data_id={} log_id={}) DAGExecutor retry({}/{})"
.format(data_id, log_id, i + 1, self._retry))
_LOGGER.debug("(logid={}) Packing RPC response package".format(data_id))
_LOGGER.debug("(data_id={} log_id={}) Packing RPC response package"
.format(data_id, log_id))
self._profiler.record("postpack_{}#{}_0".format(data_id, self.name))
rpc_resp = self._pack_for_rpc_resp(resp_channeldata)
self._profiler.record("postpack_{}#{}_1".format(data_id, self.name))
......@@ -288,7 +430,8 @@ class DAGExecutor(object):
trace_buffer.put({
"name": "DAG",
"id": data_id,
"succ": resp_channeldata.ecode == ChannelDataEcode.OK.value,
"succ":
resp_channeldata.error_code == ChannelDataErrcode.OK.value,
"actions": {
"call_{}".format(data_id): end_call - start_call,
},
......@@ -309,6 +452,15 @@ class DAGExecutor(object):
return rpc_resp
def _pack_for_rpc_resp(self, channeldata):
"""
Packing one RPC response
Args:
channeldata: one channel data to be packed
Returns:
resp: one RPC response
"""
try:
return self._pack_rpc_func(channeldata)
except Exception as e:
......@@ -317,20 +469,23 @@ class DAGExecutor(object):
.format(channeldata.id, e),
exc_info=True)
resp = pipeline_service_pb2.Response()
resp.ecode = ChannelDataEcode.RPC_PACKAGE_ERROR.value
resp.error_info = "rpc package error: {}".format(e)
resp.err_no = ChannelDataErrcode.RPC_PACKAGE_ERROR.value
resp.err_msg = "rpc package error: {}".format(e)
return resp
class DAG(object):
"""
Directed Acyclic Graph(DAG) engine, builds one DAG topology.
"""
def __init__(self, request_name, response_op, use_profile, is_thread_op,
client_type, channel_size, build_dag_each_worker, tracer):
channel_size, build_dag_each_worker, tracer):
self._request_name = request_name
self._response_op = response_op
self._use_profile = use_profile
self._is_thread_op = is_thread_op
self._channel_size = channel_size
self._client_type = client_type
self._build_dag_each_worker = build_dag_each_worker
self._tracer = tracer
if not self._is_thread_op:
......@@ -339,6 +494,18 @@ class DAG(object):
@staticmethod
def get_use_ops(response_op):
"""
Starting from ResponseOp, recursively traverse the front OPs. Getting
all used ops and the post op list of each op (excluding ResponseOp)
Args:
response_op: ResponseOp
Returns:
used_ops: used ops, set
succ_ops_of_use_op: op and the next op list, dict.
"""
unique_names = set()
used_ops = set()
succ_ops_of_use_op = {} # {op_name: succ_ops}
......@@ -364,6 +531,15 @@ class DAG(object):
return used_ops, succ_ops_of_use_op
def _gen_channel(self, name_gen):
"""
Generate one ThreadChannel or ProcessChannel.
Args:
name_gen: channel name
Returns:
channel: one channel generated
"""
channel = None
if self._is_thread_op:
channel = ThreadChannel(
......@@ -375,11 +551,37 @@ class DAG(object):
return channel
def _gen_virtual_op(self, name_gen):
"""
Generate one virtual Op
Args:
name_gen: Op name
Returns:
vir_op: one virtual Op object.
"""
vir_op = VirtualOp(name=name_gen.next())
_LOGGER.debug("[DAG] Generate virtual_op: {}".format(vir_op.name))
return vir_op
def _topo_sort(self, used_ops, response_op, out_degree_ops):
"""
Topological sort of DAG, creates inverted multi-layers views.
Args:
used_ops: op used in DAG
response_op: response op
out_degree_ops: Next op list for each op, dict. the output of
get_use_ops()
Returns:
dag_views: the inverted hierarchical topology list. examples:
DAG :[A -> B -> C -> E]
\-> D /
dag_views: [[E], [C, D], [B], [A]]
last_op:the last op front of ResponseOp
"""
out_degree_num = {
name: len(ops)
for name, ops in out_degree_ops.items()
......@@ -423,6 +625,23 @@ class DAG(object):
return dag_views, last_op
def _build_dag(self, response_op):
"""
Building DAG, the most important function in class DAG. Core steps:
1.get_use_ops: Getting used ops, and out degree op list for each op.
2._topo_sort: Topological sort creates inverted multi-layers views.
3.create channels and virtual ops.
Args:
response_op: ResponseOp
Returns:
actual_ops: all OPs used in DAG, including virtual OPs
channels: all channels used in DAG
input_channel: the channel of first OP
output_channel: the channel of last OP
pack_func: pack_response_package function of response_op
unpack_func: unpack_request_package function of request_op
"""
if response_op is None:
_LOGGER.critical("Failed to build DAG: ResponseOp"
" has not been set.")
......@@ -548,6 +767,18 @@ class DAG(object):
return self._channels
def build(self):
"""
Interface for building one DAG outside.
Args:
None
Returns:
_input_channel: the channel of first OP
_output_channel: the channel of last OP
_pack_func: pack_response_package function of response_op
_unpack_func: unpack_request_package function of request_op
"""
(actual_ops, channels, input_channel, output_channel, pack_func,
unpack_func) = self._build_dag(self._response_op)
_LOGGER.info("[DAG] Succ build DAG")
......@@ -565,26 +796,52 @@ class DAG(object):
return self._input_channel, self._output_channel, self._pack_func, self._unpack_func
def start(self):
"""
Each OP starts a thread or process by _is_thread_op
Args:
None
Returns:
_threads_or_proces: threads or process list.
"""
self._threads_or_proces = []
for op in self._actual_ops:
op.use_profiler(self._use_profile)
op.set_tracer(self._tracer)
if self._is_thread_op:
self._threads_or_proces.extend(
op.start_with_thread(self._client_type))
self._threads_or_proces.extend(op.start_with_thread())
else:
self._threads_or_proces.extend(
op.start_with_process(self._client_type))
self._threads_or_proces.extend(op.start_with_process())
_LOGGER.info("[DAG] start")
# not join yet
return self._threads_or_proces
def join(self):
"""
All threads or processes join.
Args:
None
Returns:
None
"""
for x in self._threads_or_proces:
x.join()
if x is not None:
x.join()
def stop(self):
"""
Stopping and cleanning all channels.
Args:
None
Returns:
None
"""
for chl in self._channels:
chl.stop()
for op in self._actual_ops:
......
......@@ -19,22 +19,25 @@ option go_package = ".;pipeline_serving";
import "google/api/annotations.proto";
message Response {
repeated string key = 1;
repeated string value = 2;
int32 ecode = 3;
string error_info = 4;
int32 err_no = 1;
string err_msg = 2;
repeated string key = 3;
repeated string value = 4;
};
message Request {
repeated string key = 1;
repeated string value = 2;
string name = 3;
}
string method = 4;
int64 logid = 5;
string clientip = 6;
};
service PipelineService {
rpc inference(Request) returns (Response) {
option (google.api.http) = {
post : "/{name=*}/prediction"
post : "/{name=*}/{method=*}"
body : "*"
};
}
......
......@@ -25,7 +25,7 @@ import (
"github.com/grpc-ecosystem/grpc-gateway/runtime"
"google.golang.org/grpc"
gw "./proto"
gw "serving-gateway/proto"
)
//export run_proxy_server
......@@ -38,7 +38,8 @@ func run_proxy_server(grpc_port int, http_port int) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
mux := runtime.NewServeMux()
//EmitDefaults=true, does not filter out the default inputs
mux := runtime.NewServeMux(runtime.WithMarshalerOption(runtime.MIMEWildcard, &runtime.JSONPb{OrigName: true, EmitDefaults: true}))
opts := []grpc.DialOption{grpc.WithInsecure()}
err := gw.RegisterPipelineServiceHandlerFromEndpoint(ctx, mux, *pipelineEndpoint, opts)
if err != nil {
......
......@@ -15,101 +15,203 @@
import os
import logging
import multiprocessing
try:
from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
PACKAGE_VERSION = "GPU"
except ImportError:
from paddle_serving_server import OpMaker, OpSeqMaker, Server
PACKAGE_VERSION = "CPU"
#from paddle_serving_server_gpu import OpMaker, OpSeqMaker
#from paddle_serving_server_gpu import Server as GpuServer
#from paddle_serving_server import Server as CpuServer
from . import util
#from paddle_serving_app.local_predict import LocalPredictor
_LOGGER = logging.getLogger(__name__)
_workdir_name_gen = util.NameGenerator("workdir_")
class LocalRpcServiceHandler(object):
class LocalServiceHandler(object):
"""
LocalServiceHandler is the processor of the local service, contains
three client types, brpc, grpc and local_predictor.If you use the
brpc or grpc, serveing startup ability is provided.If you use
local_predictor, local predict ability is provided by paddle_serving_app.
"""
def __init__(self,
model_config,
client_type='local_predictor',
workdir="",
thread_num=2,
devices="",
fetch_names=None,
mem_optim=True,
ir_optim=False,
available_port_generator=None):
available_port_generator=None,
use_trt=False,
use_profile=False):
"""
Initialization of localservicehandler
Args:
model_config: model config path
client_type: brpc, grpc and local_predictor[default]
workdir: work directory
thread_num: number of threads, concurrent quantity.
devices: gpu id list[gpu], "" default[cpu]
fetch_names: get fetch names out of LocalServiceHandler in
local_predictor mode. fetch_names_ is compatible for Client().
mem_optim: use memory/graphics memory optimization, True default.
ir_optim: use calculation chart optimization, False default.
available_port_generator: generate available ports
use_trt: use nvidia tensorRt engine, False default.
use_profile: use profiling, False default.
Returns:
None
"""
if available_port_generator is None:
available_port_generator = util.GetAvailablePortGenerator()
self._model_config = model_config
self._port_list = []
self._device_type = "cpu"
if devices == "":
# cpu
devices = [-1]
self._device_type = "cpu"
self._port_list.append(available_port_generator.next())
_LOGGER.info("Model({}) will be launch in cpu device. Port({})"
.format(model_config, self._port_list))
else:
# gpu
if PACKAGE_VERSION == "CPU":
raise ValueError(
"You are using the CPU version package("
"paddle-serving-server), unable to set devices")
self._device_type = "gpu"
devices = [int(x) for x in devices.split(",")]
for _ in devices:
self._port_list.append(available_port_generator.next())
_LOGGER.info("Model({}) will be launch in gpu device: {}. Port({})"
.format(model_config, devices, self._port_list))
self._client_type = client_type
self._workdir = workdir
self._devices = devices
self._thread_num = thread_num
self._mem_optim = mem_optim
self._ir_optim = ir_optim
self._local_predictor_client = None
self._rpc_service_list = []
self._server_pros = []
self._fetch_vars = None
self._use_trt = use_trt
self._use_profile = use_profile
self.fetch_names_ = fetch_names
def get_fetch_list(self):
return self._fetch_vars
return self.fetch_names_
def get_port_list(self):
return self._port_list
def get_client(self):
"""
Function get_client is only used for local predictor case, creates one
LocalPredictor object, and initializes the paddle predictor by function
load_model_config.
Args:
None
Returns:
_local_predictor_client
"""
from paddle_serving_app.local_predict import LocalPredictor
if self._local_predictor_client is None:
self._local_predictor_client = LocalPredictor()
use_gpu = False
if self._device_type == "gpu":
use_gpu = True
self._local_predictor_client.load_model_config(
model_path=self._model_config,
use_gpu=use_gpu,
gpu_id=self._devices[0],
use_profile=self._use_profile,
thread_num=self._thread_num,
mem_optim=self._mem_optim,
ir_optim=self._ir_optim,
use_trt=self._use_trt)
return self._local_predictor_client
def get_client_config(self):
return os.path.join(self._model_config, "serving_server_conf.prototxt")
def _prepare_one_server(self, workdir, port, gpuid, thread_num, mem_optim,
ir_optim):
device = "gpu"
if gpuid == -1:
device = "cpu"
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
server = Server()
"""
According to _device_type, generating one CpuServer or GpuServer, and
setting the model config amd startup params.
Args:
workdir: work directory
port: network port
gpuid: gpu id
thread_num: thread num
mem_optim: use memory/graphics memory optimization
ir_optim: use calculation chart optimization
Returns:
server: CpuServer/GpuServer
"""
if self._device_type == "cpu":
from paddle_serving_server import OpMaker, OpSeqMaker, Server
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
server = Server()
else:
#gpu
from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
op_maker = OpMaker()
read_op = op_maker.create('general_reader')
general_infer_op = op_maker.create('general_infer')
general_response_op = op_maker.create('general_response')
op_seq_maker = OpSeqMaker()
op_seq_maker.add_op(read_op)
op_seq_maker.add_op(general_infer_op)
op_seq_maker.add_op(general_response_op)
server = Server()
if gpuid >= 0:
server.set_gpuid(gpuid)
server.set_op_sequence(op_seq_maker.get_op_sequence())
server.set_num_threads(thread_num)
server.set_memory_optimize(mem_optim)
server.set_ir_optimize(ir_optim)
server.load_model_config(self._model_config)
if gpuid >= 0:
server.set_gpuid(gpuid)
server.prepare_server(workdir=workdir, port=port, device=device)
if self._fetch_vars is None:
self._fetch_vars = server.get_fetch_list()
server.prepare_server(
workdir=workdir, port=port, device=self._device_type)
if self.fetch_names_ is None:
self.fetch_names_ = server.get_fetch_list()
return server
def _start_one_server(self, service_idx):
"""
Start one server
Args:
service_idx: server index
Returns:
None
"""
self._rpc_service_list[service_idx].run_server()
def prepare_server(self):
"""
Prepare all servers to be started, and append them into list.
"""
for i, device_id in enumerate(self._devices):
if self._workdir != "":
workdir = "{}_{}".format(self._workdir, i)
......@@ -125,6 +227,9 @@ class LocalRpcServiceHandler(object):
ir_optim=self._ir_optim))
def start_server(self):
"""
Start multiple processes and start one server in each process
"""
for i, service in enumerate(self._rpc_service_list):
p = multiprocessing.Process(
target=self._start_one_server, args=(i, ))
......
......@@ -24,6 +24,7 @@ import os
import sys
import collections
import numpy as np
import json
from numpy import *
if sys.version_info.major == 2:
import Queue
......@@ -33,12 +34,12 @@ else:
raise Exception("Error Python version")
from .proto import pipeline_service_pb2
from .channel import (ThreadChannel, ProcessChannel, ChannelDataEcode,
from .channel import (ThreadChannel, ProcessChannel, ChannelDataErrcode,
ChannelData, ChannelDataType, ChannelStopError,
ChannelTimeoutError)
ChannelTimeoutError, ProductErrCode)
from .util import NameGenerator
from .profiler import UnsafeTimeProfiler as TimeProfiler
from . import local_rpc_service_handler
from . import local_service_handler
_LOGGER = logging.getLogger(__name__)
_op_name_gen = NameGenerator("Op")
......@@ -51,12 +52,13 @@ class Op(object):
server_endpoints=None,
fetch_list=None,
client_config=None,
client_type=None,
concurrency=None,
timeout=None,
retry=None,
batch_size=None,
auto_batching_timeout=None,
local_rpc_service_handler=None):
local_service_handler=None):
# In __init__, all the parameters are just saved and Op is not initialized
if name is None:
name = _op_name_gen.next()
......@@ -64,10 +66,11 @@ class Op(object):
self.concurrency = concurrency # amount of concurrency
self.set_input_ops(input_ops)
self._local_rpc_service_handler = local_rpc_service_handler
self._local_service_handler = local_service_handler
self._server_endpoints = server_endpoints
self._fetch_names = fetch_list
self._client_config = client_config
self.client_type = client_type
self._timeout = timeout
self._retry = max(1, retry)
self._batch_size = batch_size
......@@ -86,6 +89,18 @@ class Op(object):
self._succ_close_op = False
def init_from_dict(self, conf):
"""
Initializing one Op from config.yaml. If server_endpoints exist,
which is remote RPC mode, otherwise it is local RPC mode. There
are three types of predictios in local RPC mode, brpc, grpc and
local_predictor.
Args:
conf: config.yaml
Returns:
None
"""
# init op
if self.concurrency is None:
self.concurrency = conf["concurrency"]
......@@ -116,56 +131,82 @@ class Op(object):
else:
self._auto_batching_timeout = self._auto_batching_timeout / 1000.0
self.model_config = None
self.workdir = None
self.thread_num = self.concurrency
self.devices = ""
self.mem_optim = False
self.ir_optim = False
if self._server_endpoints is None:
server_endpoints = conf.get("server_endpoints", [])
if len(server_endpoints) != 0:
# remote service
self.with_serving = True
self._server_endpoints = server_endpoints
self.client_type = conf["client_type"]
else:
if self._local_rpc_service_handler is None:
if self._local_service_handler is None:
local_service_conf = conf.get("local_service_conf")
_LOGGER.info("local_service_conf: {}".format(
local_service_conf))
model_config = local_service_conf.get("model_config")
_LOGGER.info("model_config: {}".format(model_config))
if model_config is None:
self.model_config = local_service_conf.get("model_config")
self.client_type = local_service_conf.get("client_type")
self.workdir = local_service_conf.get("workdir")
self.thread_num = local_service_conf.get("thread_num")
self.devices = local_service_conf.get("devices")
self.mem_optim = local_service_conf.get("mem_optim")
self.ir_optim = local_service_conf.get("ir_optim")
self._fetch_names = local_service_conf.get("fetch_list")
if self.model_config is None:
self.with_serving = False
else:
# local rpc service
self.with_serving = True
service_handler = local_rpc_service_handler.LocalRpcServiceHandler(
model_config=model_config,
workdir=local_service_conf["workdir"],
thread_num=local_service_conf["thread_num"],
devices=local_service_conf["devices"],
mem_optim=local_service_conf["mem_optim"],
ir_optim=local_service_conf["ir_optim"])
service_handler.prepare_server() # get fetch_list
serivce_ports = service_handler.get_port_list()
self._server_endpoints = [
"127.0.0.1:{}".format(p) for p in serivce_ports
]
if self._client_config is None:
self._client_config = service_handler.get_client_config(
)
if self._fetch_names is None:
self._fetch_names = service_handler.get_fetch_list()
self._local_rpc_service_handler = service_handler
if self.client_type == "brpc" or self.client_type == "grpc":
service_handler = local_service_handler.LocalServiceHandler(
model_config=self.model_config,
client_type=self.client_type,
workdir=self.workdir,
thread_num=self.thread_num,
devices=self.devices,
mem_optim=self.mem_optim,
ir_optim=self.ir_optim)
service_handler.prepare_server() # get fetch_list
serivce_ports = service_handler.get_port_list()
self._server_endpoints = [
"127.0.0.1:{}".format(p) for p in serivce_ports
]
if self._client_config is None:
self._client_config = service_handler.get_client_config(
)
if self._fetch_names is None:
self._fetch_names = service_handler.get_fetch_list(
)
elif self.client_type == "local_predictor":
service_handler = local_service_handler.LocalServiceHandler(
model_config=self.model_config,
client_type=self.client_type,
workdir=self.workdir,
thread_num=self.thread_num,
devices=self.devices,
fetch_names=self._fetch_names)
if self._client_config is None:
self._client_config = service_handler.get_client_config(
)
self._local_service_handler = service_handler
else:
self.with_serving = True
self._local_rpc_service_handler.prepare_server(
self._local_service_handler.prepare_server(
) # get fetch_list
serivce_ports = self._local_rpc_service_handler.get_port_list(
)
serivce_ports = self._local_service_handler.get_port_list()
self._server_endpoints = [
"127.0.0.1:{}".format(p) for p in serivce_ports
]
if self._client_config is None:
self._client_config = self._local_rpc_service_handler.get_client_config(
self._client_config = self._local_service_handler.get_client_config(
)
if self._fetch_names is None:
self._fetch_names = self._local_rpc_service_handler.get_fetch_list(
self._fetch_names = self._local_service_handler.get_fetch_list(
)
else:
self.with_serving = True
......@@ -188,17 +229,38 @@ class Op(object):
self._batch_size, self._auto_batching_timeout)))
def launch_local_rpc_service(self):
if self._local_rpc_service_handler is None:
"""
Launching multiple local rpc servers.
Args:
None
Returns:
None
"""
if self._local_service_handler is None:
_LOGGER.warning(
self._log("Failed to launch local rpc"
" service: local_rpc_service_handler is None."))
" service: local_service_handler is None."))
return
port = self._local_rpc_service_handler.get_port_list()
self._local_rpc_service_handler.start_server()
port = self._local_service_handler.get_port_list()
#if self._local_service_handler.client_type == "local_predictor":
# _LOGGER.info("Op({}) use local predictor.")
# return
self._local_service_handler.start_server()
_LOGGER.info("Op({}) use local rpc service at port: {}"
.format(self.name, port))
def use_default_auto_batching_config(self):
"""
Set the auto batching config default.
Args:
None
Returns:
None
"""
if self._batch_size != 1:
_LOGGER.warning("Op({}) reset batch_size=1 (original: {})"
.format(self.name, self._batch_size))
......@@ -215,28 +277,56 @@ class Op(object):
def set_tracer(self, tracer):
self._tracer = tracer
def init_client(self, client_type, client_config, server_endpoints,
fetch_names):
def init_client(self, client_config, server_endpoints):
"""
Initialize the client object. There are three types of clients, brpc,
grpc and local_predictor. In grpc or brpc mode, the client connects
endpoints.
Args:
client_config: client config info
server_endpoints: server IP/Port list.
Returns:
client: client object.
"""
if self.with_serving == False:
_LOGGER.info("Op({}) has no client (and it also do not "
"run the process function)".format(self.name))
return None
if client_type == 'brpc':
if self.client_type == 'brpc':
client = Client()
client.load_client_config(client_config)
elif client_type == 'grpc':
elif self.client_type == 'grpc':
client = MultiLangClient()
elif self.client_type == 'local_predictor':
if self.local_predictor is None:
raise ValueError("local predictor not yet created")
client = self.local_predictor
else:
raise ValueError("Failed to init client: unknow client "
"type {}".format(client_type))
client.connect(server_endpoints)
self._fetch_names = fetch_names
"type {}".format(self.client_type))
if self._fetch_names is None:
self._fetch_names = client.fetch_names_
_LOGGER.info("Op({}) has no fetch name set. So fetch all vars")
if self.client_type != "local_predictor":
client.connect(server_endpoints)
return client
def get_input_ops(self):
return self._input_ops
def set_input_ops(self, ops):
"""
Set input ops.Each op have many input ops, but only one input
channel.
Args:
ops: op list
Returns:
None.
"""
if not isinstance(ops, list):
ops = [] if ops is None else [ops]
self._input_ops = []
......@@ -249,6 +339,10 @@ class Op(object):
self._input_ops.append(op)
def add_input_channel(self, channel):
"""
Adding one input channel to the Op. Each op have many front op,
but, only one input channel.
"""
if not isinstance(channel, (ThreadChannel, ProcessChannel)):
_LOGGER.critical(
self._log("Failed to set input_channel: input "
......@@ -265,6 +359,16 @@ class Op(object):
return self._input
def add_output_channel(self, channel):
"""
Adding one output channel to the Op. Each op have many output channels,
But only one front channel.
Args:
channel: an output channel object.
Returns:
None
"""
if not isinstance(channel, (ThreadChannel, ProcessChannel)):
_LOGGER.critical(
self._log("Failed to add output_channel: output channel "
......@@ -279,7 +383,23 @@ class Op(object):
def _get_output_channels(self):
return self._outputs
def preprocess(self, input_dicts):
def preprocess(self, input_dicts, data_id=0, log_id=0):
"""
In preprocess stage, assembling data for process stage. users can
override this function for model feed features.
Args:
input_dicts: input data to be preprocessed
data_id: inner unique id, 0 default
log_id: global unique id for RTT, 0 default
Return:
input_dict: data for process stage
is_skip_process: skip process stage or not, False default
prod_errcode: None default, otherwise, product errores occured.
It is handled in the same way as exception.
prod_errinfo: "" default
"""
# multiple previous Op
if len(input_dicts) != 1:
_LOGGER.critical(
......@@ -289,44 +409,92 @@ class Op(object):
os._exit(-1)
(_, input_dict), = input_dicts.items()
return input_dict
def process(self, feed_batch, typical_logid):
return input_dict, False, None, ""
def process(self, feed_batch, typical_logid=0):
"""
In process stage, send requests to the inference server or predict locally.
users do not need to inherit this function
Args:
feed_batch: data to be fed to inference server
typical_logid: mark batch predicts, usually the first logid in batch,
0 default.
Returns:
call_result: predict result
"""
err, err_info = ChannelData.check_batch_npdata(feed_batch)
if err != 0:
_LOGGER.critical(
self._log("Failed to run process: {}. Please override "
"preprocess func.".format(err_info)))
os._exit(-1)
call_result = self.client.predict(
feed=feed_batch, fetch=self._fetch_names, log_id=typical_logid)
if self.client_type == "local_predictor":
call_result = self.client.predict(
feed=feed_batch[0],
fetch=self._fetch_names,
batch=True,
log_id=typical_logid)
else:
call_result = self.client.predict(
feed=feed_batch,
fetch=self._fetch_names,
batch=True,
log_id=typical_logid)
if isinstance(self.client, MultiLangClient):
if call_result is None or call_result["serving_status_code"] != 0:
return None
call_result.pop("serving_status_code")
return call_result
def postprocess(self, input_dict, fetch_dict):
return fetch_dict
def postprocess(self, input_dict, fetch_dict, log_id=0):
"""
In postprocess stage, assemble data for next op or output.
Args:
input_dict: data returned in preprocess stage.
fetch_dict: data returned in process stage.
log_id: logid, 0 default
Returns:
fetch_dict: return fetch_dict default
prod_errcode: None default, otherwise, product errores occured.
It is handled in the same way as exception.
prod_errinfo: "" default
"""
return fetch_dict, None, ""
def _parse_channeldata(self, channeldata_dict):
"""
Parse one channeldata
Args:
channeldata_dict : channel data to be parsed, dict type
Return:
data_id: created by dag._id_generator, unique
error_channeldata: error channeldata
parsed_data: get np/dict data from channeldata
client_need_profile: need profile info
profile_set: profile info
log_id: logid for tracing a request
"""
data_id, error_channeldata = None, None
client_need_profile, profile_set = False, set()
parsed_data = {}
key = list(channeldata_dict.keys())[0]
data_id = channeldata_dict[key].id
log_id = channeldata_dict[key].log_id
client_need_profile = channeldata_dict[key].client_need_profile
for name, data in channeldata_dict.items():
if data.ecode != ChannelDataEcode.OK.value:
if data.error_code != ChannelDataErrcode.OK.value:
error_channeldata = data
break
parsed_data[name] = data.parse()
if client_need_profile:
profile_set |= data.profile_data_set
return (data_id, error_channeldata, parsed_data, client_need_profile,
profile_set)
profile_set, log_id)
def _push_to_output_channels(self,
data,
......@@ -335,6 +503,20 @@ class Op(object):
profile_str=None,
client_need_profile=False,
profile_set=None):
"""
Push data to output channels, Do not run the later stage(preprocess,
process, postprocess)
Args:
data: channeldata, to be pushed
channels: output channels
name: op name
profile_str: one profile message
client_need_profile: False default
profile_set: profile message collections
Returns:
None
"""
if name is None:
name = self.name
......@@ -347,33 +529,61 @@ class Op(object):
for channel in channels:
channel.push(data, name)
def start_with_process(self, client_type):
def start_with_process(self):
"""
Each OP creates a process to run the main loop, initializes the CUDA
environment in each individual process.
Args:
None
Returns:
process array
"""
trace_buffer = None
if self._tracer is not None:
trace_buffer = self._tracer.data_buffer()
proces = []
process = []
for concurrency_idx in range(self.concurrency):
p = multiprocessing.Process(
target=self._run,
args=(concurrency_idx, self._get_input_channel(),
self._get_output_channels(), client_type, False,
trace_buffer))
self._get_output_channels(), False, trace_buffer,
self.model_config, self.workdir, self.thread_num,
self.devices, self.mem_optim, self.ir_optim))
p.daemon = True
p.start()
proces.append(p)
return proces
def start_with_thread(self, client_type):
process.append(p)
return process
def start_with_thread(self):
"""
Each OP creates a thread to run the main loop, initializes the CUDA
environment in the main thread.
Args:
None
Returns:
thread array
"""
trace_buffer = None
if self._tracer is not None:
trace_buffer = self._tracer.data_buffer()
#Init cuda env in main thread
if self.client_type == "local_predictor":
_LOGGER.info("Init cuda env in main thread")
self.local_predictor = self._local_service_handler.get_client()
threads = []
for concurrency_idx in range(self.concurrency):
t = threading.Thread(
target=self._run,
args=(concurrency_idx, self._get_input_channel(),
self._get_output_channels(), client_type, True,
trace_buffer))
self._get_output_channels(), True, trace_buffer,
self.model_config, self.workdir, self.thread_num,
self.devices, self.mem_optim, self.ir_optim))
# When a process exits, it attempts to terminate
# all of its daemonic child processes.
t.daemon = True
......@@ -384,52 +594,109 @@ class Op(object):
def init_op(self):
pass
def _run_preprocess(self, parsed_data_dict, op_info_prefix):
def _run_preprocess(self, parsed_data_dict, op_info_prefix, logid_dict):
"""
Run preprocess stage
Args:
parsed_data_dict: data to be pre-processed
op_info_prefix: input op info
logid_dict: logid dict
Returns:
preped_data_dict: data preprocessed, to be processed
err_channeldata_dict: when exceptions occurred, putting errors in it.
skip_process_dict: skip process stage or not
"""
_LOGGER.debug("{} Running preprocess".format(op_info_prefix))
preped_data_dict = collections.OrderedDict()
err_channeldata_dict = collections.OrderedDict()
skip_process_dict = {}
for data_id, parsed_data in parsed_data_dict.items():
preped_data, error_channeldata = None, None
is_skip_process = False
prod_errcode, prod_errinfo = None, None
log_id = logid_dict.get(data_id)
try:
preped_data = self.preprocess(parsed_data)
preped_data, is_skip_process, prod_errcode, prod_errinfo = self.preprocess(
parsed_data, data_id, logid_dict.get(data_id))
# Set skip_process_dict
if is_skip_process is True:
skip_process_dict[data_id] = True
except TypeError as e:
# Error type in channeldata.datatype
error_info = "(logid={}) {} Failed to preprocess: {}".format(
data_id, op_info_prefix, e)
error_info = "(data_id={} log_id={}) {} Failed to preprocess: {}".format(
data_id, log_id, op_info_prefix, e)
_LOGGER.error(error_info, exc_info=True)
error_channeldata = ChannelData(
ecode=ChannelDataEcode.TYPE_ERROR.value,
error_code=ChannelDataErrcode.TYPE_ERROR.value,
error_info=error_info,
data_id=data_id)
data_id=data_id,
log_id=log_id)
except Exception as e:
error_info = "(logid={}) {} Failed to preprocess: {}".format(
data_id, op_info_prefix, e)
error_info = "(data_id={} log_id={}) {} Failed to preprocess: {}".format(
data_id, log_id, op_info_prefix, e)
_LOGGER.error(error_info, exc_info=True)
error_channeldata = ChannelData(
ecode=ChannelDataEcode.UNKNOW.value,
error_code=ChannelDataErrcode.UNKNOW.value,
error_info=error_info,
data_id=data_id)
data_id=data_id,
log_id=log_id)
if prod_errcode is not None:
# product errors occured
error_channeldata = ChannelData(
error_code=ChannelDataErrcode.PRODUCT_ERROR.value,
error_info="",
prod_error_code=prod_errcode,
prod_error_info=prod_errinfo,
data_id=data_id,
log_id=log_id)
if error_channeldata is not None:
err_channeldata_dict[data_id] = error_channeldata
else:
preped_data_dict[data_id] = preped_data
_LOGGER.debug("{} Succ preprocess".format(op_info_prefix))
return preped_data_dict, err_channeldata_dict
def _run_process(self, preped_data_dict, op_info_prefix):
return preped_data_dict, err_channeldata_dict, skip_process_dict
def _run_process(self, preped_data_dict, op_info_prefix, skip_process_dict,
logid_dict):
"""
Run process stage
Args:
preped_data_dict: feed the data to be predicted by the model.
op_info_prefix: prefix op info
skip_process_dict: skip process stage or not
logid_dict: logid dict
Returns:
midped_data_dict: data midprocessed, to be post-processed
err_channeldata_dict: when exceptions occurred, putting errors in it
"""
_LOGGER.debug("{} Running process".format(op_info_prefix))
midped_data_dict = collections.OrderedDict()
err_channeldata_dict = collections.OrderedDict()
if self.with_serving:
data_ids = preped_data_dict.keys()
### if (batch_num == 1 && skip == True) ,then skip the process stage.
is_skip_process = False
data_ids = preped_data_dict.keys()
if len(data_ids) == 1 and skip_process_dict.get(data_ids[0]) == True:
is_skip_process = True
_LOGGER.info("(data_id={} log_id={}) skip process stage".format(
data_ids[0], logid_dict.get(data_ids[0])))
if self.with_serving is True and is_skip_process is False:
# use typical_logid to mark batch data
typical_logid = data_ids[0]
if len(data_ids) != 1:
for data_id in data_ids:
_LOGGER.info(
"(logid={}) {} During access to PaddleServingService,"
"(data_id={} logid={}) {} During access to PaddleServingService,"
" we selected logid={} (from batch: {}) as a "
"representative for logging.".format(
data_id, op_info_prefix, typical_logid, data_ids))
data_id,
logid_dict.get(data_id), op_info_prefix,
typical_logid, data_ids))
# combine samples to batch
one_input = preped_data_dict[data_ids[0]]
......@@ -449,64 +716,70 @@ class Op(object):
input_offset.append(offset)
else:
_LOGGER.critical(
"{} Failed to process: expect input type is dict(sample"
" input) or list(batch input), but get {}".format(
op_info_prefix, type(one_input)))
"(data_id={} log_id={}){} Failed to process: expect input type is dict(sample"
" input) or list(batch input), but get {}".format(data_ids[
0], typical_logid, op_info_prefix, type(one_input)))
os._exit(-1)
midped_batch = None
ecode = ChannelDataEcode.OK.value
error_code = ChannelDataErrcode.OK.value
if self._timeout <= 0:
try:
midped_batch = self.process(feed_batch, typical_logid)
except Exception as e:
ecode = ChannelDataEcode.UNKNOW.value
error_info = "(logid={}) {} Failed to process(batch: {}): {}".format(
typical_logid, op_info_prefix, data_ids, e)
error_code = ChannelDataErrcode.UNKNOW.value
error_info = "(data_id={} log_id={}) {} Failed to process(batch: {}): {}".format(
data_ids[0], typical_logid, op_info_prefix, data_ids, e)
_LOGGER.error(error_info, exc_info=True)
else:
# retry N times configed in yaml files.
for i in range(self._retry):
try:
# time out for each process
midped_batch = func_timeout.func_timeout(
self._timeout,
self.process,
args=(feed_batch, typical_logid))
except func_timeout.FunctionTimedOut as e:
if i + 1 >= self._retry:
ecode = ChannelDataEcode.TIMEOUT.value
error_info = "(logid={}) {} Failed to process(batch: {}): " \
error_code = ChannelDataErrcode.TIMEOUT.value
error_info = "(log_id={}) {} Failed to process(batch: {}): " \
"exceeded retry count.".format(
typical_logid, op_info_prefix, data_ids)
_LOGGER.error(error_info)
else:
_LOGGER.warning(
"(logid={}) {} Failed to process(batch: {}): timeout,"
"(log_id={}) {} Failed to process(batch: {}): timeout,"
" and retrying({}/{})...".format(
typical_logid, op_info_prefix, data_ids, i +
1, self._retry))
except Exception as e:
ecode = ChannelDataEcode.UNKNOW.value
error_info = "(logid={}) {} Failed to process(batch: {}): {}".format(
error_code = ChannelDataErrcode.UNKNOW.value
error_info = "(log_id={}) {} Failed to process(batch: {}): {}".format(
typical_logid, op_info_prefix, data_ids, e)
_LOGGER.error(error_info, exc_info=True)
break
else:
break
if ecode != ChannelDataEcode.OK.value:
if error_code != ChannelDataErrcode.OK.value:
for data_id in data_ids:
err_channeldata_dict[data_id] = ChannelData(
ecode=ecode, error_info=error_info, data_id=data_id)
error_code=error_code,
error_info=error_info,
data_id=data_id,
log_id=logid_dict.get(data_id))
elif midped_batch is None:
# op client return None
error_info = "(logid={}) {} Failed to predict, please check if " \
error_info = "(log_id={}) {} Failed to predict, please check if " \
"PaddleServingService is working properly.".format(
typical_logid, op_info_prefix)
_LOGGER.error(error_info)
for data_id in data_ids:
err_channeldata_dict[data_id] = ChannelData(
ecode=ChannelDataEcode.CLIENT_ERROR.value,
error_code=ChannelDataErrcode.CLIENT_ERROR.value,
error_info=error_info,
data_id=data_id)
data_id=data_id,
log_id=logid_dict.get(data_id))
else:
# transform np format to dict format
var_names = midped_batch.keys()
......@@ -515,7 +788,7 @@ class Op(object):
for name in var_names:
lod_offset_name = "{}.lod".format(name)
if lod_offset_name in var_names:
_LOGGER.debug("(logid={}) {} {} is LodTensor".format(
_LOGGER.debug("(log_id={}) {} {} is LodTensor".format(
typical_logid, op_info_prefix, name))
lod_var_names.add(name)
lod_offset_names.add(lod_offset_name)
......@@ -551,38 +824,67 @@ class Op(object):
return midped_data_dict, err_channeldata_dict
def _run_postprocess(self, parsed_data_dict, midped_data_dict,
op_info_prefix):
op_info_prefix, logid_dict):
"""
Run postprocess stage.
Args:
parsed_data_dict: data returned in preprocess stage
midped_data_dict: data returned in process stage
op_info_prefix: prefix op info
logid_dict: logid dict
Returns:
postped_data_dict: data postprocessed
err_channeldata_dict: when exceptions occurred, putting errors in it
"""
_LOGGER.debug("{} Running postprocess".format(op_info_prefix))
postped_data_dict = collections.OrderedDict()
err_channeldata_dict = collections.OrderedDict()
for data_id, midped_data in midped_data_dict.items():
log_id = logid_dict.get(data_id)
postped_data, err_channeldata = None, None
prod_errcode, prod_errinfo = None, None
try:
postped_data = self.postprocess(parsed_data_dict[data_id],
midped_data)
postped_data, prod_errcode, prod_errinfo = self.postprocess(
parsed_data_dict[data_id], midped_data,
logid_dict.get(data_id))
except Exception as e:
error_info = "(logid={}) {} Failed to postprocess: {}".format(
data_id, op_info_prefix, e)
error_info = "(data_id={} log_id={}) {} Failed to postprocess: {}".format(
data_id, log_id, op_info_prefix, e)
_LOGGER.error(error_info, exc_info=True)
err_channeldata = ChannelData(
ecode=ChannelDataEcode.UNKNOW.value,
error_code=ChannelDataErrcode.UNKNOW.value,
error_info=error_info,
data_id=data_id)
data_id=data_id,
log_id=log_id)
if prod_errcode is not None:
# product errors occured
err_channeldata = ChannelData(
error_code=ChannelDataErrcode.PRODUCT_ERROR.value,
error_info="",
prod_error_code=prod_errcode,
prod_error_info=prod_errinfo,
data_id=data_id,
log_id=log_id)
if err_channeldata is not None:
err_channeldata_dict[data_id] = err_channeldata
continue
else:
if not isinstance(postped_data, dict):
error_info = "(logid={}) {} Failed to postprocess: " \
error_info = "(log_id={} log_id={}) {} Failed to postprocess: " \
"output of postprocess funticon must be " \
"dict type, but get {}".format(
data_id, op_info_prefix,
data_id, log_id, op_info_prefix,
type(postped_data))
_LOGGER.error(error_info)
err_channeldata = ChannelData(
ecode=ChannelDataEcode.UNKNOW.value,
error_code=ChannelDataErrcode.UNKNOW.value,
error_info=error_info,
data_id=data_id)
data_id=data_id,
log_id=log_id)
err_channeldata_dict[data_id] = err_channeldata
continue
......@@ -592,18 +894,36 @@ class Op(object):
output_data = ChannelData(
ChannelDataType.CHANNEL_NPDATA.value,
npdata=postped_data,
data_id=data_id)
data_id=data_id,
log_id=log_id)
else:
output_data = ChannelData(
ChannelDataType.DICT.value,
dictdata=postped_data,
data_id=data_id)
data_id=data_id,
log_id=log_id)
postped_data_dict[data_id] = output_data
_LOGGER.debug("{} Succ postprocess".format(op_info_prefix))
return postped_data_dict, err_channeldata_dict
def _auto_batching_generator(self, input_channel, op_name, batch_size,
timeout, op_info_prefix):
"""
Merge batch_size requests for one prediction.Taking one piece of data
from the input channel each time until equals batch_size, or the waiting
time exceeds auto_batching_timeout.
Args:
input_channel: the input channel of Op
op_name: op name
batch_size: batch size, Less than worker_num
timeout: batch timeout, seconds, If timeout is None, and the quantity
taken from the front is less than batch_size, blocking occured.
op_info_prefix: op link info.
Returns:
None
"""
while True:
batch = []
while len(batch) == 0:
......@@ -624,6 +944,9 @@ class Op(object):
else:
channeldata_dict = input_channel.front(op_name)
batch.append(channeldata_dict)
_LOGGER.debug(
"_auto_batching_generator get {} channeldata from op:{} into batch, batch_size:{}".
format(idx, op_name, batch_size))
except ChannelTimeoutError:
_LOGGER.debug("{} Failed to generate batch: "
"timeout".format(op_info_prefix))
......@@ -633,38 +956,91 @@ class Op(object):
yield batch
def _parse_channeldata_batch(self, batch, output_channels):
"""
Parse channeldatas batch
Args:
batch: auto-batching batch datas
output_channels: output channels
Returns:
parsed_data_dict: parsed from channeldata in batch
need_profile_dict: need profile dict in batch
profile_dict: profile info dict in batch
logid_dict: trace each request in batch
"""
parsed_data_dict = collections.OrderedDict()
need_profile_dict = {}
profile_dict = {}
logid_dict = {}
for channeldata_dict in batch:
(data_id, error_channeldata, parsed_data,
client_need_profile, profile_set) = \
client_need_profile, profile_set, log_id) = \
self._parse_channeldata(channeldata_dict)
if error_channeldata is None:
parsed_data_dict[data_id] = parsed_data
need_profile_dict[data_id] = client_need_profile
profile_dict[data_id] = profile_set
logid_dict[data_id] = log_id
else:
# error data in predecessor Op
# (error_channeldata with profile info)
self._push_to_output_channels(error_channeldata,
output_channels)
return parsed_data_dict, need_profile_dict, profile_dict
def _run(self, concurrency_idx, input_channel, output_channels, client_type,
is_thread_op, trace_buffer):
return parsed_data_dict, need_profile_dict, profile_dict, logid_dict
def _run(self, concurrency_idx, input_channel, output_channels,
is_thread_op, trace_buffer, model_config, workdir, thread_num,
devices, mem_optim, ir_optim):
"""
_run() is the entry function of OP process / thread model.When client
type is local_predictor in process mode, the CUDA environment needs to
be initialized by LocalServiceHandler[child process], otherwise, Cuda
error(3), initialization error is occured. Preprocess, process and
postprocess are executed in the main loop. The preprocess and postprocess
function is usually rewrited by users. Trace data is recorded by trace_que.
Args:
concurrency_idx: thread/process index
input_channel: input channel, take the data to be processed
output_channels: output channel, store processed data
is_thread_op: False, It's process op; True, It's thread op
trace_buffer: store trace infomations
model_config: model config path
workdir: work directory
thread_num: number of threads, concurrent quantity
devices: gpu id list[gpu], "" default[cpu]
mem_optim: use memory/graphics memory optimization, True default.
ir_optim: use calculation chart optimization, False default.
Returns:
None
"""
op_info_prefix = "[{}|{}]".format(self.name, concurrency_idx)
tid = threading.current_thread().ident
# init op
# init ops
profiler = None
try:
profiler = self._initialize(is_thread_op, client_type,
concurrency_idx)
if is_thread_op == False and self.client_type == "local_predictor":
self.service_handler = local_service_handler.LocalServiceHandler(
model_config=model_config,
client_type="local_predictor",
workdir=workdir,
thread_num=thread_num,
devices=devices,
mem_optim=mem_optim,
ir_optim=ir_optim)
_LOGGER.info("Init cuda env in process {}".format(
concurrency_idx))
self.local_predictor = self.service_handler.get_client()
# check all ops initialized successfully.
profiler = self._initialize(is_thread_op, concurrency_idx)
except Exception as e:
_LOGGER.critical(
"{} Failed to init op: {}".format(op_info_prefix, e),
"{} failed to init op: {}".format(op_info_prefix, e),
exc_info=True)
os._exit(-1)
_LOGGER.info("{} Succ init".format(op_info_prefix))
......@@ -691,7 +1067,7 @@ class Op(object):
# parse channeldata batch
try:
parsed_data_dict, need_profile_dict, profile_dict \
parsed_data_dict, need_profile_dict, profile_dict, logid_dict\
= self._parse_channeldata_batch(
channeldata_dict_batch, output_channels)
except ChannelStopError:
......@@ -704,11 +1080,12 @@ class Op(object):
# preprecess
start = profiler.record("prep#{}_0".format(op_info_prefix))
preped_data_dict, err_channeldata_dict \
= self._run_preprocess(parsed_data_dict, op_info_prefix)
preped_data_dict, err_channeldata_dict, skip_process_dict \
= self._run_preprocess(parsed_data_dict, op_info_prefix, logid_dict)
end = profiler.record("prep#{}_1".format(op_info_prefix))
prep_time = end - start
try:
# put error requests into output channel, skip process and postprocess stage
for data_id, err_channeldata in err_channeldata_dict.items():
self._push_to_output_channels(
data=err_channeldata,
......@@ -725,7 +1102,7 @@ class Op(object):
# process
start = profiler.record("midp#{}_0".format(op_info_prefix))
midped_data_dict, err_channeldata_dict \
= self._run_process(preped_data_dict, op_info_prefix)
= self._run_process(preped_data_dict, op_info_prefix, skip_process_dict, logid_dict)
end = profiler.record("midp#{}_1".format(op_info_prefix))
midp_time = end - start
try:
......@@ -745,8 +1122,7 @@ class Op(object):
# postprocess
start = profiler.record("postp#{}_0".format(op_info_prefix))
postped_data_dict, err_channeldata_dict \
= self._run_postprocess(
parsed_data_dict, midped_data_dict, op_info_prefix)
= self._run_postprocess(parsed_data_dict, midped_data_dict, op_info_prefix, logid_dict)
end = profiler.record("postp#{}_1".format(op_info_prefix))
postp_time = end - start
try:
......@@ -801,16 +1177,28 @@ class Op(object):
except Queue.Full:
break
def _initialize(self, is_thread_op, client_type, concurrency_idx):
def _initialize(self, is_thread_op, concurrency_idx):
"""
Initialize one OP object in the target function of a thread or porcess.
Initialize the client object with _client_config and _server_endpoints.
Create a TimeProfiler per thread or process for recording profiler info.
Args:
is_thread_op: True, one op runs in one thread; False, one op runs
in one process.
concurrency_idx: process id, Thread mode does not use this param.
Returns:
TimeProfiler
"""
if is_thread_op:
with self._for_init_op_lock:
if not self._succ_init_op:
# for the threaded version of Op, each thread cannot get its concurrency_idx
self.concurrency_idx = None
# init client
self.client = self.init_client(
client_type, self._client_config,
self._server_endpoints, self._fetch_names)
self.client = self.init_client(self._client_config,
self._server_endpoints)
# user defined
self.init_op()
self._succ_init_op = True
......@@ -818,9 +1206,8 @@ class Op(object):
else:
self.concurrency_idx = concurrency_idx
# init client
self.client = self.init_client(client_type, self._client_config,
self._server_endpoints,
self._fetch_names)
self.client = self.init_client(self._client_config,
self._server_endpoints)
# user defined
self.init_op()
......@@ -843,9 +1230,17 @@ class Op(object):
class RequestOp(Op):
""" RequestOp do not run preprocess, process, postprocess. """
"""
RequestOp is a special Op, for unpacking one request package. If the
request needs one special unpackaging method, you need to inherit class
RequestOp and rewrite function unpack_request_package.Notice!!! Class
RequestOp does not run preprocess, process, postprocess.
"""
def __init__(self):
"""
Initialize the RequestOp
"""
# PipelineService.name = "@DAGExecutor"
super(RequestOp, self).__init__(name="@DAGExecutor", input_ops=[])
# init op
......@@ -856,7 +1251,25 @@ class RequestOp(Op):
os._exit(-1)
def unpack_request_package(self, request):
dictdata = {}
"""
Unpack request package by gateway.proto
Args:
request: HTTP body, JSON format
Returns:
dict_data: json fields in HTTP body
log_id: log_id
prod_errcode: None or ProductErrCode.SUCC.value default, otherwise,
product errores occured.It is handled in the same way
as exception.
prod_errinfo: "" default
"""
dict_data = {}
log_id = None
if request is None:
_LOGGER.critical("request is None")
raise ValueError("request is None")
for idx, key in enumerate(request.key):
data = request.value[idx]
try:
......@@ -865,14 +1278,27 @@ class RequestOp(Op):
data = evaled_data
except Exception as e:
pass
dictdata[key] = data
return dictdata
dict_data[key] = data
log_id = request.logid
_LOGGER.info("RequestOp unpack one request. log_id:{}, clientip:{} \
name:{}, method:{}".format(log_id, request.clientip, request.name,
request.method))
return dict_data, log_id, None, ""
class ResponseOp(Op):
""" ResponseOp do not run preprocess, process, postprocess. """
"""
ResponseOp is a special Op, for packing one response package. If the channeldata
needs a special packaging method, you need to inherit class ReponseOp and rewrite
pack_response_package function. Notice!!! Class ResponseOp does not run preprocess,
process, postprocess.
"""
def __init__(self, input_ops):
"""
Initialize the ResponseOp
"""
super(ResponseOp, self).__init__(
name="@DAGExecutor", input_ops=input_ops)
# init op
......@@ -884,9 +1310,21 @@ class ResponseOp(Op):
os._exit(-1)
def pack_response_package(self, channeldata):
"""
Getting channeldata from the last channel, packting the response
package serialized by protobuf.
Args:
channeldata: Type ChannelData
Returns:
resp: pipeline_service_pb2.Response()
"""
resp = pipeline_service_pb2.Response()
resp.ecode = channeldata.ecode
if resp.ecode == ChannelDataEcode.OK.value:
error_code = channeldata.error_code
error_info = ""
if error_code == ChannelDataErrcode.OK.value:
# Framework level errors
if channeldata.datatype == ChannelDataType.CHANNEL_NPDATA.value:
feed = channeldata.parse()
# ndarray to string:
......@@ -899,30 +1337,57 @@ class ResponseOp(Op):
feed = channeldata.parse()
for name, var in feed.items():
if not isinstance(var, str):
resp.ecode = ChannelDataEcode.TYPE_ERROR.value
resp.error_info = self._log(
error_code = ChannelDataErrcode.TYPE_ERROR.value
error_info = self._log(
"fetch var type must be str({}).".format(
type(var)))
_LOGGER.error("(logid={}) Failed to pack RPC "
"response package: {}".format(
channeldata.id, resp.error_info))
channeldata.id, resp.err_msg))
break
resp.value.append(var)
resp.key.append(name)
else:
resp.ecode = ChannelDataEcode.TYPE_ERROR.value
resp.error_info = self._log(
"error type({}) in datatype.".format(channeldata.datatype))
error_code = ChannelDataErrcode.TYPE_ERROR.value
error_info = self._log("error type({}) in datatype.".format(
channeldata.datatype))
_LOGGER.error("(logid={}) Failed to pack RPC response"
" package: {}".format(channeldata.id,
resp.error_info))
" package: {}".format(channeldata.id, error_info))
else:
resp.error_info = channeldata.error_info
# Product level errors
error_info = channeldata.error_info
if error_code == ChannelDataErrcode.PRODUCT_ERROR.value:
#rewrite error_code when product errors occured
error_code = channeldata.prod_error_code
error_info = channeldata.prod_error_info
# pack results
if error_code is None:
error_code = 0
resp.err_no = error_code
resp.err_msg = error_info
return resp
class VirtualOp(Op):
''' For connecting two channels. '''
"""
To connect 2 ops across levels in dag view, we create virtual ops
between non-virtual ops, and transfer data only. For examples,
the pred ops of F are D & E.In the process of building DAG, we will
create channels layer by layer according to dag views.Op F is not
in the next layer view of [B, E], so we will create a virtual OP
'V1' whose pred OP is E. And so on, we create two virtual op 'V2'
and 'V3', Finally, we find the non-virtual op F. we create 4 channels
among E, V1, V2, V3 and F, the producer of V1, V2, V3 and F is E.
DAG: [A -> B -> C -> D -> F]
\-> E ----------/
DAG view: [[A], [B, E], [C], [D], [F]]
BUILD DAG: [A -> B -> C -> D -> E -> F]
\-> E -> V1-> V2-> V3/
"""
def __init__(self, name, concurrency=1):
super(VirtualOp, self).__init__(
......@@ -930,9 +1395,27 @@ class VirtualOp(Op):
self._virtual_pred_ops = []
def add_virtual_pred_op(self, op):
"""
Add the front op of current vritual op.
Args:
op: one op object, may be a virtual op or not.
Returns:
None
"""
self._virtual_pred_ops.append(op)
def _actual_pred_op_names(self, op):
"""
Recursively find the front op which is a non-virtual op.
Args:
op: one op object
Returns:
names: the name of non-virtual pred ops.
"""
# can use disjoint-set, but it's not necessary
if not isinstance(op, VirtualOp):
return [op.name]
......@@ -942,6 +1425,15 @@ class VirtualOp(Op):
return names
def add_output_channel(self, channel):
"""
Adding the output channel of non-virtual pred ops.
Args:
channel: one channel.
Returns:
None.
"""
if not isinstance(channel, (ThreadChannel, ProcessChannel)):
_LOGGER.critical(
self._log("Failed to add output_channel: output_channel"
......@@ -955,6 +1447,20 @@ class VirtualOp(Op):
def _run(self, concurrency_idx, input_channel, output_channels, client_type,
is_thread_op):
"""
The target function _run() only transfers data between OPs in one thread
or process.
Args:
concurrency_idx: process id, not avaliable in thread mode.
input_channel: input channel
output_channels: output channels
client_type: no use
is_thread_op: True, thread mode; False, process mode
Returns:
None
"""
op_info_prefix = "[{}|{}]".format(self.name, concurrency_idx)
log = get_log_func(op_info_prefix)
tid = threading.current_thread().ident
......
......@@ -18,14 +18,20 @@ import numpy as np
from numpy import *
import logging
import functools
from .channel import ChannelDataEcode
import json
import socket
from .channel import ChannelDataErrcode
from .proto import pipeline_service_pb2
from .proto import pipeline_service_pb2_grpc
import six
_LOGGER = logging.getLogger(__name__)
class PipelineClient(object):
"""
PipelineClient provides the basic capabilities of the pipeline SDK
"""
def __init__(self):
self._channel = None
self._profile_key = "pipeline.profile"
......@@ -42,6 +48,26 @@ class PipelineClient(object):
def _pack_request_package(self, feed_dict, profile):
req = pipeline_service_pb2.Request()
logid = feed_dict.get("logid")
if logid is None:
req.logid = 0
else:
if six.PY2:
req.logid = long(logid)
elif six.PY3:
req.logid = int(log_id)
feed_dict.pop("logid")
clientip = feed_dict.get("clientip")
if clientip is None:
hostname = socket.gethostname()
ip = socket.gethostbyname(hostname)
req.clientip = ip
else:
req.clientip = clientip
feed_dict.pop("clientip")
np.set_printoptions(threshold=sys.maxsize)
for key, value in feed_dict.items():
req.key.append(key)
......@@ -60,29 +86,7 @@ class PipelineClient(object):
return req
def _unpack_response_package(self, resp, fetch):
if resp.ecode != 0:
return {
"ecode": resp.ecode,
"ecode_desc": ChannelDataEcode(resp.ecode),
"error_info": resp.error_info,
}
fetch_map = {"ecode": resp.ecode}
for idx, key in enumerate(resp.key):
if key == self._profile_key:
if resp.value[idx] != "":
sys.stderr.write(resp.value[idx])
continue
if fetch is not None and key not in fetch:
continue
data = resp.value[idx]
try:
evaled_data = eval(data)
if isinstance(evaled_data, np.ndarray):
data = evaled_data
except Exception as e:
pass
fetch_map[key] = data
return fetch_map
return resp
def predict(self, feed_dict, fetch=None, asyn=False, profile=False):
if not isinstance(feed_dict, dict):
......
......@@ -32,6 +32,10 @@ _LOGGER = logging.getLogger(__name__)
class PipelineServicer(pipeline_service_pb2_grpc.PipelineServiceServicer):
"""
Pipeline Servicer entrance.
"""
def __init__(self, name, response_op, dag_conf, worker_idx=-1):
super(PipelineServicer, self).__init__()
self._name = name
......@@ -42,10 +46,16 @@ class PipelineServicer(pipeline_service_pb2_grpc.PipelineServiceServicer):
_LOGGER.info("[PipelineServicer] succ init")
def inference(self, request, context):
_LOGGER.info("(log_id={}) inference request name:{} self.name:{}".
format(request.logid, request.name, self._name))
if request.name != "" and request.name != self._name:
_LOGGER.error("(log_id={}) name dismatch error. request.name:{},"
"server.name={}".format(request.logid, request.name,
self._name))
resp = pipeline_service_pb2.Response()
resp.ecode = channel.ChannelDataEcode.NO_SERVICE.value
resp.error_info = "Failed to inference: Service name error."
resp.err_no = channel.ChannelDataErrcode.NO_SERVICE.value
resp.err_msg = "Failed to inference: Service name error."
resp.result = ""
return resp
resp = self._dag_executor.call(request)
return resp
......@@ -53,7 +63,9 @@ class PipelineServicer(pipeline_service_pb2_grpc.PipelineServiceServicer):
@contextlib.contextmanager
def _reserve_port(port):
"""Find and reserve a port for all subprocesses to use."""
"""
Find and reserve a port for all subprocesses to use.
"""
sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT, 1)
if sock.getsockopt(socket.SOL_SOCKET, socket.SO_REUSEPORT) == 0:
......@@ -66,6 +78,10 @@ def _reserve_port(port):
class PipelineServer(object):
"""
Pipeline Server : grpc gateway + grpc server.
"""
def __init__(self, name=None):
self._name = name # for grpc-gateway path
self._rpc_port = None
......@@ -74,6 +90,16 @@ class PipelineServer(object):
self._proxy_server = None
def _grpc_gateway(self, grpc_port, http_port):
"""
Running a gateway server, linking libproxy_server.so
Args:
grpc_port: GRPC port
http_port: HTTP port
Returns:
None
"""
import os
from ctypes import cdll
from . import gateway
......@@ -83,6 +109,17 @@ class PipelineServer(object):
proxy_server.run_proxy_server(grpc_port, http_port)
def _run_grpc_gateway(self, grpc_port, http_port):
"""
Starting the GRPC gateway in a new process. Exposing one
available HTTP port outside, and reflecting the data to RPC port.
Args:
grpc_port: GRPC port
http_port: HTTP port
Returns:
None
"""
if http_port <= 0:
_LOGGER.info("Ignore grpc_gateway configuration.")
return
......@@ -99,6 +136,15 @@ class PipelineServer(object):
self._proxy_server.start()
def set_response_op(self, response_op):
"""
Set the response OP.
Args:
response_op: ResponseOp or its subclass object
Returns:
None
"""
if not isinstance(response_op, operator.ResponseOp):
raise Exception("Failed to set response_op: response_op "
"must be ResponseOp type.")
......@@ -109,6 +155,17 @@ class PipelineServer(object):
self._used_op, _ = dag.DAG.get_use_ops(self._response_op)
def prepare_server(self, yml_file=None, yml_dict=None):
"""
Reading configures from the yml file(config.yaml), and launching
local services.
Args:
yml_file: Reading configures from yaml files
yml_dict: Reading configures from yaml dict.
Returns:
None
"""
conf = ServerYamlConfChecker.load_server_yaml_conf(
yml_file=yml_file, yml_dict=yml_dict)
......@@ -158,6 +215,15 @@ class PipelineServer(object):
self._start_local_rpc_service()
def _init_ops(self, op_conf):
"""
Initializing all OPs from dicetory.
Args:
op_conf: the op configures in yaml dict.
Returns:
None.
"""
default_conf = {
"concurrency": 1,
"timeout": -1,
......@@ -187,12 +253,22 @@ class PipelineServer(object):
op.launch_local_rpc_service()
def run_server(self):
"""
If _build_dag_each_worker is True, Starting _worker_num processes and
running one GRPC server in each process. Otherwise, Staring one GRPC
server.
Args:
None
Returns:
None
"""
if self._build_dag_each_worker:
with _reserve_port(self._rpc_port) as port:
bind_address = 'localhost:{}'.format(port)
workers = []
for i in range(self._worker_num):
show_info = (i == 0)
worker = multiprocessing.Process(
target=self._run_server_func,
args=(bind_address, self._response_op, self._conf, i))
......@@ -220,6 +296,15 @@ class PipelineServer(object):
server.wait_for_termination()
def _run_server_func(self, bind_address, response_op, dag_conf, worker_idx):
"""
Running one GRPC server with PipelineServicer.
Args:
bind_address: binding IP/Port
response_op: ResponseOp or its subclass object
dag_conf: DAG config
worker_idx: Process index.
"""
options = [('grpc.so_reuseport', 1),
('grpc.max_send_message_length', 256 * 1024 * 1024),
('grpc.max_send_message_length', 256 * 1024 * 1024)]
......@@ -235,6 +320,10 @@ class PipelineServer(object):
class ServerYamlConfChecker(object):
"""
Checking validities of server yaml files.
"""
def __init__(self):
pass
......
......@@ -19,13 +19,16 @@ message Request {
repeated string key = 1;
repeated string value = 2;
optional string name = 3;
optional string method = 4;
optional int64 logid = 5;
optional string clientip = 6;
};
message Response {
repeated string key = 1;
repeated string value = 2;
required int32 ecode = 3;
optional string error_info = 4;
optional int32 err_no = 1;
optional string err_msg = 2;
repeated string key = 3;
repeated string value = 4;
};
service PipelineService {
......
......@@ -32,8 +32,8 @@ if '${PACK}' == 'ON':
REQUIRED_PACKAGES = [
'six >= 1.10.0', 'sentencepiece', 'opencv-python<=4.2.0.32', 'pillow',
'shapely<=1.6.1', 'pyclipper'
'six >= 1.10.0', 'sentencepiece<=0.1.92', 'opencv-python<=4.2.0.32', 'pillow',
'pyclipper'
]
packages=['paddle_serving_app',
......
......@@ -28,19 +28,11 @@ import util
py_version = sys.version_info
def copy_lib():
if py_version[0] == 2:
lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
elif py_version[1] == 5:
lib_list = ['libpython3.5m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
elif py_version[1] == 6:
lib_list = ['libpython3.6m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
elif py_version[1] == 7:
lib_list = ['libpython3.7m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
os.popen('mkdir -p paddle_serving_client/lib')
lib_list = ['${OPENSSL_CRYPTO_LIBRARY}', '${OPENSSL_SSL_LIBRARY}',
'${PYTHON_LIBRARY}']
for lib in lib_list:
r = os.popen('which {}'.format(lib))
text = r.read()
os.popen('cp {} ./paddle_serving_client/lib'.format(text.strip()))
os.popen('cp {} ./paddle_serving_client/lib'.format(lib))
max_version, mid_version, min_version = util.python_version()
......@@ -55,9 +47,6 @@ REQUIRED_PACKAGES = [
'grpcio-tools >= 1.28.1'
]
if not util.find_package("paddlepaddle") and not util.find_package("paddlepaddle-gpu"):
REQUIRED_PACKAGES.append("paddlepaddle")
packages=['paddle_serving_client',
'paddle_serving_client.proto',
......
......@@ -29,7 +29,7 @@ util.gen_pipeline_code("paddle_serving_server")
REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app'
'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app', 'func_timeout', 'pyyaml'
]
packages=['paddle_serving_server',
......
......@@ -19,17 +19,19 @@ from __future__ import print_function
from setuptools import setup, Distribution, Extension
from setuptools import find_packages
from setuptools import setup
from paddle_serving_server_gpu.version import serving_server_version
from paddle_serving_server_gpu.version import serving_server_version, cuda_version
import util
max_version, mid_version, min_version = util.python_version()
if cuda_version != "trt":
cuda_version = "post" + cuda_version
max_version, mid_version, min_version = util.python_version()
# gen pipeline proto code
util.gen_pipeline_code("paddle_serving_server_gpu")
REQUIRED_PACKAGES = [
'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app'
'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app', 'func_timeout', 'pyyaml'
]
packages=['paddle_serving_server_gpu',
......@@ -56,7 +58,7 @@ package_data={'paddle_serving_server_gpu': ['pipeline/gateway/libproxy_server.so
setup(
name='paddle-serving-server-gpu',
version=serving_server_version.replace('-', '') + '.post@CUDA_VERSION_MAJOR@',
version=serving_server_version.replace('-', '') + "." + cuda_version,
description=
('Paddle Serving Package for saved model with PaddlePaddle'),
url='https://github.com/PaddlePaddle/Serving',
......
......@@ -44,8 +44,8 @@ def gen_pipeline_code(package_name):
ret = os.system(
"cd {}/pipeline/gateway/proto/ && "
"../../../../../third_party/install/protobuf/bin/protoc -I. "
"-I$GOPATH/src "
"-I$GOPATH/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis "
"-I$GOPATH/pkg/mod "
"-I$GOPATH/pkg/mod/github.com/grpc-ecosystem/grpc-gateway\@v1.15.2/third_party/googleapis "
"--go_out=plugins=grpc:. "
"gateway.proto".format(package_name))
if ret != 0:
......@@ -54,14 +54,18 @@ def gen_pipeline_code(package_name):
ret = os.system(
"cd {}/pipeline/gateway/proto/ && "
"../../../../../third_party/install/protobuf/bin/protoc -I. "
"-I$GOPATH/src "
"-I$GOPATH/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis "
"-I$GOPATH/pkg/mod "
"-I$GOPATH/pkg/mod/github.com/grpc-ecosystem/grpc-gateway\@v1.15.2/third_party/googleapis "
"--grpc-gateway_out=logtostderr=true:. "
"gateway.proto".format(package_name))
if ret != 0:
exit(1)
# pipeline grpc-gateway shared-lib
ret = os.system("cd {}/pipeline/gateway/ && go mod init serving-gateway".
format(package_name))
ret = os.system("cd {}/pipeline/gateway/ && go mod vendor && go mod tidy".
format(package_name))
ret = os.system(
"cd {}/pipeline/gateway && "
"go build -buildmode=c-shared -o libproxy_server.so proxy_server.go".
......
sphinx==2.1.0
mistune
sphinx_rtd_theme
paddlepaddle>=1.6
paddlepaddle>=1.8.4
shapely
......@@ -41,6 +41,12 @@ RUN yum -y install wget && \
echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
source /root/.bashrc && \
cd .. && rm -rf Python-3.6.8* && \
wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-* &&\
yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
yum clean all && \
echo "export LANG=en_US.utf8" >> /root/.bashrc && \
......
......@@ -41,6 +41,12 @@ RUN yum -y install wget && \
echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
source /root/.bashrc && \
cd .. && rm -rf Python-3.6.8* && \
wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-* && \
yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
yum clean all && \
localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \
......
......@@ -34,6 +34,13 @@ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2
&& cd .. \
&& rm -rf patchelf-0.10*
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-*
RUN yum install -y python3 python3-devel
RUN yum -y update >/dev/null \
......
......@@ -5,7 +5,14 @@ RUN yum -y install wget >/dev/null \
&& yum -y install git openssl-devel curl-devel bzip2-devel python-devel \
&& yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false \
&& yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
&& yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
&& yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-*
RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
&& tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
......@@ -32,3 +39,5 @@ RUN yum install -y python3 python3-devel \
RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
&& echo "export LANG=en_US.utf8" >> /root/.bashrc \
&& echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
FROM nvidia/cuda:10.1-cudnn7-devel-centos7
RUN export http_proxy="http://172.19.56.199:3128" \
&& export https_proxy="http://172.19.56.199:3128" \
&& yum -y install wget >/dev/null \
&& yum -y install gcc gcc-c++ make glibc-static which \
&& yum -y install git openssl-devel curl-devel bzip2-devel python-devel \
&& yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false \
&& yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
&& yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
RUN export http_proxy="http://172.19.56.199:3128" \
&& export https_proxy="http://172.19.56.199:3128" && \
wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-*
RUN export http_proxy="http://172.19.56.199:3128" \
&& export https_proxy="http://172.19.56.199:3128" && \
wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
&& tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
&& mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \
&& echo 'export PATH=/usr/local/cmake3.2.0/bin:$PATH' >> /root/.bashrc \
&& rm cmake-3.2.0-Linux-x86_64.tar.gz
RUN export http_proxy="http://172.19.56.199:3128" \
&& export https_proxy="http://172.19.56.199:3128" && \
wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
&& tar xzf go1.14.linux-amd64.tar.gz \
&& mv go /usr/local/go \
&& echo 'export GOROOT=/usr/local/go' >> /root/.bashrc \
&& echo 'export PATH=/usr/local/go/bin:$PATH' >> /root/.bashrc \
&& rm go1.14.linux-amd64.tar.gz
RUN export http_proxy="http://172.19.56.199:3128" \
&& export https_proxy="http://172.19.56.199:3128" && \
yum -y install python-devel sqlite-devel \
&& curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
&& python get-pip.py >/dev/null \
&& rm get-pip.py
RUN export http_proxy="http://172.19.56.199:3128" \
&& export https_proxy="http://172.19.56.199:3128" && \
yum install -y python3 python3-devel \
&& yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
&& yum clean all
RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
&& echo "export LANG=en_US.utf8" >> /root/.bashrc \
&& echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
RUN wget https://paddle-serving.bj.bcebos.com/tools/TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \
&& tar -xzf TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \
&& mv TensorRT-6.0.1.5 /usr/local/ \
&& rm TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \
&& echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/TensorRT-6.0.1.5/lib/' >> /root/.bashrc
......@@ -6,6 +6,13 @@ RUN yum -y install wget >/dev/null \
&& yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
&& yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
tar zxf protobuf-all-3.11.2.tar.gz && \
cd protobuf-3.11.2 && \
./configure && make -j4 && make install && \
make clean && \
cd .. && rm -rf protobuf-*
RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
&& tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
&& mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \
......
......@@ -18,14 +18,20 @@ function init() {
export PYTHONROOT=/usr
cd Serving
export SERVING_WORKDIR=$PWD
$PYTHONROOT/bin/python -m pip install -r python/requirements.txt
$PYTHONROOT/bin/python -m pip install paddlepaddle
export GOPATH=$HOME/go
export PATH=$PATH:$GOPATH/bin
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
go get -u github.com/golang/protobuf/protoc-gen-go
go get -u google.golang.org/grpc
go env -w GO111MODULE=on
go env -w GOPROXY=https://goproxy.cn,direct
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
go get -u google.golang.org/grpc@v1.33.0
}
function check_cmd() {
......@@ -605,7 +611,7 @@ function python_test_grpc_impl() {
# test load server config and client config in Server side
cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube
<<COMMENT #comment for compile bug, todo fix conflict between grpc-gateway and cube-agent
check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz > /dev/null"
check_cmd "tar xf ctr_cube_unittest.tar.gz"
check_cmd "mv models/ctr_client_conf ./"
......@@ -626,9 +632,11 @@ function python_test_grpc_impl() {
echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67"
exit 1
fi
COMMENT
echo "grpc impl test success"
kill_server_process
ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
#ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
cd .. # pwd: /Serving/python/examples/grpc_impl_example
;;
......@@ -665,6 +673,7 @@ function python_test_grpc_impl() {
cd .. # pwd: /Serving/python/examples/grpc_impl_example
# test load server config and client config in Server side
<<COMMENT #comment for compile bug, todo fix conflict between grpc-gateway and cube-agent
cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube
check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz"
......@@ -689,10 +698,11 @@ function python_test_grpc_impl() {
echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67"
exit 1
fi
COMMENT
echo "grpc impl test success"
kill_server_process
ps -ef | grep "test_server_gpu" | grep -v serving_build | grep -v grep | awk '{print $2}' | xargs kill
ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
#ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
cd .. # pwd: /Serving/python/examples/grpc_impl_example
;;
*)
......@@ -829,8 +839,8 @@ EOF
kill_process_by_port 18080
# test: process servicer & thread op
pip uninstall grpcio -y
pip install grpcio --no-binary=grpcio
#pip uninstall grpcio -y
#pip install grpcio --no-binary=grpcio
cat << EOF > config.yml
rpc_port: 18080
worker_num: 4
......@@ -944,7 +954,7 @@ function python_run_test() {
local TYPE=$1 # pwd: /Serving
cd python/examples # pwd: /Serving/python/examples
python_test_fit_a_line $TYPE # pwd: /Serving/python/examples
python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
#python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
python_test_bert $TYPE # pwd: /Serving/python/examples
python_test_imdb $TYPE # pwd: /Serving/python/examples
python_test_lac $TYPE # pwd: /Serving/python/examples
......@@ -953,7 +963,7 @@ function python_run_test() {
python_test_yolov4 $TYPE # pwd: /Serving/python/examples
python_test_grpc_impl $TYPE # pwd: /Serving/python/examples
python_test_resnet50 $TYPE # pwd: /Serving/python/examples
python_test_pipeline $TYPE # pwd: /Serving/python/examples
#python_test_pipeline $TYPE # pwd: /Serving/python/examples
echo "test python $TYPE part finished as expected."
cd ../.. # pwd: /Serving
}
......@@ -1098,7 +1108,7 @@ function main() {
build_app $TYPE # pwd: /Serving
java_run_test $TYPE # pwd: /Serving
python_run_test $TYPE # pwd: /Serving
monitor_test $TYPE # pwd: /Serving
#monitor_test $TYPE # pwd: /Serving
echo "serving $TYPE part finished as expected."
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册