diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index a19400bfda735e4205551c2caaba0e78fafc6ff1..c72a5cac52ccf1c03a0c132083e3ac43c83fb868 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -143,7 +143,6 @@ function(grpc_protobuf_generate_python SRCS) set(${SRCS} ${${SRCS}} PARENT_SCOPE) endfunction() - # Print and set the protobuf library information, # finish this cmake process and exit from this file. macro(PROMPT_PROTOBUF_LIB) diff --git a/core/configure/CMakeLists.txt b/core/configure/CMakeLists.txt index c3b0be5142896f87868cdd7c13686b87f03c573a..9d9487dc9e2513388b70d03e5ac1d875079d95f4 100644 --- a/core/configure/CMakeLists.txt +++ b/core/configure/CMakeLists.txt @@ -86,6 +86,7 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD COMMAND cp *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto." WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) + add_custom_command(TARGET multi_lang_general_model_service_py_proto POST_BUILD COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto COMMAND cp *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto diff --git a/core/configure/proto/multi_lang_general_model_service.proto b/core/configure/proto/multi_lang_general_model_service.proto index 6e1764b23b3e6f7d9eb9a33925bcd83cfb1810bb..2a8a8bc1532c19aa02a1998aa751aa7ba9d41570 100644 --- a/core/configure/proto/multi_lang_general_model_service.proto +++ b/core/configure/proto/multi_lang_general_model_service.proto @@ -28,16 +28,17 @@ message FeedInst { repeated Tensor tensor_array = 1; }; message FetchInst { repeated Tensor tensor_array = 1; }; -message Request { +message InferenceRequest { repeated FeedInst insts = 1; repeated string feed_var_names = 2; repeated string fetch_var_names = 3; required bool is_python = 4 [ default = false ]; }; -message Response { +message InferenceResponse { repeated ModelOutput outputs = 1; optional string tag = 2; + required int32 err_code = 3; }; message ModelOutput { @@ -45,6 +46,17 @@ message ModelOutput { optional string engine_name = 2; } +message SetTimeoutRequest { required int32 timeout_ms = 1; } + +message SimpleResponse { required int32 err_code = 1; } + +message GetClientConfigRequest {} + +message GetClientConfigResponse { required string client_config_str = 1; } + service MultiLangGeneralModelService { - rpc inference(Request) returns (Response) {} + rpc Inference(InferenceRequest) returns (InferenceResponse) {} + rpc SetTimeout(SetTimeoutRequest) returns (SimpleResponse) {} + rpc GetClientConfig(GetClientConfigRequest) + returns (GetClientConfigResponse) {} }; diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h index b379188854c30587d24962bc827aa099c3a39183..b5d27df5edbaf9278ecb8614e282d104347206f8 100644 --- a/core/general-client/include/general_model.h +++ b/core/general-client/include/general_model.h @@ -49,6 +49,8 @@ class ModelRes { res._int64_value_map.end()); _float_value_map.insert(res._float_value_map.begin(), res._float_value_map.end()); + _int32_value_map.insert(res._int32_value_map.begin(), + res._int32_value_map.end()); _shape_map.insert(res._shape_map.begin(), res._shape_map.end()); _lod_map.insert(res._lod_map.begin(), res._lod_map.end()); } @@ -60,6 +62,9 @@ class ModelRes { _float_value_map.insert( std::make_move_iterator(std::begin(res._float_value_map)), std::make_move_iterator(std::end(res._float_value_map))); + _int32_value_map.insert( + std::make_move_iterator(std::begin(res._int32_value_map)), + std::make_move_iterator(std::end(res._int32_value_map))); _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), std::make_move_iterator(std::end(res._shape_map))); _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), @@ -78,6 +83,12 @@ class ModelRes { std::vector&& get_float_by_name_with_rv(const std::string& name) { return std::move(_float_value_map[name]); } + const std::vector& get_int32_by_name(const std::string& name) { + return _int32_value_map[name]; + } + std::vector&& get_int32_by_name_with_rv(const std::string& name) { + return std::move(_int32_value_map[name]); + } const std::vector& get_shape_by_name(const std::string& name) { return _shape_map[name]; } @@ -103,6 +114,9 @@ class ModelRes { _float_value_map.insert( std::make_move_iterator(std::begin(res._float_value_map)), std::make_move_iterator(std::end(res._float_value_map))); + _int32_value_map.insert( + std::make_move_iterator(std::begin(res._int32_value_map)), + std::make_move_iterator(std::end(res._int32_value_map))); _shape_map.insert(std::make_move_iterator(std::begin(res._shape_map)), std::make_move_iterator(std::end(res._shape_map))); _lod_map.insert(std::make_move_iterator(std::begin(res._lod_map)), @@ -115,6 +129,7 @@ class ModelRes { std::string _engine_name; std::map> _int64_value_map; std::map> _float_value_map; + std::map> _int32_value_map; std::map> _shape_map; std::map> _lod_map; }; @@ -145,6 +160,14 @@ class PredictorRes { const std::string& name) { return std::move(_models[model_idx].get_float_by_name_with_rv(name)); } + const std::vector& get_int32_by_name(const int model_idx, + const std::string& name) { + return _models[model_idx].get_int32_by_name(name); + } + std::vector&& get_int32_by_name_with_rv(const int model_idx, + const std::string& name) { + return std::move(_models[model_idx].get_int32_by_name_with_rv(name)); + } const std::vector& get_shape_by_name(const int model_idx, const std::string& name) { return _models[model_idx].get_shape_by_name(name); diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp index 613abf9233610d170bce4386798662f78887edf7..9f709c71045577f7b043777a7ad1528a0e2ccc28 100644 --- a/core/general-client/src/general_model.cpp +++ b/core/general-client/src/general_model.cpp @@ -207,17 +207,28 @@ int PredictorClient::batch_predict( for (auto &name : int_feed_name) { int idx = _feed_name_to_idx[name]; Tensor *tensor = tensor_vec[idx]; - VLOG(2) << "prepare int feed " << name << " shape size " - << int_shape[vec_idx].size(); + if (_type[idx] == 0) { + VLOG(2) << "prepare int64 feed " << name << " shape size " + << int_shape[vec_idx].size(); + VLOG(3) << "feed var name " << name << " index " << vec_idx + << "first data " << int_feed[vec_idx][0]; + for (uint32_t j = 0; j < int_feed[vec_idx].size(); ++j) { + tensor->add_int64_data(int_feed[vec_idx][j]); + } + } else if (_type[idx] == 2) { + VLOG(2) << "prepare int32 feed " << name << " shape size " + << int_shape[vec_idx].size(); + VLOG(3) << "feed var name " << name << " index " << vec_idx + << "first data " << int32_t(int_feed[vec_idx][0]); + for (uint32_t j = 0; j < int_feed[vec_idx].size(); ++j) { + tensor->add_int_data(int32_t(int_feed[vec_idx][j])); + } + } + for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) { tensor->add_shape(int_shape[vec_idx][j]); } - tensor->set_elem_type(0); - VLOG(3) << "feed var name " << name << " index " << vec_idx - << "first data " << int_feed[vec_idx][0]; - for (uint32_t j = 0; j < int_feed[vec_idx].size(); ++j) { - tensor->add_int64_data(int_feed[vec_idx][j]); - } + tensor->set_elem_type(_type[idx]); vec_idx++; } @@ -284,18 +295,25 @@ int PredictorClient::batch_predict( for (auto &name : fetch_name) { // int idx = _fetch_name_to_idx[name]; if (_fetch_name_to_type[name] == 0) { - VLOG(2) << "ferch var " << name << "type int"; + VLOG(2) << "ferch var " << name << "type int64"; int size = output.insts(0).tensor_array(idx).int64_data_size(); model._int64_value_map[name] = std::vector( output.insts(0).tensor_array(idx).int64_data().begin(), output.insts(0).tensor_array(idx).int64_data().begin() + size); - } else { + } else if (_fetch_name_to_type[name] == 1) { VLOG(2) << "fetch var " << name << "type float"; int size = output.insts(0).tensor_array(idx).float_data_size(); model._float_value_map[name] = std::vector( output.insts(0).tensor_array(idx).float_data().begin(), output.insts(0).tensor_array(idx).float_data().begin() + size); + } else if (_fetch_name_to_type[name] == 2) { + VLOG(2) << "fetch var " << name << "type int32"; + int size = output.insts(0).tensor_array(idx).int_data_size(); + model._int32_value_map[name] = std::vector( + output.insts(0).tensor_array(idx).int_data().begin(), + output.insts(0).tensor_array(idx).int_data().begin() + size); } + idx += 1; } predict_res_batch.add_model_res(std::move(model)); @@ -442,12 +460,19 @@ int PredictorClient::numpy_predict( for (auto &name : int_feed_name) { int idx = _feed_name_to_idx[name]; Tensor *tensor = tensor_vec[idx]; - VLOG(2) << "prepare int feed " << name << " shape size " - << int_shape[vec_idx].size(); + for (uint32_t j = 0; j < int_shape[vec_idx].size(); ++j) { tensor->add_shape(int_shape[vec_idx][j]); } - tensor->set_elem_type(0); + tensor->set_elem_type(_type[idx]); + + if (_type[idx] == 0) { + VLOG(2) << "prepare int feed " << name << " shape size " + << int_shape[vec_idx].size(); + } else { + VLOG(2) << "prepare int32 feed " << name << " shape size " + << int_shape[vec_idx].size(); + } const int int_shape_size = int_shape[vec_idx].size(); switch (int_shape_size) { @@ -457,7 +482,11 @@ int PredictorClient::numpy_predict( for (ssize_t j = 0; j < int_array.shape(1); j++) { for (ssize_t k = 0; k < int_array.shape(2); k++) { for (ssize_t l = 0; k < int_array.shape(3); l++) { - tensor->add_int64_data(int_array(i, j, k, l)); + if (_type[idx] == 0) { + tensor->add_int64_data(int_array(i, j, k, l)); + } else { + tensor->add_int_data(int_array(i, j, k, l)); + } } } } @@ -469,7 +498,11 @@ int PredictorClient::numpy_predict( for (ssize_t i = 0; i < int_array.shape(0); i++) { for (ssize_t j = 0; j < int_array.shape(1); j++) { for (ssize_t k = 0; k < int_array.shape(2); k++) { - tensor->add_int64_data(int_array(i, j, k)); + if (_type[idx] == 0) { + tensor->add_int64_data(int_array(i, j, k)); + } else { + tensor->add_int_data(int_array(i, j, k)); + } } } } @@ -479,7 +512,11 @@ int PredictorClient::numpy_predict( auto int_array = int_feed[vec_idx].unchecked<2>(); for (ssize_t i = 0; i < int_array.shape(0); i++) { for (ssize_t j = 0; j < int_array.shape(1); j++) { - tensor->add_int64_data(int_array(i, j)); + if (_type[idx] == 0) { + tensor->add_int64_data(int_array(i, j)); + } else { + tensor->add_int_data(int_array(i, j)); + } } } break; @@ -487,7 +524,11 @@ int PredictorClient::numpy_predict( case 1: { auto int_array = int_feed[vec_idx].unchecked<1>(); for (ssize_t i = 0; i < int_array.shape(0); i++) { - tensor->add_int64_data(int_array(i)); + if (_type[idx] == 0) { + tensor->add_int64_data(int_array(i)); + } else { + tensor->add_int_data(int_array(i)); + } } break; } @@ -557,17 +598,23 @@ int PredictorClient::numpy_predict( for (auto &name : fetch_name) { // int idx = _fetch_name_to_idx[name]; if (_fetch_name_to_type[name] == 0) { - VLOG(2) << "ferch var " << name << "type int"; + VLOG(2) << "ferch var " << name << "type int64"; int size = output.insts(0).tensor_array(idx).int64_data_size(); model._int64_value_map[name] = std::vector( output.insts(0).tensor_array(idx).int64_data().begin(), output.insts(0).tensor_array(idx).int64_data().begin() + size); - } else { + } else if (_fetch_name_to_type[name] == 1) { VLOG(2) << "fetch var " << name << "type float"; int size = output.insts(0).tensor_array(idx).float_data_size(); model._float_value_map[name] = std::vector( output.insts(0).tensor_array(idx).float_data().begin(), output.insts(0).tensor_array(idx).float_data().begin() + size); + } else if (_fetch_name_to_type[name] == 2) { + VLOG(2) << "fetch var " << name << "type int32"; + int size = output.insts(0).tensor_array(idx).int_data_size(); + model._int32_value_map[name] = std::vector( + output.insts(0).tensor_array(idx).int_data().begin(), + output.insts(0).tensor_array(idx).int_data().begin() + size); } idx += 1; } @@ -601,7 +648,6 @@ int PredictorClient::numpy_predict( _api.thrd_clear(); return 0; } - } // namespace general_model } // namespace paddle_serving } // namespace baidu diff --git a/core/general-server/op/general_reader_op.cpp b/core/general-server/op/general_reader_op.cpp index 7d48949b22d0ace289ab3b9214f092819f5476e0..380f861606a7719a33407dd946c5ac476629fdb7 100644 --- a/core/general-server/op/general_reader_op.cpp +++ b/core/general-server/op/general_reader_op.cpp @@ -126,9 +126,12 @@ int GeneralReaderOp::inference() { if (elem_type[i] == 0) { // int64 elem_size[i] = sizeof(int64_t); lod_tensor.dtype = paddle::PaddleDType::INT64; - } else { + } else if (elem_type[i] == 1) { elem_size[i] = sizeof(float); lod_tensor.dtype = paddle::PaddleDType::FLOAT32; + } else if (elem_type[i] == 2) { + elem_size[i] = sizeof(int32_t); + lod_tensor.dtype = paddle::PaddleDType::INT32; } if (model_config->_is_lod_feed[i]) { @@ -159,8 +162,10 @@ int GeneralReaderOp::inference() { int data_len = 0; if (tensor.int64_data_size() > 0) { data_len = tensor.int64_data_size(); - } else { + } else if (tensor.float_data_size() > 0) { data_len = tensor.float_data_size(); + } else if (tensor.int_data_size() > 0) { + data_len = tensor.int_data_size(); } VLOG(2) << "tensor size for var[" << i << "]: " << data_len; tensor_size += data_len; @@ -198,6 +203,8 @@ int GeneralReaderOp::inference() { for (int i = 0; i < var_num; ++i) { if (elem_type[i] == 0) { int64_t *dst_ptr = static_cast(out->at(i).data.data()); + VLOG(2) << "first element data in var[" << i << "] is " + << req->insts(0).tensor_array(i).int64_data(0); int offset = 0; for (int j = 0; j < batch_size; ++j) { int elem_num = req->insts(j).tensor_array(i).int64_data_size(); @@ -210,8 +217,10 @@ int GeneralReaderOp::inference() { offset += capacity[i]; } } - } else { + } else if (elem_type[i] == 1) { float *dst_ptr = static_cast(out->at(i).data.data()); + VLOG(2) << "first element data in var[" << i << "] is " + << req->insts(0).tensor_array(i).float_data(0); int offset = 0; for (int j = 0; j < batch_size; ++j) { int elem_num = req->insts(j).tensor_array(i).float_data_size(); @@ -224,6 +233,22 @@ int GeneralReaderOp::inference() { offset += capacity[i]; } } + } else if (elem_type[i] == 2) { + int32_t *dst_ptr = static_cast(out->at(i).data.data()); + VLOG(2) << "first element data in var[" << i << "] is " + << req->insts(0).tensor_array(i).int_data(0); + int offset = 0; + for (int j = 0; j < batch_size; ++j) { + int elem_num = req->insts(j).tensor_array(i).int_data_size(); + for (int k = 0; k < elem_num; ++k) { + dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k); + } + if (out->at(i).lod.size() == 1) { + offset = out->at(i).lod[0][j + 1]; + } else { + offset += capacity[i]; + } + } } } diff --git a/core/general-server/op/general_response_op.cpp b/core/general-server/op/general_response_op.cpp index 5667a174d9bb6e134e58de72524c60839dc82356..935ef85d847cc17c2d5b76255de0936f0e08a890 100644 --- a/core/general-server/op/general_response_op.cpp +++ b/core/general-server/op/general_response_op.cpp @@ -91,7 +91,6 @@ int GeneralResponseOp::inference() { for (auto &idx : fetch_index) { Tensor *tensor = fetch_inst->add_tensor_array(); - tensor->set_elem_type(1); if (model_config->_is_lod_fetch[idx]) { VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx] << " is lod_tensor"; @@ -116,7 +115,7 @@ int GeneralResponseOp::inference() { cap *= in->at(idx).shape[j]; } if (in->at(idx).dtype == paddle::PaddleDType::INT64) { - VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx] + VLOG(2) << "Prepare int64 var [" << model_config->_fetch_name[idx] << "]."; int64_t *data_ptr = static_cast(in->at(idx).data.data()); if (model_config->_is_lod_fetch[idx]) { @@ -157,6 +156,27 @@ int GeneralResponseOp::inference() { } VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready"; var_idx++; + } else if (in->at(idx).dtype == paddle::PaddleDType::INT32) { + VLOG(2) << "Prepare int32 var [" << model_config->_fetch_name[idx] + << "]."; + int32_t *data_ptr = static_cast(in->at(idx).data.data()); + if (model_config->_is_lod_fetch[idx]) { + FetchInst *fetch_p = output->mutable_insts(0); + for (int j = 0; j < in->at(idx).lod[0].size(); ++j) { + fetch_p->mutable_tensor_array(var_idx)->add_lod( + in->at(idx).lod[0][j]); + } + for (int j = 0; j < cap; ++j) { + fetch_p->mutable_tensor_array(var_idx)->add_int_data(data_ptr[j]); + } + } else { + FetchInst *fetch_p = output->mutable_insts(0); + for (int j = 0; j < cap; ++j) { + fetch_p->mutable_tensor_array(var_idx)->add_int_data(data_ptr[j]); + } + } + VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready"; + var_idx++; } } } diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index e8c0ff47d86f081516a35576655f843a28b0591b..51cfb95a8d56d4261b9dab99df5216c5e6c79733 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -603,13 +603,13 @@ class VersionedInferEngine : public InferEngine { LOG(ERROR) << "Failed generate engine with type:" << engine_type; return -1; } - VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr; + VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr; int tmp = FLAGS_logtostderr; if (engine->proc_initialize(conf, version) != 0) { LOG(ERROR) << "Failed initialize engine, type:" << engine_type; return -1; } - VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr; + VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr; FLAGS_logtostderr = tmp; auto r = _versions.insert(std::make_pair(engine->version(), engine)); if (!r.second) { diff --git a/core/predictor/tools/seq_generator.cpp b/core/predictor/tools/seq_generator.cpp index 135e25d6dd7ce44fa04f510f7d521b42998bc955..eb7e7ed7f9a609e0c21be9a2c3d686dd7d9a1abd 100644 --- a/core/predictor/tools/seq_generator.cpp +++ b/core/predictor/tools/seq_generator.cpp @@ -233,7 +233,7 @@ int compress_parameter_parallel(const char *file1, greedy_search( emb_table + k * emb_size, xmin, xmax, loss, emb_size, bits); // 得出 loss 最小的时候的 scale - float scale = (xmax - xmin) * (pow2bits - 1); + float scale = (xmax - xmin) / (pow2bits - 1); char *min_ptr = tensor_temp; char *max_ptr = tensor_temp + sizeof(float); memcpy(min_ptr, &xmin, sizeof(float)); diff --git a/doc/GRPC_IMPL_CN.md b/doc/GRPC_IMPL_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..7b10907caec98ae5754126a7ec54096cc4cd48af --- /dev/null +++ b/doc/GRPC_IMPL_CN.md @@ -0,0 +1,52 @@ +# gRPC接口 + +gRPC 接口实现形式类似 Web Service: + +![](grpc_impl.png) + +## 与bRPC接口对比 + +1. gRPC Server 端 `load_model_config` 函数添加 `client_config_path` 参数: + + ```python + def load_model_config(self, server_config_paths, client_config_path=None) + ``` + + 在一些例子中 bRPC Server 端与 bRPC Client 端的配置文件可能是不同的(如 cube local 例子中,Client 端的数据先交给 cube,经过 cube 处理后再交给预测库),所以 gRPC Server 端需要获取 gRPC Client 端的配置;同时为了取消 gRPC Client 端手动加载配置文件的过程,所以设计 gRPC Server 端同时加载两个配置文件。`client_config_path` 默认为 `/serving_server_conf.prototxt`。 + +2. gRPC Client 端取消 `load_client_config` 步骤: + + 在 `connect` 步骤通过 RPC 获取相应的 prototxt(从任意一个 endpoint 获取即可)。 + +3. gRPC Client 需要通过 RPC 方式设置 timeout 时间(调用形式与 bRPC Client保持一致) + + 因为 bRPC Client 在 `connect` 后无法更改 timeout 时间,所以当 gRPC Server 收到变更 timeout 的调用请求时会重新创建 bRPC Client 实例以变更 bRPC Client timeout时间,同时 gRPC Client 会设置 gRPC 的 deadline 时间。 + + **注意,设置 timeout 接口和 Inference 接口不能同时调用(非线程安全),出于性能考虑暂时不加锁。** + +4. gRPC Client 端 `predict` 函数添加 `asyn` 和 `is_python` 参数: + + ```python + def predict(self, feed, fetch, need_variant_tag=False, asyn=False, is_python=True) + ``` + + 其中,`asyn` 为异步调用选项。当 `asyn=True` 时为异步调用,返回 `MultiLangPredictFuture` 对象,通过 `MultiLangPredictFuture.result()` 阻塞获取预测值;当 `asyn=Fasle` 为同步调用。 + + `is_python` 为 proto 格式选项。当 `is_python=True` 时,基于 numpy bytes 格式进行数据传输,目前只适用于 Python;当 `is_python=False` 时,以普通数据格式传输,更加通用。使用 numpy bytes 格式传输耗时比普通数据格式小很多(详见 [#654](https://github.com/PaddlePaddle/Serving/pull/654))。 + +5. 异常处理:当 gRPC Server 端的 bRPC Client 预测失败(返回 `None`)时,gRPC Client 端同样返回None。其他 gRPC 异常会在 Client 内部捕获,并在返回的 fetch_map 中添加一个 "status_code" 字段来区分是否预测正常(参考 timeout 样例)。 + +6. 由于 gRPC 只支持 pick_first 和 round_robin 负载均衡策略,ABTEST 特性还未打齐。 + +7. 经测试,gRPC 版本可以在 Windows、macOS 平台使用。 + +8. 计划支持的客户端语言: + + - [x] Python + - [ ] Java + - [ ] Go + - [ ] JavaScript + +## Python 端的一些例子 + +详见 `python/examples/grpc_impl_example` 下的示例文件。 diff --git a/doc/grpc_impl.png b/doc/grpc_impl.png new file mode 100644 index 0000000000000000000000000000000000000000..05b1a67e815efae5f4b7b81758444bff48cfe59d Binary files /dev/null and b/doc/grpc_impl.png differ diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 07699da458ab62ad1a5b9ece83547799d08f8cf7..098453a2da2411f5bb83cbdd248898e8879a3922 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,4 +1,7 @@ if (CLIENT) + file(INSTALL pipeline DESTINATION paddle_serving_client) + execute_process(COMMAND ${PYTHON_EXECUTABLE} run_codegen.py + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/paddle_serving_client/pipeline/proto) file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py) set(PY_FILES ${SERVING_CLIENT_PY_FILES}) SET(PACKAGE_NAME "serving_client") @@ -7,8 +10,14 @@ endif() if (SERVER) if (NOT WITH_GPU) + file(INSTALL pipeline DESTINATION paddle_serving_server) + execute_process(COMMAND ${PYTHON_EXECUTABLE} run_codegen.py + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/paddle_serving_server/pipeline/proto) file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) else() + file(INSTALL pipeline DESTINATION paddle_serving_server_gpu) + execute_process(COMMAND ${PYTHON_EXECUTABLE} run_codegen.py + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/paddle_serving_server_gpu/pipeline/proto) file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server_gpu/*.py) endif() set(PY_FILES ${SERVING_SERVER_PY_FILES}) diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/README_CN.md b/python/examples/grpc_impl_example/criteo_ctr_with_cube/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..07fc1acc18903256c49d77e2af8e9c2d74b21c16 --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/README_CN.md @@ -0,0 +1,40 @@ +## 带稀疏参数索引服务的CTR预测服务 + +该样例是为了展示gRPC Server 端 `load_model_config` 函数,在这个例子中,bRPC Server 端与 bRPC Client 端的配置文件是不同的(bPRC Client 端的数据先交给 cube,经过 cube 处理后再交给预测库) + +### 获取样例数据 +``` +sh get_data.sh +``` + +### 下载模型和稀疏参数序列文件 +``` +wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz +tar xf ctr_cube_unittest.tar.gz +mv models/ctr_client_conf ./ +mv models/ctr_serving_model_kv ./ +mv models/data ./cube/ +``` +执行脚本后会在当前目录有ctr_server_model_kv和ctr_client_config文件夹。 + +### 启动稀疏参数索引服务 +``` +wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz +tar xf cube_app.tar.gz +mv cube_app/cube* ./cube/ +sh cube_prepare.sh & +``` + +此处,模型当中的稀疏参数会被存放在稀疏参数索引服务Cube当中,关于稀疏参数索引服务Cube的介绍,请阅读[稀疏参数索引服务Cube单机版使用指南](../../../doc/CUBE_LOCAL_CN.md) + +### 启动RPC预测服务,服务端线程数为4(可在test_server.py配置) + +``` +python test_server.py ctr_serving_model_kv ctr_client_conf/serving_client_conf.prototxt +``` + +### 执行预测 + +``` +python test_client.py ./raw_data +``` diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/args.py b/python/examples/grpc_impl_example/criteo_ctr_with_cube/args.py new file mode 100755 index 0000000000000000000000000000000000000000..30124d4ebd9cd27cdb4580e654a8a47c55b178bf --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/args.py @@ -0,0 +1,105 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing +import argparse + + +def parse_args(): + parser = argparse.ArgumentParser(description="PaddlePaddle CTR example") + parser.add_argument( + '--train_data_path', + type=str, + default='./data/raw/train.txt', + help="The path of training dataset") + parser.add_argument( + '--sparse_only', + type=bool, + default=False, + help="Whether we use sparse features only") + parser.add_argument( + '--test_data_path', + type=str, + default='./data/raw/valid.txt', + help="The path of testing dataset") + parser.add_argument( + '--batch_size', + type=int, + default=1000, + help="The size of mini-batch (default:1000)") + parser.add_argument( + '--embedding_size', + type=int, + default=10, + help="The size for embedding layer (default:10)") + parser.add_argument( + '--num_passes', + type=int, + default=10, + help="The number of passes to train (default: 10)") + parser.add_argument( + '--model_output_dir', + type=str, + default='models', + help='The path for model to store (default: models)') + parser.add_argument( + '--sparse_feature_dim', + type=int, + default=1000001, + help='sparse feature hashing space for index processing') + parser.add_argument( + '--is_local', + type=int, + default=1, + help='Local train or distributed train (default: 1)') + parser.add_argument( + '--cloud_train', + type=int, + default=0, + help='Local train or distributed train on paddlecloud (default: 0)') + parser.add_argument( + '--async_mode', + action='store_true', + default=False, + help='Whether start pserver in async mode to support ASGD') + parser.add_argument( + '--no_split_var', + action='store_true', + default=False, + help='Whether split variables into blocks when update_method is pserver') + parser.add_argument( + '--role', + type=str, + default='pserver', # trainer or pserver + help='The path for model to store (default: models)') + parser.add_argument( + '--endpoints', + type=str, + default='127.0.0.1:6000', + help='The pserver endpoints, like: 127.0.0.1:6000,127.0.0.1:6001') + parser.add_argument( + '--current_endpoint', + type=str, + default='127.0.0.1:6000', + help='The path for model to store (default: 127.0.0.1:6000)') + parser.add_argument( + '--trainer_id', + type=int, + default=0, + help='The path for model to store (default: models)') + parser.add_argument( + '--trainers', + type=int, + default=1, + help='The num of trianers, (default: 1)') + return parser.parse_args() diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/clean.sh b/python/examples/grpc_impl_example/criteo_ctr_with_cube/clean.sh new file mode 100755 index 0000000000000000000000000000000000000000..99a4819802178f1910c5fced7d4c5a39c3037e4a --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/clean.sh @@ -0,0 +1,4 @@ +ps -ef | grep cube | awk {'print $2'} | xargs kill -9 +rm -rf cube/cube_data cube/data cube/log* cube/nohup* cube/output/ cube/donefile cube/input cube/monitor cube/cube-builder.INFO +ps -ef | grep test | awk {'print $2'} | xargs kill -9 +ps -ef | grep serving | awk {'print $2'} | xargs kill -9 diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/criteo.py b/python/examples/grpc_impl_example/criteo_ctr_with_cube/criteo.py new file mode 100755 index 0000000000000000000000000000000000000000..f37eb1d2c1d8df6975ec0c28923c6e17c0272290 --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/criteo.py @@ -0,0 +1,81 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + + +class CriteoDataset(object): + def setup(self, sparse_feature_dim): + self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + self.cont_max_ = [ + 20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50 + ] + self.cont_diff_ = [ + 20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50 + ] + self.hash_dim_ = sparse_feature_dim + # here, training data are lines with line_index < train_idx_ + self.train_idx_ = 41256555 + self.continuous_range_ = range(1, 14) + self.categorical_range_ = range(14, 40) + + def _process_line(self, line): + features = line.rstrip('\n').split('\t') + dense_feature = [] + sparse_feature = [] + for idx in self.continuous_range_: + if features[idx] == '': + dense_feature.append(0.0) + else: + dense_feature.append((float(features[idx]) - self.cont_min_[idx - 1]) / \ + self.cont_diff_[idx - 1]) + for idx in self.categorical_range_: + sparse_feature.append( + [hash(str(idx) + features[idx]) % self.hash_dim_]) + + return dense_feature, sparse_feature, [int(features[0])] + + def infer_reader(self, filelist, batch, buf_size): + def local_iter(): + for fname in filelist: + with open(fname.strip(), "r") as fin: + for line in fin: + dense_feature, sparse_feature, label = self._process_line( + line) + #yield dense_feature, sparse_feature, label + yield [dense_feature] + sparse_feature + [label] + + import paddle + batch_iter = paddle.batch( + paddle.reader.shuffle( + local_iter, buf_size=buf_size), + batch_size=batch) + return batch_iter + + def generate_sample(self, line): + def data_iter(): + dense_feature, sparse_feature, label = self._process_line(line) + feature_name = ["dense_input"] + for idx in self.categorical_range_: + feature_name.append("C" + str(idx - 13)) + feature_name.append("label") + yield zip(feature_name, [dense_feature] + sparse_feature + [label]) + + return data_iter + + +if __name__ == "__main__": + criteo_dataset = CriteoDataset() + criteo_dataset.setup(int(sys.argv[1])) + criteo_dataset.run_from_stdin() diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/criteo_reader.py b/python/examples/grpc_impl_example/criteo_ctr_with_cube/criteo_reader.py new file mode 100755 index 0000000000000000000000000000000000000000..2a80af78a9c2033bf246f703ca70a817ab786af3 --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/criteo_reader.py @@ -0,0 +1,83 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +import sys +import paddle.fluid.incubate.data_generator as dg + + +class CriteoDataset(dg.MultiSlotDataGenerator): + def setup(self, sparse_feature_dim): + self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] + self.cont_max_ = [ + 20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50 + ] + self.cont_diff_ = [ + 20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50 + ] + self.hash_dim_ = sparse_feature_dim + # here, training data are lines with line_index < train_idx_ + self.train_idx_ = 41256555 + self.continuous_range_ = range(1, 14) + self.categorical_range_ = range(14, 40) + + def _process_line(self, line): + features = line.rstrip('\n').split('\t') + dense_feature = [] + sparse_feature = [] + for idx in self.continuous_range_: + if features[idx] == '': + dense_feature.append(0.0) + else: + dense_feature.append((float(features[idx]) - self.cont_min_[idx - 1]) / \ + self.cont_diff_[idx - 1]) + for idx in self.categorical_range_: + sparse_feature.append( + [hash(str(idx) + features[idx]) % self.hash_dim_]) + + return dense_feature, sparse_feature, [int(features[0])] + + def infer_reader(self, filelist, batch, buf_size): + def local_iter(): + for fname in filelist: + with open(fname.strip(), "r") as fin: + for line in fin: + dense_feature, sparse_feature, label = self._process_line( + line) + #yield dense_feature, sparse_feature, label + yield [dense_feature] + sparse_feature + [label] + + import paddle + batch_iter = paddle.batch( + paddle.reader.shuffle( + local_iter, buf_size=buf_size), + batch_size=batch) + return batch_iter + + def generate_sample(self, line): + def data_iter(): + dense_feature, sparse_feature, label = self._process_line(line) + feature_name = ["dense_input"] + for idx in self.categorical_range_: + feature_name.append("C" + str(idx - 13)) + feature_name.append("label") + yield zip(feature_name, [dense_feature] + sparse_feature + [label]) + + return data_iter + + +if __name__ == "__main__": + criteo_dataset = CriteoDataset() + criteo_dataset.setup(int(sys.argv[1])) + criteo_dataset.run_from_stdin() diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube/conf/cube.conf b/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube/conf/cube.conf new file mode 100755 index 0000000000000000000000000000000000000000..b70f6e34247e410f9b80054010338d3c8f452ec6 --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube/conf/cube.conf @@ -0,0 +1,13 @@ +[{ + "dict_name": "test_dict", + "shard": 1, + "dup": 1, + "timeout": 200, + "retry": 3, + "backup_request": 100, + "type": "ipport_list", + "load_balancer": "rr", + "nodes": [{ + "ipport_list": "list://127.0.0.1:8027" + }] +}] diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube/conf/gflags.conf b/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube/conf/gflags.conf new file mode 100755 index 0000000000000000000000000000000000000000..21c7bddebd8f22b91d0ba26a6121007f96a4380b --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube/conf/gflags.conf @@ -0,0 +1,4 @@ +--port=8027 +--dict_split=1 +--in_mem=true +--log_dir=./log/ diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube/keys b/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube/keys new file mode 100755 index 0000000000000000000000000000000000000000..f00c965d8307308469e537302baa73048488f162 --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube/keys @@ -0,0 +1,10 @@ +1 +2 +3 +4 +5 +6 +7 +8 +9 +10 diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube_prepare.sh b/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube_prepare.sh new file mode 100755 index 0000000000000000000000000000000000000000..1417254a54e2194ab3a0194f2ec970f480787acd --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube_prepare.sh @@ -0,0 +1,22 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing +#! /bin/bash + +mkdir -p cube_model +mkdir -p cube/data +./seq_generator ctr_serving_model/SparseFeatFactors ./cube_model/feature +./cube/cube-builder -dict_name=test_dict -job_mode=base -last_version=0 -cur_version=0 -depend_version=0 -input_path=./cube_model -output_path=${PWD}/cube/data -shard_num=1 -only_build=false +mv ./cube/data/0_0/test_dict_part0/* ./cube/data/ +cd cube && ./cube diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube_quant_prepare.sh b/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube_quant_prepare.sh new file mode 100755 index 0000000000000000000000000000000000000000..0db6575ab307fb81cdd0336a20bb9a8ec30d446d --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/cube_quant_prepare.sh @@ -0,0 +1,22 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing +#! /bin/bash + +mkdir -p cube_model +mkdir -p cube/data +./seq_generator ctr_serving_model/SparseFeatFactors ./cube_model/feature 8 +./cube/cube-builder -dict_name=test_dict -job_mode=base -last_version=0 -cur_version=0 -depend_version=0 -input_path=./cube_model -output_path=${PWD}/cube/data -shard_num=1 -only_build=false +mv ./cube/data/0_0/test_dict_part0/* ./cube/data/ +cd cube && ./cube diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/get_data.sh b/python/examples/grpc_impl_example/criteo_ctr_with_cube/get_data.sh new file mode 100755 index 0000000000000000000000000000000000000000..1f244b3a4aa81488bb493825576ba30c4b3bba22 --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/get_data.sh @@ -0,0 +1,2 @@ +wget --no-check-certificate https://paddle-serving.bj.bcebos.com/data/ctr_prediction/ctr_data.tar.gz +tar -zxvf ctr_data.tar.gz diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/local_train.py b/python/examples/grpc_impl_example/criteo_ctr_with_cube/local_train.py new file mode 100755 index 0000000000000000000000000000000000000000..d4a1bc930924e348048f7ac3e5c46381d9b6441b --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/local_train.py @@ -0,0 +1,100 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from __future__ import print_function + +from args import parse_args +import os +import paddle.fluid as fluid +import sys +from network_conf import dnn_model + +dense_feature_dim = 13 + + +def train(): + args = parse_args() + sparse_only = args.sparse_only + if not os.path.isdir(args.model_output_dir): + os.mkdir(args.model_output_dir) + dense_input = fluid.layers.data( + name="dense_input", shape=[dense_feature_dim], dtype='float32') + sparse_input_ids = [ + fluid.layers.data( + name="C" + str(i), shape=[1], lod_level=1, dtype="int64") + for i in range(1, 27) + ] + label = fluid.layers.data(name='label', shape=[1], dtype='int64') + + #nn_input = None if sparse_only else dense_input + nn_input = dense_input + predict_y, loss, auc_var, batch_auc_var, infer_vars = dnn_model( + nn_input, sparse_input_ids, label, args.embedding_size, + args.sparse_feature_dim) + + optimizer = fluid.optimizer.SGD(learning_rate=1e-4) + optimizer.minimize(loss) + + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(fluid.default_startup_program()) + dataset = fluid.DatasetFactory().create_dataset("InMemoryDataset") + dataset.set_use_var([dense_input] + sparse_input_ids + [label]) + + python_executable = "python" + pipe_command = "{} criteo_reader.py {}".format(python_executable, + args.sparse_feature_dim) + + dataset.set_pipe_command(pipe_command) + dataset.set_batch_size(128) + thread_num = 10 + dataset.set_thread(thread_num) + + whole_filelist = [ + "raw_data/part-%d" % x for x in range(len(os.listdir("raw_data"))) + ] + + print(whole_filelist) + dataset.set_filelist(whole_filelist[:100]) + dataset.load_into_memory() + fluid.layers.Print(auc_var) + epochs = 1 + for i in range(epochs): + exe.train_from_dataset( + program=fluid.default_main_program(), dataset=dataset, debug=True) + print("epoch {} finished".format(i)) + + import paddle_serving_client.io as server_io + feed_var_dict = {} + feed_var_dict['dense_input'] = dense_input + for i, sparse in enumerate(sparse_input_ids): + feed_var_dict["embedding_{}.tmp_0".format(i)] = sparse + fetch_var_dict = {"prob": predict_y} + + feed_kv_dict = {} + feed_kv_dict['dense_input'] = dense_input + for i, emb in enumerate(infer_vars): + feed_kv_dict["embedding_{}.tmp_0".format(i)] = emb + fetch_var_dict = {"prob": predict_y} + + server_io.save_model("ctr_serving_model", "ctr_client_conf", feed_var_dict, + fetch_var_dict, fluid.default_main_program()) + + server_io.save_model("ctr_serving_model_kv", "ctr_client_conf_kv", + feed_kv_dict, fetch_var_dict, + fluid.default_main_program()) + + +if __name__ == '__main__': + train() diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/network_conf.py b/python/examples/grpc_impl_example/criteo_ctr_with_cube/network_conf.py new file mode 100755 index 0000000000000000000000000000000000000000..2975533a72ad21d6dd5896446fd06c1f9bdfe8b4 --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/network_conf.py @@ -0,0 +1,77 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +import paddle.fluid as fluid +import math + + +def dnn_model(dense_input, sparse_inputs, label, embedding_size, + sparse_feature_dim): + def embedding_layer(input): + emb = fluid.layers.embedding( + input=input, + is_sparse=True, + is_distributed=False, + size=[sparse_feature_dim, embedding_size], + param_attr=fluid.ParamAttr( + name="SparseFeatFactors", + initializer=fluid.initializer.Uniform())) + x = fluid.layers.sequence_pool(input=emb, pool_type='sum') + return emb, x + + def mlp_input_tensor(emb_sums, dense_tensor): + #if isinstance(dense_tensor, fluid.Variable): + # return fluid.layers.concat(emb_sums, axis=1) + #else: + return fluid.layers.concat(emb_sums + [dense_tensor], axis=1) + + def mlp(mlp_input): + fc1 = fluid.layers.fc(input=mlp_input, + size=400, + act='relu', + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(mlp_input.shape[1])))) + fc2 = fluid.layers.fc(input=fc1, + size=400, + act='relu', + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc1.shape[1])))) + fc3 = fluid.layers.fc(input=fc2, + size=400, + act='relu', + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc2.shape[1])))) + pre = fluid.layers.fc(input=fc3, + size=2, + act='softmax', + param_attr=fluid.ParamAttr( + initializer=fluid.initializer.Normal( + scale=1 / math.sqrt(fc3.shape[1])))) + return pre + + emb_pair_sums = list(map(embedding_layer, sparse_inputs)) + emb_sums = [x[1] for x in emb_pair_sums] + infer_vars = [x[0] for x in emb_pair_sums] + mlp_in = mlp_input_tensor(emb_sums, dense_input) + predict = mlp(mlp_in) + cost = fluid.layers.cross_entropy(input=predict, label=label) + avg_cost = fluid.layers.reduce_sum(cost) + accuracy = fluid.layers.accuracy(input=predict, label=label) + auc_var, batch_auc_var, auc_states = \ + fluid.layers.auc(input=predict, label=label, num_thresholds=2 ** 12, slide_steps=20) + return predict, avg_cost, auc_var, batch_auc_var, infer_vars diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_client.py b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_client.py new file mode 100755 index 0000000000000000000000000000000000000000..f82c1a21c153594e0be192506af5318c24a4e99a --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_client.py @@ -0,0 +1,49 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from paddle_serving_client import MultiLangClient as Client +import sys +import os +import criteo as criteo +import time +from paddle_serving_client.metric import auc +import grpc + +client = Client() +client.connect(["127.0.0.1:9292"]) + +batch = 1 +buf_size = 100 +dataset = criteo.CriteoDataset() +dataset.setup(1000001) +test_filelists = ["{}/part-0".format(sys.argv[1])] +reader = dataset.infer_reader(test_filelists, batch, buf_size) +label_list = [] +prob_list = [] +start = time.time() +for ei in range(10000): + data = reader().next() + feed_dict = {} + feed_dict['dense_input'] = data[0][0] + for i in range(1, 27): + feed_dict["embedding_{}.tmp_0".format(i - 1)] = data[0][i] + fetch_map = client.predict(feed=feed_dict, fetch=["prob"]) + if fetch_map["serving_status_code"] == 0: + prob_list.append(fetch_map['prob'][0][1]) + label_list.append(data[0][-1][0]) + +print(auc(label_list, prob_list)) +end = time.time() +print(end - start) diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server.py b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server.py new file mode 100755 index 0000000000000000000000000000000000000000..361d5a59becb7c110907f66d8b651e05e7eb418e --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server.py @@ -0,0 +1,37 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +import os +import sys +from paddle_serving_server import OpMaker +from paddle_serving_server import OpSeqMaker +from paddle_serving_server import MultiLangServer as Server + +op_maker = OpMaker() +read_op = op_maker.create('general_reader') +general_dist_kv_infer_op = op_maker.create('general_dist_kv_infer') +response_op = op_maker.create('general_response') + +op_seq_maker = OpSeqMaker() +op_seq_maker.add_op(read_op) +op_seq_maker.add_op(general_dist_kv_infer_op) +op_seq_maker.add_op(response_op) + +server = Server() +server.set_op_sequence(op_seq_maker.get_op_sequence()) +server.set_num_threads(4) +server.load_model_config(sys.argv[1], sys.argv[2]) +server.prepare_server(workdir="work_dir1", port=9292, device="cpu") +server.run_server() diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server_gpu.py b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server_gpu.py new file mode 100755 index 0000000000000000000000000000000000000000..38e1bf82118f6af7cfe7b467003332a5328b2979 --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server_gpu.py @@ -0,0 +1,37 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +import os +import sys +from paddle_serving_server_gpu import OpMaker +from paddle_serving_server_gpu import OpSeqMaker +from paddle_serving_server_gpu import MultiLangServer as Server + +op_maker = OpMaker() +read_op = op_maker.create('general_reader') +general_dist_kv_infer_op = op_maker.create('general_dist_kv_infer') +response_op = op_maker.create('general_response') + +op_seq_maker = OpSeqMaker() +op_seq_maker.add_op(read_op) +op_seq_maker.add_op(general_dist_kv_infer_op) +op_seq_maker.add_op(response_op) + +server = Server() +server.set_op_sequence(op_seq_maker.get_op_sequence()) +server.set_num_threads(4) +server.load_model_config(sys.argv[1], sys.argv[2]) +server.prepare_server(workdir="work_dir1", port=9292, device="cpu") +server.run_server() diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server_quant.py b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server_quant.py new file mode 100755 index 0000000000000000000000000000000000000000..feca75b077d737a614bdfd955b7bf0d82ed08529 --- /dev/null +++ b/python/examples/grpc_impl_example/criteo_ctr_with_cube/test_server_quant.py @@ -0,0 +1,37 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +import os +import sys +from paddle_serving_server import OpMaker +from paddle_serving_server import OpSeqMaker +from paddle_serving_server import MultiLangServer as Server + +op_maker = OpMaker() +read_op = op_maker.create('general_reader') +general_dist_kv_infer_op = op_maker.create('general_dist_kv_quant_infer') +response_op = op_maker.create('general_response') + +op_seq_maker = OpSeqMaker() +op_seq_maker.add_op(read_op) +op_seq_maker.add_op(general_dist_kv_infer_op) +op_seq_maker.add_op(response_op) + +server = Server() +server.set_op_sequence(op_seq_maker.get_op_sequence()) +server.set_num_threads(4) +server.load_model_config(sys.argv[1], sys.argv[2]) +server.prepare_server(workdir="work_dir1", port=9292, device="cpu") +server.run_server() diff --git a/python/examples/grpc_impl_example/fit_a_line/README_CN.md b/python/examples/grpc_impl_example/fit_a_line/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..93e0d1cf7262d620df18570401ed39db67f839ef --- /dev/null +++ b/python/examples/grpc_impl_example/fit_a_line/README_CN.md @@ -0,0 +1,57 @@ +# 线性回归预测服务示例 + +## 获取数据 + +```shell +sh get_data.sh +``` + +## 开启 gRPC 服务端 + +``` shell +python test_server.py uci_housing_model/ +``` + +也可以通过下面的一行代码开启默认 gRPC 服务: + +```shell +python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang +``` + +## 客户端预测 + +### 同步预测 + +``` shell +python test_sync_client.py +``` + +### 异步预测 + +``` shell +python test_asyn_client.py +``` + +### Batch 预测 + +``` shell +python test_batch_client.py +``` + +### 通用 pb 预测 + +``` shell +python test_general_pb_client.py +``` + +### 预测超时 + +``` shell +python test_timeout_client.py +``` + +### List 输入 + +``` shell +python test_list_input_client.py +``` diff --git a/python/examples/grpc_impl_example/fit_a_line/get_data.sh b/python/examples/grpc_impl_example/fit_a_line/get_data.sh new file mode 100644 index 0000000000000000000000000000000000000000..84a3966a0ef323cef4b146d8e9489c70a7a8ae35 --- /dev/null +++ b/python/examples/grpc_impl_example/fit_a_line/get_data.sh @@ -0,0 +1,2 @@ +wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz +tar -xzf uci_housing.tar.gz diff --git a/python/examples/fit_a_line/test_multilang_client.py b/python/examples/grpc_impl_example/fit_a_line/test_asyn_client.py similarity index 58% rename from python/examples/fit_a_line/test_multilang_client.py rename to python/examples/grpc_impl_example/fit_a_line/test_asyn_client.py index f85814a4b24693c269c192b23f0f5ab1c7d566a6..b01a9372585bae42abca213fe8fb8a55505dfe57 100644 --- a/python/examples/fit_a_line/test_multilang_client.py +++ b/python/examples/grpc_impl_example/fit_a_line/test_asyn_client.py @@ -13,38 +13,39 @@ # limitations under the License. # pylint: disable=doc-string-missing -from paddle_serving_client import MultiLangClient +from paddle_serving_client import MultiLangClient as Client import functools -import sys import time import threading +import grpc -client = MultiLangClient() -client.load_client_config(sys.argv[1]) +client = Client() client.connect(["127.0.0.1:9393"]) -import paddle -test_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.uci_housing.test(), buf_size=500), - batch_size=1) - complete_task_count = [0] lock = threading.Lock() -def call_back(call_future, data): - fetch_map = call_future.result() - print("{} {}".format(fetch_map["price"][0], data[0][1][0])) - with lock: - complete_task_count[0] += 1 +def call_back(call_future): + try: + fetch_map = call_future.result() + print(fetch_map) + except grpc.RpcError as e: + print(e.code()) + finally: + with lock: + complete_task_count[0] += 1 +x = [ + 0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, + 0.4919, 0.1856, 0.0795, -0.0332 +] task_count = 0 -for data in test_reader(): - future = client.predict(feed={"x": data[0][0]}, fetch=["price"], asyn=True) +for i in range(3): + future = client.predict(feed={"x": x}, fetch=["price"], asyn=True) task_count += 1 - future.add_done_callback(functools.partial(call_back, data=data)) + future.add_done_callback(functools.partial(call_back)) while complete_task_count[0] != task_count: time.sleep(0.1) diff --git a/python/examples/grpc_impl_example/fit_a_line/test_batch_client.py b/python/examples/grpc_impl_example/fit_a_line/test_batch_client.py new file mode 100644 index 0000000000000000000000000000000000000000..0630a0a960e5e40a7507454feb57418c8cfbdc68 --- /dev/null +++ b/python/examples/grpc_impl_example/fit_a_line/test_batch_client.py @@ -0,0 +1,32 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing +from paddle_serving_client import MultiLangClient as Client + +client = Client() +client.connect(["127.0.0.1:9393"]) + +batch_size = 2 +x = [ + 0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, + 0.4919, 0.1856, 0.0795, -0.0332 +] + +for i in range(3): + batch_feed = [{"x": x} for j in range(batch_size)] + fetch_map = client.predict(feed=batch_feed, fetch=["price"]) + if fetch_map["serving_status_code"] == 0: + print(fetch_map) + else: + print(fetch_map["serving_status_code"]) diff --git a/python/examples/grpc_impl_example/fit_a_line/test_general_pb_client.py b/python/examples/grpc_impl_example/fit_a_line/test_general_pb_client.py new file mode 100644 index 0000000000000000000000000000000000000000..b2744906b0dcd321f86a1b8117a78307e24578e5 --- /dev/null +++ b/python/examples/grpc_impl_example/fit_a_line/test_general_pb_client.py @@ -0,0 +1,30 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from paddle_serving_client import MultiLangClient as Client + +client = Client() +client.connect(["127.0.0.1:9393"]) + +x = [ + 0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, + 0.4919, 0.1856, 0.0795, -0.0332 +] +for i in range(3): + fetch_map = client.predict(feed={"x": x}, fetch=["price"], is_python=False) + if fetch_map["serving_status_code"] == 0: + print(fetch_map) + else: + print(fetch_map["serving_status_code"]) diff --git a/python/examples/grpc_impl_example/fit_a_line/test_numpy_input_client.py b/python/examples/grpc_impl_example/fit_a_line/test_numpy_input_client.py new file mode 100644 index 0000000000000000000000000000000000000000..e98c1e87bb48613e4226cf5378063aec7c5b4093 --- /dev/null +++ b/python/examples/grpc_impl_example/fit_a_line/test_numpy_input_client.py @@ -0,0 +1,31 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from paddle_serving_client import MultiLangClient as Client +import numpy as np + +client = Client() +client.connect(["127.0.0.1:9393"]) + +x = [ + 0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, + 0.4919, 0.1856, 0.0795, -0.0332 +] +for i in range(3): + fetch_map = client.predict(feed={"x": np.array(x)}, fetch=["price"]) + if fetch_map["serving_status_code"] == 0: + print(fetch_map) + else: + print(fetch_map["serving_status_code"]) diff --git a/python/examples/fit_a_line/test_multilang_server.py b/python/examples/grpc_impl_example/fit_a_line/test_server.py similarity index 94% rename from python/examples/fit_a_line/test_multilang_server.py rename to python/examples/grpc_impl_example/fit_a_line/test_server.py index 23eb938f0ee1bf6b195509816dea5221bbfa9218..6acc7bfe2e6d00621f32f1f7f437691fc15d20fc 100644 --- a/python/examples/fit_a_line/test_multilang_server.py +++ b/python/examples/grpc_impl_example/fit_a_line/test_server.py @@ -17,7 +17,7 @@ import os import sys from paddle_serving_server import OpMaker from paddle_serving_server import OpSeqMaker -from paddle_serving_server import MultiLangServer +from paddle_serving_server import MultiLangServer as Server op_maker = OpMaker() read_op = op_maker.create('general_reader') @@ -29,7 +29,7 @@ op_seq_maker.add_op(read_op) op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(response_op) -server = MultiLangServer() +server = Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.load_model_config(sys.argv[1]) server.prepare_server(workdir="work_dir1", port=9393, device="cpu") diff --git a/python/examples/grpc_impl_example/fit_a_line/test_server_gpu.py b/python/examples/grpc_impl_example/fit_a_line/test_server_gpu.py new file mode 100644 index 0000000000000000000000000000000000000000..1547ee445f4f8ceebe58e6f9e4f05b92520911eb --- /dev/null +++ b/python/examples/grpc_impl_example/fit_a_line/test_server_gpu.py @@ -0,0 +1,37 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +import os +import sys +from paddle_serving_server_gpu import OpMaker +from paddle_serving_server_gpu import OpSeqMaker +from paddle_serving_server_gpu import MultiLangServer as Server + +op_maker = OpMaker() +read_op = op_maker.create('general_reader') +general_infer_op = op_maker.create('general_infer') +response_op = op_maker.create('general_response') + +op_seq_maker = OpSeqMaker() +op_seq_maker.add_op(read_op) +op_seq_maker.add_op(general_infer_op) +op_seq_maker.add_op(response_op) + +server = Server() +server.set_op_sequence(op_seq_maker.get_op_sequence()) +server.load_model_config(sys.argv[1]) +server.set_gpuid(0) +server.prepare_server(workdir="work_dir1", port=9393, device="cpu") +server.run_server() diff --git a/python/examples/grpc_impl_example/fit_a_line/test_sync_client.py b/python/examples/grpc_impl_example/fit_a_line/test_sync_client.py new file mode 100644 index 0000000000000000000000000000000000000000..89530dc2f2a33ef44b2dbde52975634f4b4d8295 --- /dev/null +++ b/python/examples/grpc_impl_example/fit_a_line/test_sync_client.py @@ -0,0 +1,30 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from paddle_serving_client import MultiLangClient as Client + +client = Client() +client.connect(["127.0.0.1:9393"]) + +x = [ + 0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, + 0.4919, 0.1856, 0.0795, -0.0332 +] +for i in range(3): + fetch_map = client.predict(feed={"x": x}, fetch=["price"]) + if fetch_map["serving_status_code"] == 0: + print(fetch_map) + else: + print(fetch_map["serving_status_code"]) diff --git a/python/examples/grpc_impl_example/fit_a_line/test_timeout_client.py b/python/examples/grpc_impl_example/fit_a_line/test_timeout_client.py new file mode 100644 index 0000000000000000000000000000000000000000..f90fab38533aabf3daa7627ee0b79c56892444dd --- /dev/null +++ b/python/examples/grpc_impl_example/fit_a_line/test_timeout_client.py @@ -0,0 +1,34 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from paddle_serving_client import MultiLangClient as Client +import grpc + +client = Client() +client.connect(["127.0.0.1:9393"]) +client.set_rpc_timeout_ms(1) + +x = [ + 0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, + 0.4919, 0.1856, 0.0795, -0.0332 +] +for i in range(3): + fetch_map = client.predict(feed={"x": x}, fetch=["price"]) + if fetch_map["serving_status_code"] == 0: + print(fetch_map) + elif fetch_map["serving_status_code"] == grpc.StatusCode.DEADLINE_EXCEEDED: + print('timeout') + else: + print(fetch_map["serving_status_code"]) diff --git a/python/examples/imdb/test_client.py b/python/examples/imdb/test_client.py index cbdc6fe56e0f1078ad32c0d15f4e30a1a59f581b..b903a59983fb0df87adfa4fa38b7eb2b80fb4ebb 100644 --- a/python/examples/imdb/test_client.py +++ b/python/examples/imdb/test_client.py @@ -29,6 +29,6 @@ imdb_dataset.load_resource(sys.argv[2]) for line in sys.stdin: word_ids, label = imdb_dataset.get_words_and_label(line) feed = {"words": word_ids} - fetch = ["acc", "cost", "prediction"] + fetch = ["prediction"] fetch_map = client.predict(feed=feed, fetch=fetch) print("{} {}".format(fetch_map["prediction"][0], label[0])) diff --git a/python/examples/imdb/test_ensemble_client.py b/python/examples/imdb/test_ensemble_client.py index 6cafb3389fff5a25103bcb2b3a867b73b35b9e8e..eb1e29ddd6d5a02854e4859a35474306c1c4d073 100644 --- a/python/examples/imdb/test_ensemble_client.py +++ b/python/examples/imdb/test_ensemble_client.py @@ -32,11 +32,7 @@ for i in range(3): line = 'i am very sad | 0' word_ids, label = imdb_dataset.get_words_and_label(line) feed = {"words": word_ids} - fetch = ["acc", "cost", "prediction"] + fetch = ["prediction"] fetch_maps = client.predict(feed=feed, fetch=fetch) - if len(fetch_maps) == 1: - print("step: {}, res: {}".format(i, fetch_maps['prediction'][0][1])) - else: - for model, fetch_map in fetch_maps.items(): - print("step: {}, model: {}, res: {}".format(i, model, fetch_map[ - 'prediction'][0][1])) + for model, fetch_map in fetch_maps.items(): + print("step: {}, model: {}, res: {}".format(i, model, fetch_map)) diff --git a/python/examples/imdb/test_multilang_ensemble_client.py b/python/examples/imdb/test_multilang_ensemble_client.py new file mode 100644 index 0000000000000000000000000000000000000000..6686d4c8c38d6a17cb9c5701abf7d76773031772 --- /dev/null +++ b/python/examples/imdb/test_multilang_ensemble_client.py @@ -0,0 +1,37 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from paddle_serving_client import MultiLangClient +from imdb_reader import IMDBDataset + +client = MultiLangClient() +# If you have more than one model, make sure that the input +# and output of more than one model are the same. +client.connect(["127.0.0.1:9393"]) + +# you can define any english sentence or dataset here +# This example reuses imdb reader in training, you +# can define your own data preprocessing easily. +imdb_dataset = IMDBDataset() +imdb_dataset.load_resource('imdb.vocab') + +for i in range(3): + line = 'i am very sad | 0' + word_ids, label = imdb_dataset.get_words_and_label(line) + feed = {"words": word_ids} + fetch = ["prediction"] + fetch_maps = client.predict(feed=feed, fetch=fetch) + for model, fetch_map in fetch_maps.items(): + print("step: {}, model: {}, res: {}".format(i, model, fetch_map)) diff --git a/python/examples/imdb/test_multilang_ensemble_server.py b/python/examples/imdb/test_multilang_ensemble_server.py new file mode 100644 index 0000000000000000000000000000000000000000..053aa06f0219de231415ba178135782334e56c1f --- /dev/null +++ b/python/examples/imdb/test_multilang_ensemble_server.py @@ -0,0 +1,40 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from paddle_serving_server import OpMaker +from paddle_serving_server import OpGraphMaker +from paddle_serving_server import MultiLangServer + +op_maker = OpMaker() +read_op = op_maker.create('general_reader') +cnn_infer_op = op_maker.create( + 'general_infer', engine_name='cnn', inputs=[read_op]) +bow_infer_op = op_maker.create( + 'general_infer', engine_name='bow', inputs=[read_op]) +response_op = op_maker.create( + 'general_response', inputs=[cnn_infer_op, bow_infer_op]) + +op_graph_maker = OpGraphMaker() +op_graph_maker.add_op(read_op) +op_graph_maker.add_op(cnn_infer_op) +op_graph_maker.add_op(bow_infer_op) +op_graph_maker.add_op(response_op) + +server = MultiLangServer() +server.set_op_graph(op_graph_maker.get_op_graph()) +model_config = {cnn_infer_op: 'imdb_cnn_model', bow_infer_op: 'imdb_bow_model'} +server.load_model_config(model_config) +server.prepare_server(workdir="work_dir1", port=9393, device="cpu") +server.run_server() diff --git a/python/examples/pipeline/imdb_model_ensemble/config.yml b/python/examples/pipeline/imdb_model_ensemble/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..5ad37f846524922c2e262691b18eff953b19a105 --- /dev/null +++ b/python/examples/pipeline/imdb_model_ensemble/config.yml @@ -0,0 +1,6 @@ +use_multithread: true +client_type: brpc +retry: 1 +profile: false +prot: 8080 +worker_num: 2 diff --git a/python/examples/pipeline/imdb_model_ensemble/get_data.sh b/python/examples/pipeline/imdb_model_ensemble/get_data.sh new file mode 100644 index 0000000000000000000000000000000000000000..81d8d5d3b018f133c41e211d1501cf3cd9a3d8a4 --- /dev/null +++ b/python/examples/pipeline/imdb_model_ensemble/get_data.sh @@ -0,0 +1,4 @@ +wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz +wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz +tar -zxvf text_classification_data.tar.gz +tar -zxvf imdb_model.tar.gz diff --git a/python/examples/pipeline/imdb_model_ensemble/test_pipeline_client.py b/python/examples/pipeline/imdb_model_ensemble/test_pipeline_client.py new file mode 100644 index 0000000000000000000000000000000000000000..011f0593f4e00e72bd1eda6e394712df0e765050 --- /dev/null +++ b/python/examples/pipeline/imdb_model_ensemble/test_pipeline_client.py @@ -0,0 +1,30 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from paddle_serving_client.pipeline import PipelineClient +import numpy as np +from line_profiler import LineProfiler + +client = PipelineClient() +client.connect('localhost:8080') + +lp = LineProfiler() +lp_wrapper = lp(client.predict) + +words = 'i am very sad | 0' + +for i in range(1): + fetch_map = lp_wrapper(feed_dict={"words": words}, fetch=["prediction"]) + print(fetch_map) + +#lp.print_stats() diff --git a/python/examples/pipeline/imdb_model_ensemble/test_pipeline_server.py b/python/examples/pipeline/imdb_model_ensemble/test_pipeline_server.py new file mode 100644 index 0000000000000000000000000000000000000000..bbd37de578c4a75822d21a695bcc0d4356b76ab7 --- /dev/null +++ b/python/examples/pipeline/imdb_model_ensemble/test_pipeline_server.py @@ -0,0 +1,110 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from paddle_serving_server.pipeline import Op, RequestOp, ResponseOp +from paddle_serving_server.pipeline import PipelineServer +from paddle_serving_server.pipeline.proto import pipeline_service_pb2 +from paddle_serving_server.pipeline.channel import ChannelDataEcode +import numpy as np +import logging +from paddle_serving_app.reader import IMDBDataset + +_LOGGER = logging.getLogger(__name__) + +logging.basicConfig( + format='%(asctime)s %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + datefmt='%Y-%m-%d %H:%M', + level=logging.DEBUG) + + +class ImdbRequestOp(RequestOp): + def load_user_resources(self): + self.imdb_dataset = IMDBDataset() + self.imdb_dataset.load_resource('imdb.vocab') + + def unpack_request_package(self, request): + dictdata = {} + for idx, key in enumerate(request.key): + if key != "words": + continue + words = request.value[idx] + word_ids, _ = self.imdb_dataset.get_words_and_label(words) + dictdata[key] = np.array(word_ids) + return dictdata + + +class CombineOp(Op): + def preprocess(self, input_data): + combined_prediction = 0 + for op_name, data in input_data.items(): + _LOGGER.info("{}: {}".format(op_name, data["prediction"])) + combined_prediction += data["prediction"] + data = {"prediction": combined_prediction / 2} + return data + + +class ImdbResponseOp(ResponseOp): + # Here ImdbResponseOp is consistent with the default ResponseOp implementation + def pack_response_package(self, channeldata): + resp = pipeline_service_pb2.Response() + resp.ecode = channeldata.ecode + if resp.ecode == ChannelDataEcode.OK.value: + feed = channeldata.parse() + # ndarray to string + for name, var in feed.items(): + resp.value.append(var.__repr__()) + resp.key.append(name) + else: + resp.error_info = channeldata.error_info + return resp + + +read_op = ImdbRequestOp() +bow_op = Op(name="bow", + input_ops=[read_op], + server_endpoints=["127.0.0.1:9393"], + fetch_list=["prediction"], + client_config="imdb_bow_client_conf/serving_client_conf.prototxt", + concurrency=1, + timeout=-1, + retry=1) +cnn_op = Op(name="cnn", + input_ops=[read_op], + server_endpoints=["127.0.0.1:9292"], + fetch_list=["prediction"], + client_config="imdb_cnn_client_conf/serving_client_conf.prototxt", + concurrency=1, + timeout=-1, + retry=1) +combine_op = CombineOp( + name="combine", + input_ops=[bow_op, cnn_op], + concurrency=1, + timeout=-1, + retry=1) + +# fetch output of bow_op +# response_op = ImdbResponseOp(input_ops=[bow_op]) + +# fetch output of combine_op +response_op = ImdbResponseOp(input_ops=[combine_op]) + +# use default ResponseOp implementation +# response_op = ResponseOp(input_ops=[combine_op]) + +server = PipelineServer() +server.set_response_op(response_op) +server.prepare_server('config.yml') +server.run_server() diff --git a/python/examples/util/timeline_trace.py b/python/examples/util/timeline_trace.py index 144c21cb4458cf8f73fa9e198617b735970897bd..b5cfb519c64bd5108cd3afb790659670a316eb0e 100644 --- a/python/examples/util/timeline_trace.py +++ b/python/examples/util/timeline_trace.py @@ -16,10 +16,16 @@ def prase(pid_str, time_str, counter): if len(name_list) == 2: name = name_list[0] else: - name = name_list[0] + "_" + name_list[1] + name = "_".join(name_list[:-1]) + name_list = name.split("#") + if len(name_list) > 1: + tid = name_list[-1] + name = "#".join(name_list[:-1]) + else: + tid = 0 event_dict = {} event_dict["name"] = name - event_dict["tid"] = 0 + event_dict["tid"] = tid event_dict["pid"] = pid event_dict["ts"] = ts event_dict["ph"] = ph diff --git a/python/examples/yolov4/000000570688.jpg b/python/examples/yolov4/000000570688.jpg new file mode 100644 index 0000000000000000000000000000000000000000..cb304bd56c4010c08611a30dcca58ea9140cea54 Binary files /dev/null and b/python/examples/yolov4/000000570688.jpg differ diff --git a/python/examples/yolov4/README.md b/python/examples/yolov4/README.md new file mode 100644 index 0000000000000000000000000000000000000000..08e16026d79ef7e93df732359f2c17609d4a2d0d --- /dev/null +++ b/python/examples/yolov4/README.md @@ -0,0 +1,23 @@ +# Yolov4 Detection Service + +([简体中文](README_CN.md)|English) + +## Get Model + +``` +python -m paddle_serving_app.package --get_model yolov4 +tar -xzvf yolov4.tar.gz +``` + +## Start RPC Service + +``` +python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 +``` + +## Prediction + +``` +python test_client.py 000000570688.jpg +``` +After the prediction is completed, a json file to save the prediction result and a picture with the detection result box will be generated in the `./outpu folder. diff --git a/python/examples/yolov4/README_CN.md b/python/examples/yolov4/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..a4eed96b08619d4602cbd012a676a9adb6e08a63 --- /dev/null +++ b/python/examples/yolov4/README_CN.md @@ -0,0 +1,24 @@ +# Yolov4 检测服务 + +(简体中文|[English](README.md)) + +## 获取模型 + +``` +python -m paddle_serving_app.package --get_model yolov4 +tar -xzvf yolov4.tar.gz +``` + +## 启动RPC服务 + +``` +python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 +``` + +## 预测 + +``` +python test_client.py 000000570688.jpg +``` + +预测完成会在`./output`文件夹下生成保存预测结果的json文件以及标出检测结果框的图片。 diff --git a/python/examples/yolov4/label_list.txt b/python/examples/yolov4/label_list.txt new file mode 100644 index 0000000000000000000000000000000000000000..941cb4e1392266f6a6c09b1fdc5f79503b2e5df6 --- /dev/null +++ b/python/examples/yolov4/label_list.txt @@ -0,0 +1,80 @@ +person +bicycle +car +motorcycle +airplane +bus +train +truck +boat +traffic light +fire hydrant +stop sign +parking meter +bench +bird +cat +dog +horse +sheep +cow +elephant +bear +zebra +giraffe +backpack +umbrella +handbag +tie +suitcase +frisbee +skis +snowboard +sports ball +kite +baseball bat +baseball glove +skateboard +surfboard +tennis racket +bottle +wine glass +cup +fork +knife +spoon +bowl +banana +apple +sandwich +orange +broccoli +carrot +hot dog +pizza +donut +cake +chair +couch +potted plant +bed +dining table +toilet +tv +laptop +mouse +remote +keyboard +cell phone +microwave +oven +toaster +sink +refrigerator +book +clock +vase +scissors +teddy bear +hair drier +toothbrush diff --git a/python/examples/yolov4/test_client.py b/python/examples/yolov4/test_client.py new file mode 100644 index 0000000000000000000000000000000000000000..92dcd06552ca1fdd3f2d54060e9de501f052e349 --- /dev/null +++ b/python/examples/yolov4/test_client.py @@ -0,0 +1,41 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import numpy as np +from paddle_serving_client import Client +from paddle_serving_app.reader import * +import cv2 +preprocess = Sequential([ + File2Image(), BGR2RGB(), Resize( + (608, 608), interpolation=cv2.INTER_LINEAR), Div(255.0), Transpose( + (2, 0, 1)) +]) + +postprocess = RCNNPostprocess("label_list.txt", "output", [608, 608]) +client = Client() + +client.load_client_config("yolov4_client/serving_client_conf.prototxt") +client.connect(['127.0.0.1:9393']) + +im = preprocess(sys.argv[1]) +print(im.shape) +fetch_map = client.predict( + feed={ + "image": im, + "im_size": np.array(list(im.shape[1:])), + }, + fetch=["save_infer_model/scale_0.tmp_0"]) +fetch_map["image"] = sys.argv[1] +postprocess(fetch_map) diff --git a/python/paddle_serving_app/models/model_list.py b/python/paddle_serving_app/models/model_list.py index 0c26a59f6f0537b9c910f21062938d4720d4f9f4..79b3f91bd6584d17ddbc4124584cf40bd586b965 100644 --- a/python/paddle_serving_app/models/model_list.py +++ b/python/paddle_serving_app/models/model_list.py @@ -24,7 +24,7 @@ class ServingModels(object): "SentimentAnalysis"] = ["senta_bilstm", "senta_bow", "senta_cnn"] self.model_dict["SemanticRepresentation"] = ["ernie"] self.model_dict["ChineseWordSegmentation"] = ["lac"] - self.model_dict["ObjectDetection"] = ["faster_rcnn"] + self.model_dict["ObjectDetection"] = ["faster_rcnn", "yolov4"] self.model_dict["ImageSegmentation"] = [ "unet", "deeplabv3", "deeplabv3+cityscapes" ] diff --git a/python/paddle_serving_app/reader/image_reader.py b/python/paddle_serving_app/reader/image_reader.py index dc029bf0409179f1d392ce05d007565cd3007085..a44ca5de84da2bafce9b4cea37fb88095debabc6 100644 --- a/python/paddle_serving_app/reader/image_reader.py +++ b/python/paddle_serving_app/reader/image_reader.py @@ -280,10 +280,11 @@ class SegPostprocess(object): class RCNNPostprocess(object): - def __init__(self, label_file, output_dir): + def __init__(self, label_file, output_dir, resize_shape=None): self.output_dir = output_dir self.label_file = label_file self.label_list = [] + self.resize_shape = resize_shape with open(label_file) as fin: for line in fin: self.label_list.append(line.strip()) @@ -378,6 +379,13 @@ class RCNNPostprocess(object): xmax = xmin + w ymax = ymin + h + img_w, img_h = image.size + if self.resize_shape is not None: + xmin = xmin * img_w / self.resize_shape[0] + xmax = xmax * img_w / self.resize_shape[0] + ymin = ymin * img_h / self.resize_shape[1] + ymax = ymax * img_h / self.resize_shape[1] + color = tuple(color_list[catid]) # draw bbox diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py index 63f827167de6417a15097d0ea2c7834e7fbf2d20..455bcf62cd039dde69736ec514892856eabd3088 100644 --- a/python/paddle_serving_client/__init__.py +++ b/python/paddle_serving_client/__init__.py @@ -28,8 +28,11 @@ sys.path.append( os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto')) from .proto import multi_lang_general_model_service_pb2_grpc -int_type = 0 -float_type = 1 +int64_type = 0 +float32_type = 1 +int32_type = 2 +int_type = set([int64_type, int32_type]) +float_type = set([float32_type]) class _NOPProfiler(object): @@ -279,7 +282,7 @@ class Client(object): raise ValueError("Wrong feed name: {}.".format(key)) #if not isinstance(feed_i[key], np.ndarray): self.shape_check(feed_i, key) - if self.feed_types_[key] == int_type: + if self.feed_types_[key] in int_type: if i == 0: int_feed_names.append(key) if isinstance(feed_i[key], np.ndarray): @@ -292,7 +295,7 @@ class Client(object): else: int_slot.append(feed_i[key]) self.all_numpy_input = False - elif self.feed_types_[key] == float_type: + elif self.feed_types_[key] in float_type: if i == 0: float_feed_names.append(key) if isinstance(feed_i[key], np.ndarray): @@ -339,7 +342,7 @@ class Client(object): result_map = {} # result map needs to be a numpy array for i, name in enumerate(fetch_names): - if self.fetch_names_to_type_[name] == int_type: + if self.fetch_names_to_type_[name] == int64_type: # result_map[name] will be py::array(numpy array) result_map[name] = result_batch_handle.get_int64_by_name( mi, name) @@ -348,7 +351,7 @@ class Client(object): if name in self.lod_tensor_set: result_map["{}.lod".format( name)] = result_batch_handle.get_lod(mi, name) - elif self.fetch_names_to_type_[name] == float_type: + elif self.fetch_names_to_type_[name] == float32_type: result_map[name] = result_batch_handle.get_float_by_name( mi, name) shape = result_batch_handle.get_shape(mi, name) @@ -356,6 +359,16 @@ class Client(object): if name in self.lod_tensor_set: result_map["{}.lod".format( name)] = result_batch_handle.get_lod(mi, name) + + elif self.fetch_names_to_type_[name] == int32_type: + # result_map[name] will be py::array(numpy array) + result_map[name] = result_batch_handle.get_int32_by_name( + mi, name) + shape = result_batch_handle.get_shape(mi, name) + result_map[name].shape = shape + if name in self.lod_tensor_set: + result_map["{}.lod".format( + name)] = result_batch_handle.get_lod(mi, name) multi_result_map.append(result_map) ret = None if len(model_engine_names) == 1: @@ -384,22 +397,41 @@ class Client(object): class MultiLangClient(object): def __init__(self): self.channel_ = None + self.stub_ = None + self.rpc_timeout_s_ = 2 - def load_client_config(self, path): - if not isinstance(path, str): - raise Exception("GClient only supports multi-model temporarily") - self._parse_model_config(path) + def add_variant(self, tag, cluster, variant_weight): + # TODO + raise Exception("cannot support ABtest yet") + + def set_rpc_timeout_ms(self, rpc_timeout): + if self.stub_ is None: + raise Exception("set timeout must be set after connect.") + if not isinstance(rpc_timeout, int): + # for bclient + raise ValueError("rpc_timeout must be int type.") + self.rpc_timeout_s_ = rpc_timeout / 1000.0 + timeout_req = multi_lang_general_model_service_pb2.SetTimeoutRequest() + timeout_req.timeout_ms = rpc_timeout + resp = self.stub_.SetTimeout(timeout_req) + return resp.err_code == 0 - def connect(self, endpoint): + def connect(self, endpoints): # https://github.com/tensorflow/serving/issues/1382 options = [('grpc.max_receive_message_length', 512 * 1024 * 1024), ('grpc.max_send_message_length', 512 * 1024 * 1024), - ('grpc.max_receive_message_length', 512 * 1024 * 1024)] - - self.channel_ = grpc.insecure_channel( - endpoint[0], options=options) #TODO + ('grpc.lb_policy_name', 'round_robin')] + # TODO: weight round robin + g_endpoint = 'ipv4:{}'.format(','.join(endpoints)) + self.channel_ = grpc.insecure_channel(g_endpoint, options=options) self.stub_ = multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelServiceStub( self.channel_) + # get client model config + get_client_config_req = multi_lang_general_model_service_pb2.GetClientConfigRequest( + ) + resp = self.stub_.GetClientConfig(get_client_config_req) + model_config_str = resp.client_config_str + self._parse_model_config(model_config_str) def _flatten_list(self, nested_list): for item in nested_list: @@ -409,11 +441,10 @@ class MultiLangClient(object): else: yield item - def _parse_model_config(self, model_config_path): + def _parse_model_config(self, model_config_str): model_conf = m_config.GeneralModelConfig() - f = open(model_config_path, 'r') - model_conf = google.protobuf.text_format.Merge( - str(f.read()), model_conf) + model_conf = google.protobuf.text_format.Merge(model_config_str, + model_conf) self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.feed_types_ = {} self.feed_shapes_ = {} @@ -434,8 +465,8 @@ class MultiLangClient(object): if var.is_lod_tensor: self.lod_tensor_set_.add(var.alias_name) - def _pack_feed_data(self, feed, fetch, is_python): - req = multi_lang_general_model_service_pb2.Request() + def _pack_inference_request(self, feed, fetch, is_python): + req = multi_lang_general_model_service_pb2.InferenceRequest() req.fetch_var_names.extend(fetch) req.is_python = is_python feed_batch = None @@ -460,26 +491,50 @@ class MultiLangClient(object): data = np.array(var, dtype="int64") elif v_type == 1: # float32 data = np.array(var, dtype="float32") + elif v_type == 2: # int32 + data = np.array(var, dtype="int32") else: - raise Exception("error type.") - else: + raise Exception("error tensor value type.") + elif isinstance(var, np.ndarray): data = var - if var.dtype == "float64": - data = data.astype("float32") + if v_type == 0: + if data.dtype != 'int64': + data = data.astype("int64") + elif v_type == 1: + if data.dtype != 'float32': + data = data.astype("float32") + elif v_type == 2: + if data.dtype != 'int32': + data = data.astype("int32") + else: + raise Exception("error tensor value type.") + else: + raise Exception("var must be list or ndarray.") tensor.data = data.tobytes() else: - if v_type == 0: # int64 - if isinstance(var, np.ndarray): - tensor.int64_data.extend(var.reshape(-1).tolist()) + if isinstance(var, np.ndarray): + if v_type == 0: # int64 + tensor.int64_data.extend( + var.reshape(-1).astype("int64").tolist()) + elif v_type == 1: + tensor.float_data.extend( + var.reshape(-1).astype('float32').tolist()) + elif v_type == 2: + tensor.int32_data.extend( + var.reshape(-1).astype('int32').tolist()) else: + raise Exception("error tensor value type.") + elif isinstance(var, list): + if v_type == 0: tensor.int64_data.extend(self._flatten_list(var)) - elif v_type == 1: # float32 - if isinstance(var, np.ndarray): - tensor.float_data.extend(var.reshape(-1).tolist()) - else: + elif v_type == 1: tensor.float_data.extend(self._flatten_list(var)) + elif v_type == 2: + tensor.int32_data.extend(self._flatten_list(var)) + else: + raise Exception("error tensor value type.") else: - raise Exception("error type.") + raise Exception("var must be list or ndarray.") if isinstance(var, np.ndarray): tensor.shape.extend(list(var.shape)) else: @@ -488,37 +543,52 @@ class MultiLangClient(object): req.insts.append(inst) return req - def _unpack_resp(self, resp, fetch, is_python, need_variant_tag): - result_map = {} - inst = resp.outputs[0].insts[0] + def _unpack_inference_response(self, resp, fetch, is_python, + need_variant_tag): + if resp.err_code != 0: + return None tag = resp.tag - for i, name in enumerate(fetch): - var = inst.tensor_array[i] - v_type = self.fetch_types_[name] - if is_python: - if v_type == 0: # int64 - result_map[name] = np.frombuffer(var.data, dtype="int64") - elif v_type == 1: # float32 - result_map[name] = np.frombuffer(var.data, dtype="float32") - else: - raise Exception("error type.") - else: - if v_type == 0: # int64 - result_map[name] = np.array( - list(var.int64_data), dtype="int64") - elif v_type == 1: # float32 - result_map[name] = np.array( - list(var.float_data), dtype="float32") + multi_result_map = {} + for model_result in resp.outputs: + inst = model_result.insts[0] + result_map = {} + for i, name in enumerate(fetch): + var = inst.tensor_array[i] + v_type = self.fetch_types_[name] + if is_python: + if v_type == 0: # int64 + result_map[name] = np.frombuffer( + var.data, dtype="int64") + elif v_type == 1: # float32 + result_map[name] = np.frombuffer( + var.data, dtype="float32") + else: + raise Exception("error type.") else: - raise Exception("error type.") - result_map[name].shape = list(var.shape) - if name in self.lod_tensor_set_: - result_map["{}.lod".format(name)] = np.array(list(var.lod)) - return result_map if not need_variant_tag else [result_map, tag] + if v_type == 0: # int64 + result_map[name] = np.array( + list(var.int64_data), dtype="int64") + elif v_type == 1: # float32 + result_map[name] = np.array( + list(var.float_data), dtype="float32") + else: + raise Exception("error type.") + result_map[name].shape = list(var.shape) + if name in self.lod_tensor_set_: + result_map["{}.lod".format(name)] = np.array(list(var.lod)) + multi_result_map[model_result.engine_name] = result_map + ret = None + if len(resp.outputs) == 1: + ret = list(multi_result_map.values())[0] + else: + ret = multi_result_map + ret["serving_status_code"] = 0 + return ret if not need_variant_tag else [ret, tag] def _done_callback_func(self, fetch, is_python, need_variant_tag): def unpack_resp(resp): - return self._unpack_resp(resp, fetch, is_python, need_variant_tag) + return self._unpack_inference_response(resp, fetch, is_python, + need_variant_tag) return unpack_resp @@ -531,16 +601,20 @@ class MultiLangClient(object): need_variant_tag=False, asyn=False, is_python=True): - req = self._pack_feed_data(feed, fetch, is_python=is_python) + req = self._pack_inference_request(feed, fetch, is_python=is_python) if not asyn: - resp = self.stub_.inference(req) - return self._unpack_resp( - resp, - fetch, - is_python=is_python, - need_variant_tag=need_variant_tag) + try: + resp = self.stub_.Inference(req, timeout=self.rpc_timeout_s_) + return self._unpack_inference_response( + resp, + fetch, + is_python=is_python, + need_variant_tag=need_variant_tag) + except grpc.RpcError as e: + return {"serving_status_code": e.code()} else: - call_future = self.stub_.inference.future(req) + call_future = self.stub_.Inference.future( + req, timeout=self.rpc_timeout_s_) return MultiLangPredictFuture( call_future, self._done_callback_func( @@ -555,7 +629,10 @@ class MultiLangPredictFuture(object): self.callback_func_ = callback_func def result(self): - resp = self.call_future_.result() + try: + resp = self.call_future_.result() + except grpc.RpcError as e: + return {"serving_status_code": e.code()} return self.callback_func_(resp) def add_done_callback(self, fn): diff --git a/python/paddle_serving_client/io/__init__.py b/python/paddle_serving_client/io/__init__.py index 20d29e2bdfe0d2753d2f23cda028d76a3b13c699..69e185be3d2e4d1a579a29d30b59341bfb8666ed 100644 --- a/python/paddle_serving_client/io/__init__.py +++ b/python/paddle_serving_client/io/__init__.py @@ -48,16 +48,18 @@ def save_model(server_model_folder, config = model_conf.GeneralModelConfig() + #int64 = 0; float32 = 1; int32 = 2; for key in feed_var_dict: feed_var = model_conf.FeedVar() feed_var.alias_name = key feed_var.name = feed_var_dict[key].name feed_var.is_lod_tensor = feed_var_dict[key].lod_level >= 1 - if feed_var_dict[key].dtype == core.VarDesc.VarType.INT32 or \ - feed_var_dict[key].dtype == core.VarDesc.VarType.INT64: + if feed_var_dict[key].dtype == core.VarDesc.VarType.INT64: feed_var.feed_type = 0 if feed_var_dict[key].dtype == core.VarDesc.VarType.FP32: feed_var.feed_type = 1 + if feed_var_dict[key].dtype == core.VarDesc.VarType.INT32: + feed_var.feed_type = 2 if feed_var.is_lod_tensor: feed_var.shape.extend([-1]) else: @@ -73,13 +75,12 @@ def save_model(server_model_folder, fetch_var.alias_name = key fetch_var.name = fetch_var_dict[key].name fetch_var.is_lod_tensor = fetch_var_dict[key].lod_level >= 1 - if fetch_var_dict[key].dtype == core.VarDesc.VarType.INT32 or \ - fetch_var_dict[key].dtype == core.VarDesc.VarType.INT64: + if fetch_var_dict[key].dtype == core.VarDesc.VarType.INT64: fetch_var.fetch_type = 0 - if fetch_var_dict[key].dtype == core.VarDesc.VarType.FP32: fetch_var.fetch_type = 1 - + if fetch_var_dict[key].dtype == core.VarDesc.VarType.INT32: + fetch_var.fetch_type = 2 if fetch_var.is_lod_tensor: fetch_var.shape.extend([-1]) else: diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index 3a5c07011ace961fdfb61ebf3217ab1aab375e82..875e275c759d9fb1a9ccb6632816418a75a93aec 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -231,6 +231,7 @@ class Server(object): self.infer_service_conf.services.extend([infer_service]) def _prepare_resource(self, workdir): + self.workdir = workdir if self.resource_conf == None: with open("{}/{}".format(workdir, self.general_model_config_fn), "w") as fout: @@ -328,10 +329,10 @@ class Server(object): os.chdir(self.module_path) need_download = False device_version = self.get_device_version() - floder_name = device_version + serving_server_version - tar_name = floder_name + ".tar.gz" + folder_name = device_version + serving_server_version + tar_name = folder_name + ".tar.gz" bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name - self.server_path = os.path.join(self.module_path, floder_name) + self.server_path = os.path.join(self.module_path, folder_name) #acquire lock version_file = open("{}/version.py".format(self.module_path), "r") @@ -357,7 +358,7 @@ class Server(object): os.remove(exe_path) raise SystemExit( 'Decompressing failed, please check your permission of {} or disk space left.'. - foemat(self.module_path)) + format(self.module_path)) finally: os.remove(tar_name) #release lock @@ -374,11 +375,11 @@ class Server(object): os.system("touch {}/fluid_time_file".format(workdir)) if not self.port_is_available(port): - raise SystemExit("Prot {} is already used".format(port)) + raise SystemExit("Port {} is already used".format(port)) + self.set_port(port) self._prepare_resource(workdir) self._prepare_engine(self.model_config_paths, device) self._prepare_infer_service(port) - self.port = port self.workdir = workdir infer_service_fn = "{}/{}".format(workdir, self.infer_service_fn) @@ -440,22 +441,29 @@ class Server(object): os.system(command) -class MultiLangServerService( - multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelService): - def __init__(self, model_config_path, endpoints): +class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. + MultiLangGeneralModelServiceServicer): + def __init__(self, model_config_path, is_multi_model, endpoints): + self.is_multi_model_ = is_multi_model + self.model_config_path_ = model_config_path + self.endpoints_ = endpoints + with open(self.model_config_path_) as f: + self.model_config_str_ = str(f.read()) + self._parse_model_config(self.model_config_str_) + self._init_bclient(self.model_config_path_, self.endpoints_) + + def _init_bclient(self, model_config_path, endpoints, timeout_ms=None): from paddle_serving_client import Client - self._parse_model_config(model_config_path) self.bclient_ = Client() - self.bclient_.load_client_config( - "{}/serving_server_conf.prototxt".format(model_config_path)) + if timeout_ms is not None: + self.bclient_.set_rpc_timeout_ms(timeout_ms) + self.bclient_.load_client_config(model_config_path) self.bclient_.connect(endpoints) - def _parse_model_config(self, model_config_path): + def _parse_model_config(self, model_config_str): model_conf = m_config.GeneralModelConfig() - f = open("{}/serving_server_conf.prototxt".format(model_config_path), - 'r') - model_conf = google.protobuf.text_format.Merge( - str(f.read()), model_conf) + model_conf = google.protobuf.text_format.Merge(model_config_str, + model_conf) self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.feed_types_ = {} self.feed_shapes_ = {} @@ -480,7 +488,7 @@ class MultiLangServerService( else: yield item - def _unpack_request(self, request): + def _unpack_inference_request(self, request): feed_names = list(request.feed_var_names) fetch_names = list(request.fetch_var_names) is_python = request.is_python @@ -492,10 +500,12 @@ class MultiLangServerService( v_type = self.feed_types_[name] data = None if is_python: - if v_type == 0: + if v_type == 0: # int64 data = np.frombuffer(var.data, dtype="int64") - elif v_type == 1: + elif v_type == 1: # float32 data = np.frombuffer(var.data, dtype="float32") + elif v_type == 2: # int32 + data = np.frombuffer(var.data, dtype="int32") else: raise Exception("error type.") else: @@ -503,6 +513,8 @@ class MultiLangServerService( data = np.array(list(var.int64_data), dtype="int64") elif v_type == 1: # float32 data = np.array(list(var.float_data), dtype="float32") + elif v_type == 2: # int32 + data = np.array(list(var.int32_data), dtype="int32") else: raise Exception("error type.") data.shape = list(feed_inst.tensor_array[idx].shape) @@ -510,55 +522,132 @@ class MultiLangServerService( feed_batch.append(feed_dict) return feed_batch, fetch_names, is_python - def _pack_resp_package(self, result, fetch_names, is_python, tag): - resp = multi_lang_general_model_service_pb2.Response() - # Only one model is supported temporarily - model_output = multi_lang_general_model_service_pb2.ModelOutput() - inst = multi_lang_general_model_service_pb2.FetchInst() - for idx, name in enumerate(fetch_names): - tensor = multi_lang_general_model_service_pb2.Tensor() - v_type = self.fetch_types_[name] - if is_python: - tensor.data = result[name].tobytes() - else: - if v_type == 0: # int64 - tensor.int64_data.extend(result[name].reshape(-1).tolist()) - elif v_type == 1: # float32 - tensor.float_data.extend(result[name].reshape(-1).tolist()) - else: - raise Exception("error type.") - tensor.shape.extend(list(result[name].shape)) - if name in self.lod_tensor_set_: - tensor.lod.extend(result["{}.lod".format(name)].tolist()) - inst.tensor_array.append(tensor) - model_output.insts.append(inst) - resp.outputs.append(model_output) + def _pack_inference_response(self, ret, fetch_names, is_python): + resp = multi_lang_general_model_service_pb2.InferenceResponse() + if ret is None: + resp.err_code = 1 + return resp + results, tag = ret resp.tag = tag + resp.err_code = 0 + if not self.is_multi_model_: + results = {'general_infer_0': results} + for model_name, model_result in results.items(): + model_output = multi_lang_general_model_service_pb2.ModelOutput() + inst = multi_lang_general_model_service_pb2.FetchInst() + for idx, name in enumerate(fetch_names): + tensor = multi_lang_general_model_service_pb2.Tensor() + v_type = self.fetch_types_[name] + if is_python: + tensor.data = model_result[name].tobytes() + else: + if v_type == 0: # int64 + tensor.int64_data.extend(model_result[name].reshape(-1) + .tolist()) + elif v_type == 1: # float32 + tensor.float_data.extend(model_result[name].reshape(-1) + .tolist()) + elif v_type == 2: # int32 + tensor.int32_data.extend(model_result[name].reshape(-1) + .tolist()) + else: + raise Exception("error type.") + tensor.shape.extend(list(model_result[name].shape)) + if name in self.lod_tensor_set_: + tensor.lod.extend(model_result["{}.lod".format(name)] + .tolist()) + inst.tensor_array.append(tensor) + model_output.insts.append(inst) + model_output.engine_name = model_name + resp.outputs.append(model_output) + return resp + + def SetTimeout(self, request, context): + # This porcess and Inference process cannot be operate at the same time. + # For performance reasons, do not add thread lock temporarily. + timeout_ms = request.timeout_ms + self._init_bclient(self.model_config_path_, self.endpoints_, timeout_ms) + resp = multi_lang_general_model_service_pb2.SimpleResponse() + resp.err_code = 0 return resp - def inference(self, request, context): - feed_dict, fetch_names, is_python = self._unpack_request(request) - data, tag = self.bclient_.predict( + def Inference(self, request, context): + feed_dict, fetch_names, is_python = self._unpack_inference_request( + request) + ret = self.bclient_.predict( feed=feed_dict, fetch=fetch_names, need_variant_tag=True) - return self._pack_resp_package(data, fetch_names, is_python, tag) + return self._pack_inference_response(ret, fetch_names, is_python) + + def GetClientConfig(self, request, context): + resp = multi_lang_general_model_service_pb2.GetClientConfigResponse() + resp.client_config_str = self.model_config_str_ + return resp class MultiLangServer(object): - def __init__(self, worker_num=2): + def __init__(self): self.bserver_ = Server() - self.worker_num_ = worker_num + self.worker_num_ = 4 + self.body_size_ = 64 * 1024 * 1024 + self.concurrency_ = 100000 + self.is_multi_model_ = False # for model ensemble + + def set_max_concurrency(self, concurrency): + self.concurrency_ = concurrency + self.bserver_.set_max_concurrency(concurrency) + + def set_num_threads(self, threads): + self.worker_num_ = threads + self.bserver_.set_num_threads(threads) + + def set_max_body_size(self, body_size): + self.bserver_.set_max_body_size(body_size) + if body_size >= self.body_size_: + self.body_size_ = body_size + else: + print( + "max_body_size is less than default value, will use default value in service." + ) + + def set_port(self, port): + self.gport_ = port + + def set_reload_interval(self, interval): + self.bserver_.set_reload_interval(interval) def set_op_sequence(self, op_seq): self.bserver_.set_op_sequence(op_seq) - def load_model_config(self, model_config_path): - if not isinstance(model_config_path, str): - raise Exception( - "MultiLangServer only supports multi-model temporarily") - self.bserver_.load_model_config(model_config_path) - self.model_config_path_ = model_config_path + def set_op_graph(self, op_graph): + self.bserver_.set_op_graph(op_graph) + + def set_memory_optimize(self, flag=False): + self.bserver_.set_memory_optimize(flag) + + def set_ir_optimize(self, flag=False): + self.bserver_.set_ir_optimize(flag) + + def set_op_sequence(self, op_seq): + self.bserver_.set_op_sequence(op_seq) + + def use_mkl(self, flag): + self.bserver_.use_mkl(flag) + + def load_model_config(self, server_config_paths, client_config_path=None): + self.bserver_.load_model_config(server_config_paths) + if client_config_path is None: + if isinstance(server_config_paths, dict): + self.is_multi_model_ = True + client_config_path = '{}/serving_server_conf.prototxt'.format( + list(server_config_paths.items())[0][1]) + else: + client_config_path = '{}/serving_server_conf.prototxt'.format( + server_config_paths) + self.bclient_config_path_ = client_config_path def prepare_server(self, workdir=None, port=9292, device="cpu"): + if not self._port_is_available(port): + raise SystemExit("Prot {} is already used".format(port)) default_port = 12000 self.port_list_ = [] for i in range(1000): @@ -568,7 +657,7 @@ class MultiLangServer(object): break self.bserver_.prepare_server( workdir=workdir, port=self.port_list_[0], device=device) - self.gport_ = port + self.set_port(port) def _launch_brpc_service(self, bserver): bserver.run_server() @@ -583,12 +672,16 @@ class MultiLangServer(object): p_bserver = Process( target=self._launch_brpc_service, args=(self.bserver_, )) p_bserver.start() + options = [('grpc.max_send_message_length', self.body_size_), + ('grpc.max_receive_message_length', self.body_size_)] server = grpc.server( - futures.ThreadPoolExecutor(max_workers=self.worker_num_)) + futures.ThreadPoolExecutor(max_workers=self.worker_num_), + options=options, + maximum_concurrent_rpcs=self.concurrency_) multi_lang_general_model_service_pb2_grpc.add_MultiLangGeneralModelServiceServicer_to_server( - MultiLangServerService(self.model_config_path_, - ["0.0.0.0:{}".format(self.port_list_[0])]), - server) + MultiLangServerServiceServicer( + self.bclient_config_path_, self.is_multi_model_, + ["0.0.0.0:{}".format(self.port_list_[0])]), server) server.add_insecure_port('[::]:{}'.format(self.gport_)) server.start() p_bserver.join() diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index e75240dfafd436e5557a8f11396029e6be5868fe..009a6ce00af2290b64716e211429385d09189831 100644 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -53,6 +53,11 @@ def parse_args(): # pylint: disable=doc-string-missing type=int, default=512 * 1024 * 1024, help="Limit sizes of messages") + parser.add_argument( + "--use_multilang", + default=False, + action="store_true", + help="Use Multi-language-service") return parser.parse_args() @@ -67,6 +72,7 @@ def start_standard_model(): # pylint: disable=doc-string-missing ir_optim = args.ir_optim max_body_size = args.max_body_size use_mkl = args.use_mkl + use_multilang = args.use_multilang if model == "": print("You must specify your serving model") @@ -83,7 +89,11 @@ def start_standard_model(): # pylint: disable=doc-string-missing op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(general_response_op) - server = serving.Server() + server = None + if use_multilang: + server = serving.MultiLangServer() + else: + server = serving.Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) server.set_memory_optimize(mem_optim) diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py index b3fcc1b880fcbffa1da884e4b68350c1870997c1..ea43c8cb18731f60d905ccbe4bada605709d9c11 100755 --- a/python/paddle_serving_server/web_service.py +++ b/python/paddle_serving_server/web_service.py @@ -85,9 +85,9 @@ class WebService(object): fetch_map = self.client.predict(feed=feed, fetch=fetch) for key in fetch_map: fetch_map[key] = fetch_map[key].tolist() - fetch_map = self.postprocess( + result = self.postprocess( feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map) - result = {"result": fetch_map} + result = {"result": result} except ValueError: result = {"result": "Request Value Error"} return result diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py index 44733b154096255c3ce06e1be29d50d3e662269a..26288f6ae65ce823a57ee201130d40ff6510c4a5 100644 --- a/python/paddle_serving_server_gpu/__init__.py +++ b/python/paddle_serving_server_gpu/__init__.py @@ -68,6 +68,11 @@ def serve_args(): type=int, default=512 * 1024 * 1024, help="Limit sizes of messages") + parser.add_argument( + "--use_multilang", + default=False, + action="store_true", + help="Use Multi-language-service") return parser.parse_args() @@ -410,7 +415,7 @@ class Server(object): os.system("touch {}/fluid_time_file".format(workdir)) if not self.port_is_available(port): - raise SystemExit("Prot {} is already used".format(port)) + raise SystemExit("Port {} is already used".format(port)) self.set_port(port) self._prepare_resource(workdir) @@ -484,22 +489,29 @@ class Server(object): os.system(command) -class MultiLangServerService( - multi_lang_general_model_service_pb2_grpc.MultiLangGeneralModelService): - def __init__(self, model_config_path, endpoints): +class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc. + MultiLangGeneralModelServiceServicer): + def __init__(self, model_config_path, is_multi_model, endpoints): + self.is_multi_model_ = is_multi_model + self.model_config_path_ = model_config_path + self.endpoints_ = endpoints + with open(self.model_config_path_) as f: + self.model_config_str_ = str(f.read()) + self._parse_model_config(self.model_config_str_) + self._init_bclient(self.model_config_path_, self.endpoints_) + + def _init_bclient(self, model_config_path, endpoints, timeout_ms=None): from paddle_serving_client import Client - self._parse_model_config(model_config_path) self.bclient_ = Client() - self.bclient_.load_client_config( - "{}/serving_server_conf.prototxt".format(model_config_path)) + if timeout_ms is not None: + self.bclient_.set_rpc_timeout_ms(timeout_ms) + self.bclient_.load_client_config(model_config_path) self.bclient_.connect(endpoints) - def _parse_model_config(self, model_config_path): + def _parse_model_config(self, model_config_str): model_conf = m_config.GeneralModelConfig() - f = open("{}/serving_server_conf.prototxt".format(model_config_path), - 'r') - model_conf = google.protobuf.text_format.Merge( - str(f.read()), model_conf) + model_conf = google.protobuf.text_format.Merge(model_config_str, + model_conf) self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.feed_types_ = {} self.feed_shapes_ = {} @@ -524,7 +536,7 @@ class MultiLangServerService( else: yield item - def _unpack_request(self, request): + def _unpack_inference_request(self, request): feed_names = list(request.feed_var_names) fetch_names = list(request.fetch_var_names) is_python = request.is_python @@ -540,6 +552,8 @@ class MultiLangServerService( data = np.frombuffer(var.data, dtype="int64") elif v_type == 1: data = np.frombuffer(var.data, dtype="float32") + elif v_type == 2: + data = np.frombuffer(var.data, dtype="int32") else: raise Exception("error type.") else: @@ -547,6 +561,8 @@ class MultiLangServerService( data = np.array(list(var.int64_data), dtype="int64") elif v_type == 1: # float32 data = np.array(list(var.float_data), dtype="float32") + elif v_type == 2: + data = np.array(list(var.int32_data), dtype="int32") else: raise Exception("error type.") data.shape = list(feed_inst.tensor_array[idx].shape) @@ -554,55 +570,129 @@ class MultiLangServerService( feed_batch.append(feed_dict) return feed_batch, fetch_names, is_python - def _pack_resp_package(self, result, fetch_names, is_python, tag): - resp = multi_lang_general_model_service_pb2.Response() - # Only one model is supported temporarily - model_output = multi_lang_general_model_service_pb2.ModelOutput() - inst = multi_lang_general_model_service_pb2.FetchInst() - for idx, name in enumerate(fetch_names): - tensor = multi_lang_general_model_service_pb2.Tensor() - v_type = self.fetch_types_[name] - if is_python: - tensor.data = result[name].tobytes() - else: - if v_type == 0: # int64 - tensor.int64_data.extend(result[name].reshape(-1).tolist()) - elif v_type == 1: # float32 - tensor.float_data.extend(result[name].reshape(-1).tolist()) - else: - raise Exception("error type.") - tensor.shape.extend(list(result[name].shape)) - if name in self.lod_tensor_set_: - tensor.lod.extend(result["{}.lod".format(name)].tolist()) - inst.tensor_array.append(tensor) - model_output.insts.append(inst) - resp.outputs.append(model_output) + def _pack_inference_response(self, ret, fetch_names, is_python): + resp = multi_lang_general_model_service_pb2.InferenceResponse() + if ret is None: + resp.err_code = 1 + return resp + results, tag = ret resp.tag = tag + resp.err_code = 0 + if not self.is_multi_model_: + results = {'general_infer_0': results} + for model_name, model_result in results.items(): + model_output = multi_lang_general_model_service_pb2.ModelOutput() + inst = multi_lang_general_model_service_pb2.FetchInst() + for idx, name in enumerate(fetch_names): + tensor = multi_lang_general_model_service_pb2.Tensor() + v_type = self.fetch_types_[name] + if is_python: + tensor.data = model_result[name].tobytes() + else: + if v_type == 0: # int64 + tensor.int64_data.extend(model_result[name].reshape(-1) + .tolist()) + elif v_type == 1: # float32 + tensor.float_data.extend(model_result[name].reshape(-1) + .tolist()) + elif v_type == 2: # int32 + tensor.int32_data.extend(model_result[name].reshape(-1) + .tolist()) + else: + raise Exception("error type.") + tensor.shape.extend(list(model_result[name].shape)) + if name in self.lod_tensor_set_: + tensor.lod.extend(model_result["{}.lod".format(name)] + .tolist()) + inst.tensor_array.append(tensor) + model_output.insts.append(inst) + model_output.engine_name = model_name + resp.outputs.append(model_output) + return resp + + def SetTimeout(self, request, context): + # This porcess and Inference process cannot be operate at the same time. + # For performance reasons, do not add thread lock temporarily. + timeout_ms = request.timeout_ms + self._init_bclient(self.model_config_path_, self.endpoints_, timeout_ms) + resp = multi_lang_general_model_service_pb2.SimpleResponse() + resp.err_code = 0 return resp - def inference(self, request, context): - feed_dict, fetch_names, is_python = self._unpack_request(request) - data, tag = self.bclient_.predict( + def Inference(self, request, context): + feed_dict, fetch_names, is_python = self._unpack_inference_request( + request) + ret = self.bclient_.predict( feed=feed_dict, fetch=fetch_names, need_variant_tag=True) - return self._pack_resp_package(data, fetch_names, is_python, tag) + return self._pack_inference_response(ret, fetch_names, is_python) + + def GetClientConfig(self, request, context): + resp = multi_lang_general_model_service_pb2.GetClientConfigResponse() + resp.client_config_str = self.model_config_str_ + return resp class MultiLangServer(object): - def __init__(self, worker_num=2): + def __init__(self): self.bserver_ = Server() - self.worker_num_ = worker_num + self.worker_num_ = 4 + self.body_size_ = 64 * 1024 * 1024 + self.concurrency_ = 100000 + self.is_multi_model_ = False # for model ensemble + + def set_max_concurrency(self, concurrency): + self.concurrency_ = concurrency + self.bserver_.set_max_concurrency(concurrency) + + def set_num_threads(self, threads): + self.worker_num_ = threads + self.bserver_.set_num_threads(threads) + + def set_max_body_size(self, body_size): + self.bserver_.set_max_body_size(body_size) + if body_size >= self.body_size_: + self.body_size_ = body_size + else: + print( + "max_body_size is less than default value, will use default value in service." + ) + + def set_port(self, port): + self.gport_ = port + + def set_reload_interval(self, interval): + self.bserver_.set_reload_interval(interval) def set_op_sequence(self, op_seq): self.bserver_.set_op_sequence(op_seq) - def load_model_config(self, model_config_path): - if not isinstance(model_config_path, str): - raise Exception( - "MultiLangServer only supports multi-model temporarily") - self.bserver_.load_model_config(model_config_path) - self.model_config_path_ = model_config_path + def set_op_graph(self, op_graph): + self.bserver_.set_op_graph(op_graph) + + def set_memory_optimize(self, flag=False): + self.bserver_.set_memory_optimize(flag) + + def set_ir_optimize(self, flag=False): + self.bserver_.set_ir_optimize(flag) + + def set_gpuid(self, gpuid=0): + self.bserver_.set_gpuid(gpuid) + + def load_model_config(self, server_config_paths, client_config_path=None): + self.bserver_.load_model_config(server_config_paths) + if client_config_path is None: + if isinstance(server_config_paths, dict): + self.is_multi_model_ = True + client_config_path = '{}/serving_server_conf.prototxt'.format( + list(server_config_paths.items())[0][1]) + else: + client_config_path = '{}/serving_server_conf.prototxt'.format( + server_config_paths) + self.bclient_config_path_ = client_config_path def prepare_server(self, workdir=None, port=9292, device="cpu"): + if not self._port_is_available(port): + raise SystemExit("Prot {} is already used".format(port)) default_port = 12000 self.port_list_ = [] for i in range(1000): @@ -612,7 +702,7 @@ class MultiLangServer(object): break self.bserver_.prepare_server( workdir=workdir, port=self.port_list_[0], device=device) - self.gport_ = port + self.set_port(port) def _launch_brpc_service(self, bserver): bserver.run_server() @@ -627,12 +717,16 @@ class MultiLangServer(object): p_bserver = Process( target=self._launch_brpc_service, args=(self.bserver_, )) p_bserver.start() + options = [('grpc.max_send_message_length', self.body_size_), + ('grpc.max_receive_message_length', self.body_size_)] server = grpc.server( - futures.ThreadPoolExecutor(max_workers=self.worker_num_)) + futures.ThreadPoolExecutor(max_workers=self.worker_num_), + options=options, + maximum_concurrent_rpcs=self.concurrency_) multi_lang_general_model_service_pb2_grpc.add_MultiLangGeneralModelServiceServicer_to_server( - MultiLangServerService(self.model_config_path_, - ["0.0.0.0:{}".format(self.port_list_[0])]), - server) + MultiLangServerServiceServicer( + self.bclient_config_path_, self.is_multi_model_, + ["0.0.0.0:{}".format(self.port_list_[0])]), server) server.add_insecure_port('[::]:{}'.format(self.gport_)) server.start() p_bserver.join() diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py index 309896a876bda5fc9b1baceb089242baa6d77dc5..e26b32c2699d09b714b2658cafad0ae8c5138071 100644 --- a/python/paddle_serving_server_gpu/serve.py +++ b/python/paddle_serving_server_gpu/serve.py @@ -37,6 +37,7 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss mem_optim = args.mem_optim ir_optim = args.ir_optim max_body_size = args.max_body_size + use_multilang = args.use_multilang workdir = "{}_{}".format(args.workdir, gpuid) if model == "": @@ -54,7 +55,10 @@ def start_gpu_card_model(index, gpuid, args): # pylint: disable=doc-string-miss op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(general_response_op) - server = serving.Server() + if use_multilang: + server = serving.MultiLangServer() + else: + server = serving.Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) server.set_memory_optimize(mem_optim) diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py index 76721de8a005dfb23fbe2427671446889aa72af1..0eff9c72df84b30ded7dbc7c2e82a96bbd591162 100644 --- a/python/paddle_serving_server_gpu/web_service.py +++ b/python/paddle_serving_server_gpu/web_service.py @@ -50,12 +50,12 @@ class WebService(object): general_infer_op = op_maker.create('general_infer') general_response_op = op_maker.create('general_response') - op_seq_maker = serving.OpSeqMaker() + op_seq_maker = OpSeqMaker() op_seq_maker.add_op(read_op) op_seq_maker.add_op(general_infer_op) op_seq_maker.add_op(general_response_op) - server = serving.Server() + server = Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) @@ -171,7 +171,7 @@ class WebService(object): processes=1) def get_app_instance(self): - return app_instance + return self.app_instance def preprocess(self, feed=[], fetch=[]): return feed, fetch diff --git a/python/pipeline/__init__.py b/python/pipeline/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..61913a81f2aef513d9dd4d321b51e59ecf67f6a4 --- /dev/null +++ b/python/pipeline/__init__.py @@ -0,0 +1,17 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from operator import Op, RequestOp, ResponseOp +from pipeline_server import PipelineServer +from pipeline_client import PipelineClient diff --git a/python/pipeline/channel.py b/python/pipeline/channel.py index a0eed6da107c0955be0d0bbcdda2967402b84b68..ce55b187e66ae02916d04a57732391de01f4ece5 100644 --- a/python/pipeline/channel.py +++ b/python/pipeline/channel.py @@ -12,3 +12,634 @@ # See the License for the specific language governing permissions and # limitations under the License. # pylint: disable=doc-string-missing +import threading +import multiprocessing +import multiprocessing.queues +import sys +if sys.version_info.major == 2: + import Queue +elif sys.version_info.major == 3: + import queue as Queue +else: + raise Exception("Error Python version") +import numpy as np +import logging +import enum +import copy + +_LOGGER = logging.getLogger(__name__) + + +class ChannelDataEcode(enum.Enum): + OK = 0 + TIMEOUT = 1 + NOT_IMPLEMENTED = 2 + TYPE_ERROR = 3 + RPC_PACKAGE_ERROR = 4 + CLIENT_ERROR = 5 + UNKNOW = 6 + + +class ChannelDataType(enum.Enum): + DICT = 0 + CHANNEL_NPDATA = 1 + ERROR = 2 + + +class ChannelData(object): + def __init__(self, + datatype=None, + npdata=None, + dictdata=None, + data_id=None, + ecode=None, + error_info=None): + ''' + There are several ways to use it: + + 1. ChannelData(ChannelDataType.CHANNEL_NPDATA.value, npdata, data_id) + 2. ChannelData(ChannelDataType.DICT.value, dictdata, data_id) + 3. ChannelData(ecode, error_info, data_id) + + Protobufs are not pickle-able: + https://stackoverflow.com/questions/55344376/how-to-import-protobuf-module + ''' + if ecode is not None: + if data_id is None or error_info is None: + raise ValueError("data_id and error_info cannot be None") + datatype = ChannelDataType.ERROR.value + else: + if datatype == ChannelDataType.CHANNEL_NPDATA.value: + ecode, error_info = ChannelData.check_npdata(npdata) + if ecode != ChannelDataEcode.OK.value: + datatype = ChannelDataType.ERROR.value + _LOGGER.error(error_info) + elif datatype == ChannelDataType.DICT.value: + ecode, error_info = ChannelData.check_dictdata(dictdata) + if ecode != ChannelDataEcode.OK.value: + datatype = ChannelDataType.ERROR.value + _LOGGER.error(error_info) + else: + raise ValueError("datatype not match") + self.datatype = datatype + self.npdata = npdata + self.dictdata = dictdata + self.id = data_id + self.ecode = ecode + self.error_info = error_info + + @staticmethod + def check_dictdata(dictdata): + ecode = ChannelDataEcode.OK.value + error_info = None + if not isinstance(dictdata, dict): + ecode = ChannelDataEcode.TYPE_ERROR.value + error_info = "the value of data must " \ + "be dict, but get {}.".format(type(dictdata)) + return ecode, error_info + + @staticmethod + def check_npdata(npdata): + ecode = ChannelDataEcode.OK.value + error_info = None + for _, value in npdata.items(): + if not isinstance(value, np.ndarray): + ecode = ChannelDataEcode.TYPE_ERROR.value + error_info = "the value of data must " \ + "be np.ndarray, but get {}.".format(type(value)) + break + return ecode, error_info + + def parse(self): + feed = None + if self.datatype == ChannelDataType.CHANNEL_NPDATA.value: + # return narray + feed = self.npdata + elif self.datatype == ChannelDataType.DICT.value: + # return dict + feed = self.dictdata + else: + raise TypeError("Error type({}) in datatype.".format(self.datatype)) + return feed + + def __str__(self): + return "type[{}], ecode[{}], id[{}]".format( + ChannelDataType(self.datatype).name, self.ecode, self.id) + + +class ProcessChannel(multiprocessing.queues.Queue): + """ + (Process version) The channel used for communication between Ops. + + 1. Support multiple different Op feed data (multiple producer) + Different types of data will be packaged through the data ID + 2. Support multiple different Op fetch data (multiple consumer) + Only when all types of Ops get the data of the same ID, + the data will be poped; The Op of the same type will not + get the data of the same ID. + 3. (TODO) Timeout and BatchSize are not fully supported. + + Note: + 1. The ID of the data in the channel must be different. + 2. The function add_producer() and add_consumer() are not thread safe, + and can only be called during initialization. + + There are two buffers and one queue in Channel: + + op_A \ / op_D + op_B - a. input_buf -> b. queue -> c. output_buf - op_E + op_C / \ op_F + + a. In input_buf, the input of multiple predecessor Ops is packed by data ID. + b. The packed data will be stored in queue. + c. In order to support multiple successor Ops to retrieve data, output_buf + maintains the data obtained from queue. + """ + + def __init__(self, manager, name=None, maxsize=0, timeout=None): + # https://stackoverflow.com/questions/39496554/cannot-subclass-multiprocessing-queue-in-python-3-5/ + if sys.version_info.major == 2: + super(ProcessChannel, self).__init__(maxsize=maxsize) + elif sys.version_info.major == 3: + super(ProcessChannel, self).__init__( + maxsize=maxsize, ctx=multiprocessing.get_context()) + else: + raise Exception("Error Python version") + self._maxsize = maxsize + self._timeout = timeout + self.name = name + self._stop = False + + self._cv = multiprocessing.Condition() + + self._producers = [] + self._pushed_producer_count = manager.dict() # {data_id: count} + self._input_buf = manager.dict() # {data_id: {op_name: data}} + + self._reset_max_cursor = 1000000000000000000 + self._consumer_cursors = manager.dict() # {op_name: cursor} + self._cursor_count = manager.dict() # {cursor: count} + self._base_cursor = manager.Value('i', 0) + self._output_buf = manager.list() + + def get_producers(self): + return self._producers + + def get_consumers(self): + return self._consumer_cursors.keys() + + def _log(self, info_str): + return "[{}] {}".format(self.name, info_str) + + def debug(self): + return self._log("p: {}, c: {}".format(self.get_producers(), + self.get_consumers())) + + def add_producer(self, op_name): + """ not thread safe, and can only be called during initialization. """ + if op_name in self._producers: + raise ValueError( + self._log("producer({}) is already in channel".format(op_name))) + self._producers.append(op_name) + + def add_consumer(self, op_name): + """ not thread safe, and can only be called during initialization. """ + if op_name in self._consumer_cursors: + raise ValueError( + self._log("consumer({}) is already in channel".format(op_name))) + self._consumer_cursors[op_name] = 0 + + if self._cursor_count.get(0) is None: + self._cursor_count[0] = 0 + self._cursor_count[0] += 1 + + def push(self, channeldata, op_name=None): + _LOGGER.debug( + self._log("{} try to push data: {}".format(op_name, + channeldata.__str__()))) + if len(self._producers) == 0: + raise Exception( + self._log( + "expected number of producers to be greater than 0, but the it is 0." + )) + elif len(self._producers) == 1: + with self._cv: + while self._stop is False: + try: + self.put({op_name: channeldata}, timeout=0) + break + except Queue.Full: + self._cv.wait() + _LOGGER.debug( + self._log("{} channel size: {}".format(op_name, + self.qsize()))) + self._cv.notify_all() + _LOGGER.debug(self._log("{} notify all".format(op_name))) + _LOGGER.debug(self._log("{} push data succ!".format(op_name))) + return True + elif op_name is None: + raise Exception( + self._log( + "There are multiple producers, so op_name cannot be None.")) + + producer_num = len(self._producers) + data_id = channeldata.id + put_data = None + with self._cv: + _LOGGER.debug(self._log("{} get lock".format(op_name))) + if data_id not in self._input_buf: + self._input_buf[data_id] = { + name: None + for name in self._producers + } + self._pushed_producer_count[data_id] = 0 + # see: https://docs.python.org/3.6/library/multiprocessing.html?highlight=multiprocess#proxy-objects + # self._input_buf[data_id][op_name] = channeldata + tmp_input_buf = self._input_buf[data_id] + tmp_input_buf[op_name] = channeldata + self._input_buf[data_id] = tmp_input_buf + + if self._pushed_producer_count[data_id] + 1 == producer_num: + put_data = self._input_buf[data_id] + self._input_buf.pop(data_id) + self._pushed_producer_count.pop(data_id) + else: + self._pushed_producer_count[data_id] += 1 + + if put_data is None: + _LOGGER.debug( + self._log("{} push data succ, but not push to queue.". + format(op_name))) + else: + while self._stop is False: + try: + _LOGGER.debug( + self._log("{} push data succ: {}".format( + op_name, put_data.__str__()))) + self.put(put_data, timeout=0) + break + except Queue.Empty: + self._cv.wait() + + _LOGGER.debug( + self._log("multi | {} push data succ!".format(op_name))) + self._cv.notify_all() + return True + + def front(self, op_name=None): + _LOGGER.debug(self._log("{} try to get data...".format(op_name))) + if len(self._consumer_cursors) == 0: + raise Exception( + self._log( + "expected number of consumers to be greater than 0, but the it is 0." + )) + elif len(self._consumer_cursors) == 1: + resp = None + with self._cv: + while self._stop is False and resp is None: + try: + _LOGGER.debug( + self._log("{} try to get(with channel empty: {})". + format(op_name, self.empty()))) + # For queue multiprocess: after putting an object on + # an empty queue there may be an infinitessimal delay + # before the queue's :meth:`~Queue.empty` + # see more: + # - https://bugs.python.org/issue18277 + # - https://hg.python.org/cpython/rev/860fc6a2bd21 + resp = self.get(timeout=1e-3) + break + except Queue.Empty: + _LOGGER.debug( + self._log( + "{} wait for empty queue(with channel empty: {})". + format(op_name, self.empty()))) + self._cv.wait() + _LOGGER.debug( + self._log("{} get data succ: {}".format(op_name, resp.__str__( + )))) + return resp + elif op_name is None: + raise Exception( + self._log( + "There are multiple consumers, so op_name cannot be None.")) + + # In output_buf, different Ops (according to op_name) have different + # cursors. In addition, there is a base_cursor. Their difference is + # the data_idx to be taken by the corresponding Op at the current + # time: data_idx = consumer_cursor - base_cursor + # + # base_cursor consumer_B_cursor (data_idx: 3) + # | | + # output_buf: | data0 | data1 | data2 | data3 | + # | + # consumer_A_cursor (data_idx: 0) + with self._cv: + # When the data required by the current Op is not in output_buf, + # it is necessary to obtain a data from queue and add it to output_buf. + while self._stop is False and self._consumer_cursors[ + op_name] - self._base_cursor.value >= len(self._output_buf): + _LOGGER.debug( + self._log( + "({}) B self._consumer_cursors: {}, self._base_cursor: {}, len(self._output_buf): {}". + format(op_name, self._consumer_cursors, + self._base_cursor.value, len(self._output_buf)))) + try: + _LOGGER.debug( + self._log("{} try to get(with channel size: {})".format( + op_name, self.qsize()))) + # For queue multiprocess: after putting an object on + # an empty queue there may be an infinitessimal delay + # before the queue's :meth:`~Queue.empty` + # see more: + # - https://bugs.python.org/issue18277 + # - https://hg.python.org/cpython/rev/860fc6a2bd21 + channeldata = self.get(timeout=1e-3) + self._output_buf.append(channeldata) + break + except Queue.Empty: + _LOGGER.debug( + self._log( + "{} wait for empty queue(with channel size: {})". + format(op_name, self.qsize()))) + self._cv.wait() + + consumer_cursor = self._consumer_cursors[op_name] + base_cursor = self._base_cursor.value + data_idx = consumer_cursor - base_cursor + resp = self._output_buf[data_idx] + _LOGGER.debug(self._log("{} get data: {}".format(op_name, resp))) + + self._cursor_count[consumer_cursor] -= 1 + if consumer_cursor == base_cursor and self._cursor_count[ + consumer_cursor] == 0: + # When all the different Ops get the data that data_idx points + # to, pop the data from output_buf. + self._cursor_count.pop(consumer_cursor) + self._output_buf.pop(0) + self._base_cursor.value += 1 + # to avoid cursor overflow + if self._base_cursor.value >= self._reset_max_cursor: + self._base_cursor.value -= self._reset_max_cursor + for name in self._consumer_cursors.keys(): + self._consumer_cursors[name] -= self._reset_max_cursor + cursor_count_tmp = { + cursor - self._reset_max_cursor: count + for cursor, count in self._cursor_count.copy().items() + } + self._cursor_count.clear() + for cursor, count in cursor_count_tmp.items(): + self._cursor_count[cursor] = count + + self._consumer_cursors[op_name] += 1 + new_consumer_cursor = self._consumer_cursors[op_name] + if self._cursor_count.get(new_consumer_cursor) is None: + self._cursor_count[new_consumer_cursor] = 0 + self._cursor_count[new_consumer_cursor] += 1 + + _LOGGER.debug( + self._log( + "({}) A self._consumer_cursors: {}, self._base_cursor: {}, len(self._output_buf): {}". + format(op_name, self._consumer_cursors, + self._base_cursor.value, len(self._output_buf)))) + _LOGGER.debug(self._log("{} notify all".format(op_name))) + self._cv.notify_all() + + _LOGGER.debug(self._log("multi | {} get data succ!".format(op_name))) + return resp # reference, read only + + def stop(self): + #TODO + self.close() + self._stop = True + self._cv.notify_all() + + +class ThreadChannel(Queue.Queue): + """ + (Thread version)The channel used for communication between Ops. + + 1. Support multiple different Op feed data (multiple producer) + Different types of data will be packaged through the data ID + 2. Support multiple different Op fetch data (multiple consumer) + Only when all types of Ops get the data of the same ID, + the data will be poped; The Op of the same type will not + get the data of the same ID. + 3. (TODO) Timeout and BatchSize are not fully supported. + + Note: + 1. The ID of the data in the channel must be different. + 2. The function add_producer() and add_consumer() are not thread safe, + and can only be called during initialization. + + There are two buffers and one queue in Channel: + + op_A \ / op_D + op_B - a. input_buf -> b. queue -> c. output_buf - op_E + op_C / \ op_F + + a. In input_buf, the input of multiple predecessor Ops is packed by data ID. + b. The packed data will be stored in queue. + c. In order to support multiple successor Ops to retrieve data, output_buf + maintains the data obtained from queue. + """ + + def __init__(self, name=None, maxsize=-1, timeout=None): + Queue.Queue.__init__(self, maxsize=maxsize) + self._maxsize = maxsize + self._timeout = timeout + self.name = name + self._stop = False + + self._cv = threading.Condition() + + self._producers = [] + self._pushed_producer_count = {} # {data_id: count} + self._input_buf = {} # {data_id: {op_name: data}} + + self._reset_max_cursor = 1000000000000000000 + self._consumer_cursors = {} # {op_name: idx} + self._cursor_count = {} # {cursor: count} + self._base_cursor = 0 + self._output_buf = [] + + def get_producers(self): + return self._producers + + def get_consumers(self): + return self._consumer_cursors.keys() + + def _log(self, info_str): + return "[{}] {}".format(self.name, info_str) + + def debug(self): + return self._log("p: {}, c: {}".format(self.get_producers(), + self.get_consumers())) + + def add_producer(self, op_name): + """ not thread safe, and can only be called during initialization. """ + if op_name in self._producers: + raise ValueError( + self._log("producer({}) is already in channel".format(op_name))) + self._producers.append(op_name) + + def add_consumer(self, op_name): + """ not thread safe, and can only be called during initialization. """ + if op_name in self._consumer_cursors: + raise ValueError( + self._log("consumer({}) is already in channel".format(op_name))) + self._consumer_cursors[op_name] = 0 + + if self._cursor_count.get(0) is None: + self._cursor_count[0] = 0 + self._cursor_count[0] += 1 + + def push(self, channeldata, op_name=None): + _LOGGER.debug( + self._log("{} try to push data: {}".format(op_name, + channeldata.__str__()))) + if len(self._producers) == 0: + raise Exception( + self._log( + "expected number of producers to be greater than 0, but the it is 0." + )) + elif len(self._producers) == 1: + with self._cv: + while self._stop is False: + try: + self.put({op_name: channeldata}, timeout=0) + break + except Queue.Full: + self._cv.wait() + self._cv.notify_all() + _LOGGER.debug(self._log("{} push data succ!".format(op_name))) + return True + elif op_name is None: + raise Exception( + self._log( + "There are multiple producers, so op_name cannot be None.")) + + producer_num = len(self._producers) + data_id = channeldata.id + put_data = None + with self._cv: + _LOGGER.debug(self._log("{} get lock".format(op_name))) + if data_id not in self._input_buf: + self._input_buf[data_id] = { + name: None + for name in self._producers + } + self._pushed_producer_count[data_id] = 0 + self._input_buf[data_id][op_name] = channeldata + if self._pushed_producer_count[data_id] + 1 == producer_num: + put_data = self._input_buf[data_id] + self._input_buf.pop(data_id) + self._pushed_producer_count.pop(data_id) + else: + self._pushed_producer_count[data_id] += 1 + + if put_data is None: + _LOGGER.debug( + self._log("{} push data succ, but not push to queue.". + format(op_name))) + else: + while self._stop is False: + try: + self.put(put_data, timeout=0) + break + except Queue.Empty: + self._cv.wait() + + _LOGGER.debug( + self._log("multi | {} push data succ!".format(op_name))) + self._cv.notify_all() + return True + + def front(self, op_name=None): + _LOGGER.debug(self._log("{} try to get data".format(op_name))) + if len(self._consumer_cursors) == 0: + raise Exception( + self._log( + "expected number of consumers to be greater than 0, but the it is 0." + )) + elif len(self._consumer_cursors) == 1: + resp = None + with self._cv: + while self._stop is False and resp is None: + try: + resp = self.get(timeout=0) + break + except Queue.Empty: + self._cv.wait() + _LOGGER.debug( + self._log("{} get data succ: {}".format(op_name, resp.__str__( + )))) + return resp + elif op_name is None: + raise Exception( + self._log( + "There are multiple consumers, so op_name cannot be None.")) + + # In output_buf, different Ops (according to op_name) have different + # cursors. In addition, there is a base_cursor. Their difference is + # the data_idx to be taken by the corresponding Op at the current + # time: data_idx = consumer_cursor - base_cursor + # + # base_cursor consumer_B_cursor (data_idx: 3) + # | | + # output_buf: | data0 | data1 | data2 | data3 | + # | + # consumer_A_cursor (data_idx: 0) + with self._cv: + # When the data required by the current Op is not in output_buf, + # it is necessary to obtain a data from queue and add it to output_buf. + while self._stop is False and self._consumer_cursors[ + op_name] - self._base_cursor >= len(self._output_buf): + try: + channeldata = self.get(timeout=0) + self._output_buf.append(channeldata) + break + except Queue.Empty: + self._cv.wait() + + consumer_cursor = self._consumer_cursors[op_name] + base_cursor = self._base_cursor + data_idx = consumer_cursor - base_cursor + resp = self._output_buf[data_idx] + _LOGGER.debug(self._log("{} get data: {}".format(op_name, resp))) + + self._cursor_count[consumer_cursor] -= 1 + if consumer_cursor == base_cursor and self._cursor_count[ + consumer_cursor] == 0: + # When all the different Ops get the data that data_idx points + # to, pop the data from output_buf. + self._cursor_count.pop(consumer_cursor) + self._output_buf.pop(0) + self._base_cursor += 1 + # to avoid cursor overflow + if self._base_cursor >= self._reset_max_cursor: + self._base_cursor -= self._reset_max_cursor + for name in self._consumer_cursors: + self._consumer_cursors[name] -= self._reset_max_cursor + self._cursor_count = { + cursor - self._reset_max_cursor: count + for cursor, count in self._cursor_count.items() + } + + self._consumer_cursors[op_name] += 1 + new_consumer_cursor = self._consumer_cursors[op_name] + if self._cursor_count.get(new_consumer_cursor) is None: + self._cursor_count[new_consumer_cursor] = 0 + self._cursor_count[new_consumer_cursor] += 1 + + self._cv.notify_all() + + _LOGGER.debug(self._log("multi | {} get data succ!".format(op_name))) + # return resp # reference, read only + return copy.deepcopy(resp) + + def stop(self): + #TODO + self.close() + self._stop = True + self._cv.notify_all() diff --git a/python/pipeline/operator.py b/python/pipeline/operator.py index a0eed6da107c0955be0d0bbcdda2967402b84b68..d82cac888298f83a1c8412f742adbf7de3932471 100644 --- a/python/pipeline/operator.py +++ b/python/pipeline/operator.py @@ -12,3 +12,473 @@ # See the License for the specific language governing permissions and # limitations under the License. # pylint: disable=doc-string-missing + +import threading +import multiprocessing +from paddle_serving_client import MultiLangClient, Client +from concurrent import futures +import logging +import func_timeout +from numpy import * + +from .proto import pipeline_service_pb2 +from .channel import ThreadChannel, ProcessChannel, ChannelDataEcode, ChannelData, ChannelDataType +from .util import NameGenerator + +_LOGGER = logging.getLogger(__name__) +_op_name_gen = NameGenerator("Op") + + +class Op(object): + def __init__(self, + name=None, + input_ops=[], + server_endpoints=[], + fetch_list=[], + client_config=None, + concurrency=1, + timeout=-1, + retry=1): + if name is None: + name = _op_name_gen.next() + self._is_run = False + self.name = name # to identify the type of OP, it must be globally unique + self.concurrency = concurrency # amount of concurrency + self.set_input_ops(input_ops) + + self._server_endpoints = server_endpoints + self.with_serving = False + if len(self._server_endpoints) != 0: + self.with_serving = True + self._client_config = client_config + self._fetch_names = fetch_list + + self._timeout = timeout + self._retry = max(1, retry) + self._input = None + self._outputs = [] + self._profiler = None + + def init_profiler(self, profiler): + self._profiler = profiler + + def _profiler_record(self, string): + if self._profiler is None: + return + self._profiler.record(string) + + def init_client(self, client_type, client_config, server_endpoints, + fetch_names): + if self.with_serving == False: + _LOGGER.debug("{} no client".format(self.name)) + return + _LOGGER.debug("{} client_config: {}".format(self.name, client_config)) + _LOGGER.debug("{} fetch_names: {}".format(self.name, fetch_names)) + if client_type == 'brpc': + self._client = Client() + self._client.load_client_config(client_config) + elif client_type == 'grpc': + self._client = MultiLangClient() + else: + raise ValueError("unknow client type: {}".format(client_type)) + self._client.connect(server_endpoints) + self._fetch_names = fetch_names + + def _get_input_channel(self): + return self._input + + def get_input_ops(self): + return self._input_ops + + def set_input_ops(self, ops): + if not isinstance(ops, list): + ops = [] if ops is None else [ops] + self._input_ops = [] + for op in ops: + if not isinstance(op, Op): + raise TypeError( + self._log('input op must be Op type, not {}'.format( + type(op)))) + self._input_ops.append(op) + + def add_input_channel(self, channel): + if not isinstance(channel, (ThreadChannel, ProcessChannel)): + raise TypeError( + self._log('input channel must be Channel type, not {}'.format( + type(channel)))) + channel.add_consumer(self.name) + self._input = channel + + def _get_output_channels(self): + return self._outputs + + def add_output_channel(self, channel): + if not isinstance(channel, (ThreadChannel, ProcessChannel)): + raise TypeError( + self._log('output channel must be Channel type, not {}'.format( + type(channel)))) + channel.add_producer(self.name) + self._outputs.append(channel) + + def preprocess(self, input_dicts): + # multiple previous Op + if len(input_dicts) != 1: + raise NotImplementedError( + 'this Op has multiple previous inputs. Please override this func.' + ) + + (_, input_dict), = input_dicts.items() + return input_dict + + def process(self, feed_dict): + err, err_info = ChannelData.check_npdata(feed_dict) + if err != 0: + raise NotImplementedError( + "{} Please override preprocess func.".format(err_info)) + _LOGGER.debug(self._log('feed_dict: {}'.format(feed_dict))) + _LOGGER.debug(self._log('fetch: {}'.format(self._fetch_names))) + call_result = self._client.predict( + feed=feed_dict, fetch=self._fetch_names) + _LOGGER.debug(self._log("get call_result")) + return call_result + + def postprocess(self, fetch_dict): + return fetch_dict + + def stop(self): + self._is_run = False + + def _parse_channeldata(self, channeldata_dict): + data_id, error_channeldata = None, None + parsed_data = {} + + key = list(channeldata_dict.keys())[0] + data_id = channeldata_dict[key].id + + for name, data in channeldata_dict.items(): + if data.ecode != ChannelDataEcode.OK.value: + error_channeldata = data + break + parsed_data[name] = data.parse() + return data_id, error_channeldata, parsed_data + + def _push_to_output_channels(self, data, channels, name=None): + if name is None: + name = self.name + for channel in channels: + channel.push(data, name) + + def start_with_process(self, client_type): + proces = [] + for concurrency_idx in range(self.concurrency): + p = multiprocessing.Process( + target=self._run, + args=(concurrency_idx, self._get_input_channel(), + self._get_output_channels(), client_type)) + p.start() + proces.append(p) + return proces + + def start_with_thread(self, client_type): + threads = [] + for concurrency_idx in range(self.concurrency): + t = threading.Thread( + target=self._run, + args=(concurrency_idx, self._get_input_channel(), + self._get_output_channels(), client_type)) + t.start() + threads.append(t) + return threads + + def load_user_resources(self): + pass + + def _run_preprocess(self, parsed_data, data_id, log_func): + preped_data, error_channeldata = None, None + try: + preped_data = self.preprocess(parsed_data) + except NotImplementedError as e: + # preprocess function not implemented + error_info = log_func(e) + _LOGGER.error(error_info) + error_channeldata = ChannelData( + ecode=ChannelDataEcode.NOT_IMPLEMENTED.value, + error_info=error_info, + data_id=data_id) + except TypeError as e: + # Error type in channeldata.datatype + error_info = log_func(e) + _LOGGER.error(error_info) + error_channeldata = ChannelData( + ecode=ChannelDataEcode.TYPE_ERROR.value, + error_info=error_info, + data_id=data_id) + except Exception as e: + error_info = log_func(e) + _LOGGER.error(error_info) + error_channeldata = ChannelData( + ecode=ChannelDataEcode.UNKNOW.value, + error_info=error_info, + data_id=data_id) + return preped_data, error_channeldata + + def _run_process(self, preped_data, data_id, log_func): + midped_data, error_channeldata = None, None + if self.with_serving: + ecode = ChannelDataEcode.OK.value + if self._timeout <= 0: + try: + midped_data = self.process(preped_data) + except Exception as e: + ecode = ChannelDataEcode.UNKNOW.value + error_info = log_func(e) + _LOGGER.error(error_info) + else: + for i in range(self._retry): + try: + midped_data = func_timeout.func_timeout( + self._timeout, self.process, args=(preped_data, )) + except func_timeout.FunctionTimedOut as e: + if i + 1 >= self._retry: + ecode = ChannelDataEcode.TIMEOUT.value + error_info = log_func(e) + _LOGGER.error(error_info) + else: + _LOGGER.warn( + log_func("timeout, retry({})".format(i + 1))) + except Exception as e: + ecode = ChannelDataEcode.UNKNOW.value + error_info = log_func(e) + _LOGGER.error(error_info) + break + else: + break + if ecode != ChannelDataEcode.OK.value: + error_channeldata = ChannelData( + ecode=ecode, error_info=error_info, data_id=data_id) + elif midped_data is None: + # op client return None + error_channeldata = ChannelData( + ecode=ChannelDataEcode.CLIENT_ERROR.value, + error_info=log_func( + "predict failed. pls check the server side."), + data_id=data_id) + else: + midped_data = preped_data + return midped_data, error_channeldata + + def _run_postprocess(self, midped_data, data_id, log_func): + output_data, error_channeldata = None, None + try: + postped_data = self.postprocess(midped_data) + except Exception as e: + error_info = log_func(e) + _LOGGER.error(error_info) + error_channeldata = ChannelData( + ecode=ChannelDataEcode.UNKNOW.value, + error_info=error_info, + data_id=data_id) + return output_data, error_channeldata + + if not isinstance(postped_data, dict): + error_info = log_func("output of postprocess funticon must be " \ + "dict type, but get {}".format(type(postped_data))) + _LOGGER.error(error_info) + error_channeldata = ChannelData( + ecode=ChannelDataEcode.UNKNOW.value, + error_info=error_info, + data_id=data_id) + return output_data, error_channeldata + + err, _ = ChannelData.check_npdata(postped_data) + if err == 0: + output_data = ChannelData( + ChannelDataType.CHANNEL_NPDATA.value, + npdata=postped_data, + data_id=data_id) + else: + output_data = ChannelData( + ChannelDataType.DICT.value, + dictdata=postped_data, + data_id=data_id) + return output_data, error_channeldata + + def _run(self, concurrency_idx, input_channel, output_channels, + client_type): + def get_log_func(op_info_prefix): + def log_func(info_str): + return "{} {}".format(op_info_prefix, info_str) + + return log_func + + op_info_prefix = "[{}|{}]".format(self.name, concurrency_idx) + log = get_log_func(op_info_prefix) + tid = threading.current_thread().ident + + # create client based on client_type + self.init_client(client_type, self._client_config, + self._server_endpoints, self._fetch_names) + + # load user resources + self.load_user_resources() + + self._is_run = True + while self._is_run: + self._profiler_record("{}-get#{}_0".format(op_info_prefix, tid)) + channeldata_dict = input_channel.front(self.name) + self._profiler_record("{}-get#{}_1".format(op_info_prefix, tid)) + _LOGGER.debug(log("input_data: {}".format(channeldata_dict))) + + data_id, error_channeldata, parsed_data = self._parse_channeldata( + channeldata_dict) + # error data in predecessor Op + if error_channeldata is not None: + self._push_to_output_channels(error_channeldata, + output_channels) + continue + + # preprecess + self._profiler_record("{}-prep#{}_0".format(op_info_prefix, tid)) + preped_data, error_channeldata = self._run_preprocess(parsed_data, + data_id, log) + self._profiler_record("{}-prep#{}_1".format(op_info_prefix, tid)) + if error_channeldata is not None: + self._push_to_output_channels(error_channeldata, + output_channels) + continue + + # process + self._profiler_record("{}-midp#{}_0".format(op_info_prefix, tid)) + midped_data, error_channeldata = self._run_process(preped_data, + data_id, log) + self._profiler_record("{}-midp#{}_1".format(op_info_prefix, tid)) + if error_channeldata is not None: + self._push_to_output_channels(error_channeldata, + output_channels) + continue + + # postprocess + self._profiler_record("{}-postp#{}_0".format(op_info_prefix, tid)) + output_data, error_channeldata = self._run_postprocess(midped_data, + data_id, log) + self._profiler_record("{}-postp#{}_1".format(op_info_prefix, tid)) + if error_channeldata is not None: + self._push_to_output_channels(error_channeldata, + output_channels) + continue + + # push data to channel (if run succ) + self._profiler_record("{}-push#{}_0".format(op_info_prefix, tid)) + self._push_to_output_channels(output_data, output_channels) + self._profiler_record("{}-push#{}_1".format(op_info_prefix, tid)) + + def _log(self, info): + return "{} {}".format(self.name, info) + + +class RequestOp(Op): + """ RequestOp do not run preprocess, process, postprocess. """ + + def __init__(self, concurrency=1): + # PipelineService.name = "#G" + super(RequestOp, self).__init__( + name="#G", input_ops=[], concurrency=concurrency) + # load user resources + self.load_user_resources() + + def unpack_request_package(self, request): + dictdata = {} + for idx, key in enumerate(request.key): + data = request.value[idx] + try: + data = eval(data) + except Exception as e: + pass + dictdata[key] = data + return dictdata + + +class ResponseOp(Op): + """ ResponseOp do not run preprocess, process, postprocess. """ + + def __init__(self, input_ops, concurrency=1): + super(ResponseOp, self).__init__( + name="#R", input_ops=input_ops, concurrency=concurrency) + # load user resources + self.load_user_resources() + + def pack_response_package(self, channeldata): + resp = pipeline_service_pb2.Response() + resp.ecode = channeldata.ecode + if resp.ecode == ChannelDataEcode.OK.value: + if channeldata.datatype == ChannelDataType.CHANNEL_NPDATA.value: + feed = channeldata.parse() + # ndarray to string: + # https://stackoverflow.com/questions/30167538/convert-a-numpy-ndarray-to-stringor-bytes-and-convert-it-back-to-numpy-ndarray + for name, var in feed.items(): + resp.value.append(var.__repr__()) + resp.key.append(name) + elif channeldata.datatype == ChannelDataType.DICT.value: + feed = channeldata.parse() + for name, var in feed.items(): + if not isinstance(var, str): + resp.ecode = ChannelDataEcode.TYPE_ERROR.value + resp.error_info = self._log( + "fetch var type must be str({}).".format( + type(var))) + break + resp.value.append(var) + resp.key.append(name) + else: + resp.ecode = ChannelDataEcode.TYPE_ERROR.value + resp.error_info = self._log( + "Error type({}) in datatype.".format(channeldata.datatype)) + _LOGGER.error(resp.error_info) + else: + resp.error_info = channeldata.error_info + return resp + + +class VirtualOp(Op): + ''' For connecting two channels. ''' + + def __init__(self, name, concurrency=1): + super(VirtualOp, self).__init__( + name=name, input_ops=None, concurrency=concurrency) + self._virtual_pred_ops = [] + + def add_virtual_pred_op(self, op): + self._virtual_pred_ops.append(op) + + def add_output_channel(self, channel): + if not isinstance(channel, (ThreadChannel, ProcessChannel)): + raise TypeError( + self._log('output channel must be Channel type, not {}'.format( + type(channel)))) + for op in self._virtual_pred_ops: + channel.add_producer(op.name) + self._outputs.append(channel) + + def _run(self, concurrency_idx, input_channel, output_channels, + client_type): + def get_log_func(op_info_prefix): + def log_func(info_str): + return "{} {}".format(op_info_prefix, info_str) + + return log_func + + op_info_prefix = "[{}|{}]".format(self.name, concurrency_idx) + log = get_log_func(op_info_prefix) + tid = threading.current_thread().ident + + self._is_run = True + while self._is_run: + self._profiler_record("{}-get#{}_0".format(op_info_prefix, tid)) + channeldata_dict = input_channel.front(self.name) + self._profiler_record("{}-get#{}_1".format(op_info_prefix, tid)) + + self._profiler_record("{}-push#{}_0".format(op_info_prefix, tid)) + for name, data in channeldata_dict.items(): + self._push_to_output_channels( + data, channels=output_channels, name=name) + self._profiler_record("{}-push#{}_1".format(op_info_prefix, tid)) diff --git a/python/pipeline/pipeline_client.py b/python/pipeline/pipeline_client.py new file mode 100644 index 0000000000000000000000000000000000000000..4ad05b5a953d5084ffda360c0a1ac561463898a4 --- /dev/null +++ b/python/pipeline/pipeline_client.py @@ -0,0 +1,90 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing +import grpc +import numpy as np +from numpy import * +import logging +import functools +from .proto import pipeline_service_pb2 +from .proto import pipeline_service_pb2_grpc + +_LOGGER = logging.getLogger(__name__) + + +class PipelineClient(object): + def __init__(self): + self._channel = None + + def connect(self, endpoint): + self._channel = grpc.insecure_channel(endpoint) + self._stub = pipeline_service_pb2_grpc.PipelineServiceStub( + self._channel) + + def _pack_request_package(self, feed_dict): + req = pipeline_service_pb2.Request() + for key, value in feed_dict.items(): + req.key.append(key) + if isinstance(value, np.ndarray): + req.value.append(value.__repr__()) + elif isinstance(value, str): + req.value.append(value) + elif isinstance(value, list): + req.value.append(np.array(value).__repr__()) + else: + raise TypeError("only str and np.ndarray type is supported: {}". + format(type(value))) + return req + + def _unpack_response_package(self, resp, fetch): + if resp.ecode != 0: + return {"ecode": resp.ecode, "error_info": resp.error_info} + fetch_map = {"ecode": resp.ecode} + for idx, key in enumerate(resp.key): + if key not in fetch: + continue + data = resp.value[idx] + try: + data = eval(data) + except Exception as e: + pass + fetch_map[key] = data + return fetch_map + + def predict(self, feed_dict, fetch, asyn=False): + if not isinstance(feed_dict, dict): + raise TypeError( + "feed must be dict type with format: {name: value}.") + if not isinstance(fetch, list): + raise TypeError("fetch must be list type with format: [name].") + req = self._pack_request_package(feed_dict) + if not asyn: + resp = self._stub.inference(req) + return self._unpack_response_package(resp) + else: + call_future = self._stub.inference.future(req) + return PipelinePredictFuture( + call_future, + functools.partial( + self._unpack_response_package, fetch=fetch)) + + +class PipelinePredictFuture(object): + def __init__(self, call_future, callback_func): + self.call_future_ = call_future + self.callback_func_ = callback_func + + def result(self): + resp = self.call_future_.result() + return self.callback_func_(resp) diff --git a/python/pipeline/pipeline_server.py b/python/pipeline/pipeline_server.py index a0eed6da107c0955be0d0bbcdda2967402b84b68..55289eeca42e02bb979d4a21791fdde44e0aff02 100644 --- a/python/pipeline/pipeline_server.py +++ b/python/pipeline/pipeline_server.py @@ -12,3 +12,440 @@ # See the License for the specific language governing permissions and # limitations under the License. # pylint: disable=doc-string-missing +import threading +import multiprocessing +import multiprocessing.queues +import sys +if sys.version_info.major == 2: + import Queue +elif sys.version_info.major == 3: + import queue as Queue +else: + raise Exception("Error Python version") +import os +from paddle_serving_client import MultiLangClient, Client +from concurrent import futures +import numpy as np +import grpc +import logging +import random +import time +import func_timeout +import enum +import collections +import copy +import socket +from contextlib import closing +import yaml + +from .proto import pipeline_service_pb2 +from .proto import pipeline_service_pb2_grpc +from .operator import Op, RequestOp, ResponseOp, VirtualOp +from .channel import ThreadChannel, ProcessChannel, ChannelData, ChannelDataEcode, ChannelDataType +from .profiler import TimeProfiler +from .util import NameGenerator + +_LOGGER = logging.getLogger(__name__) +_profiler = TimeProfiler() + + +class PipelineService(pipeline_service_pb2_grpc.PipelineServiceServicer): + def __init__(self, in_channel, out_channel, unpack_func, pack_func, + retry=2): + super(PipelineService, self).__init__() + self.name = "#G" + self.set_in_channel(in_channel) + self.set_out_channel(out_channel) + _LOGGER.debug(self._log(in_channel.debug())) + _LOGGER.debug(self._log(out_channel.debug())) + #TODO: + # multi-lock for different clients + # diffenert lock for server and client + self._id_lock = threading.Lock() + self._cv = threading.Condition() + self._globel_resp_dict = {} + self._id_counter = 0 + self._reset_max_id = 1000000000000000000 + self._retry = retry + self._is_run = True + self._pack_func = pack_func + self._unpack_func = unpack_func + self._recive_func = threading.Thread( + target=PipelineService._recive_out_channel_func, args=(self, )) + self._recive_func.start() + + def _log(self, info_str): + return "[{}] {}".format(self.name, info_str) + + def set_in_channel(self, in_channel): + if not isinstance(in_channel, (ThreadChannel, ProcessChannel)): + raise TypeError( + self._log('in_channel must be Channel type, but get {}'.format( + type(in_channel)))) + in_channel.add_producer(self.name) + self._in_channel = in_channel + + def set_out_channel(self, out_channel): + if not isinstance(out_channel, (ThreadChannel, ProcessChannel)): + raise TypeError( + self._log('out_channel must be Channel type, but get {}'.format( + type(out_channel)))) + out_channel.add_consumer(self.name) + self._out_channel = out_channel + + def stop(self): + self._is_run = False + + def _recive_out_channel_func(self): + while self._is_run: + channeldata_dict = self._out_channel.front(self.name) + if len(channeldata_dict) != 1: + raise Exception("out_channel cannot have multiple input ops") + (_, channeldata), = channeldata_dict.items() + if not isinstance(channeldata, ChannelData): + raise TypeError( + self._log('data must be ChannelData type, but get {}'. + format(type(channeldata)))) + with self._cv: + data_id = channeldata.id + self._globel_resp_dict[data_id] = channeldata + self._cv.notify_all() + + def _get_next_id(self): + with self._id_lock: + if self._id_counter >= self._reset_max_id: + self._id_counter -= self._reset_max_id + self._id_counter += 1 + return self._id_counter - 1 + + def _get_data_in_globel_resp_dict(self, data_id): + resp = None + with self._cv: + while data_id not in self._globel_resp_dict: + self._cv.wait() + resp = self._globel_resp_dict.pop(data_id) + self._cv.notify_all() + return resp + + def _pack_data_for_infer(self, request): + _LOGGER.debug(self._log('start inferce')) + data_id = self._get_next_id() + dictdata = None + try: + dictdata = self._unpack_func(request) + except Exception as e: + return ChannelData( + ecode=ChannelDataEcode.RPC_PACKAGE_ERROR.value, + error_info="rpc package error: {}".format(e), + data_id=data_id), data_id + else: + return ChannelData( + datatype=ChannelDataType.DICT.value, + dictdata=dictdata, + data_id=data_id), data_id + + def _pack_data_for_resp(self, channeldata): + _LOGGER.debug(self._log('get channeldata')) + return self._pack_func(channeldata) + + def inference(self, request, context): + _profiler.record("{}-prepack_0".format(self.name)) + data, data_id = self._pack_data_for_infer(request) + _profiler.record("{}-prepack_1".format(self.name)) + + resp_channeldata = None + for i in range(self._retry): + _LOGGER.debug(self._log('push data')) + _profiler.record("{}-push_0".format(self.name)) + self._in_channel.push(data, self.name) + _profiler.record("{}-push_1".format(self.name)) + + _LOGGER.debug(self._log('wait for infer')) + _profiler.record("{}-fetch_0".format(self.name)) + resp_channeldata = self._get_data_in_globel_resp_dict(data_id) + _profiler.record("{}-fetch_1".format(self.name)) + + if resp_channeldata.ecode == ChannelDataEcode.OK.value: + break + if i + 1 < self._retry: + _LOGGER.warn("retry({}): {}".format( + i + 1, resp_channeldata.error_info)) + + _profiler.record("{}-postpack_0".format(self.name)) + resp = self._pack_data_for_resp(resp_channeldata) + _profiler.record("{}-postpack_1".format(self.name)) + _profiler.print_profile() + return resp + + +class PipelineServer(object): + def __init__(self): + self._channels = [] + self._actual_ops = [] + self._port = None + self._worker_num = None + self._in_channel = None + self._out_channel = None + self._response_op = None + self._pack_func = None + self._unpack_func = None + + def add_channel(self, channel): + self._channels.append(channel) + + def gen_desc(self): + _LOGGER.info('here will generate desc for PAAS') + pass + + def set_response_op(self, response_op): + if not isinstance(response_op, Op): + raise Exception("response_op must be Op type.") + if len(response_op.get_input_ops()) != 1: + raise Exception("response_op can only have one previous op.") + self._response_op = response_op + + def _topo_sort(self, response_op): + if response_op is None: + raise Exception("response_op has not been set.") + + def get_use_ops(root): + # root: response_op + unique_names = set() + use_ops = set() + succ_ops_of_use_op = {} # {op_name: succ_ops} + que = Queue.Queue() + que.put(root) + #use_ops.add(root) + #unique_names.add(root.name) + while que.qsize() != 0: + op = que.get() + for pred_op in op.get_input_ops(): + if pred_op.name not in succ_ops_of_use_op: + succ_ops_of_use_op[pred_op.name] = [] + if op != root: + succ_ops_of_use_op[pred_op.name].append(op) + if pred_op not in use_ops: + que.put(pred_op) + use_ops.add(pred_op) + # check the name of op is globally unique + if pred_op.name in unique_names: + raise Exception("the name of Op must be unique: {}". + format(pred_op.name)) + unique_names.add(pred_op.name) + return use_ops, succ_ops_of_use_op + + use_ops, out_degree_ops = get_use_ops(response_op) + if len(use_ops) <= 1: + raise Exception( + "Besides RequestOp and ResponseOp, there should be at least one Op in DAG." + ) + + name2op = {op.name: op for op in use_ops} + out_degree_num = { + name: len(ops) + for name, ops in out_degree_ops.items() + } + que_idx = 0 # scroll queue + ques = [Queue.Queue() for _ in range(2)] + zero_indegree_num = 0 + for op in use_ops: + if len(op.get_input_ops()) == 0: + zero_indegree_num += 1 + if zero_indegree_num != 1: + raise Exception("DAG contains multiple input Ops") + last_op = response_op.get_input_ops()[0] + ques[que_idx].put(last_op) + + # topo sort to get dag_views + dag_views = [] + sorted_op_num = 0 + while True: + que = ques[que_idx] + next_que = ques[(que_idx + 1) % 2] + dag_view = [] + while que.qsize() != 0: + op = que.get() + dag_view.append(op) + sorted_op_num += 1 + for pred_op in op.get_input_ops(): + out_degree_num[pred_op.name] -= 1 + if out_degree_num[pred_op.name] == 0: + next_que.put(pred_op) + dag_views.append(dag_view) + if next_que.qsize() == 0: + break + que_idx = (que_idx + 1) % 2 + if sorted_op_num < len(use_ops): + raise Exception("not legal DAG") + + # create channels and virtual ops + def gen_channel(name_gen): + channel = None + if self._use_multithread: + channel = ThreadChannel(name=name_gen.next()) + else: + channel = ProcessChannel(self._manager, name=name_gen.next()) + return channel + + def gen_virtual_op(name_gen): + return VirtualOp(name=name_gen.next()) + + virtual_op_name_gen = NameGenerator("vir") + channel_name_gen = NameGenerator("chl") + virtual_ops = [] + channels = [] + input_channel = None + actual_view = None + dag_views = list(reversed(dag_views)) + for v_idx, view in enumerate(dag_views): + if v_idx + 1 >= len(dag_views): + break + next_view = dag_views[v_idx + 1] + if actual_view is None: + actual_view = view + actual_next_view = [] + pred_op_of_next_view_op = {} + for op in actual_view: + # find actual succ op in next view and create virtual op + for succ_op in out_degree_ops[op.name]: + if succ_op in next_view: + if succ_op not in actual_next_view: + actual_next_view.append(succ_op) + if succ_op.name not in pred_op_of_next_view_op: + pred_op_of_next_view_op[succ_op.name] = [] + pred_op_of_next_view_op[succ_op.name].append(op) + else: + # create virtual op + virtual_op = gen_virtual_op(virtual_op_name_gen) + virtual_ops.append(virtual_op) + out_degree_ops[virtual_op.name] = [succ_op] + actual_next_view.append(virtual_op) + pred_op_of_next_view_op[virtual_op.name] = [op] + virtual_op.add_virtual_pred_op(op) + actual_view = actual_next_view + # create channel + processed_op = set() + for o_idx, op in enumerate(actual_next_view): + if op.name in processed_op: + continue + channel = gen_channel(channel_name_gen) + channels.append(channel) + _LOGGER.debug("{} => {}".format(channel.name, op.name)) + op.add_input_channel(channel) + pred_ops = pred_op_of_next_view_op[op.name] + if v_idx == 0: + input_channel = channel + else: + # if pred_op is virtual op, it will use ancestors as producers to channel + for pred_op in pred_ops: + _LOGGER.debug("{} => {}".format(pred_op.name, + channel.name)) + pred_op.add_output_channel(channel) + processed_op.add(op.name) + # find same input op to combine channel + for other_op in actual_next_view[o_idx + 1:]: + if other_op.name in processed_op: + continue + other_pred_ops = pred_op_of_next_view_op[other_op.name] + if len(other_pred_ops) != len(pred_ops): + continue + same_flag = True + for pred_op in pred_ops: + if pred_op not in other_pred_ops: + same_flag = False + break + if same_flag: + _LOGGER.debug("{} => {}".format(channel.name, + other_op.name)) + other_op.add_input_channel(channel) + processed_op.add(other_op.name) + output_channel = gen_channel(channel_name_gen) + channels.append(output_channel) + last_op.add_output_channel(output_channel) + + pack_func, unpack_func = None, None + pack_func = self._response_op.pack_response_package + self._actual_ops = virtual_ops + for op in use_ops: + if len(op.get_input_ops()) == 0: + unpack_func = op.unpack_request_package + continue + self._actual_ops.append(op) + self._channels = channels + for c in channels: + _LOGGER.debug(c.debug()) + return input_channel, output_channel, pack_func, unpack_func + + def _port_is_available(self, port): + with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: + sock.settimeout(2) + result = sock.connect_ex(('0.0.0.0', port)) + return result != 0 + + def prepare_server(self, yml_file): + with open(yml_file) as f: + yml_config = yaml.load(f.read()) + self._port = yml_config.get('port', 8080) + if not self._port_is_available(self._port): + raise SystemExit("Prot {} is already used".format(self._port)) + self._worker_num = yml_config.get('worker_num', 2) + + self._retry = yml_config.get('retry', 1) + self._client_type = yml_config.get('client_type', 'brpc') + self._use_multithread = yml_config.get('use_multithread', True) + profile = yml_config.get('profile', False) + + if not self._use_multithread: + self._manager = multiprocessing.Manager() + if profile: + raise Exception( + "profile cannot be used in multiprocess version temporarily") + _profiler.enable(profile) + + input_channel, output_channel, self._pack_func, self._unpack_func = self._topo_sort( + self._response_op) + self._in_channel = input_channel + self._out_channel = output_channel + for op in self._actual_ops: + if op.with_serving: + self.prepare_serving(op) + self.gen_desc() + + def _run_ops(self): + threads_or_proces = [] + for op in self._actual_ops: + op.init_profiler(_profiler) + if self._use_multithread: + threads_or_proces.extend( + op.start_with_thread(self._client_type)) + else: + threads_or_proces.extend( + op.start_with_process(self._client_type)) + return threads_or_proces + + def _stop_all(self, service): + service.stop() + for op in self._actual_ops: + op.stop() + for chl in self._channels: + chl.stop() + + def run_server(self): + op_threads_or_proces = self._run_ops() + service = PipelineService(self._in_channel, self._out_channel, + self._unpack_func, self._pack_func, + self._retry) + server = grpc.server( + futures.ThreadPoolExecutor(max_workers=self._worker_num)) + pipeline_service_pb2_grpc.add_PipelineServiceServicer_to_server(service, + server) + server.add_insecure_port('[::]:{}'.format(self._port)) + server.start() + server.wait_for_termination() + self._stop_all() # TODO + for x in op_threads_or_proces: + x.join() + + def prepare_serving(self, op): + # run a server (not in PyServing) + _LOGGER.info("run a server (not in PyServing)") diff --git a/python/pipeline/profiler.py b/python/pipeline/profiler.py index a0eed6da107c0955be0d0bbcdda2967402b84b68..146203f7c184b506bb8fd70dadac1d89166a2de9 100644 --- a/python/pipeline/profiler.py +++ b/python/pipeline/profiler.py @@ -12,3 +12,54 @@ # See the License for the specific language governing permissions and # limitations under the License. # pylint: disable=doc-string-missing + +import os +import sys +import logging +if sys.version_info.major == 2: + import Queue +elif sys.version_info.major == 3: + import queue as Queue +else: + raise Exception("Error Python version") +import time + +_LOGGER = logging.getLogger(__name__) + + +class TimeProfiler(object): + def __init__(self): + self._pid = os.getpid() + self._print_head = 'PROFILE\tpid:{}\t'.format(self._pid) + self._time_record = Queue.Queue() + self._enable = False + + def enable(self, enable): + self._enable = enable + + def record(self, name_with_tag): + if self._enable is False: + return + name_with_tag = name_with_tag.split("_") + tag = name_with_tag[-1] + name = '_'.join(name_with_tag[:-1]) + self._time_record.put((name, tag, int(round(time.time() * 1000000)))) + + def print_profile(self): + if self._enable is False: + return + print_str = self._print_head + tmp = {} + while not self._time_record.empty(): + name, tag, timestamp = self._time_record.get() + if name in tmp: + ptag, ptimestamp = tmp.pop(name) + print_str += "{}_{}:{} ".format(name, ptag, ptimestamp) + print_str += "{}_{}:{} ".format(name, tag, timestamp) + else: + tmp[name] = (tag, timestamp) + print_str += "\n" + sys.stderr.write(print_str) + for name, item in tmp.items(): + tag, timestamp = item + self._time_record.put((name, tag, timestamp)) diff --git a/python/pipeline/proto/__init__.py b/python/pipeline/proto/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..abf198b97e6e818e1fbe59006f98492640bcee54 --- /dev/null +++ b/python/pipeline/proto/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/pipeline/proto/pipeline_service.proto b/python/pipeline/proto/pipeline_service.proto new file mode 100644 index 0000000000000000000000000000000000000000..a920d5618ce36a191390d5140bee0a42c7394a6b --- /dev/null +++ b/python/pipeline/proto/pipeline_service.proto @@ -0,0 +1,32 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto2"; +package baidu.paddle_serving.pipeline_serving; + +message Request { + repeated string key = 1; + repeated string value = 2; +}; + +message Response { + repeated string key = 1; + repeated string value = 2; + required int32 ecode = 3; + optional string error_info = 4; +}; + +service PipelineService { + rpc inference(Request) returns (Response) {} +}; diff --git a/python/pipeline/proto/run_codegen.py b/python/pipeline/proto/run_codegen.py new file mode 100644 index 0000000000000000000000000000000000000000..217c60bbe74b1345519935b5f6609b085f410541 --- /dev/null +++ b/python/pipeline/proto/run_codegen.py @@ -0,0 +1,37 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Copyright 2015 gRPC authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Runs protoc with the gRPC plugin to generate messages and gRPC stubs.""" + +from grpc_tools import protoc + +protoc.main(( + '', + '-I.', + '--python_out=.', + '--grpc_python_out=.', + 'pipeline_service.proto', )) diff --git a/python/pipeline/util.py b/python/pipeline/util.py new file mode 100644 index 0000000000000000000000000000000000000000..a24c1a057ca4bbb5cf33f2402559ce3d14f3e6b9 --- /dev/null +++ b/python/pipeline/util.py @@ -0,0 +1,25 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + + +class NameGenerator(object): + def __init__(self, prefix): + self._idx = -1 + self._prefix = prefix + + def next(self): + self._idx += 1 + return "{}{}".format(self._prefix, self._idx) diff --git a/python/requirements.txt b/python/requirements.txt index 4b61fa6a4f89d88338cd868134f510d179bc45b6..ba953d94c733b0e256e9cf4ecbda1691bc15c4cf 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,3 +1,4 @@ numpy>=1.12, <=1.16.4 ; python_version<"3.5" grpcio-tools>=1.28.1 grpcio>=1.28.1 +func-timeout>=4.3.5 diff --git a/python/setup.py.client.in b/python/setup.py.client.in index 601cfc81f0971cf1fa480b1daaed70eb6c696494..4613d2db7747d06ab706bc96181f612985630811 100644 --- a/python/setup.py.client.in +++ b/python/setup.py.client.in @@ -65,11 +65,14 @@ REQUIRED_PACKAGES = [ if not find_package("paddlepaddle") and not find_package("paddlepaddle-gpu"): REQUIRED_PACKAGES.append("paddlepaddle") + packages=['paddle_serving_client', 'paddle_serving_client.proto', 'paddle_serving_client.io', - 'paddle_serving_client.metric', - 'paddle_serving_client.utils',] + 'paddle_serving_client.metric', + 'paddle_serving_client.utils', + 'paddle_serving_client.pipeline', + 'paddle_serving_client.pipeline.proto'] package_data={'paddle_serving_client': ['serving_client.so','lib/*'],} package_dir={'paddle_serving_client': '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client', @@ -77,10 +80,14 @@ package_dir={'paddle_serving_client': '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/proto', 'paddle_serving_client.io': '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/io', - 'paddle_serving_client.metric': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/metric', - 'paddle_serving_client.utils': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/utils',} + 'paddle_serving_client.metric': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/metric', + 'paddle_serving_client.utils': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/utils', + 'paddle_serving_client.pipeline': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/pipeline', + 'paddle_serving_client.pipeline.proto': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_client/pipeline/proto'} setup( name='paddle-serving-client', diff --git a/python/setup.py.server.in b/python/setup.py.server.in index efa9a50bb8a31fc81b97dec0243316cdc9cd8af6..967b98da42f77ac593d723ce64fe6533abe40bb0 100644 --- a/python/setup.py.server.in +++ b/python/setup.py.server.in @@ -42,12 +42,18 @@ REQUIRED_PACKAGES = [ ] packages=['paddle_serving_server', - 'paddle_serving_server.proto'] + 'paddle_serving_server.proto', + 'paddle_serving_server.pipeline', + 'paddle_serving_server.pipeline.proto'] package_dir={'paddle_serving_server': '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server', 'paddle_serving_server.proto': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto'} + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto', + 'paddle_serving_server.pipeline': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline', + 'paddle_serving_server.pipeline.proto': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/pipeline/proto'} setup( name='paddle-serving-server', diff --git a/python/setup.py.server_gpu.in b/python/setup.py.server_gpu.in index 06b51c1c404590ed1db141f273bdc35f26c13176..29098b7c8d23a3f53abd084b68871c66512e8b4f 100644 --- a/python/setup.py.server_gpu.in +++ b/python/setup.py.server_gpu.in @@ -43,12 +43,18 @@ REQUIRED_PACKAGES = [ packages=['paddle_serving_server_gpu', - 'paddle_serving_server_gpu.proto'] + 'paddle_serving_server_gpu.proto', + 'paddle_serving_server_gpu.pipeline', + 'paddle_serving_server_gpu.pipeline.proto'] package_dir={'paddle_serving_server_gpu': '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu', 'paddle_serving_server_gpu.proto': - '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto'} + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto', + 'paddle_serving_server_gpu.pipeline': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline', + 'paddle_serving_server_gpu.pipeline.proto': + '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/pipeline/proto'} setup( name='paddle-serving-server-gpu', diff --git a/tools/Dockerfile b/tools/Dockerfile index 3c701725400350247153f828410d06cec69856f5..dd18a773562bd078771d7df44123ac530764af93 100644 --- a/tools/Dockerfile +++ b/tools/Dockerfile @@ -7,8 +7,9 @@ RUN yum -y install wget && \ yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false && \ yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && \ yum -y install python3 python3-devel && \ - yum clean all && \ - curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + yum clean all + +RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ python get-pip.py && rm get-pip.py && \ localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \ echo "export LANG=en_US.utf8" >> /root/.bashrc diff --git a/tools/Dockerfile.devel b/tools/Dockerfile.devel index e4bcd33534cb9e887f49fcba5029619aaa1dea4c..dc00384e39bb742400fee74663a551cf44019d61 100644 --- a/tools/Dockerfile.devel +++ b/tools/Dockerfile.devel @@ -1,26 +1,31 @@ FROM centos:7.3.1611 -RUN yum -y install wget >/dev/null \ - && yum -y install gcc gcc-c++ make glibc-static which >/dev/null \ - && yum -y install git openssl-devel curl-devel bzip2-devel python-devel >/dev/null \ - && wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \ +RUN yum -y install wget \ + && yum -y install gcc gcc-c++ make glibc-static which \ + && yum -y install git openssl-devel curl-devel bzip2-devel python-devel + +RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \ && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \ && mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \ && echo 'export PATH=/usr/local/cmake3.2.0/bin:$PATH' >> /root/.bashrc \ - && rm cmake-3.2.0-Linux-x86_64.tar.gz \ - && wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ + && rm cmake-3.2.0-Linux-x86_64.tar.gz + +RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ && tar xzf go1.14.linux-amd64.tar.gz \ && mv go /usr/local/go \ && echo 'export GOROOT=/usr/local/go' >> /root/.bashrc \ && echo 'export PATH=/usr/local/go/bin:$PATH' >> /root/.bashrc \ - && rm go1.14.linux-amd64.tar.gz \ - && yum -y install python-devel sqlite-devel >/dev/null \ + && rm go1.14.linux-amd64.tar.gz + +RUN yum -y install python-devel sqlite-devel \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ && pip install google protobuf setuptools wheel flask >/dev/null \ - && rm get-pip.py \ - && yum install -y python3 python3-devel \ + && rm get-pip.py + +RUN yum install -y python3 python3-devel \ && pip3 install google protobuf setuptools wheel flask \ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ - && yum clean all \ - && localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ + && yum clean all + +RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ && echo "export LANG=en_US.utf8" >> /root/.bashrc diff --git a/tools/Dockerfile.gpu b/tools/Dockerfile.gpu index 2f38a3a3cd1c8987d34a81259ec9ad6ba67156a7..adb8e73f86a8fa436de3844a60f08ab22df0177e 100644 --- a/tools/Dockerfile.gpu +++ b/tools/Dockerfile.gpu @@ -8,10 +8,12 @@ RUN yum -y install wget && \ yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false && \ yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && \ yum -y install python3 python3-devel && \ - yum clean all && \ - curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ - python get-pip.py && rm get-pip.py && \ - ln -s /usr/local/cuda-9.0/lib64/libcublas.so.9.0 /usr/local/cuda-9.0/lib64/libcublas.so && \ + yum clean all + +RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py && \ + python get-pip.py && rm get-pip.py + +RUN ln -s /usr/local/cuda-9.0/lib64/libcublas.so.9.0 /usr/local/cuda-9.0/lib64/libcublas.so && \ echo 'export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH' >> /root/.bashrc && \ ln -s /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so.7 /usr/local/cuda-9.0/targets/x86_64-linux/lib/libcudnn.so && \ echo 'export LD_LIBRARY_PATH=/usr/local/cuda-9.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \ diff --git a/tools/Dockerfile.gpu.devel b/tools/Dockerfile.gpu.devel index 057201cefa1f8de7a105ea9b7f93e7ca9e342777..583b0566edf85f56c5fcc6f9f36dce6430ba7941 100644 --- a/tools/Dockerfile.gpu.devel +++ b/tools/Dockerfile.gpu.devel @@ -1,26 +1,31 @@ FROM nvidia/cuda:9.0-cudnn7-devel-centos7 - RUN yum -y install wget >/dev/null \ - && yum -y install gcc gcc-c++ make glibc-static which >/dev/null \ - && yum -y install git openssl-devel curl-devel bzip2-devel python-devel >/dev/null \ - && wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \ + && yum -y install gcc gcc-c++ make glibc-static which \ + && yum -y install git openssl-devel curl-devel bzip2-devel python-devel + +RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \ && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \ && mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \ && echo 'export PATH=/usr/local/cmake3.2.0/bin:$PATH' >> /root/.bashrc \ - && rm cmake-3.2.0-Linux-x86_64.tar.gz \ - && wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ + && rm cmake-3.2.0-Linux-x86_64.tar.gz + +RUN wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ && tar xzf go1.14.linux-amd64.tar.gz \ && mv go /usr/local/go \ && echo 'export GOROOT=/usr/local/go' >> /root/.bashrc \ && echo 'export PATH=/usr/local/go/bin:$PATH' >> /root/.bashrc \ - && rm go1.14.linux-amd64.tar.gz \ - && yum -y install python-devel sqlite-devel >/dev/null \ + && rm go1.14.linux-amd64.tar.gz + +RUN yum -y install python-devel sqlite-devel \ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ && python get-pip.py >/dev/null \ && pip install google protobuf setuptools wheel flask >/dev/null \ - && rm get-pip.py \ - && yum install -y python3 python3-devel \ + && rm get-pip.py + +RUN yum install -y python3 python3-devel \ && pip3 install google protobuf setuptools wheel flask \ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ - && yum clean all \ + && yum clean all + +RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ && echo "export LANG=en_US.utf8" >> /root/.bashrc diff --git a/tools/serving_build.sh b/tools/serving_build.sh index 989e48ead9864e717e573f7f0800a1afba2e934a..097123165988fb266f7c4a3a0da603ade6d98be1 100644 --- a/tools/serving_build.sh +++ b/tools/serving_build.sh @@ -134,6 +134,7 @@ function build_server() { function kill_server_process() { ps -ef | grep "serving" | grep -v serving_build | grep -v grep | awk '{print $2}' | xargs kill + sleep 1 } function python_test_fit_a_line() { @@ -246,6 +247,7 @@ function python_run_criteo_ctr_with_cube() { echo "criteo_ctr_with_cube inference auc test success" kill_server_process ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill + sleep 1 ;; GPU) check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz" @@ -261,6 +263,8 @@ function python_run_criteo_ctr_with_cube() { check_cmd "mkdir work_dir1 && cp cube/conf/cube.conf ./work_dir1/" python test_server_gpu.py ctr_serving_model_kv & sleep 5 + # for warm up + python test_client.py ctr_client_conf/serving_client_conf.prototxt ./ut_data > /dev/null || true check_cmd "python test_client.py ctr_client_conf/serving_client_conf.prototxt ./ut_data >score" tail -n 2 score | awk 'NR==1' AUC=$(tail -n 2 score | awk 'NR==1') @@ -273,6 +277,7 @@ function python_run_criteo_ctr_with_cube() { echo "criteo_ctr_with_cube inference auc test success" kill_server_process ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill + sleep 1 ;; *) echo "error type" @@ -484,6 +489,7 @@ function python_test_lac() { setproxy # recover proxy state kill_server_process ps -ef | grep "lac_web_service" | grep -v grep | awk '{print $2}' | xargs kill + sleep 1 echo "lac CPU HTTP inference pass" ;; GPU) @@ -499,6 +505,178 @@ function python_test_lac() { cd .. } +function python_test_grpc_impl() { + # pwd: /Serving/python/examples + cd grpc_impl_example # pwd: /Serving/python/examples/grpc_impl_example + local TYPE=$1 + export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving + unsetproxy + case $TYPE in + CPU) + # test general case + cd fit_a_line # pwd: /Serving/python/examples/grpc_impl_example/fit_a_line + sh get_data.sh + + # one line command start + check_cmd "python -m paddle_serving_server.serve --model uci_housing_model --port 9393 --thread 4 --use_multilang > /dev/null &" + sleep 5 # wait for the server to start + check_cmd "python test_sync_client.py > /dev/null" + check_cmd "python test_asyn_client.py > /dev/null" + check_cmd "python test_general_pb_client.py > /dev/null" + check_cmd "python test_numpy_input_client.py > /dev/null" + check_cmd "python test_batch_client.py > /dev/null" + check_cmd "python test_timeout_client.py > /dev/null" + kill_server_process + + check_cmd "python test_server.py uci_housing_model > /dev/null &" + sleep 5 # wait for the server to start + check_cmd "python test_sync_client.py > /dev/null" + check_cmd "python test_asyn_client.py > /dev/null" + check_cmd "python test_general_pb_client.py > /dev/null" + check_cmd "python test_numpy_input_client.py > /dev/null" + check_cmd "python test_batch_client.py > /dev/null" + check_cmd "python test_timeout_client.py > /dev/null" + kill_server_process + + cd .. # pwd: /Serving/python/examples/grpc_impl_example + + # test load server config and client config in Server side + cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube + + check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz" + check_cmd "tar xf ctr_cube_unittest.tar.gz" + check_cmd "mv models/ctr_client_conf ./" + check_cmd "mv models/ctr_serving_model_kv ./" + check_cmd "mv models/data ./cube/" + check_cmd "mv models/ut_data ./" + cp ../../../../build-server-$TYPE/output/bin/cube* ./cube/ + sh cube_prepare.sh & + check_cmd "mkdir work_dir1 && cp cube/conf/cube.conf ./work_dir1/" + python test_server.py ctr_serving_model_kv ctr_client_conf/serving_client_conf.prototxt & + sleep 5 + check_cmd "python test_client.py ./ut_data >score" + tail -n 2 score | awk 'NR==1' + AUC=$(tail -n 2 score | awk 'NR==1') + VAR2="0.67" #TODO: temporarily relax the threshold to 0.67 + RES=$( echo "$AUC>$VAR2" | bc ) + if [[ $RES -eq 0 ]]; then + echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67" + exit 1 + fi + echo "grpc impl test success" + kill_server_process + ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill + + cd .. # pwd: /Serving/python/examples/grpc_impl_example + ;; + GPU) + export CUDA_VISIBLE_DEVICES=0 + # test general case + cd fit_a_line # pwd: /Serving/python/examples/grpc_impl_example/fit_a_line + sh get_data.sh + + # one line command start + check_cmd "python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9393 --thread 4 --gpu_ids 0 --use_multilang > /dev/null &" + sleep 5 # wait for the server to start + check_cmd "python test_sync_client.py > /dev/null" + check_cmd "python test_asyn_client.py > /dev/null" + check_cmd "python test_general_pb_client.py > /dev/null" + check_cmd "python test_numpy_input_client.py > /dev/null" + check_cmd "python test_batch_client.py > /dev/null" + check_cmd "python test_timeout_client.py > /dev/null" + kill_server_process + + check_cmd "python test_server_gpu.py uci_housing_model > /dev/null &" + sleep 5 # wait for the server to start + check_cmd "python test_sync_client.py > /dev/null" + check_cmd "python test_asyn_client.py > /dev/null" + check_cmd "python test_general_pb_client.py > /dev/null" + check_cmd "python test_numpy_input_client.py > /dev/null" + check_cmd "python test_batch_client.py > /dev/null" + check_cmd "python test_timeout_client.py > /dev/null" + kill_server_process + ps -ef | grep "test_server_gpu" | grep -v serving_build | grep -v grep | awk '{print $2}' | xargs kill + + cd .. # pwd: /Serving/python/examples/grpc_impl_example + + # test load server config and client config in Server side + cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube + + check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz" + check_cmd "tar xf ctr_cube_unittest.tar.gz" + check_cmd "mv models/ctr_client_conf ./" + check_cmd "mv models/ctr_serving_model_kv ./" + check_cmd "mv models/data ./cube/" + check_cmd "mv models/ut_data ./" + cp ../../../../build-server-$TYPE/output/bin/cube* ./cube/ + sh cube_prepare.sh & + check_cmd "mkdir work_dir1 && cp cube/conf/cube.conf ./work_dir1/" + python test_server_gpu.py ctr_serving_model_kv ctr_client_conf/serving_client_conf.prototxt & + sleep 5 + # for warm up + python test_client.py ./ut_data &> /dev/null || true + check_cmd "python test_client.py ./ut_data >score" + tail -n 2 score | awk 'NR==1' + AUC=$(tail -n 2 score | awk 'NR==1') + VAR2="0.67" #TODO: temporarily relax the threshold to 0.67 + RES=$( echo "$AUC>$VAR2" | bc ) + if [[ $RES -eq 0 ]]; then + echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67" + exit 1 + fi + echo "grpc impl test success" + kill_server_process + ps -ef | grep "test_server_gpu" | grep -v serving_build | grep -v grep | awk '{print $2}' | xargs kill + ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill + cd .. # pwd: /Serving/python/examples/grpc_impl_example + ;; + *) + echo "error type" + exit 1 + ;; + esac + echo "test grpc impl $TYPE part finished as expected." + setproxy + unset SERVING_BIN + cd .. # pwd: /Serving/python/examples +} + + +function python_test_yolov4(){ + #pwd:/ Serving/python/examples + local TYPE=$1 + export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving + cd yolov4 + case $TYPE in + CPU) + python -m paddle_serving_app.package --get_model yolov4 + tar -xzvf yolov4.tar.gz + check_cmd "python -m paddle_serving_server.serve --model yolov4_model/ --port 9393 &" + sleep 5 + check_cmd "python test_client.py 000000570688.jpg" + echo "yolov4 CPU RPC inference pass" + kill_server_process + ;; + GPU) + python -m paddle_serving_app.package --get_model yolov4 + tar -xzvf yolov4.tar.gz + check_cmd "python -m paddle_serving_server_gpu.serve --model yolov4_model/ --port 9393 --gpu_ids 0 &" + sleep 5 + check_cmd "python test_client.py 000000570688.jpg" + echo "yolov4 GPU RPC inference pass" + kill_server_process + ;; + *) + echo "error type" + exit 1 + ;; + esac + echo "test yolov4 $TYPE finished as expected." + unset SERVING_BIN + cd .. +} + + function python_run_test() { # Using the compiled binary local TYPE=$1 # pwd: /Serving @@ -510,6 +688,8 @@ function python_run_test() { python_test_lac $TYPE # pwd: /Serving/python/examples python_test_multi_process $TYPE # pwd: /Serving/python/examples python_test_multi_fetch $TYPE # pwd: /Serving/python/examples + python_test_yolov4 $TYPE # pwd: /Serving/python/examples + python_test_grpc_impl $TYPE # pwd: /Serving/python/examples echo "test python $TYPE part finished as expected." cd ../.. # pwd: /Serving } @@ -768,3 +948,4 @@ function main() { } main $@ +exit 0