diff --git a/README.md b/README.md index 9f1c60178da79aa2b8b331b2c69f4355ba36a4e3..7af47c0e1cbd96878b71d933f455879d3299ad01 100755 --- a/README.md +++ b/README.md @@ -40,13 +40,20 @@ The goal of Paddle Serving is to provide high-performance, flexible and easy-to- - Support service monitoring, provide prometheus-based performance statistics and port access -

Tutorial

+

Tutorial and Papers

-- AIStudio tutorial(Chinese) : [Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/2538249) +- AIStudio tutorial(Chinese) : [Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/3946013) +- AIStudio OCR practice(Chinese) : [基于PaddleServing的OCR服务化部署实战](https://aistudio.baidu.com/aistudio/projectdetail/3630726) - Video tutorial(Chinese) : [深度学习服务化部署-以互联网应用为例](https://aistudio.baidu.com/aistudio/course/introduce/19084) - Edge AI solution(Chinese) : [基于Paddle Serving&百度智能边缘BIE的边缘AI解决方案](https://mp.weixin.qq.com/s/j0EVlQXaZ7qmoz9Fv96Yrw) +- Paper : [JiZhi: A Fast and Cost-Effective Model-As-A-Service System for +Web-Scale Online Inference at Baidu](https://arxiv.org/pdf/2106.01674.pdf) +- Paper : [ERNIE 3.0 TITAN: EXPLORING LARGER-SCALE KNOWLEDGE +ENHANCED PRE-TRAINING FOR LANGUAGE UNDERSTANDING +AND GENERATION](https://arxiv.org/pdf/2112.12731.pdf) +

@@ -90,8 +97,6 @@ The first step is to call the model save interface to generate a model parameter - [Analyze and optimize performance](doc/Python_Pipeline/Performance_Tuning_EN.md) - [TensorRT dynamic Shape](doc/TensorRT_Dynamic_Shape_EN.md) - [Benchmark(Chinese)](doc/Python_Pipeline/Benchmark_CN.md) - - Our Paper: [JiZhi: A Fast and Cost-Effective Model-As-A-Service System for -Web-Scale Online Inference at Baidu](https://arxiv.org/pdf/2106.01674.pdf) - Client SDK - [Python SDK(Chinese)](doc/C++_Serving/Introduction_CN.md#42-多语言多协议Client) - [JAVA SDK](doc/Java_SDK_EN.md) diff --git a/README_CN.md b/README_CN.md index 65bf8b4686318c68103a00f8e32dd98169cddbb1..cc9679caf752b48b8f84a03faa418dcc69b323ce 100755 --- a/README_CN.md +++ b/README_CN.md @@ -39,11 +39,18 @@ Paddle Serving依托深度学习框架PaddlePaddle旨在帮助深度学习开发 - 支持服务监控,提供基于普罗米修斯的性能数据统计及端口访问 -

教程

- -- AIStudio教程-[Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/2538249) -- 视频教程-[深度学习服务化部署-以互联网应用为例](https://aistudio.baidu.com/aistudio/course/introduce/19084) -- 边缘AI解决方案-[基于Paddle Serving&百度智能边缘BIE的边缘AI解决方案](https://mp.weixin.qq.com/s/j0EVlQXaZ7qmoz9Fv96Yrw) +

教程与论文

+ +- AIStudio 使用教程 : [Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/3946013) +- AIStudio OCR实战 : [基于PaddleServing的OCR服务化部署实战](https://aistudio.baidu.com/aistudio/projectdetail/3630726) +- 视频教程 : [深度学习服务化部署-以互联网应用为例](https://aistudio.baidu.com/aistudio/course/introduce/19084) +- 边缘AI 解决方案 : [基于Paddle Serving&百度智能边缘BIE的边缘AI解决方案](https://mp.weixin.qq.com/s/j0EVlQXaZ7qmoz9Fv96Yrw) + +- 论文 : [JiZhi: A Fast and Cost-Effective Model-As-A-Service System for +Web-Scale Online Inference at Baidu](https://arxiv.org/pdf/2106.01674.pdf) +- 论文 : [ERNIE 3.0 TITAN: EXPLORING LARGER-SCALE KNOWLEDGE +ENHANCED PRE-TRAINING FOR LANGUAGE UNDERSTANDING +AND GENERATION](https://arxiv.org/pdf/2112.12731.pdf)

diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake index ad8df0f1844e902a632f7c7df390ec3c2a783345..c9d4a583944072dcddc25f6d8544f47b110acab5 100644 --- a/cmake/paddlepaddle.cmake +++ b/cmake/paddlepaddle.cmake @@ -30,7 +30,7 @@ message( "WITH_GPU = ${WITH_GPU}") # Paddle Version should be one of: # latest: latest develop build # version number like 1.5.2 -SET(PADDLE_VERSION "2.2.2") +SET(PADDLE_VERSION "2.3.0") if (WITH_GPU) message("CUDA: ${CUDA_VERSION}, CUDNN_MAJOR_VERSION: ${CUDNN_MAJOR_VERSION}") # cuda 11.0 is not supported, 11.2 would be added. @@ -53,6 +53,7 @@ else() set(WITH_TRT OFF) endif() if (WITH_GPU) + SET(PADDLE_VERSION "2.3.0-no-ort") SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/GPU/${CUDA_SUFFIX}") elseif (WITH_LITE) message("cpu arch: ${CMAKE_SYSTEM_PROCESSOR}") @@ -85,6 +86,7 @@ elseif (WITH_ASCEND_CL) endif() else() if (WITH_AVX) + SET(PADDLE_VERSION "2.3.0-no-ort") if (WITH_MKLML) SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}/cxx_c/Linux/CPU/gcc8.2_avx_mkl") else() @@ -100,7 +102,7 @@ endif() if(WITH_LITE) if (WITH_XPU) - SET(PADDLE_LIB_PATH "https://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/paddle_inference_install_dir.tar.gz ") + SET(PADDLE_LIB_PATH "https://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference_install_dir.tar.gz ") elseif (WITH_ASCEND_CL) SET(PADDLE_LIB_PATH "http://paddle-serving.bj.bcebos.com/inferlib/${PADDLE_LIB_VERSION}/paddle_inference_install_dir.tgz ") endif() @@ -113,7 +115,7 @@ else() endif() MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}") -if (WITH_GPU OR WITH_MKLML) +if ((WITH_GPU OR WITH_MKLML) AND NOT WITH_JETSON) if (WITH_TRT) ExternalProject_Add( "extern_paddle" @@ -171,14 +173,27 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib) SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib") LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib) +#SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib") +#LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib) + +#SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib") +#LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib) + if (NOT WITH_MKLML) ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a) endif() +#ADD_LIBRARY(paddle2onnx STATIC IMPORTED GLOBAL) +#SET_PROPERTY(TARGET paddle2onnx PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/paddle2onnx/lib/libpaddle2onnx.so) + +#ADD_LIBRARY(onnxruntime STATIC IMPORTED GLOBAL) +#SET_PROPERTY(TARGET onnxruntime PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/onnxruntime/lib/libonnxruntime.so.1.10.0) + ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL) SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a) -if (WITH_ASCEND_CL) + +if (WITH_ASCEND_CL OR WITH_XPU) SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.so) endif() diff --git a/core/configure/proto/general_model_service.proto b/core/configure/proto/general_model_service.proto index c2deab2f69ea6f6ca5e77354ec955bf679f9a3d6..b4f1ce0cfbb1f407a217e1a72440b55489e8a1c3 100644 --- a/core/configure/proto/general_model_service.proto +++ b/core/configure/proto/general_model_service.proto @@ -90,11 +90,12 @@ message Request { message Response { repeated ModelOutput outputs = 1; repeated int64 profile_time = 2; + bool profile_server = 3; + uint64 log_id = 4; // Error code - int32 err_no = 3; - + int32 err_no = 5; // Error messages - string err_msg = 4; + string err_msg = 6; }; message ModelOutput { diff --git a/core/configure/proto/server_configure.proto b/core/configure/proto/server_configure.proto old mode 100755 new mode 100644 index c974f010737a8836d5de83d737ee0f9b9519462f..4f49aa3c959a63afbb623853bcc1f0c14cbd52e3 --- a/core/configure/proto/server_configure.proto +++ b/core/configure/proto/server_configure.proto @@ -49,6 +49,17 @@ message EngineDesc { optional bool gpu_multi_stream = 20; optional bool use_ascend_cl = 21; + /* + * "gpu_memory_mb": allocate gpu memory by config.EnableUseGpu() + * "cpu_math_thread_num": set thread numbers of cpu math by config.SetCpuMathLibraryNumThreads() + * "trt_workspace_size": set TensorRT workspace size by config.EnableTensorRtEngine(), 1 << 25 default + * "trt_use_static": If true, save the optimization information of the TRT serialized to the disk, and load from the disk. + */ + optional int32 gpu_memory_mb = 22 [default = 100]; + optional int32 cpu_math_thread_num = 23 [default = 1]; + optional int32 trt_workspace_size = 24 [default = 33554432]; + optional bool trt_use_static = 25 [default = false]; + /* * "runtime_thread_num": n == 0 means don`t use Asynchronous task scheduling * mode. @@ -65,6 +76,28 @@ message EngineDesc { optional int32 batch_infer_size = 31 [ default = 32 ]; optional bool enable_overrun = 32 [ default = false ]; optional bool allow_split_request = 33 [ default = true ]; + optional int32 min_subgraph_size = 34 [ default = 3 ]; + map min_input_shape = 35; + map max_input_shape = 36; + map opt_input_shape = 37; + + /* + * Distributed inference params + * "enable_dist_model": enable distributed model, false default. + * "carrier_id": mark carrier + * "dist_cfg_file": file name of distributed configure. + * "dist_nranks": number of distributed nodes. + * "dist_endpoints": all endpoints(ip:port) of distributed nodes. + * "dist_subgraph_index": distributed subgraph index, auto increment from 0. + * It is + * used to select the endpoint of the current shard in distribute model. + */ + optional bool enable_dist_model = 40 [ default = false ]; + optional string dist_carrier_id = 41 [ default = "inference" ]; + optional string dist_cfg_file = 42; + optional int32 dist_nranks = 43 [ default = 0 ]; + repeated string dist_endpoints = 44; + optional int32 dist_subgraph_index = 45 [ default = 0 ]; }; // model_toolkit conf @@ -96,7 +129,8 @@ message DAGNodeDependency { message DAGNode { required string name = 1; required string type = 2; - repeated DAGNodeDependency dependencies = 3; + repeated string address = 3; + repeated DAGNodeDependency dependencies = 4; }; // workflow entry diff --git a/core/general-server/op/general_detection_op.cpp b/core/general-server/op/general_detection_op.cpp index b62a2d2544e12d493033cf1bb8e6606d72f614d3..6a4fe15f2de0bbe930bde850022c1dc3c34f59d3 100644 --- a/core/general-server/op/general_detection_op.cpp +++ b/core/general-server/op/general_detection_op.cpp @@ -244,7 +244,7 @@ int GeneralDetectionOp::inference() { databuf_char_out = reinterpret_cast(databuf_data_out); paddle::PaddleBuf paddleBuf(databuf_char_out, databuf_size_out); paddle::PaddleTensor tensor_out; - tensor_out.name = "image"; + tensor_out.name = "x"; tensor_out.dtype = paddle::PaddleDType::FLOAT32; tensor_out.shape = output_shape; tensor_out.data = paddleBuf; diff --git a/core/general-server/op/general_dist_kv_infer_op.cpp b/core/general-server/op/general_dist_kv_infer_op.cpp index 238d4cac3a085ef188f427c8cc3669b7617443d7..957379b594e7dc18516b1a55ec042b2ec9921cc5 100644 --- a/core/general-server/op/general_dist_kv_infer_op.cpp +++ b/core/general-server/op/general_dist_kv_infer_op.cpp @@ -40,7 +40,7 @@ using baidu::paddle_serving::predictor::PaddleGeneralModelConfig; using baidu::paddle_serving::predictor::CubeCache; // DistKV Infer Op: seek cube and then call paddle inference -// op seq: general_reader-> dist_kv_infer -> general_response +// op seq: GeneralReaderOp-> dist_kv_infer -> general_response int GeneralDistKVInferOp::inference() { VLOG(2) << "Going to run inference"; const std::vector pre_node_names = pre_names(); @@ -186,9 +186,9 @@ int GeneralDistKVInferOp::inference() { if (values.size() != keys.size() || values[0].buff.size() == 0) { LOG(ERROR) << "cube value return null"; } - size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float); + size_t EMBEDDING_SIZE = values[0].buff.size() / sizeof(float); // size_t EMBEDDING_SIZE = (values[0].buff.size() - 10) / sizeof(float); - //size_t EMBEDDING_SIZE = 9; + // size_t EMBEDDING_SIZE = 9; TensorVector sparse_out; sparse_out.resize(sparse_count); TensorVector dense_out; @@ -241,7 +241,7 @@ int GeneralDistKVInferOp::inference() { // The data generated by pslib has 10 bytes of information to be filtered // out - memcpy(data_ptr, cur_val->buff.data(), cur_val->buff.size() ); + memcpy(data_ptr, cur_val->buff.data(), cur_val->buff.size()); // VLOG(3) << keys[cube_val_idx] << ":" << data_ptr[0] << ", " << // data_ptr[1] << ", " < 0) { VLOG(2) << "(logid=" << log_id << ") var[" << i << "] is lod_tensor"; - paddleTensor.lod.resize(1); + int lod_index = -1; for (int k = 0; k < tensor.lod_size(); ++k) { - paddleTensor.lod[0].push_back(tensor.lod(k)); + if (tensor.lod(k) == 0) { + lod_index++; + paddleTensor.lod.resize(lod_index + 1); + } + paddleTensor.lod[lod_index].push_back(tensor.lod(k)); + VLOG(2) << "(logid=" << log_id << ") lod[" << lod_index + << "]=" << tensor.lod(k); } } @@ -191,7 +197,7 @@ int GeneralReaderOp::inference() { VLOG(2) << "(logid=" << log_id << ") var[" << i << "] has lod_tensor and len=" << out->at(i).lod[0].back(); } - void* dst_ptr = out->at(i).data.data(); + void *dst_ptr = out->at(i).data.data(); if (!dst_ptr) { LOG(ERROR) << "dst_ptr is nullptr"; return -1; diff --git a/core/general-server/op/general_remote_op.cpp b/core/general-server/op/general_remote_op.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2e77067ddad9190d58b741361efff9f1e704f9b0 --- /dev/null +++ b/core/general-server/op/general_remote_op.cpp @@ -0,0 +1,126 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "core/general-server/op/general_remote_op.h" +#include +#include +#include "core/util/include/timer.h" + +// paddle inference 2.1 support: FLOAT32, INT64, INT32, UINT8, INT8 +// will support: FLOAT16 +#define BRPC_MAX_BODY_SIZE 2 * 1024 * 1024 * 1024 +const std::string LODABALANCE = ""; + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::Timer; +using baidu::paddle_serving::predictor::general_model::Tensor; +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::Response; + +brpc::Channel BRPCStub::brpc_channels[MAX_MP_NUM]; + +brpc::ChannelOptions BRPCStub::options; +std::atomic BRPCStub::inited(0); + +int GeneralRemoteOp::inference() { + LOG(INFO) << "Enter GeneralRemoteOp:inference()"; + int expected = 0; + std::vector op_address = address(); + if (BRPCStub::inited.compare_exchange_strong(expected, 1)) { + BRPCStub::options.protocol = "baidu_std"; + BRPCStub::options.connection_type = "short"; + BRPCStub::options.timeout_ms = 80000 /*milliseconds*/; + BRPCStub::options.max_retry = 100; + brpc::fLU64::FLAGS_max_body_size = BRPC_MAX_BODY_SIZE; + + LOG(ERROR) << "address size: " << op_address.size(); + for (int i = 0; i < op_address.size(); ++i) { + LOG(INFO) << i + 1 << " address is " << op_address[i].c_str(); + BRPCStub::brpc_channels[i].Init( + op_address[i].c_str(), LODABALANCE.c_str(), &BRPCStub::options); + } + + BRPCStub::inited++; + } + while (BRPCStub::inited < 2) { + } + + Timer timeline; + int64_t start = timeline.TimeStampUS(); + timeline.Start(); + VLOG(2) << "Going to run Remote inference"; + + Request* req = (Request*)(get_request_message()); + Response* res = mutable_data(); + uint64_t log_id = req->log_id(); + + brpc::Controller brpc_controllers[MAX_MP_NUM]; + brpc::CallId brpc_callids[MAX_MP_NUM]; + Response brpc_response_tmp; + + size_t i = 0; + // Init BRPC controllers, callids and stubs + for (i = 0; i < op_address.size(); ++i) { + brpc_controllers[i].set_log_id(log_id); + brpc_callids[i] = brpc_controllers[i].call_id(); + } + for (i = 0; i < op_address.size(); ++i) { + baidu::paddle_serving::predictor::general_model::GeneralModelService_Stub + stub(&BRPCStub::brpc_channels[i]); + LOG(INFO) << "Sended 1 request to Slave Sever " << i; + if (0 == i) { + stub.inference(&brpc_controllers[i], req, res, brpc::DoNothing()); + continue; + } + stub.inference( + &brpc_controllers[i], req, &brpc_response_tmp, brpc::DoNothing()); + } + + LOG(INFO) << "All request are sended, waiting for all responses."; + + // Wait RPC done. + for (i = 0; i < op_address.size(); ++i) { + brpc::Join(brpc_callids[i]); + } + + // Print RPC Results + for (i = 0; i < op_address.size(); ++i) { + LOG(INFO) << "brpc_controller_" << i + << " status:" << brpc_controllers[i].Failed(); + if (!brpc_controllers[i].Failed()) { + LOG(INFO) << "Received response from " + << brpc_controllers[i].remote_side() + << " Latency=" << brpc_controllers[i].latency_us() << "us"; + } else { + LOG(ERROR) << brpc_controllers[i].ErrorText(); + } + } + LOG(INFO) << "All brpc remote stubs joined done."; + + res->set_log_id(log_id); + res->set_profile_server(req->profile_server()); + int64_t end = timeline.TimeStampUS(); + res->add_profile_time(start); + res->add_profile_time(end); + + return 0; +} + +DEFINE_OP(GeneralRemoteOp); +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/op/general_remote_op.h b/core/general-server/op/general_remote_op.h new file mode 100644 index 0000000000000000000000000000000000000000..94bfcb9f671866432c572ea67ccbdaf48344fcea --- /dev/null +++ b/core/general-server/op/general_remote_op.h @@ -0,0 +1,58 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +#include "core/general-server/general_model_service.pb.h" + +#include "core/sdk-cpp/builtin_format.pb.h" +#include "core/sdk-cpp/general_model_service.pb.h" +#include "core/sdk-cpp/include/common.h" +#include "core/sdk-cpp/include/predictor_sdk.h" + +#define MAX_MP_NUM 16 + +namespace baidu { +namespace paddle_serving { +namespace serving { + +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::Response; + +class GeneralRemoteOp + : public baidu::paddle_serving::predictor::OpWithChannel< + baidu::paddle_serving::predictor::general_model::Response> { + public: + DECLARE_OP(GeneralRemoteOp); + int inference(); +}; + +class BRPCStub { + public: + static brpc::Channel brpc_channels[MAX_MP_NUM]; + static brpc::ChannelOptions options; + static std::atomic inited; +}; + +} // namespace serving +} // namespace paddle_serving +} // namespace baidu diff --git a/core/general-server/proto/general_model_service.proto b/core/general-server/proto/general_model_service.proto index 4b6282637ca6ea0617096a18bbbc3268067906bc..a5adeeb95b59f65f5d009bdafb7e034a631a4e6f 100755 --- a/core/general-server/proto/general_model_service.proto +++ b/core/general-server/proto/general_model_service.proto @@ -92,11 +92,13 @@ message Request { message Response { repeated ModelOutput outputs = 1; repeated int64 profile_time = 2; - // Error code - int32 err_no = 3; + bool profile_server = 3; + uint64 log_id = 4; + // Error code + int32 err_no = 5; // Error messages - string err_msg = 4; + string err_msg = 6; }; message ModelOutput { diff --git a/core/predictor/common/constant.cpp b/core/predictor/common/constant.cpp index b0acb886950face9383518cb7da227137a9c14be..5df873017ef2406e96e0b1316c4c5062d4208552 100644 --- a/core/predictor/common/constant.cpp +++ b/core/predictor/common/constant.cpp @@ -20,7 +20,7 @@ namespace predictor { DEFINE_bool(use_parallel_infer_service, false, ""); DEFINE_int32(el_log_level, 16, ""); -DEFINE_int32(idle_timeout_s, 16, ""); +DEFINE_int32(idle_timeout_s, 80, ""); DEFINE_int32(port, 8010, ""); DEFINE_string(workflow_path, "./conf", ""); DEFINE_string(workflow_file, "workflow.prototxt", ""); diff --git a/core/predictor/framework/bsf-inl.h b/core/predictor/framework/bsf-inl.h old mode 100755 new mode 100644 index abdba1f2e5bc9710e19804fa3f4d4c0fbce50abd..f1885ae357c910f1590502b4aec7e4a6de0289f3 --- a/core/predictor/framework/bsf-inl.h +++ b/core/predictor/framework/bsf-inl.h @@ -275,6 +275,7 @@ bool TaskExecutor::move_task_to_batch( } TaskT* previous_task = nullptr; + int padding_task_count = 0; while (!_task_queue.empty()) { TaskT* task = _task_queue.front(); @@ -327,6 +328,7 @@ bool TaskExecutor::move_task_to_batch( if (batchTask.padding(task) != 2) { break; } + ++padding_task_count; size_t rem = batchTask.append_task(task); previous_task = task; if (task->rem <= 0) { @@ -334,7 +336,12 @@ bool TaskExecutor::move_task_to_batch( } if (rem <= 0) break; } - LOG(INFO) << "Number of tasks remaining in _task_queue is" + + if (padding_task_count > 1) { + LOG(INFO) << "Hit auto padding, merge " << padding_task_count + << " tasks into 1 batch."; + } + LOG(INFO) << "Number of tasks remaining in _task_queue is " << _task_queue.size(); return true; } diff --git a/core/predictor/framework/cache.cpp b/core/predictor/framework/cache.cpp index 8715b85a66eccb71469bca294de8d8488cb59288..e5fe730535add74c82bf3ce34ec6c6e9a3a62c34 100644 --- a/core/predictor/framework/cache.cpp +++ b/core/predictor/framework/cache.cpp @@ -55,7 +55,7 @@ int CubeCache::reload_data(const std::string& cache_path) { // loading data from cache files if (stat(cache_path.c_str(), &st) < 0 || !S_ISDIR(st.st_mode)) { - LOG(ERROR) << "invalid cache path " << cache_path; + LOG(WARNING) << "No cube cache directory " << cache_path << " provided, ignore it"; return -1; } if ((dp = opendir(cache_path.c_str())) == nullptr) { diff --git a/core/predictor/framework/dag.cpp b/core/predictor/framework/dag.cpp index c45952f8fb8f3b6d48c2e1295d6a43d45ad185e5..629e3b095414d5c030f057e8835f9858d1acd894 100644 --- a/core/predictor/framework/dag.cpp +++ b/core/predictor/framework/dag.cpp @@ -129,6 +129,10 @@ int Dag::init(const configure::Workflow& conf, const std::string& name) { node->id = i + 1; // 0 is reserved for begginer-op node->name = conf.nodes(i).name(); node->type = conf.nodes(i).type(); + for (int add_index = 0; add_index < conf.nodes(i).address_size(); + ++add_index) { + node->address.push_back(conf.nodes(i).address(add_index)); + } uint32_t depend_size = conf.nodes(i).dependencies_size(); for (uint32_t j = 0; j < depend_size; j++) { const configure::DAGNodeDependency& depend = @@ -159,7 +163,8 @@ int Dag::init(const configure::Workflow& conf, const std::string& name) { for (uint32_t nid = 0; nid < _index_nodes.size(); nid++) { DagNode* node = _index_nodes[nid]; LOG(INFO) << "OP-" << node->id << "-" << node->name << "-" << node->type - << " depends: " << node->depends.size(); + << " depends: " << node->depends.size() + << " address: " << node->address.size(); boost::unordered_map::iterator it; for (it = node->depends.begin(); it != node->depends.end(); it++) { diff --git a/core/predictor/framework/dag.h b/core/predictor/framework/dag.h index 1145c9a9a564c432a00e8be69dbed23b32f0bb7f..c072c5e31951679f678fb8fbc11fc64374df4b38 100644 --- a/core/predictor/framework/dag.h +++ b/core/predictor/framework/dag.h @@ -29,6 +29,7 @@ struct DagNode { std::string name; // opname std::string full_name; // workflow_stageindex_opname std::string type; + std::vector address; void* conf; boost::unordered_map depends; }; diff --git a/core/predictor/framework/dag_view.cpp b/core/predictor/framework/dag_view.cpp index 64383514f604688a085097a7ec0043c91f25a9b4..16d2c647d27b4b24b6db3ad0f48f5a93ce2787a0 100644 --- a/core/predictor/framework/dag_view.cpp +++ b/core/predictor/framework/dag_view.cpp @@ -90,6 +90,7 @@ int DagView::init(Dag* dag, node->name, node->type, node->conf, + node->address, log_id) != 0) { LOG(WARNING) << "(logid=" << log_id << ") Failed init op, type:" << node->type; diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index 5c5ef8730dd82a1ea721600b24f27a7fd1e46594..171b3bfa9a985472aa77ed2c449789b58f68b629 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -32,7 +32,8 @@ #include "core/predictor/framework/memory.h" #include "core/predictor/framework/predictor_metric.h" #include "paddle_inference_api.h" // NOLINT -#include "experimental/float16.h" +//#include "experimental/float16.h" +#include "experimental/phi/common/float16.h" namespace baidu { namespace paddle_serving { namespace predictor { @@ -548,9 +549,9 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { int8_t* data = static_cast(origin_data); lod_tensor_in->CopyFromCpu(data); } else if ((*tensorVector_in_pointer)[i].dtype == - paddle::PaddleDType::FLOAT16) { - paddle::platform::float16* data = - static_cast(origin_data); + paddle::PaddleDType::FLOAT16) { + phi::dtype::float16* data = + static_cast(origin_data); lod_tensor_in->CopyFromCpu(data); } else { LOG(ERROR) << "Inference not support type[" @@ -646,14 +647,14 @@ class FluidInferEngine : public CloneDBReloadableInferEngine { lod_tensor_out->CopyToCpu(data_out); databuf_char = reinterpret_cast(data_out); } else if (dataType == paddle::PaddleDType::FLOAT16) { - databuf_size = out_num * sizeof(paddle::platform::float16); + databuf_size = out_num * sizeof(phi::dtype::float16); databuf_data = MempoolWrapper::instance().malloc(databuf_size); if (!databuf_data) { LOG(ERROR) << "Malloc failed, size: " << databuf_size; return -1; } - paddle::platform::float16* data_out = - reinterpret_cast(databuf_data); + phi::dtype::float16* data_out = + reinterpret_cast(databuf_data); lod_tensor_out->CopyToCpu(data_out); databuf_char = reinterpret_cast(data_out); } diff --git a/core/predictor/framework/server.cpp b/core/predictor/framework/server.cpp index 8ced6f1e9936059ada169633e21690d13bc48ae3..03ff34d9536a01e78e0946e7f31c479ab4f6eb7e 100644 --- a/core/predictor/framework/server.cpp +++ b/core/predictor/framework/server.cpp @@ -96,6 +96,10 @@ int ServerManager::start_and_wait() { LOG(ERROR) << "Failed to start Paddle Inference Server"; return -1; } + + std::cout << "C++ Serving service started successfully!" << std::endl; + LOG(INFO) << "C++ Serving service started successfully!"; + _server.RunUntilAskedToQuit(); ServerManager::stop_reloader(); diff --git a/core/predictor/op/op.cpp b/core/predictor/op/op.cpp index 33dba2b506543ed1103cb0b456f5f054969f17fa..a7848ede2ea7ea151ecdf6ed1ebb201a7c12b4b7 100644 --- a/core/predictor/op/op.cpp +++ b/core/predictor/op/op.cpp @@ -36,12 +36,14 @@ int Op::init(Bus* bus, const std::string& name, const std::string& type, void* conf, + const std::vector& address, const uint64_t log_id) { _bus = bus; _dag = dag; _id = id; _name = name; _type = type; + _address = address; set_config(conf); _timer = butil::get_object(); @@ -110,11 +112,13 @@ int Op::process(const uint64_t log_id, bool debug) { return ERR_INTERNAL_FAILURE; } + /* if (_has_calc) { LOG(INFO) << "(logid=" << log_id << ") Op: " << _name << " already processed before"; return ERR_OK; } + */ // 1. dependency inference /* @@ -147,8 +151,10 @@ int Op::process(const uint64_t log_id, bool debug) { } // 3. share output to bus - Channel* channel = mutable_channel(); - channel->share_to_bus(_bus, log_id); + if (!_has_calc) { + Channel* channel = mutable_channel(); + channel->share_to_bus(_bus, log_id); + } // 4. mark has calculated _has_calc = true; diff --git a/core/predictor/op/op.h b/core/predictor/op/op.h index ea700cce164805d04ddd10b72311f068245e2f10..f14d74b49b5184ad5262a3b160f096704af69304 100644 --- a/core/predictor/op/op.h +++ b/core/predictor/op/op.h @@ -114,6 +114,7 @@ class Op { const std::string& name, const std::string& type, void* conf, + const std::vector& address, const uint64_t log_id); int deinit(); @@ -135,6 +136,8 @@ class Op { const std::string& full_name() const { return _full_name; } + const std::vector& address() const { return _address; } + const std::vector& pre_names() const { return _pre_node_names; } void set_full_name(const std::string full_name) { _full_name = full_name; } @@ -206,6 +209,7 @@ class Op { std::string _name; std::string _full_name; // service_workflow_stageindex_opname std::string _type; + std::vector _address; bool _has_calc; bool _has_init; TimerFlow* _timer; diff --git a/core/sdk-cpp/proto/general_model_service.proto b/core/sdk-cpp/proto/general_model_service.proto index 5340f4226e12b0b99147bc2972928b7d7c733057..5c17f955fe63a82f52f54b3c394ab1b9324608cb 100755 --- a/core/sdk-cpp/proto/general_model_service.proto +++ b/core/sdk-cpp/proto/general_model_service.proto @@ -92,11 +92,13 @@ message Request { message Response { repeated ModelOutput outputs = 1; repeated int64 profile_time = 2; - // Error code - int32 err_no = 3; + bool profile_server = 3; + uint64 log_id = 4; + // Error code + int32 err_no = 5; // Error messages - string err_msg = 4; + string err_msg = 6; }; message ModelOutput { diff --git a/core/sdk-cpp/proto/load_general_model_service.proto b/core/sdk-cpp/proto/load_general_model_service.proto index c58f79ecd6b00e82bd959d24b20ffaa653360d45..da731589c11695bb808bb9fab6ee60d12d67a69f 100644 --- a/core/sdk-cpp/proto/load_general_model_service.proto +++ b/core/sdk-cpp/proto/load_general_model_service.proto @@ -21,6 +21,7 @@ option cc_generic_services = true; message RequestAndResponse { required int32 a = 1; required float b = 2; + required uint64 log_id = 3 [ default = 0 ]; }; service LoadGeneralModelService { diff --git a/doc/C++_Serving/Model_Ensemble_CN.md b/doc/C++_Serving/Model_Ensemble_CN.md index 0590fb6fe1f4b51aff327c4aab9e82cc7382c12b..4bf5612e84adc704425dd72fa416b2b5dae04698 100755 --- a/doc/C++_Serving/Model_Ensemble_CN.md +++ b/doc/C++_Serving/Model_Ensemble_CN.md @@ -45,13 +45,13 @@ from paddle_serving_server import OpGraphMaker from paddle_serving_server import Server op_maker = OpMaker() -read_op = op_maker.create('general_reader') +read_op = op_maker.create('GeneralReaderOp') cnn_infer_op = op_maker.create( - 'general_infer', engine_name='cnn', inputs=[read_op]) + 'GeneralInferOp', engine_name='cnn', inputs=[read_op]) bow_infer_op = op_maker.create( - 'general_infer', engine_name='bow', inputs=[read_op]) + 'GeneralInferOp', engine_name='bow', inputs=[read_op]) response_op = op_maker.create( - 'general_response', inputs=[cnn_infer_op, bow_infer_op]) + 'GeneralResponseOp', inputs=[cnn_infer_op, bow_infer_op]) op_graph_maker = OpGraphMaker() op_graph_maker.add_op(read_op) diff --git a/doc/C++_Serving/Model_Ensemble_EN.md b/doc/C++_Serving/Model_Ensemble_EN.md index 071e7773106d3658818811d84b77bb9f91880e4e..0d069a8dc82873223f3b4b14756e078da81b0134 100755 --- a/doc/C++_Serving/Model_Ensemble_EN.md +++ b/doc/C++_Serving/Model_Ensemble_EN.md @@ -45,13 +45,13 @@ from paddle_serving_server import OpGraphMaker from paddle_serving_server import Server op_maker = OpMaker() -read_op = op_maker.create('general_reader') +read_op = op_maker.create('GeneralReaderOp') cnn_infer_op = op_maker.create( - 'general_infer', engine_name='cnn', inputs=[read_op]) + 'GeneralInferOp', engine_name='cnn', inputs=[read_op]) bow_infer_op = op_maker.create( - 'general_infer', engine_name='bow', inputs=[read_op]) + 'GeneralInferOp', engine_name='bow', inputs=[read_op]) response_op = op_maker.create( - 'general_response', inputs=[cnn_infer_op, bow_infer_op]) + 'GeneralResponseOp', inputs=[cnn_infer_op, bow_infer_op]) op_graph_maker = OpGraphMaker() op_graph_maker.add_op(read_op) diff --git a/doc/C++_Serving/OP_CN.md b/doc/C++_Serving/OP_CN.md index 4b541956b22534961cd7cb51c45600de3ffde163..16e5cc5182acc46c92e86a6d81556cf57b2cce04 100755 --- a/doc/C++_Serving/OP_CN.md +++ b/doc/C++_Serving/OP_CN.md @@ -138,18 +138,21 @@ DEFINE_OP(GeneralInferOp); ``` python -self.op_dict = { - "general_infer": "GeneralInferOp", - "general_reader": "GeneralReaderOp", - "general_response": "GeneralResponseOp", - "general_text_reader": "GeneralTextReaderOp", - "general_text_response": "GeneralTextResponseOp", - "general_single_kv": "GeneralSingleKVOp", - "general_dist_kv": "GeneralDistKVOp" - } +self.op_list = [ + "GeneralInferOp", + "GeneralReaderOp", + "GeneralResponseOp", + "GeneralTextReaderOp", + "GeneralTextResponseOp", + "GeneralSingleKVOp", + "GeneralDistKVInferOp", + "GeneralDistKVOp", + "GeneralCopyOp", + "GeneralDetectionOp", + ] ``` -在`python/paddle_serving_server/server.py`文件中仅添加`需要加载模型,执行推理预测的自定义的C++OP类的类名`。例如`general_reader`由于只是做一些简单的数据处理而不加载模型调用预测,故在👆的代码中需要添加,而不添加在👇的代码中。 +在`python/paddle_serving_server/server.py`文件中仅添加`需要加载模型,执行推理预测的自定义的C++OP类的类名`。例如`GeneralReaderOp`由于只是做一些简单的数据处理而不加载模型调用预测,故在👆的代码中需要添加,而不添加在👇的代码中。 ``` python default_engine_types = [ 'GeneralInferOp', diff --git a/doc/C++_Serving/OP_EN.md b/doc/C++_Serving/OP_EN.md index f3c7a480c6d1cdba06417022ac211421fb18e6fe..03ad4d2be5ebcc50937e21b416eb97c44a467556 100755 --- a/doc/C++_Serving/OP_EN.md +++ b/doc/C++_Serving/OP_EN.md @@ -136,20 +136,23 @@ After you have defined a C++ operator on server side for Paddle Serving, the las ``` python -self.op_dict = { - "general_infer": "GeneralInferOp", - "general_reader": "GeneralReaderOp", - "general_response": "GeneralResponseOp", - "general_text_reader": "GeneralTextReaderOp", - "general_text_response": "GeneralTextResponseOp", - "general_single_kv": "GeneralSingleKVOp", - "general_dist_kv": "GeneralDistKVOp" - } +self.op_list = [ + "GeneralInferOp", + "GeneralReaderOp", + "GeneralResponseOp", + "GeneralTextReaderOp", + "GeneralTextResponseOp", + "GeneralSingleKVOp", + "GeneralDistKVInferOp", + "GeneralDistKVOp", + "GeneralCopyOp", + "GeneralDetectionOp", + ] ``` In `python/paddle_serving_server/server.py` file, only the class name of the C++ OP class that needs to load the model and execute prediction is added. -For example, `general_reader`, need to be added in the 👆 code, but not in the 👇 code. Because it only does some simple data processing without loading the model and call prediction. +For example, `GeneralReaderOp`, need to be added in the 👆 code, but not in the 👇 code. Because it only does some simple data processing without loading the model and call prediction. ``` python default_engine_types = [ 'GeneralInferOp', diff --git a/doc/Latest_Packages_CN.md b/doc/Latest_Packages_CN.md index 31b6f39c828d4b7ae74b311623ddad4cac8897a5..e70bfe11c007b52f08acda5a274c5be70670fdce 100644 --- a/doc/Latest_Packages_CN.md +++ b/doc/Latest_Packages_CN.md @@ -32,7 +32,7 @@ | Python3.6 | [paddle_serving_client-0.0.0-cp36-none-any.whl](https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.0.0-cp36-none-any.whl) | [paddle_serving_client-0.8.3-cp36-none-any.whl](https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.8.3-cp36-none-any.whl) | | Python3.7 | [paddle_serving_client-0.0.0-cp37-none-any.whl](https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.0.0-cp37-none-any.whl) | [paddle_serving_client-0.8.3-cp37-none-any.whl](https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.8.3-cp37-none-any.whl) | | Python3.8 | [paddle_serving_client-0.0.0-cp38-none-any.whl](https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.0.0-cp38-none-any.whl) | [paddle_serving_client-0.8.3-cp38-none-any.whl](https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.8.3-cp38-none-any.whl) | -| Python3.9 | [paddle_serving_client-0.0.0-cp39-none-any.whl](https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.0.0-cp39-none-any.whl) | [paddle_serving_client-0.8.3-cp39-none-any.whl](https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.8.3-cp38-none-any.whl) | +| Python3.9 | [paddle_serving_client-0.0.0-cp39-none-any.whl](https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.0.0-cp39-none-any.whl) | [paddle_serving_client-0.8.3-cp39-none-any.whl](https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-0.8.3-cp39-none-any.whl) | ## paddle-serving-app Wheel包 diff --git a/doc/Offical_Docs/1-2_Benchmark.md b/doc/Offical_Docs/1-2_Benchmark.md index 10577387f4ea0d8ec9598722fc6ebf46812c01f2..b3efa05fb2efe942e325434417eaaad6b171de57 100644 --- a/doc/Offical_Docs/1-2_Benchmark.md +++ b/doc/Offical_Docs/1-2_Benchmark.md @@ -13,11 +13,12 @@ **二.测试方法** - 请求数量递增:不断增加 client 数量,指标稳定后统计 client 的耗时信息 -- 竞品对比:C++ Serving(蓝色) 与 Tenserflow Serving(灰色)都是 C++ 实现,且同为业界主流 Serving 框架 -- 吞吐性能(QPS):折线图,数值越大表示每秒钟处理的请求数量越大,性能就越好 -- 平均处理时延(ms):柱状图,数值越大表示单个请求处理时间越长,性能就越差 - 同步模式:网络线程同步处理,保证显存占用相同的情况下,开启最大线程数 - 异步模式:异步线程处理方式,保证显存占用相同,最大批量为32,异步线程数为2 +- 性能对比: + - 竞品选择:C++ Serving(蓝色) 与 Tenserflow Serving(灰色)都是 C++ 实现,且同为业界主流 Serving 框架 + - 吞吐性能(QPS):折线图,数值越大表示每秒钟处理的请求数量越大,性能就越好 + - 平均处理时延(ms):柱状图,数值越大表示单个请求处理时间越长,性能就越差 **三.同步模式** diff --git a/doc/Offical_Docs/10-0_Terminology.md b/doc/Offical_Docs/10-0_Terminology.md new file mode 100644 index 0000000000000000000000000000000000000000..329002b0ab2174989e0d5ef885451e225f04cd1a --- /dev/null +++ b/doc/Offical_Docs/10-0_Terminology.md @@ -0,0 +1 @@ +# 名词术语解释 diff --git a/doc/Offical_Docs/11-0_Contributors.md b/doc/Offical_Docs/11-0_Contributors.md new file mode 100644 index 0000000000000000000000000000000000000000..905da97939f11ac1bf707a6dd0aeaae58cd29e6a --- /dev/null +++ b/doc/Offical_Docs/11-0_Contributors.md @@ -0,0 +1,97 @@ +# 开发者贡献 + +- [贡献代码流程](#1) + - [创建个人仓库](#1.1) + - [本地克隆仓库和分支](#1.2) + - [提交代码](#1.3) + - [通过 CI 验证](#1.4) + - [Code Review](#1.5) + - [代码合入](#1.6) +- [致谢开发者](#2) + + + +## 贡献代码流程 + +Paddle Serving 使用 Git 分支模式。通常,按以下步骤贡献代码: + + + +**一.创建个人仓库** + +Paddle Serving 社区一直在快速发展,每个人都写到官方回购中是没有意义的。所以,请先 `fork` 出个人仓库,并提交 `Pull Requests`。`fork` 个人仓库,只需前往 [Serving](https://github.com/PaddlePaddle/Serving) 页面并单击右上角 ["Fork"](https://github.com/PaddlePaddle/Serving/fork)。 + + + +**二.本地克隆仓库和分支** + +创建个人仓库后,`clone` 个人仓库到本地计算机,默认创建本地 `develop` 分支。 +```bash +git clone https://github.com/your-github-account/Serving +``` + + + +**三.提交代码** + +本地修改代码并验证后,准备提交代码。在提交代码前请安装 [`pre-commit`](http://pre-commit.com/)、cpplint 和 pylint。 +```bash +pip3 install pre-commit +pre-commit install + +pip3 install cpplint pylint +``` +在提交代码时,会进行代码格式检查和修正,待所有检查都通过后,方可提交。 +```shell + $ git commit + CRLF end-lines remover...............................(no files to check)Skipped + yapf.....................................................................Passed + Check for added large files..............................................Passed + Check for merge conflicts................................................Passed + Check for broken symlinks................................................Passed + Detect Private Key...................................(no files to check)Skipped + Fix End of Files.........................................................Passed + clang-format.............................................................Passed + cpplint..................................................................Passed + pylint...................................................................Passed + copyright_checker........................................................Passed + [my-cool-stuff c703c041] add test file + 1 file changed, 0 insertions(+), 0 deletions(-) + create mode 100644 233 +``` + +运行代码提交命令,提交到个人仓库,再通过 Github 页面创建一个 `pull request` 提交到 Paddel Serving 主仓库。 +```bash +git push origin develop +``` + + + +**四.通过 CI 验证** + +所有提交到 Paddle Serving 主仓库的 `pull request` 都会运行 `py36`、`py38`、`py39`的所有 CI 测试用例。全部通过后才能合入。 + + + +**五.Code Review** + +所有提交的代码要经过管理员的评审,至少通过2人评审后方可合入。 + + + +**六.代码合入** + +待通过全部 CI 验证,并且完成 Code Review 和修改后,由仓库管理员合入代码。 + + + +## 致谢开发者 + +- 感谢 [@loveululu](https://github.com/loveululu) 提供 Cube python API +- 感谢 [@EtachGu](https://github.com/EtachGu) 更新 docker 使用命令 +- 感谢 [@BeyondYourself](https://github.com/BeyondYourself) 提供grpc教程,更新FAQ教程,整理文件目录。 +- 感谢 [@mcl-stone](https://github.com/mcl-stone) 提供faster rcnn benchmark脚本 +- 感谢 [@cg82616424](https://github.com/cg82616424) 提供unet benchmark脚本和修改部分注释错误 +- 感谢 [@cuicheng01](https://github.com/cuicheng01) 提供PaddleClas的11个模型 +- 感谢 [@Jiaqi Liu](https://github.com/LiuChiachi) 新增list[str]类型输入的预测支持 +- 感谢 [@Bin Lu](https://github.com/Intsigstephon) 提供PP-Shitu C++模型示例 diff --git a/doc/Offical_Docs/11_Lookup_CN.md b/doc/Offical_Docs/11_Lookup_CN.md deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/doc/Offical_Docs/12-0_FAQ_CN.md b/doc/Offical_Docs/12-0_FAQ_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..59455a77c5998741e2882749bb1e72892fd85ab6 --- /dev/null +++ b/doc/Offical_Docs/12-0_FAQ_CN.md @@ -0,0 +1,328 @@ +# 常见问题与解答 + +常见问题解答分为8大类问题: +- [版本升级问题](#1) +- [基础知识](#2) +- [安装问题](#3) +- [编译问题](#4) +- [环境问题](#5) +- [部署问题](#6) +- [预测问题](#7) +- [日志排查](#8) + + + +## 版本升级问题 + +#### Q: 从 `v0.6.x` 升级到 `v0.7.0` 版本时,运行 Python Pipeline 程序时报错信息如下: +``` +Failed to predict: (data_id=1 log_id=0) [det|0] Failed to postprocess: postprocess() takes 4 positional arguments but 5 were given +``` +**A:** 在服务端程序(例如 web_service.py)的postprocess函数定义中增加参数data_id,改为 def postprocess(self, input_dicts, fetch_dict, **data_id**, log_id) 即可。 + + + +## 基础知识 + +#### Q: Paddle Serving 、Paddle Inference、PaddleHub Serving 三者的区别及联系? + +**A:** Paddle Serving 是远程服务,即发起预测的设备(手机、浏览器、客户端等)与实际预测的硬件不在一起。 paddle inference 是一个 library,适合嵌入到一个大系统中保证预测效率,Paddle Serving 调用 paddle inference 做远程服务。paddlehub serving 可以认为是一个示例,都会使用 Paddle Serving 作为统一预测服务入口。如果在 web 端交互,一般是调用远程服务的形式,可以使用 Paddle Serving 的 web service 搭建。 + +#### Q: Paddle Serving 支持哪些数据类型? + +**A:** 在 protobuf 定义中 `feed_type` 和 `fetch_type` 编号与数据类型对应如下,完整信息可参考[保存用于 Serving 部署的模型参数](./5-1_Save_Model_Params_CN.md) + +| 类型 | 类型值 | +|------|------| +| int64 | 0 | +| float32 |1 | +| int32 | 2 | +| float64 | 3 | +| int16 | 4 | +| float16 | 5 | +| bfloat16 | 6 | +| uint8 | 7 | +| int8 | 8 | +| bool | 9 | +| complex64 | 10 +| complex128 | 11 | + +#### Q: Paddle Serving 是否支持 Windows 和 Linux 原生环境部署? + +**A:** 安装 `Linux Docker`,在 Docker 中部署 Paddle Serving,参考[安装指南](./2-0_Index_CN.md) + +#### Q: Paddle Serving 如何修改消息大小限制 + +**A:** Server 和 Client 通过修改 `FLAGS_max_body_size` 参数来扩大数据量限制,单位为字节,默认为64MB + +#### Q: Paddle Serving 客户端目前支持哪些开发语言? + +**A:** 提供 Python、C++ 和 Java SDK + +#### Q: Paddle Serving 支持哪些网络协议? + +**A:** C++ Serving 同时支持 HTTP、gRPC 和 bRPC 协议。其中 HTTP 协议既支持 HTTP + Json 格式,同时支持 HTTP + proto 格式。完整信息请阅读[C++ Serving 通讯协议](./6-2_Cpp_Serving_Protocols_CN.md);Python Pipeline 支持 HTTP 和 gRPC 协议,更多信息请阅读[Python Pipeline 框架设计](./6-2_Cpp_Serving_Protocols_CN.md) + + + +## 安装问题 + +#### Q: `pip install` 安装 `python wheel` 过程中,报错信息如何修复? + +``` +Collecting opencv-python + Getting requirements to build wheel ... error + ERROR: Command errored out with exit status 1: + command: /home/work/Python-2.7.17/build/bin/python /home/work/Python-2.7.17/build/lib/python2.7/site-packages/pip/_vendor/pep517/_in_process.py get_requires_for_build_wheel /tmp/tmpLiweA9 + cwd: /tmp/pip-install-_w6AUI/opencv-python + Complete output (22 lines): + Traceback (most recent call last): + File "setup.py", line 99, in main + % {"ext": re.escape(sysconfig.get_config_var("EXT_SUFFIX"))} + File "/home/work/Python-2.7.17/build/lib/python2.7/re.py", line 210, in escape + s = list(pattern) + TypeError: 'NoneType' object is not iterable +``` + +**A:** 指定 `opencv-python` 安装版本4.2.0.32,运行 `pip3 install opencv-python==4.2.0.32` + +#### Q: pip3 install wheel包过程报错,详细信息如下: + +``` + Complete output from command python setup.py egg_info: + Found cython-generated files... + error in grpcio setup command: 'install_requires' must be a string or list of strings containing valid project/version requirement specifiers; Expected ',' or end-of-list in futures>=2.2.0; python_version<'3.2' at ; python_version<'3.2' + + ---------------------------------------- +Command "python setup.py egg_info" failed with error code 1 in /tmp/pip-install-taoxz02y/grpcio/ +``` + +**A:** 需要升级 pip3 版本,再重新执行安装命令。 + +``` +pip3 install --upgrade pip +pip3 install --upgrade setuptools +``` + +#### Q: 运行过程中出现 `No module named xxx` 错误,信息如下: + +``` +Traceback (most recent call last): + File "../../deploy/serving/test_client.py", line 18, in + from paddle_serving_app.reader import * + File "/usr/local/python2.7.15/lib/python2.7/site-packages/paddle_serving_app/reader/__init__.py", line 15, in + from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, Base64ToImage + File "/usr/local/python2.7.15/lib/python2.7/site-packages/paddle_serving_app/reader/image_reader.py", line 24, in + from shapely.geometry import Polygon +ImportError: No module named shapely.geometry +``` + +**A:** 有2种方法,第一种通过 pip3 安装shapely,第二种通过 pip3 安装所有依赖组件[requirements.txt](https://github.com/PaddlePaddle/Serving/blob/develop/python/requirements.txt)。 + +``` +方法1: +pip3 install shapely==1.7.0 + +方法2: +pip3 install -r python/requirements.txt +``` + + + +## 编译问题 + +#### Q: 如何使用自己编译的 Paddle Serving 进行预测? + +**A:** 编译 Paddle Serving 请阅读[编译 Serving](https://github.com/PaddlePaddle/Serving/blob/v0.8.3/doc/Compile_CN.md)。 + +#### Q: 使用 Java 客户端,mvn compile 过程出现 "No compiler is provided in this environment. Perhaps you are running on a JRE rather than a JDK?" 错误 + +**A:** 没有安装 JDK,或者 `JAVA_HOME` 路径配置错误(正确配置是 JDK 路径,常见错误配置成 JRE 路径,例如正确路径参考 `JAVA_HOME="/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.262.b10-0.el7_8.x86_64/"`)。Java JDK 安装参考 https://segmentfault.com/a/1190000015389941。 + +#### Q: 编译过程报错 /usr/local/bin/ld: cannot find -lbz2 +``` +/usr/local/bin/ld: cannot find -lbz2 +collect2: error: ld returned 1 exit status +core/general-server/CMakeFiles/serving.dir/build.make:276: recipe for target 'core/general-server/serving' failed +make[2]: *** [core/general-server/serving] Error 1 +CMakeFiles/Makefile2:1181: recipe for target 'core/general-server/CMakeFiles/serving.dir/all' failed +make[1]: *** [core/general-server/CMakeFiles/serving.dir/all] Error 2 +Makefile:129: recipe for target 'all' failed +make: *** [all] Error 2 +``` + +**A:** Ubuntu 系统运行命令安装 libbz2: `apt install libbz2-dev`, + + + +## 环境问题 + +#### Q:程序运行出现 `CXXABI` 相关错误。 + +错误原因是编译 Python 使用的 GCC 版本和编译 Serving 的 GCC 版本不一致。对于 Docker 用户,推荐使用[Docker容器](https://github.com/PaddlePaddle/Serving/blob/develop/doc/Docker_Images_CN.md),由于 Docker 容器内的 Python 版本与 Serving 在发布前都做过适配,这样就不会出现类似的错误。 + +推荐使用 GCC 8.2 预编译包 [Python3.6](https://paddle-serving.bj.bcebos.com/others/Python3.6.10-gcc82.tar) 。下载解压后,需要将对应的目录设置为 `PYTHONROOT`,并设置 `PATH` 和 `LD_LIBRARY_PATH`。 + +```bash +export PYTHONROOT=/path/of/python # 对应解压后的Python目录 +export PATH=$PYTHONROOT/bin:$PATH +export LD_LIBRARY_PATH=$PYTHONROOT/lib:$LD_LIBRARY_PATH +``` + +#### Q:遇到 `libstdc++.so.6` 的版本不够的问题 + +触发该问题的原因在于,编译 Paddle Serving 相关可执行程序和动态库,所采用的是 GCC 8.2(Cuda 9.0 和 10.0 的 Server 可执行程序受限 CUDA 兼容性采用 GCC 4.8编译)。Python 在调用的过程中,有可能链接到了其他 GCC 版本的 `libstdc++.so`。 需要做的就是受限确保所在环境具备 GCC 8.2,其次将 GCC8.2 的`libstdc++.so.*`拷贝到某个目录例如`/home/libstdcpp` 下。最后 `export LD_LIBRARY_PATH=/home/libstdcpp:$LD_LIBRARY_PATH` 即可。 + +#### Q: 遇到 `OPENSSL_1.0.1EC` 符号找不到的问题。 + +目前 Serving 的可执行程序和客户端动态库需要链接 `1.0.2k` 版本的 `openssl` 动态库。如果环境当中没有,可以执行 + +```bash +wget https://paddle-serving.bj.bcebos.com/others/centos_ssl.tar && \ + tar xf centos_ssl.tar && rm -rf centos_ssl.tar && \ + mv libcrypto.so.1.0.2k /usr/lib/libcrypto.so.1.0.2k && mv libssl.so.1.0.2k /usr/lib/libssl.so.1.0.2k && \ + ln -sf /usr/lib/libcrypto.so.1.0.2k /usr/lib/libcrypto.so.10 && \ + ln -sf /usr/lib/libssl.so.1.0.2k /usr/lib/libssl.so.10 && \ + ln -sf /usr/lib/libcrypto.so.10 /usr/lib/libcrypto.so && \ + ln -sf /usr/lib/libssl.so.10 /usr/lib/libssl.so +``` + +其中 `/usr/lib` 可以换成其他目录,并确保该目录在 `LD_LIBRARY_PATH` 下。 + +### GPU相关环境问题 + +#### Q:需要做哪些检查确保 Serving 可以运行在 GPU 环境 + +**注:如果是使用 Serving 提供的镜像不需要做下列检查,如果是其他开发环境可以参考以下指导。** + +首先需要确保`nvidia-smi`可用,其次需要确保所需的动态库so文件在`LD_LIBRARY_PATH`所在的目录(包括系统lib库)。 + +(1)CUDA 显卡驱动:文件名通常为 `libcuda.so.$DRIVER_VERSION` 例如驱动版本为440.10.15,文件名就是 `libcuda.so.440.10.15`。 + +(2)CUDA 和 cuDNN 动态库:文件名通常为 `libcudart.so.$CUDA_VERSION`,和 `libcudnn.so.$CUDNN_VERSION`。例如 CUDA9 就是 `libcudart.so.9.0`,Cudnn7就是 `libcudnn.so.7`。CUDA 和 cuDNN 与 Serving 的版本匹配参见[Serving所有镜像列表](Docker_Images_CN.md#%E9%99%84%E5%BD%95%E6%89%80%E6%9C%89%E9%95%9C%E5%83%8F%E5%88%97%E8%A1%A8). + + (3) CUDA 10.1及更高版本需要 TensorRT。安装 TensorRT 相关文件的脚本参考 [install_trt.sh](../tools/dockerfiles/build_scripts/install_trt.sh). + + + +## 部署问题 + +#### Q: GPU 环境运行 Serving 报错,GPU count is: 0。 + +``` +terminate called after throwing an instance of 'paddle::platform::EnforceNotMet' +what(): +-------------------------------------------- +C++ Call Stacks (More useful to developers): +-------------------------------------------- +0 std::string paddle::platform::GetTraceBackString(std::string const&, char const*, int) +1 paddle::platform::SetDeviceId(int) +2 paddle::AnalysisConfig::fraction_of_gpu_memory_for_pool() const +3 std::unique_ptr > paddle::CreatePaddlePredictor(paddle::AnalysisConfig const&) +4 std::unique_ptr > paddle::CreatePaddlePredictor(paddle::AnalysisConfig const&) +---------------------- +Error Message Summary: +---------------------- +InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0. +[Hint: Expected id < GetCUDADeviceCount(), but received id:0 >= GetCUDADeviceCount():0.] at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/platform/gpu_info.cc:211) +``` + +**A:** 原因是 `libcuda.so` 没有链接成功。首先在机器上找到 `libcuda.so`,使用 `ldd` 命令检查 libnvidia 版本与 nvidia-smi 中版本是否一致(libnvidia-fatbinaryloader.so.418.39,与NVIDIA-SMI 418.39 Driver Version: 418.39),然后用 export 导出 `libcuda.so` 的路径即可(例如 libcuda.so 在 /usr/lib64/,export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib64/) + +#### Q: 遇到 GPU not found, please check your environment or use cpu version by "pip install paddle_serving_server" + +**A:** 检查环境中是否有N卡:`ls /dev/ | grep nvidia` + +#### Q: Paddle Serving 支持哪些镜像环境? + +**A:** 支持 CentOS 和 Ubuntu 环境镜像 ,完整列表查阅[这里](https://github.com/PaddlePaddle/Serving/blob/develop/doc/Docker_Images_CN.md) + +#### Q: Paddle Serving 是否支持本地离线安装 + +**A:** 支持离线部署,需要把一些相关的[依赖包](https://github.com/PaddlePaddle/Serving/blob/develop/doc/Compile_CN.md) 提前准备安装好 + +#### Q: Docker 中启动 Server IP地址 127.0.0.1 与 0.0.0.0 差异 +**A:** 必须将容器的主进程设置为绑定到特殊的 `0.0.0.0` 表示“所有接口”地址,否则它将无法从容器外部访问。在 Docker 中 `127.0.0.1` 仅代表“这个容器”,而不是“这台机器”。如果您从容器建立到 `127.0.0.1` 的出站连接,它将返回到同一个容器;如果您将服务器绑定到 `127.0.0.1`,接收不到来自外部的连接。 + + + +## 预测问题 + +#### Q: 使用 GPU 第一次预测时特别慢,如何调整 RPC 服务的等待时间避免超时? + +**A:** GPU 第一次预测需要初始化。使用 `set_rpc_timeout_ms` 设置更长的等待时间,单位为毫秒,默认时间为20秒。 + +示例: + +``` +from paddle_serving_client import Client + +client = Client() +client.load_client_config(sys.argv[1]) +client.set_rpc_timeout_ms(100000) +client.connect(["127.0.0.1:9393"]) +``` +#### Q: 执行 GPU 预测时遇到 `ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (../batch_norm_op.cu:198)`错误 + +**A:** 将 cuDNN 的 lib64路径添加到 `LD_LIBRARY_PATH`,安装自 `pypi` 的 Paddle Serving 中 `post9` 版本使用的是 `cuDNN 7.3,post10` 使用的是 `cuDNN 7.5。如果是使用自己编译的 Paddle Serving,可以在 `log/serving.INFO` 日志文件中查看对应的 cuDNN 版本。 + +#### Q: 执行 GPU 预测时遇到 `Error: Failed to find dynamic library: libcublas.so` + +**A:** 将 CUDA 的 lib64路径添加到 `LD_LIBRARY_PATH`, post9 版本的 Paddle Serving 使用的是 `cuda 9.0,post10` 版本使用的 `cuda 10.0`。 + +#### Q: Client 的 `fetch var`变量名如何设置 + +**A:** 通过[保存用于 Serving 部署的模型参数](https://github.com/PaddlePaddle/Serving/blob/v0.8.3/doc/Save_EN.md) 生成配置文件 `serving_server_conf.prototxt`,获取需要的变量名。 + +#### Q: 如何使用多语言客户端 + +**A:** 多语言客户端要与多语言服务端配套使用。当前版本下(0.8.3) + +#### Q: 如何在 Windows 下使用 Paddle Serving + +**A:** 在 Windows 上可以运行多语言 RPC 客户端,或使用 HTTP 方式访问。 + +#### Q: 报错信息 `libnvinfer.so: cannot open shared object file: No such file or directory)` + + **A:** 没有安装 TensorRT,安装 TensorRT 请参考链接: https://blog.csdn.net/hesongzefairy/article/details/105343525 + + + +## 日志排查 + +#### Q: 部署和预测中的日志信息在哪里查看? + +**A:** Server 的日志分为两部分,一部分打印到标准输出,一部分打印到启动服务时的目录下的 `log/serving.INFO` 文件中。 +Client 的日志直接打印到标准输出。 +通过在部署服务之前 'export GLOG_v=3'可以输出更为详细的日志信息。 + +#### Q: C++ Serving 启动成功后,日志文件在哪里,在哪里设置日志级别? + +**A:** C++ Serving 服务的所有日志在程序运行的当前目录的`log/`目录下,分为 serving.INFO、serving.WARNING 和 serving.ERROR 文件。 +1)警告是 `glog` 组件打印的,告知 `glog` 初始化之前日志打印在 STDERR; +2)一般采用 `GLOG_v` 方式启动服务同时设置日志级别。 + +例如: +``` +GLOG_v=2 python -m paddle_serving_server.serve --model xxx_conf/ --port 9999 +``` + +#### Q: Python Pipeline 启动成功后,日志文件在哪里,在哪里设置日志级别? + +**A:** Python Pipeline 服务的日志信息请阅读[Python Pipeline 设计](./7-1_Python_Pipeline_Design_CN.md) 第三节服务日志。 + +#### Q: (GLOG_v=2下)Server 日志一切正常,但 Client 始终得不到正确的预测结果 + +**A:** 可能是配置文件有问题,检查下配置文件(is_load_tensor,fetch_type等有没有问题) + +#### Q: 如何给 Server 传递 Logid + +**A:** Logid 默认为0,Client 通过在 predict 函数中指定 log_id 参数 + +#### Q: C++ Serving 出现问题如何调试和定位 + +**A:** 推荐您使用 GDB 进行定位和调试,如果您使用 Serving 的 Docker,在启动容器时候,需要加上 `docker run --privileged `参数,开启特权模式,这样才能在 docker 容器中使用 GDB 定位和调试 +如果 C++ Serving 出现 `core dump`,一般会生成 core 文件,若没有,运行 `ulimit -c unlimited`命令开启core dump。 +使用 GDB 调试 core 文件的方法为:`gdb <可执行文件> `,进入后输入 `bt` 指令显示栈信息。 + +注意:可执行文件路径是 C++ bin 文件的路径,而不是 python 命令,一般为类似下面的这种 `/usr/local/lib/python3.6/site-packages/paddle_serving_server/serving-gpu-102-0.7.0/serving` diff --git a/doc/Offical_Docs/3-0_QuickStart_Int_CN.md b/doc/Offical_Docs/3-0_QuickStart_Int_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..827982e0c8c0ffbab3ffbfae70f403602c79632e --- /dev/null +++ b/doc/Offical_Docs/3-0_QuickStart_Int_CN.md @@ -0,0 +1,9 @@ +# 快速开始案例部署 + +您可以通过以下 Paddle Serving 快速开始案例,分别了解到 C++ Serving 与 Python Pipeline 2种框架的部署方法。 +- [使用 C++ Serving 部署 Resnet50 模型案例](./3-1_QuickStart_Cpp_Resnet_CN.md) +- [使用 Python Pipeline 部署 OCR 模型案例](./3-2_QuickStart_Pipeline_OCR_CN.md) + +通过阅读以下内容掌握 Paddle Serving 基础功能以及2种框架特性和使用指南: +- [进阶 C++ Serving 介绍](./doc/Offical_Docs/6-0_C++_Serving_Advanced_Introduction_CN.md) +- [进阶 Python Pipeline 介绍](./7-0_Python_Pipeline_Int_CN.md) diff --git a/doc/Offical_Docs/3-1_QuickStart_Cpp_Resnet_CN.md b/doc/Offical_Docs/3-1_QuickStart_Cpp_Resnet_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..115df510d69642a1f31c24203439c20d4e8d5a87 --- /dev/null +++ b/doc/Offical_Docs/3-1_QuickStart_Cpp_Resnet_CN.md @@ -0,0 +1,111 @@ +# C++ Serving 快速部署案例 + +- [模型介绍](#1) +- [部署步骤](#2) + - [2.1 保存模型](#2.1) + - [2.2 保存 Serving 部署的模型参数](#2.2) + - [2.3 启动服务](#2.3) + - [2.4 启动客户端](#2.4) + + + +## 模型介绍 +残差网络(ResNet)于2015年被提出,摘得 ImageNet 榜单5项第一,成绩大幅领先第二名,是 CNN 图像史上的一个里程碑。 + +从经验上看,网络结构层数越多,有利于复杂特征的提取,从理论上讲会取得更好的结果。但是,随着网络层数的增加,准确率会趋于饱和甚至会下降,称为退化问题(Degradation problem)。其根本原因是深层网络出现梯度消失或者梯度爆炸的问题。残差网络利用短路机制加入了残差单元,解决了退化问题。 + +ResNet 网络是参考了 VGG19 网络,加入残差单元,ResNet50 有50层网络。 + + + +## 部署步骤 + +前提条件是你已完成[环境安装](./2-0_Index_CN.md)步骤,并已验证环境安装成功,此处不在赘述。 + +克隆 Serving 仓库后,进入 `examples/C++/PaddleClas/resnet_50_vd` 目录下,已提供程序、配置和性能测试脚本。 +``` +git clone https://github.com/PaddlePaddle/Serving +``` + +按以下5个步骤操作即可实现模型部署。 +- 一.获取模型 +- 二.保存 Serving 部署的模型参数 +- 三.启动服务 +- 四.启动客户端 + + + +**一.获取模型** + +下载 `ResNet50_vd` 的 推理模型,更多模型信息请阅读[ImageNet 预训练模型库](https://github.com/PaddlePaddle/PaddleClas/blob/8fa820f5c81edb1e7a2b222306a307bc27bff90f/docs/zh_CN/algorithm_introduction/ImageNet_models.md) +``` +wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_vd_infer.tar && tar xf ResNet50_vd_infer.tar + +``` + + + +**二.保存 Serving 部署的模型参数** + +用 `paddle_serving_client` 把下载的推理模型转换成易于 Serving 部署的模型格式,完整信息请参考 [保存用于 Serving 部署的模型参数](./5-1_Save_Model_Params_CN.md)。 + +``` +python3 -m paddle_serving_client.convert --dirname ./ResNet50_vd_infer/ \ + --model_filename inference.pdmodel \ + --params_filename inference.pdiparams \ + --serving_server ./ResNet50_vd_serving/ \ + --serving_client ./ResNet50_vd_client/ +``` + +保存参数后,会在当前文件夹多出 `ResNet50_vd_serving` 和 `ResNet50_vd_client` 的文件夹,分别用户服务端和客户端。 +``` +├── daisy.jpg +├── http_client.py +├── imagenet.label +├── ResNet50_vd_client +│   ├── serving_client_conf.prototxt +│   └── serving_client_conf.stream.prototxt +├── ResNet50_vd_infer +│   ├── inference.pdiparams +│   ├── inference.pdiparams.info +│   └── inference.pdmodel +├── ResNet50_vd_serving +│   ├── fluid_time_file +│   ├── inference.pdiparams +│   ├── inference.pdmodel +│   ├── serving_server_conf.prototxt +│   └── serving_server_conf.stream.prototxt +├── rpc_client.py +``` + + + +**三.启动服务** + +C++ Serving 服务可以指定一个网络端口同时接收 HTTP、gRPC 和 bRPC 请求。命令参数 `--model` 指定模型路径,`--gpu_ids` 指定 GPU 卡,`--port` 指定端口。 + +``` +python3 -m paddle_serving_server.serve --model ResNet50_vd_serving --gpu_ids 0 --port 9394 +``` + + + +**四.启动客户端** + +HTTP 客户端程序 `http_client.py` 创建请求参数,向服务端发起 HTTP 请求。 + +``` +python3 http_client.py +``` + +RPC 客户端程序 `rpc_client.py` 创建请求参数,向服务端发起 gRPC 请求。 + +``` +python3 rpc_client.py +``` + +成功运行后,模型预测的结果会打印如下: + +``` +prediction: daisy, probability: 0.9341399073600769 +``` diff --git a/doc/Offical_Docs/3-2_QuickStart_Pipeline_OCR_CN.md b/doc/Offical_Docs/3-2_QuickStart_Pipeline_OCR_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..d7772a2be1126aec77c3ddda8de91ea14e24f995 --- /dev/null +++ b/doc/Offical_Docs/3-2_QuickStart_Pipeline_OCR_CN.md @@ -0,0 +1,293 @@ +# Python Pipeline 快速部署案例 + +- [模型介绍](#1) +- [部署步骤](#2) + - [获取模型与保存模型参数](#2.1) + - [保存 Serving 部署的模型参数](#2.2) + - [下载测试数据集(可选)](#2.3) + - [修改配置文件(可选)](#2.4) + - [代码与配置信息绑定](#2.5) + - [启动服务与验证](#2.6) + + +Python Pipeline 框架使用 Python 语言开发,是一套端到端多模型组合服务编程框架,旨在降低编程门槛,提高资源使用率(尤其是GPU设备),提升整体服务的预估效率。详细设计参考[ Python Pipeline 设计与使用]() + + + +## 模型介绍 + +OCR 技术一般指光学字符识别。 OCR(Optical Character Recognition,光学字符识别)是指电子设备(例如扫描仪或数码相机)检查纸上打印的字符,通过检测暗、亮的模式确定其形状,然后用字符识别方法将形状翻译成计算机文字的过程。 + +[PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) 是百度飞桨 OCR 模型套件库,旨在打造一套丰富、领先、且实用的 OCR 工具库,助力开发者训练出更好的模型,并应用落地。具有 PP-OCR 系列高质量预训练模型,准确的识别效果;支持中英文数字组合识别、竖排文本识别、长文本识别;支持多语言识别:韩语、日语、德语、法语等约80种语言等特性。 + +PaddleOCR 提供的 PP-OCR 系列模型覆盖轻量级服务端、轻量级移动端和通用服务端3种场景。 + +| 模型介绍 | 模型大小 | 模型名称 | 推荐场景 | +| ------- | ------ | ----- | ----- | +| 中英文超轻量模型 | 13.0M | ch_PP-OCRv2_xx | 服务器端 或 移动端 | +| 中英文超轻量移动端模型 | 9.4M | ch_ppocr_mobile_v2.0_xx | 移动端| +| 中英文通用服务端模型 | 143.4M | ch_ppocr_server_v2.0_xx | 服务器端 | + + + +## 部署步骤 + +前提条件是你已完成[环境安装]()步骤,并已验证环境安装成功,此处不在赘述。 + +在克隆 Serving 代码后,进入 examples/Pipeline/PaddleOCR/ocr 目录下,包括程序、配置和性能测试脚本。 +``` +git clone https://github.com/PaddlePaddle/Serving +``` +通过6个步骤操作即可实现 OCR 示例部署。 +- 一.获取模型 +- 二.保存 Serving 部署的模型参数 +- 三.下载测试数据集(可选) +- 四.修改 `config.yml` 配置(可选) +- 五.代码与配置信息绑定 +- 六.启动服务与验证 + + + +**一.获取模型与保存模型参数** + +本章节选用中英文超轻量模型 ch_PP-OCRv2_xx 制作部署案例,模型体积小,效果很好,属于性价比很高的选择。 + +``` +python3 -m paddle_serving_app.package --get_model ocr_rec +tar -xzvf ocr_rec.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_det +tar -xzvf ocr_det.tar.gz +``` + + + +**二.保存 Serving 部署的模型参数** + +为了节省大家的时间,已将预训练模型使用[保存用于 Serving 部署的模型参数](./5-1_Save_Model_Params_CN.md)方法打包成压缩包,下载并解压即可使用。如你自训练的模型需经过保存模型服务化参数步骤才能服务化部署。 + + + +**三.下载测试数据集(可选)** + +下载测试图片集,如使用自有测试数据集,可忽略此步骤。 +``` +wget --no-check-certificate https://paddle-serving.bj.bcebos.com/ocr/test_imgs.tar +tar xf test_imgs.tar +``` + + + +**四.修改配置文件(可选)** + +修改配置文件 `config.yml` 设置服务、图、OP 级别属性。如果使用默认配置,此步骤可忽略。 + +由于配置项较多,仅重点介绍部分核心选项的使用,完整配置选项说明可参考[ 配置说明]() +``` +#rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时,会自动将rpc_port设置为http_port+1 +rpc_port: 18090 + +#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port +http_port: 9999 + +#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG +##当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num +worker_num: 20 + +#build_dag_each_worker, False,框架在进程内创建一条DAG;True,框架会每个进程内创建多个独立的DAG +build_dag_each_worker: false + +#有向无环图级别的选项 +dag: + #op资源类型, True, 为线程模型;False,为进程模型 + is_thread_op: False + + #重试次数 + retry: 1 + + #使用性能分析, True,生成Timeline性能数据,对性能有一定影响;False为不使用 + use_profile: false + + # 统计各个阶段耗时、Channel在 PipelineServingLogs/pipeline.tracer + tracer: + #每次记录的间隔,单位:秒 + interval_s: 10 + +#模型或可独立控制并发的处理函数级别选项 +op: + det: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 6 + + #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测 + client_type: local_predictor + + #det模型路径 + model_config: ocr_det_model + + #Fetch结果列表,以client_config中fetch_var的alias_name为准 + fetch_list: ["save_infer_model/scale_0.tmp_1"] + + # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 0 + + #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 + devices: "" + + #use_mkldnn + #use_mkldnn: True + + #thread_num + thread_num: 2 + + #ir_optim + ir_optim: True + + #开启tensorrt后,进行优化的子图包含的最少节点数 + #min_subgraph_size: 13 + rec: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 3 + + #超时时间, 单位ms + timeout: -1 + + #Serving交互重试次数,默认不重试 + retry: 1 + + #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + + #client类型,包括brpc, grpc和local_predictor。local_predictor不启动Serving服务,进程内预测 + client_type: local_predictor + + #rec模型路径 + model_config: ocr_rec_model + + #Fetch结果列表,以client_config中fetch_var的alias_name为准 + fetch_list: ["save_infer_model/scale_0.tmp_1"] + + # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 0 + + #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 + devices: "" + + #use_mkldnn,仅当 CPU 推理时使用 + #use_mkldnn: True + + #thread_num,为 CPU 推理时,数学计算线程数,开大可降低处理时长 + thread_num: 2 + + #ir_optim,IR 优化,开启 TensorRT 时,必须开启 ir_optim + ir_optim: True + + #开启tensorrt后,进行优化的子图包含的最少节点数 + #min_subgraph_size: 3 +``` + + + +**五.代码与配置信息绑定** + +第四步,实现代码和配置文件 Config.yml 绑定,以及设置多模型组合关系。具体包括: + +1. 重写模型前后处理: + +每个 op (模型或函数) 处理和 图结构 定义在 web_service.py 程序中,本案例实现了 `DetOp` 和 `RecOp` 2个 OP +``` +# DetOp 对应配置文件 Config.yml中 det op +class DetOp(Op): + def init_op(self): + def preprocess(self, input_dicts, data_id, log_id): + def postprocess(self, input_dicts, fetch_dict, data_id, log_id): + +# RecOp 对应配置文件 Config.yml中 rec op +class RecOp(Op): + def init_op(self): + def preprocess(self, input_dicts, data_id, log_id): + def postprocess(self, input_dicts, fetch_dict, data_id, log_id): + +``` + +2. 构建多模型组合关系 + +继承父类 `WebService` 派生出 `OcrService` 类,通过重写 `get_pipeline_response()` 接口,实例化 `DetOp` 和 `RecOp` ,`name` 字段与 config.yml 中 op 名称一致; input_ops 是前置 OP 列表实现 多模型组合的图结构。 + +``` +class OcrService(WebService): + def get_pipeline_response(self, read_op): + det_op = DetOp(name="det", input_ops=[read_op]) + rec_op = RecOp(name="rec", input_ops=[det_op]) + return rec_op +``` + +3. 绑定代码与配置文件 + +通过构造函数 `OcrService(name="ocr")` 设置请求 URL 中 name 字段;通过 `prepare_pipeline_config()` 接口绑定配置文件 `config.yml`;通过 `run_service()` 接口启动服务。 + +``` +ocr_service = OcrService(name="ocr") +ocr_service.prepare_pipeline_config("config.yml") +ocr_service.run_service() +``` + + + +**六.启动服务与验证** + +启动服务前,可看到程序路径下所有文件路径如下: +``` +. +├── 7.jpg +├── benchmark.py +├── benchmark.sh +├── config.yml +├── imgs +│   └── ggg.png +├── ocr_det_client +│   ├── serving_client_conf.prototxt +│   └── serving_client_conf.stream.prototxt +├── ocr_det_model +│   ├── inference.pdiparams +│   ├── inference.pdmodel +│   ├── serving_server_conf.prototxt +│   └── serving_server_conf.stream.prototxt +├── ocr_rec_client +│   ├── serving_client_conf.prototxt +│   └── serving_client_conf.stream.prototxt +├── ocr_rec_model +│   ├── inference.pdiparams +│   ├── inference.pdmodel +│   ├── serving_server_conf.prototxt +│   └── serving_server_conf.stream.prototxt +├── pipeline_http_client.py +├── pipeline_rpc_client.py +├── ppocr_keys_v1.txt +├── ProcessInfo.json +├── README_CN.md +├── README.md +└── web_service.py +``` + +运行服务程序 `web_service.py` 启动服务端,接收客户端请求,采用图执行引擎执行推理预测。 +``` +# Run Server +python3 web_service.py &>log.txt & +``` + +客户端程序 `pipeline_http_client.py` 注册服务端地址,并发送客户端请求。 + +启动客户端前,要确认 URL://{ip}:{port}/{name}/{method} 。本项目中 {name} 即是 web_service.py 中 OcrService name 参数 "ocr"。 {method} 默认为 "prediction" + +``` +# Run Client +python3 pipeline_http_client.py +``` + +模型效果: + +

+ +

diff --git a/doc/Offical_Docs/4-0_ModelZoo_CN.md b/doc/Offical_Docs/4-0_ModelZoo_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..80f4b31e81f1d205af1fd2ad260733f2c60b34f5 --- /dev/null +++ b/doc/Offical_Docs/4-0_ModelZoo_CN.md @@ -0,0 +1,163 @@ +# 模型库 + +- [模型分类](#1) + - [1.1 图像分类与识别](#1.1) + - [1.2 文本类](#1.2) + - [1.3 推荐系统](#1.3) + - [1.4 人脸识别](#1.4) + - [1.5 目标检测](#1.5) + - [1.6 文字识别](#1.6) + - [1.7 图像分割](#1.7) + - [1.8 关键点检测](#1.8) + - [1.9 视频理解](#1.9) +- [模型示例库](#2) + +Paddle Serving 已实现9个类别,共计46个模型的服务化部署示例。 + + + +## 模型分类 + + + +**一.图像分类与识别** + +模型部署示例请参阅下表: +| 场景| 模型 | 类型 | 示例使用的框架 | 下载 | +| --- | --- | --- | --- | ---- | +| 图像识别 |pp_shitu | PaddleClas | [C++ Serving](../examples/C++/PaddleClas/pp_shitu) | [.tar.gz](https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/rec/models/inference/serving/pp_shitu.tar.gz) | +| 图像分类 | resnet_v2_50_imagenet | PaddleClas | [C++ Serving](../examples/C++/PaddleClas/resnet_v2_50)
[Pipeline Serving](../examples/Pipeline/PaddleClas/ResNet_V2_50) | [.tar.gz](https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageClassification/resnet_v2_50_imagenet.tar.gz) | Pipeline Serving, C++ Serving| +| 图像分类 |mobilenet_v2_imagenet | PaddleClas | [C++ Serving](../examples/C++/PaddleClas/mobilenet) | [.tar.gz](https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageClassification/mobilenet_v2_imagenet.tar.gz) | +| 图像分类 |resnet50_vd | PaddleClas | [C++ Serving](../examples/C++/PaddleClas/imagenet)
[Pipeline Serving](../examples/Pipeline/PaddleClas/ResNet50_vd) | [.tar.gz](https://paddle-serving.bj.bcebos.com/model/ResNet50_vd.tar) | +| 图像分类 |ResNet50_vd_KL | PaddleClas | [Pipeline Serving](../examples/Pipeline/PaddleClas/ResNet50_vd_KL) | [.tar](https://paddle-serving.bj.bcebos.com/model/ResNet50_vd_KL.tar) | +| 图像分类 |ResNet50_vd_FPGM | PaddleClas | [Pipeline Serving](../examples/Pipeline/PaddleClas/ResNet50_vd_FPGM) | [.tar](https://paddle-serving.bj.bcebos.com/model/ResNet50_vd_FPGM.tar) | +| 图像分类 |ResNet50_vd_PACT | PaddleClas | [Pipeline Serving](../examples/Pipeline/PaddleClas/ResNet50_vd_PACT) | [.tar](https://paddle-serving.bj.bcebos.com/model/ResNet50_vd_PACT.tar) | +| 图像分类 |ResNeXt101_vd_64x4d | PaddleClas | [Pipeline Serving](../examples/Pipeline/PaddleClas/ResNeXt101_vd_64x4d) | [.tar](https://paddle-serving.bj.bcebos.com/model/ResNeXt101_vd_64x4d.tar) | +| 图像分类 |DarkNet53 | PaddleClas | [Pipeline Serving](../examples/Pipeline/PaddleClas/DarkNet53) | [.tar](https://paddle-serving.bj.bcebos.com/model/DarkNet53.tar) | +| 图像分类 |MobileNetV1 | PaddleClas | [Pipeline Serving](../examples/Pipeline/PaddleClas/MobileNetV1) | [.tar](https://paddle-serving.bj.bcebos.com/model/MobileNetV1.tar) | +| 图像分类 |MobileNetV2 | PaddleClas | [Pipeline Serving](../examples/Pipeline/PaddleClas/MobileNetV2) | [.tar](https://paddle-serving.bj.bcebos.com/model/MobileNetV2.tar) | +| 图像分类 |MobileNetV3_large_x1_0 | PaddleClas | [Pipeline Serving](../examples/Pipeline/PaddleClas/MobileNetV3_large_x1_0) | [.tar](https://paddle-serving.bj.bcebos.com/model/MobileNetV3_large_x1_0.tar) | +| 图像生成 |HRNet_W18_C | PaddleClas | [Pipeline Serving](../examples/Pipeline/PaddleClas/HRNet_W18_C) | [.tar](https://paddle-serving.bj.bcebos.com/model/HRNet_W18_C.tar) | +| 图像分类 |ShuffleNetV2_x1_0 | PaddleClas | [Pipeline Serving](../examples/Pipeline/PaddleClas/ShuffleNetV2_x1_0) | [.tar](https://paddle-serving.bj.bcebos.com/model/ShuffleNetV2_x1_0.tar) | + +--- + + + +**二.文本类** + +模型部署示例请参阅下表: +| 场景| 模型 | 类型 | 示例使用的框架 | 下载 | +| --- | --- | --- | --- | ---- | +| 文本生成 | bert_chinese_L-12_H-768_A-12 | PaddleNLP | [C++ Serving](../examples/C++/PaddleNLP/bert)
[Pipeline Serving](../examples/Pipeline/PaddleNLP/bert) | [.tar.gz](https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz) | +| 情感分析 |senta_bilstm | PaddleNLP | [C++ Serving](../examples/C++/PaddleNLP/senta) | [.tar.gz](https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/senta_bilstm.tar.gz) |C++ Serving| +| 词法分析 |lac | PaddleNLP | [C++ Serving](../examples/C++/PaddleNLP/lac) | [.tar.gz](https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/lac.tar.gz) | +| 机器翻译 |transformer | PaddleNLP | [Pipeline Serving](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/examples/machine_translation/transformer/deploy/serving/README.md) | [model](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/machine_translation/transformer) | +| 标点符号预测 | ELECTRA | PaddleNLP | [Pipeline Serving](https://github.com/PaddlePaddle/PaddleNLP/blob/develop/examples/language_model/electra/deploy/serving/README.md) | [model](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/examples/language_model/electra) | +| 抽取文本向量| In-batch Negatives | PaddleNLP | [Pipeline Serving](https://github.com/PaddlePaddle/PaddleNLP/tree/develop/applications/neural_search/recall/in_batch_negative) | [model](https://bj.bcebos.com/v1/paddlenlp/models/inbatch_model.zip) | + +--- + + +**三.推荐系统** + +模型部署示例请参阅下表: +| 场景| 模型 | 类型 | 示例使用的框架 | 下载 | +| --- | --- | --- | --- | ---- | +| CTR预估 | criteo_ctr | PaddleRec | [C++ Serving](../examples/C++/PaddleRec/criteo_ctr) | [.tar.gz](https://paddle-serving.bj.bcebos.com/criteo_ctr_example/criteo_ctr_demo_model.tar.gz) | +| CTR预估 | criteo_ctr_with_cube | PaddleRec | [C++ Serving](../examples/C++/PaddleRec/criteo_ctr_with_cube) | [.tar.gz](https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz) | +| 内容推荐 | wide&deep | PaddleRec | [C++ Serving](https://github.com/PaddlePaddle/PaddleRec/blob/release/2.1.0/doc/serving.md) | [model](https://github.com/PaddlePaddle/PaddleRec/blob/release/2.1.0/models/rank/wide_deep/README.md) | + +--- + + +**四.人脸识别** + +模型部署示例请参阅下表: +| 场景| 模型 | 类型 | 示例使用的框架 | 下载 | +| --- | --- | --- | --- | ---- | +| 人脸识别|blazeface | PaddleDetection | [C++ Serving](../examples/C++/PaddleDetection/blazeface) | [.tar.gz](https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ObjectDetection/blazeface.tar.gz) |C++ Serving| + +--- + + +**五.目标检测** + +模型部署示例请参阅下表: +| 场景| 模型 | 类型 | 示例使用的框架 | 下载 | +| --- | --- | --- | --- | ---- | +| 目标检测 |cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco | PaddleDetection | [C++ Serving](../examples/C++/PaddleDetection/cascade_rcnn) | [.tar.gz](https://paddle-serving.bj.bcebos.com/pddet_demo/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco_serving.tar.gz) | +| 目标检测 | yolov4 | PaddleDetection | [C++ Serving](../examples/C++/PaddleDetection/yolov4) | [.tar.gz](https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ObjectDetection/yolov4.tar.gz) |C++ Serving| +| 目标检测 |fcos_dcn_r50_fpn_1x_coco | PaddleDetection | [C++ Serving](../examples/C++/PaddleDetection/fcos_dcn_r50_fpn_1x_coco) | [.tar.gz](https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/fcos_dcn_r50_fpn_1x_coco.tar) | +| 目标检测 | ssd_vgg16_300_240e_voc | PaddleDetection | [C++ Serving](../examples/C++/PaddleDetection/ssd_vgg16_300_240e_voc) | [.tar](https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ssd_vgg16_300_240e_voc.tar) | +| 目标检测 |yolov3_darknet53_270e_coco | PaddleDetection | [C++ Serving](../examples/C++/PaddleDetection/yolov3_darknet53_270e_coco)
[Pipeline Serving](../examples/Pipeline/PaddleDetection/yolov3) | [.tar](https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/yolov3_darknet53_270e_coco.tar) | +| 目标检测 | faster_rcnn_r50_fpn_1x_coco | PaddleDetection | [C++ Serving](../examples/C++/PaddleDetection/faster_rcnn_r50_fpn_1x_coco)
[Pipeline Serving](../examples/Pipeline/PaddleDetection/faster_rcnn) | [.tar](https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/faster_rcnn_r50_fpn_1x_coco.tar) | +| 目标检测 |ppyolo_r50vd_dcn_1x_coco | PaddleDetection | [C++ Serving](../examples/C++/PaddleDetection/ppyolo_r50vd_dcn_1x_coco) | [.tar](https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ppyolo_r50vd_dcn_1x_coco.tar) | +| 目标检测 | ppyolo_mbv3_large_coco | PaddleDetection | [Pipeline Serving](../examples/Pipeline/PaddleDetection/ppyolo_mbv3) | [.tar](https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ppyolo_mbv3_large_coco.tar) | +| 目标检测 | ttfnet_darknet53_1x_coco | PaddleDetection | [C++ Serving](../examples/C++/PaddleDetection/ttfnet_darknet53_1x_coco) | [.tar](https://paddle-serving.bj.bcebos.com/pddet_demo/ttfnet_darknet53_1x_coco.tar) | +| 目标检测 |YOLOv3-DarkNet | PaddleDetection | [C++ Serving](https://github.com/PaddlePaddle/PaddleDetection/tree/release/2.3/deploy/serving) | [.pdparams](https://paddledet.bj.bcebos.com/models/yolov3_darknet53_270e_coco.pdparams)
[.yml](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/yolov3/yolov3_darknet53_270e_coco.yml) | + +--- + + +**六.文字识别** + +模型部署示例请参阅下表: +| 场景| 模型 | 类型 | 示例使用的框架 | 下载 | +| --- | --- | --- | --- | ---- | +| 文字识别 |ocr_rec | PaddleOCR | [C++ Serving](../examples/C++/PaddleOCR/ocr)
[Pipeline Serving](../examples/Pipeline/PaddleOCR/ocr) | [.tar.gz](https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/ocr_rec.tar.gz) | +| 文字识别 |ocr_det | PaddleOCR | [C++ Serving](../examples/C++/PaddleOCR/ocr)
[Pipeline Serving](../examples/Pipeline/PaddleOCR/ocr) | [.tar.gz](https://paddle-serving.bj.bcebos.com/ocr/ocr_det.tar.gz) | +| 文字识别 |ch_ppocr_mobile_v2.0_det | PaddleOCR | [Pipeline Serving](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/deploy/pdserving/README.md) | [model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tar)
[.yml](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/configs/det/ch_ppocr_v2.0/ch_det_mv3_db_v2.0.yml) | +| 文字识别 |ch_ppocr_server_v2.0_det | PaddleOCR | [Pipeline Serving](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/deploy/pdserving/README.md) | [model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_det_infer.tar)
[.yml](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/configs/det/ch_ppocr_v2.0/ch_det_res18_db_v2.0.yml) | +| 文字识别 |ch_ppocr_mobile_v2.0_rec | PaddleOCR | [Pipeline Serving](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/deploy/pdserving/README.md) | [model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tar)
[.yml](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/configs/rec/ch_ppocr_v2.0/rec_chinese_lite_train_v2.0.yml) | +| 文字识别 |ch_ppocr_server_v2.0_rec | PaddleOCR | [Pipeline Serving](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/deploy/pdserving/README.md) | [model](https://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_server_v2.0_rec_infer.tar)
[.yml](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/configs/rec/ch_ppocr_v2.0/rec_chinese_common_train_v2.0.yml) | +| 文字识别 |ch_ppocr_mobile_v2.0 | PaddleOCR | [Pipeline Serving](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/deploy/pdserving/README.md) | [model](https://github.com/PaddlePaddle/PaddleOCR) | +| 文字识别 |ch_ppocr_server_v2.0 | PaddleOCR | [Pipeline Serving](https://github.com/PaddlePaddle/PaddleOCR/blob/release/2.3/deploy/pdserving/README.md) | [model](https://github.com/PaddlePaddle/PaddleOCR) | + +--- + + +**七.图像分割** + +模型部署示例请参阅下表: +| 场景| 模型 | 类型 | 示例使用的框架 | 下载 | +| --- | --- | --- | --- | ---- | +| 图像分割 | deeplabv3 | PaddleSeg | [C++ Serving](../examples/C++/PaddleSeg/deeplabv3) | [.tar.gz](https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageSegmentation/deeplabv3.tar.gz) | +| 图像分割 | unet | PaddleSeg | [C++ Serving](../examples/C++/PaddleSeg/unet_for_image_seg) | [.tar.gz](https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/ImageSegmentation/unet.tar.gz) | + +--- + + +**八.关键点检测** + +模型部署示例请参阅下表: +| 场景| 模型 | 类型 | 示例使用的框架 | 下载 | +| --- | --- | --- | --- | ---- | +| 关键点检测 |faster_rcnn_hrnetv2p_w18_1x | PaddleDetection | [C++ Serving](../examples/C++/PaddleDetection/faster_rcnn_hrnetv2p_w18_1x) | [.tar.gz](https://paddle-serving.bj.bcebos.com/pddet_demo/faster_rcnn_hrnetv2p_w18_1x.tar.gz) | + +--- + + +**九.视频理解** + +模型部署示例请参阅下表: +| 场景| 模型 | 类型 | 示例使用的框架 | 下载 | +| --- | --- | --- | --- | ---- | +| 视频理解 |PPTSN_K400 | PaddleVideo | [Pipeline Serving](../examples/Pipeline/PaddleVideo/PPTSN_K400) | [model](https://paddle-serving.bj.bcebos.com/model/PaddleVideo/PPTSN_K400.tar) | + +--- + + +## 模型示例库 + +Paddle Serving 代码库下模型部署示例请参考 [examples](../examples) 目录。更多 Paddle Serving 部署模型请参考 [wholechain](https://www.paddlepaddle.org.cn/wholechain)。 + +了解最新模型,请进入 Paddle 模型套件库: + - [PaddleDetection](https://github.com/PaddlePaddle/PaddleDetection) + - [PaddleOCR](https://github.com/PaddlePaddle/PaddleOCR) + - [PaddleClas](https://github.com/PaddlePaddle/PaddleClas) + - [PaddleNLP](https://github.com/PaddlePaddle/PaddleNLP) + - [PaddleRec](https://github.com/PaddlePaddle/PaddleRec) + - [PaddleSeg](https://github.com/PaddlePaddle/PaddleSeg) + - [PaddleGAN](https://github.com/PaddlePaddle/PaddleGAN) + - [PaddleVideo](https://github.com/PaddlePaddle/PaddleVideo) diff --git a/doc/Offical_Docs/5-1_Save_Model_Params_CN.md b/doc/Offical_Docs/5-1_Save_Model_Params_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..2faaae7b4703bbec1aad6111de66890f2b7a149d --- /dev/null +++ b/doc/Offical_Docs/5-1_Save_Model_Params_CN.md @@ -0,0 +1,188 @@ +# 保存用于 Serving 部署的模型参数 + +- [背景介绍](#1) +- [功能设计](#2) +- [功能使用](#3) + - [PYTHON 命令执行](#3.1) + - [代码引入执行](#3.2) +- [Serving 部署](#4) + - [服务端部署示例](#4.1) + - [客户端部署示例](#4.2) + + + +## 背景介绍 + +模型参数信息保存在模型文件中,为什么还要保存用于 Paddle Serving 部署的模型参数呢,原因有3个: + +1. 服务化场景分为客户端和服务端,服务端加载模型,而在客户端没有模型信息,但需要在客户端需实现数据拼装和类型转换。 +2. 模型升级过程中 `feed vars` 和 `fetch vars` 的名称变化会导致代码升级,通过增加一个 `alias_name` 字段映射名称,代码无需升级。 +3. 部署 `Web` 服务,并使用 `URL` 方式访问时,请求信息中缺少类型和维度信息,在服务端推理前需要进行转换。 + + + +## 功能设计 + +飞桨训推一体框架中,从动态图模型训练到静态图推理部署,一体化流程如下所示 +``` +①动态图训练 → ②模型动转静 → ③静态模型 → ④模型保存 → ⑤Serving 部署 +``` +在飞桨框架2.1对模型与参数的保存与载入相关接口进行了梳理,完整文档参考[模型保存与载入](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/02_paddle2.0_develop/08_model_save_load_cn.html) +- 对于训练调优场景,我们推荐使用 `paddle.save/load` 保存和载入模型; +- 对于推理部署场景,我们推荐使用 `paddle.jit.save/load`(动态图)和 `paddle.static.save/load_inference_model` (静态图)保存载入模型; + +Paddle Serving 模型参数保存接口定位是在 `②模型动转静` 导出 `③静态模型`后,使用 `paddle.static.load_inference_model` 接口加载模型,和 `paddle.static.save_vars` 接口保存模型参数。 + +生成的模型参数信息保存在 `paddle_serving_server/client.prototxt` 文件中,其格式如下 +``` +feed_var { + name: "x" + alias_name: "image" + is_lod_tensor: false + feed_type: 1 + shape: 3 + shape: 960 + shape: 960 +} +fetch_var { + name: "save_infer_model/scale_0.tmp_1" + alias_name: "save_infer_model/scale_0.tmp_1" + is_lod_tensor: false + fetch_type: 1 + shape: 1 + shape: 960 + shape: 960 +} +``` + +| 参数 | 描述 | +|------|---------| +| name | 实际变量名 | +| alias_name | 变量别名,与 name 的关联业务场景中变量名 | +| is_lod_tensor | 是否为 LOD Tensor | +| feed_type | feed 变量类型| +| fetch_type | fetch 变量类型| +| shape 数组 | 变量的 Shape 信息 | + +feed 与 fetch 变量的类型列表如下: +| 类型 | 类型值 | +|------|------| +| int64 | 0 | +| float32 |1 | +| int32 | 2 | +| float64 | 3 | +| int16 | 4 | +| float16 | 5 | +| bfloat16 | 6 | +| uint8 | 7 | +| int8 | 8 | +| bool | 9 | +| complex64 | 10 +| complex128 | 11 | + + + +## 功能使用 + +Paddle 推理模型有3种形式,每种形式的读模型的方式都不同,散列方式必须以路径方式加载,其余2种采用目录或文件方式均可。 +1) Paddle 2.0前版本:`__model__`, `__params__` +2) Paddle 2.0后版本:`*.pdmodel`, `*.pdiparams` +3) 散列:`__model__`, `conv2d_1.w_0`, `conv2d_2.w_0`, `fc_1.w_0`, `conv2d_1.b_0`, ... + +`paddle_serving_client.convert` 接口既支持 PYTHON 命令方式执行,又支持 代码中引入运行。 + + +| 参数 | 类型 | 默认值 | 描述 | +|--------------|------|-----------|--------------------------------| +| `dirname` | str | - | 需要转换的模型文件存储路径,Program结构文件和参数文件均保存在此目录。| +| `serving_server` | str | `"serving_server"` | 转换后的模型文件和配置文件的存储路径。默认值为serving_server | +| `serving_client` | str | `"serving_client"` | 转换后的客户端配置文件存储路径。默认值为serving_client | +| `model_filename` | str | None | 存储需要转换的模型Inference Program结构的文件名称。如果设置为None,则使用 `__model__` 作为默认的文件名 | +| `params_filename` | str | None | 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保>存在一个单独的二进制文件中,它才需要被指定。如果模型参数是存储在各自分离的文件中,设置它的值为None | + + + +**一.PYTHON 命令执行** + +首先需要安装 `paddle_serivng_client` 包,以目录方式加载模型。 + +示例一,是以模型路径方式加载模型,适用于全部3种类型。 +```python +python3 -m paddle_serving_client.convert --dirname ./your_inference_model_dir +``` + +示例二,以指定加载 `当前路径` 下模型 `dygraph_model.pdmodel` 和 `dygraph_model.pdiparams`,保存结果在 `serving_server` 和 `serving_client` 目录。 +```python +python3 -m paddle_serving_client.convert --dirname . --model_filename dygraph_model.pdmodel --params_filename dygraph_model.pdiparams --serving_server serving_server --serving_client serving_client +``` + + + +**二.代码引入执行** + +代码引入执行方式,通过 `import io` 包并调用 `inference_model_to_serving` 实现模型参数保存。 +```python +import paddle_serving_client.io as serving_io +serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None) +``` + + + +## Serving 部署 +生成完的模型可直接用于服务化推理,服务端使用和客户端使用。 + + + +**一.服务端部署示例** + +示例一:C++ Serving 启动服务 +``` +python3 -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_id 0 +``` + +示例二:Python Pipeline 启动服务,在 `config.yml` 中指定模型路径 +``` +op: + det: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 6 + + #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测 + client_type: local_predictor + + #det模型路径 + model_config: ocr_det_model + + #Fetch结果列表,以client_config中fetch_var的alias_name为准 + fetch_list: ["save_infer_model/scale_0.tmp_1"] + + # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 0 +``` + + + +**二.客户端部署示例** + +通过 `client` 对象的 `load_client_config` 接口加载模型配置信息 +``` +from paddle_serving_client import Client +from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop +from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize + +client = Client() +client.load_client_config( + "serving_client/serving_client_conf.prototxt") +client.connect(["127.0.0.1:9393"]) + +seq = Sequential([ + File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)), + Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True) +]) + +image_file = "daisy.jpg" +img = seq(image_file) +fetch_map = client.predict(feed={"inputs": img}, fetch=["save_infer_model/scale_0.tmp_0"]) +``` diff --git a/doc/Offical_Docs/6-0_C++_Serving_Advanced_Introduction_CN.md b/doc/Offical_Docs/6-0_C++_Serving_Advanced_Introduction_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..23d57162324ae81ba6077c8a2afd22f6a0b4878a --- /dev/null +++ b/doc/Offical_Docs/6-0_C++_Serving_Advanced_Introduction_CN.md @@ -0,0 +1,36 @@ +# 进阶 C++ Serving 介绍 + +## 概述 + +本文将对 C++ Serving 除基本功能之外的高级特性、性能调优等问题进行介绍和说明,本文适合以下用户: +- 想要全面了解 C++ Serving 源码 +- 想要了解模型热加载、A/B Test、加密模型推理服务等高级特性 +- 通过修改 C++ Serving 参数进行性能调优 + +## 协议 + +当您需要自行组装 Request 请求中的数据或者需要二次开发时,您可以参考[相关文档]()。 + +## 模型热加载 + +当您需要在 Server 端不停止的情况下更新模型时,您可以参考[相关文档]()。 + +## A/B Test + +当您需要将用户的请求按照一定的流量比例发送到不同的 Server 端时,您可以参考[相关文档]()。 + +## 加密模型推理服务 + +当您需要将模型加密部署到 Server 端时,您可以参考[相关文档]()。 + +## 多模型串联 + +当您需要将多个模型串联在同一个 Server 中部署时(例如 OCR 需要串联 Det 和 Rec),您可以参考该部分内容。 + +## 性能优化指南 + +当您想要对 C++ Serving 服务端进行性能调优时,您可以参考[相关文档]()。 + +## 性能指标 + +当您想要了解 C++ Serving 与竞品的性能对比数据时,您可以参考[相关文档]()。 diff --git a/doc/Offical_Docs/6-1_Cpp_Asynchronous_Framwork_CN.md b/doc/Offical_Docs/6-1_Cpp_Asynchronous_Framwork_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..37c2f2af9be8aa5efcdf7b8fe87991c406ef6d71 --- /dev/null +++ b/doc/Offical_Docs/6-1_Cpp_Asynchronous_Framwork_CN.md @@ -0,0 +1,170 @@ +# C++ Serving 异步模式 + +- [设计方案](#1) + - [网络同步线程](#1.1) + - [异步调度线程](#1.2) + - [动态批量](#1.3) +- [使用案例](#2) + - [开启同步模式](#2.1) + - [开启异步模式](#2.2) +- [性能测试](#3) + - [测试结果](#3.1) + - [测试数据](#3.2) + + + +## 设计方案 + + + +**一.同步网络线程** + +Paddle Serving 的网络框架层面是同步处理模式,即 bRPC 网络处理线程从系统内核拿到完整请求数据后( epoll 模式),在同一线程内完成业务处理,C++ Serving 默认使用同步模式。同步模式比较简单直接,适用于模型预测时间短,或单个 Request 请求批量较大的情况。 + +

+ +

+ +Server 端线程数 N = 模型预测引擎数 N = 同时处理 Request 请求数 N,超发的 Request 请求需要等待当前线程处理结束后才能得到响应和处理。 + + + +**二.异步调度线程** + +为了提高计算芯片吞吐和计算资源利用率,C++ Serving 在调度层实现异步多线程并发合并请求,实现动态批量推理。异步模型主要适用于模型支持批量,单个 Request 请求的无批量或较小,单次预测时间较长的情况。 + +

+ +

+ +异步模式下,Server 端 N 个线程只负责接收 Request 请求,实际调用预测引擎是在异步框架的线程池中,异步框架的线程数可以由配置选项来指定。为了方便理解,我们假设每个 Request 请求批量均为1,此时异步框架会尽可能多得从请求池中取 n(n≤M)个 Request 并将其拼装为1个 Request(batch=n),调用1次预测引擎,得到1个 Response(batch = n),再将其对应拆分为 n 个 Response 作为返回结果。 + + + +**三.动态批量** + +通常,异步框架合并多个请求的前提是所有请求的 `feed var` 的维度除 batch 维度外必须是相同的。例如,以 OCR 文字识别案例中检测模型为例,A 请求的 `x` 变量的 shape 是 [1, 3, 960, 960],B 请求的 `x` 变量的 shape 是 [2, 3, 960, 960],虽然第一个维度值不相同,但第一个维度属于 `batch` 维度,因此,请求 A 和 请求 B 可以合并。C 请求的 `x` 变量的 shape 是 [1, 3, 640, 480],由于除了 `batch` 维度外还有2个维度值不同,A 和 C 不能直接合并。 + +从经验来看,当2个请求的同一个变量 shape 维度的数量相等时,通过 `padding` 补0的方式按最大 shape 值对齐即可。即 C 请求的 shape 补齐到 [1, 3, 960, 960],那么就可以与 A 和 B 请求合并了。Paddle Serving 框架实现了动态 Padding 功能补齐 shape。 + +当多个将要合并的请求中有一个 shape 值很大时,所有请求的 shape 都要按最大补齐,导致计算量成倍增长。Paddle Serving 设计了一套合并策略,满足任何一个条件均可合并: + +- 条件 1:绝对值差的字节数小于 **1024** 字节,评估补齐绝对长度 +- 条件 2:相似度的乘积大于 **50%**,评估相似度,评估补齐绝对值整体数据量比例 + +场景1:`Shape-1 = [batch, 500, 500], Shape-2 = [batch, 400, 400]`。此时,`绝对值差 = 500*500 - 400*400 = 90000` 字节,`相对误差= (400/500) * (400/500) = 0.8*0.8 = 0.64`,满足条件1,不满足条件2,触发动态 Padding。 + +场景2:`Shape-1 = [batch, 1, 1], Shape-2 = [batch, 2, 2]`。此时,`绝对值差 = 2*2 - 1*1 = 3`字节,`相对误差 = (1/2) * (1/2) = 0.5*0.5 = 0.25`,满足条件2,不满足条件1,触发动态 Padding。 + +场景3:`Shape-1 = [batch, 3, 320, 320], Shape-2 = [batch, 3, 960, 960]`。此时,`绝对值差 = 3*960*960 - 3*320*320 = 2457600`字节,`相对误差 = (3/3) * (320/960) * (320/960) = 0.3*0.3 = 0.09`,条件1和条件2均不满足,未触发动态 Padding。 + + + +## 使用案例 + + + +**一.开启同步模式** + +启动命令不使用 `--runtime_thread_num` 和 `--batch_infer_size` 时,属于同步处理模式,未开启异步模式。`--thread 16` 表示启动16个同步网络处理线程。 +``` +python3 -m paddle_serving_server.serve --model uci_housing_model --thread 16 --port 9292 +``` + + + +**二.开启异步模式** + +启动命令使用 `--runtime_thread_num 2` 和 `--batch_infer_size 32` 开启异步模式,Serving 框架会启动2个异步线程,单次合并最大批量为32,自动开启动态 Padding。 +``` +python3 -m paddle_serving_server.serve --model uci_housing_model --thread 16 --port 9292 --runtime_thread_num 4 --batch_infer_size 32 --ir_optim --gpu_multi_stream --gpu_ids 0 +``` + + + +## 性能测试 + + +- GPU:Tesla P4 7611 MiB +- CUDA:cuda11.2-cudnn8-trt8 +- Python 版本:python3.7 +- 模型:ResNet_v2_50 +- 测试数据:构造全1输入,单client请求100次,shape 范围(1, 224 ± 50, 224 ± 50) + +同步模式启动命令: +``` +python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --thread 8 --ir_optim --gpu_multi_stream --gpu_ids 1 --enable_prometheus --prometheus_port 1939 +``` + +异步模式启动命令: +``` +python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --port 9393 --thread 64 --runtime_thread_num 8 --ir_optim --gpu_multi_stream --gpu_ids 1 --enable_prometheus --prometheus_port 19393 +``` + + + +**一.测试结果** + +使用异步模式,并开启动态批量后,并发测试不同 shape 数据时,吞吐性能大幅提升。 +

+ +
+ +**二.测试数据** + +1. 同步模式 + +| client_num | batch_size |CPU_util_pre(%) |CPU_util(%) |GPU_memory(mb) |GPU_util(%) |qps(samples/s) |total count |mean(ms) |median(ms) |80 percent(ms) |90 percent(ms) |99 percent(ms) |total cost(s) |each cost(s)|infer_count_total|infer_cost_total(ms)|infer_cost_avg(ms)| +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| 1 |1 |1.30 |18.90 |2066 |71.56 |22.938 |100 |43.594 |23.516 |78.118 |78.323 |133.544 |4.4262 |4.3596 |7100.0000 |1666392.70 | 41.1081 | +| 5 |1 |2.00 |28.20 |3668 |92.57 |33.630 |500 |148.673 |39.531 |373.231 |396.306 |419.088 |15.0606 |14.8676 |7600.0000 |1739372.7480| 145.9601 | +|10 |1 |1.90 |29.80 |4202 |91.98 |34.303 |1000 |291.512 |76.728 |613.963 |632.736 |1217.863 |29.8004 |29.1516 |8600.0000 |1974147.7420| 234.7750 | +|20 |1 |4.70 |49.60 |4736 |92.63 |34.359 |2000 |582.089 |154.952 |1239.115 |1813.371 |1858.128 |59.7303 |58.2093 |12100.0000 |2798459.6330 |235.6248 | +|30 |1 |5.70 |65.70 |4736 |92.60 |34.162 |3000 |878.164 |231.121 |2391.687 |2442.744 |2499.963 |89.6546 |87.8168 |17600.0000 |4100408.9560 |236.6877 | +|40 |1 |5.40 |74.40 |5270 |92.44 |34.090 |4000 |1173.373 |306.244 |3037.038 |3070.198 |3134.894 |119.4162 |117.3377 |21600.0000 |5048139.2170 |236.9326| +|50 |1 |1.40 |64.70 |5270 |92.37 |34.031 |5000 |1469.250 |384.327 |3676.812 |3784.330 |4366.862 |149.7041 |146.9254 |26600.0000 |6236269.4230 |237.6260| +|70 |1 |3.70 |79.70 |5270 |91.89 |33.976 |7000 |2060.246 |533.439 |5429.255 |5552.704 |5661.492 |210.1008 |206.0250 |33600.0000 |7905005.9940 |238.3909| + + +2. 异步模式 - 未开启动态批量 + +| client_num | batch_size |CPU_util_pre(%) |CPU_util(%) |GPU_memory(mb) |GPU_util(%) |qps(samples/s) |total count |mean(ms) |median(ms) |80 percent(ms) |90 percent(ms) |99 percent(ms) |total cost(s) |each cost(s)|infer_count_total|infer_cost_total(ms)|infer_cost_avg(ms)| +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| 1 |1 |6.20 |13.60 |5170 |71.11 |22.894 |100 |43.677 |23.992 |78.285 |78.788 |123.542 |4.4253 |4.3679 |3695.0000 |745061.9120 |40.6655 | +| 5 |1 |6.10 |32.20 |7306 |89.54 |33.532 |500 |149.109 |43.906 |376.889 |401.999 |422.753 |15.1623 |14.9113 |4184.0000 |816834.2250 |146.7736| +|10 |1 |4.90 |43.60 |7306 |91.55 |38.136 |1000 |262.216 |75.393 |575.788 |632.016 |1247.775 |27.1019 |26.2220 |5107.0000 |1026490.3950 |227.1464| +|20 |1 |5.70 |39.60 |7306 |91.36 |58.601 |2000 |341.287 |145.774 |646.824 |994.748 |1132.979 |38.3915 |34.1291 |7461.0000 |1555234.6260 |229.9113| +|30 |1 |1.30 |45.40 |7484 |91.10 |69.008 |3000 |434.728 |204.347 |959.184 |1092.181 |1661.289 |46.3822 |43.4732 |10289.0000 |2269499.9730 |249.4257| +|40 |1 |3.10 |73.00 |7562 |91.83 |80.956 |4000 |494.091 |272.889 |966.072 |1310.011 |1851.887 |52.0609 |49.4095 |12102.0000 |2678878.2010 |225.8016| +|50 |1 |0.80 |68.00 |7522 |91.10 |83.018 |5000 |602.276 |364.064 |1058.261 |1473.051 |1671.025 |72.9869 |60.2280 |14225.0000 |3256628.2820 |272.1385| +|70 |1 |6.10 |78.40 |7584 |92.02 |65.069 |7000 |1075.777 |474.014 |2411.296 |2705.863 |3409.085 |111.6653 |107.5781 |17974.0000 |4139377.4050 |235.4626 + + + +3. 异步模式 - 开启动态批量 + + +| client_num | batch_size |CPU_util_pre(%) |CPU_util(%) |GPU_memory(mb) |GPU_util(%) |qps(samples/s) |total count |mean(ms) |median(ms) |80 percent(ms) |90 percent(ms) |99 percent(ms) |total cost(s) |each cost(s)|infer_count_total|infer_cost_total(ms)|infer_cost_avg(ms)| +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| 1 |1 |1.20 |13.30 |6048 |70.07 |22.417 |100 |44.606 |24.486 |78.365 |78.707 |139.349 |4.5201 |4.4608 |1569.0000 |462418.6390 |41.7646 | +| 5 |1 |1.20 |50.80 |7116 |87.37 |31.106 |500 |160.740 |42.506 |414.903 |458.841 |481.112 |16.3525 |16.0743 |2059.0000 |539439.3300 |157.1851 +|10 |1 |0.80 |26.20 |7264 |88.74 |37.417 |1000 |267.254 |79.452 |604.451 |686.477 |1345.528 |27.9848 |26.7258 |2950.0000 |752428.0570 |239.0446| +|20 |1 |1.50 |32.80 |7264 |89.52 |70.641 |2000 |283.117 |133.441 |516.066 |652.089 |1274.957 |33.0280 |28.3121 |4805.0000 |1210814.5610 |260.5873| +|30 |1 |0.90 |59.10 |7348 |89.57 |84.894 |3000 |353.380 |217.385 |613.587 |757.829 |1277.283 |40.7093 |35.3384 |6924.0000 |1817515.1710 |276.3695| +|40 |1 |1.30 |57.30 |7356 |89.30 |99.853 |4000 |400.584 |204.425 |666.015 |1031.186 |1380.650 |49.4807 |40.0588 |8104.0000 |2200137.0060 |324.2558| +|50 |1 |1.50 |50.60 |7578 |89.04 |121.545 |5000 |411.364 |331.118 |605.809 |874.543 |1285.650 |48.2343 |41.1369 |9350.0000 |2568777.6400 |295.8593| +|70 |1 |3.80 |83.20 |7602 |89.59 |133.568 |7000 |524.073 |382.653 |799.463 |1202.179 |1576.809 |57.2885 |52.4077 |10761.0000 |3013600.9670 |315.2540| + + + diff --git a/doc/Offical_Docs/6-1_Inference_Protocols_CN.md b/doc/Offical_Docs/6-1_Inference_Protocols_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..094dcbb0be4d5e4675c8b72cd92be3361647c4bb --- /dev/null +++ b/doc/Offical_Docs/6-1_Inference_Protocols_CN.md @@ -0,0 +1,245 @@ +# Inference Protocols + +C++ Serving 基于 BRPC 进行服务构建,支持 BRPC、GRPC、RESTful 请求。请求数据为 protobuf 格式,详见 `core/general-server/proto/general_model_service.proto`。本文介绍构建请求以及解析结果的方法。 + +## Tensor + +**一.Tensor 定义** + +Tensor 可以装载多种类型的数据,是 Request 和 Response 的基础单元。Tensor 的具体定义如下: + +```protobuf +message Tensor { + // VarType: INT64 + repeated int64 int64_data = 1; + + // VarType: FP32 + repeated float float_data = 2; + + // VarType: INT32 + repeated int32 int_data = 3; + + // VarType: FP64 + repeated double float64_data = 4; + + // VarType: UINT32 + repeated uint32 uint32_data = 5; + + // VarType: BOOL + repeated bool bool_data = 6; + + // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated float complex64_data = 7; + + // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated double complex128_data = 8; + + // VarType: STRING + repeated string data = 9; + + // Element types: + // 0 => INT64 + // 1 => FP32 + // 2 => INT32 + // 3 => FP64 + // 4 => INT16 + // 5 => FP16 + // 6 => BF16 + // 7 => UINT8 + // 8 => INT8 + // 9 => BOOL + // 10 => COMPLEX64 + // 11 => COMPLEX128 + // 20 => STRING + int32 elem_type = 10; + + // Shape of the tensor, including batch dimensions. + repeated int32 shape = 11; + + // Level of data(LOD), support variable length data, only for fetch tensor + // currently. + repeated int32 lod = 12; + + // Correspond to the variable 'name' in the model description prototxt. + string name = 13; + + // Correspond to the variable 'alias_name' in the model description prototxt. + string alias_name = 14; // get from the Model prototxt + + // VarType: FP16, INT16, INT8, BF16, UINT8 + bytes tensor_content = 15; +}; +``` + +- elem_type:数据类型,当前支持 FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 + +|elem_type|类型| +|---------|----| +|0|INT64| +|1|FLOAT32| +|2|INT32| +|3|FP64| +|4|INT16| +|5|FP16| +|6|BF16| +|7|UINT8| +|8|INT8| + +- shape:数据维度 +- lod:lod 信息,LoD(Level-of-Detail) Tensor 是 Paddle 的高级特性,是对 Tensor 的一种扩充,用于支持更自由的数据输入。Lod 相关原理介绍,请参考[相关文档](../LOD_CN.md) +- name/alias_name: 名称及别名,与模型配置对应 + +**二.构建 Tensor 数据** + +1. FLOAT32 类型 Tensor + +```C +// 原始数据 +std::vector float_data; +Tensor *tensor = new Tensor; +// 设置维度,可以设置多维 +for (uint32_t j = 0; j < float_shape.size(); ++j) { + tensor->add_shape(float_shape[j]); +} +// 设置 LOD 信息 +for (uint32_t j = 0; j < float_lod.size(); ++j) { + tensor->add_lod(float_lod[j]); +} +// 设置类型、名称及别名 +tensor->set_elem_type(1); +tensor->set_name(name); +tensor->set_alias_name(alias_name); +// 拷贝数据 +int total_number = float_data.size(); +tensor->mutable_float_data()->Resize(total_number, 0); +memcpy(tensor->mutable_float_data()->mutable_data(), float_datadata(), total_number * sizeof(float)); +``` + +2. INT8 类型 Tensor + +```C +// 原始数据 +std::string string_data; +Tensor *tensor = new Tensor; +for (uint32_t j = 0; j < string_shape.size(); ++j) { + tensor->add_shape(string_shape[j]); +} +for (uint32_t j = 0; j < string_lod.size(); ++j) { + tensor->add_lod(string_lod[j]); +} +tensor->set_elem_type(8); +tensor->set_name(name); +tensor->set_alias_name(alias_name); +tensor->set_tensor_content(string_data); +``` + +## Request + +**一.Request 定义** + +Request 为客户端需要发送的请求数据,其以 Tensor 为基础数据单元,并包含了额外的请求信息。定义如下: + +```protobuf +message Request { + repeated Tensor tensor = 1; + repeated string fetch_var_names = 2; + bool profile_server = 3; + uint64 log_id = 4; +}; +``` + +- fetch_vat_names: 需要获取的输出数据名称,在GeneralResponseOP会根据该列表进行过滤.请参考模型文件serving_client_conf.prototxt中的`fetch_var`字段下的`alias_name`。 +- profile_server: 调试参数,打开时会输出性能信息 +- log_id: 请求ID + +**二.构建 Request** + +1. Protobuf 形式 + +当使用 BRPC 或 GRPC 进行请求时,使用 protobuf 形式数据,构建方式如下: + +```C +Request req; +req.set_log_id(log_id); +for (auto &name : fetch_name) { + req.add_fetch_var_names(name); +} +// 添加Tensor +Tensor *tensor = req.add_tensor(); +... +``` +2. Json 形式 + +当使用 RESTful 请求时,可以使用 Json 形式数据,具体格式如下: + +```Json +{"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"name":"x","alias_name":"x","shape":[1,13]}],"fetch_var_names":["price"],"log_id":0} +``` + +## Response + +**一.Response 定义** + +Response 为服务端返回给客户端的结果,包含了 Tensor 数据、错误码、错误信息等。定义如下: + +```protobuf +message Response { + repeated ModelOutput outputs = 1; + repeated int64 profile_time = 2; + // Error code + int32 err_no = 3; + + // Error messages + string err_msg = 4; +}; + +message ModelOutput { + repeated Tensor tensor = 1; + string engine_name = 2; +} +``` + +- profile_time:当设置 request->set_profile_server(true) 时,会返回性能信息 +- err_no:错误码,详见`core/predictor/common/constant.h` +- err_msg:错误信息,详见`core/predictor/common/constant.h` +- engine_name:输出节点名称 + +|err_no|err_msg| +|---------|----| +|0|OK| +|-5000|"Paddle Serving Framework Internal Error."| +|-5001|"Paddle Serving Memory Alloc Error."| +|-5002|"Paddle Serving Array Overflow Error."| +|-5100|"Paddle Serving Op Inference Error."| + +**二.读取 Response 数据** + +```C +uint32_t model_num = res.outputs_size(); +for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) { + std::string engine_name = output.engine_name(); + int idx = 0; + // 读取 tensor 维度 + int shape_size = output.tensor(idx).shape_size(); + for (int i = 0; i < shape_size; ++i) { + shape[i] = output.tensor(idx).shape(i); + } + // 读取 LOD 信息 + int lod_size = output.tensor(idx).lod_size(); + if (lod_size > 0) { + lod.resize(lod_size); + for (int i = 0; i < lod_size; ++i) { + lod[i] = output.tensor(idx).lod(i); + } + } + // 读取 float 数据 + int size = output.tensor(idx).float_data_size(); + float_data = std::vector( + output.tensor(idx).float_data().begin(), + output.tensor(idx).float_data().begin() + size); + // 读取 int8 数据 + string_data = output.tensor(idx).tensor_content(); +} +``` diff --git a/doc/Offical_Docs/6-2_Cpp_Serving_Protocols_CN.md b/doc/Offical_Docs/6-2_Cpp_Serving_Protocols_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..4b31be083a173a6f2ad641844e9d37a381a161d7 --- /dev/null +++ b/doc/Offical_Docs/6-2_Cpp_Serving_Protocols_CN.md @@ -0,0 +1,290 @@ +# C++ Serving 通讯协议 + +- [网络框架](#0) +- [Tensor](#1) + - [1.1 构建 FLOAT32 Tensor](#1.1) + - [1.2 构建 STRING Tensor](#1.2) +- [Request](#2) + - [2.1 构建 Protobuf Request](#2.1) + - [2.2 构建 Json Request](#2.2) +- [Response](#3) + - [3.1 读取 Response 数据](#3.1) + + + +## 网络框架 + +C++ Serving 基于 [bRPC](https://github.com/apache/incubator-brpc) 网络框架构建服务,支持 bRPC、gRPC 和 RESTful 协议请求。不限于开发语言和框架,甚至 `curl` 方式,只要按照上述协议封装数据并发送,Server 就能够接收、处理和返回结果。 + +对于支持的各种协议我们提供了部分的 Client SDK 示例供用户参考和使用,用户也可以根据自己的需求去开发新的 Client SDK,也欢迎用户添加其他语言/协议(例如 GRPC-Go、GRPC-C++ HTTP2-Go、HTTP2-Java 等)Client SDK 到我们的仓库供其他开发者借鉴和参考。 + +| 通信协议 | 速度 | 是否支持 | 是否提供Client SDK | +|-------------|-----|---------|-------------------| +| bRPC | 最快 | 支持 | [C++]、[Python(Pybind方式)] | +| HTTP 2.0 + Proto | 快 | 不支持 | | +| gRPC | 较快 | 支持 | [Java]、[Python] | +| HTTP 1.1 + Proto | 较快 | 支持 | [Java]、[Python] | +| HTTP 1.1 + Json | 慢 | 支持 | [Java]、[Python]、[Curl] | + + +C++ Serving 请求和应答的数据格式为 protobuf,重要的结构有以下3个: + + + +## Tensor + +[Tensor](https://github.com/PaddlePaddle/Serving/blob/develop/core/general-server/proto/general_model_service.proto#L22) 可以装载多种类型的数据,是 Request 和 Response 的基础单元。Tensor 的定义如下: + +```protobuf +message Tensor { + // VarType: INT64 + repeated int64 int64_data = 1; + + // VarType: FP32 + repeated float float_data = 2; + + // VarType: INT32 + repeated int32 int_data = 3; + + // VarType: FP64 + repeated double float64_data = 4; + + // VarType: UINT32 + repeated uint32 uint32_data = 5; + + // VarType: BOOL + repeated bool bool_data = 6; + + // (No support)VarType: COMPLEX64, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated float complex64_data = 7; + + // (No support)VarType: COMPLEX128, 2x represents the real part, 2x+1 + // represents the imaginary part + repeated double complex128_data = 8; + + // VarType: STRING + repeated string data = 9; + + // Element types: + // 0 => INT64 + // 1 => FP32 + // 2 => INT32 + // 3 => FP64 + // 4 => INT16 + // 5 => FP16 + // 6 => BF16 + // 7 => UINT8 + // 8 => INT8 + // 9 => BOOL + // 10 => COMPLEX64 + // 11 => COMPLEX128 + // 20 => STRING + int32 elem_type = 10; + + // Shape of the tensor, including batch dimensions. + repeated int32 shape = 11; + + // Level of data(LOD), support variable length data, only for fetch tensor + // currently. + repeated int32 lod = 12; + + // Correspond to the variable 'name' in the model description prototxt. + string name = 13; + + // Correspond to the variable 'alias_name' in the model description prototxt. + string alias_name = 14; // get from the Model prototxt + + // VarType: FP16, INT16, INT8, BF16, UINT8 + bytes tensor_content = 15; +}; +``` + +Tensor 结构中重要成员 `elem_type`、`shape`、`lod` 和 `name/alias_name`。 +- name/alias_name: 名称及别名,与模型配置对应 +- elem_type:数据类型,当前支持FLOAT32, INT64, INT32, UINT8, INT8, FLOAT16 +- shape:数据维度 +- lod:变长结构 LoD(Level-of-Detail) Tensor 是 Paddle 的高级特性,是对 Tensor 的一种扩充,用于支持更自由的数据输入。详见[LOD](../LOD_CN.md) + +|elem_type|类型| +|---------|----| +|0|INT64| +|1|FLOAT32| +|2|INT32| +|3|FP64| +|4|INT16| +|5|FP16| +|6|BF16| +|7|UINT8| +|8|INT8| +|9|BOOL| +|10|COMPLEX64| +|11|COMPLEX128 +|20|STRING| + + + +**一.构建 FLOAT32 Tensor** + +创建 Tensor 对象,通过 `mutable_float_data::Resize()` 设置 FLOAT32 类型数据长度,通过 memcpy 函数拷贝数据。 +```C +// 原始数据 +std::vector float_data; +Tensor *tensor = new Tensor; +// 设置维度,可以设置多维 +for (uint32_t j = 0; j < float_shape.size(); ++j) { + tensor->add_shape(float_shape[j]); +} +// 设置LOD信息 +for (uint32_t j = 0; j < float_lod.size(); ++j) { + tensor->add_lod(float_lod[j]); +} +// 设置类型、名称及别名 +tensor->set_elem_type(1); +tensor->set_name(name); +tensor->set_alias_name(alias_name); +// 拷贝数据 +int total_number = float_data.size(); +tensor->mutable_float_data()->Resize(total_number, 0); +memcpy(tensor->mutable_float_data()->mutable_data(), float_data.data(), total_number * sizeof(float)); +``` + + + +**二.构建 STRING Tensor** + +创建 Tensor 对象,通过 `set_tensor_content` 设置 string 类型数据。 +```C +// 原始数据 +std::string string_data; +Tensor *tensor = new Tensor; +for (uint32_t j = 0; j < string_shape.size(); ++j) { + tensor->add_shape(string_shape[j]); +} +for (uint32_t j = 0; j < string_lod.size(); ++j) { + tensor->add_lod(string_lod[j]); +} +tensor->set_elem_type(8); +tensor->set_name(name); +tensor->set_alias_name(alias_name); +tensor->set_tensor_content(string_data); +``` + + + +## Request + +Request 为客户端需要发送的请求数据,其以 Tensor 为基础数据单元,并包含了额外的请求信息。定义如下: + +```protobuf +message Request { + repeated Tensor tensor = 1; + repeated string fetch_var_names = 2; + bool profile_server = 3; + uint64 log_id = 4; +}; +``` + +- fetch_vat_names: 需要获取的输出数据名称,在 `GeneralResponseOP` 会根据该列表进行过滤.请参考模型文件 `serving_client_conf.prototxt` 中的 `fetch_var` 字段下的 `alias_name`。 +- profile_server: 调试参数,打开时会输出性能信息 +- log_id: 请求ID + +当使用 bRPC 或 gRPC 进行请求时,使用 protobuf 或 Json 格式请求数据。 + + + +**一.构建 Protobuf Request** + +创建 Request 对象,通过 `add_tensor` 接口来设置 Tensor。 +```C +Request req; +req.set_log_id(log_id); +for (auto &name : fetch_name) { + req.add_fetch_var_names(name); +} +// 添加Tensor +Tensor *tensor = req.add_tensor(); +... +``` + + +**二.构建 Json Request** + +当使用 RESTful 请求时,可以使用 Json 格式数据,示例如下: + +```JSON +{"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"name":"x","alias_name":"x","shape":[1,13]}],"fetch_var_names":["price"],"log_id":0} +``` + +可参考示例,不用修改整体结构,仅需修改数据类型和数据。 + + + +## Response + +Response 为服务端返回给客户端的结果,包含了 Tensor 数据、错误码、错误信息等。定义如下: + +```protobuf +message Response { + repeated ModelOutput outputs = 1; + repeated int64 profile_time = 2; + // Error code + int32 err_no = 3; + + // Error messages + string err_msg = 4; +}; + +message ModelOutput { + repeated Tensor tensor = 1; + string engine_name = 2; +} +``` + +Response 结构中核心成员: +- profile_time:当设置 `request->set_profile_server(true)` 时,会返回性能信息 +- err_no:错误码 +- err_msg:错误信息 +- engine_name:输出节点名称 + +|err_no|err_msg| +|---------|----| +|0|OK| +|-5000|"Paddle Serving Framework Internal Error."| +|-5001|"Paddle Serving Memory Alloc Error."| +|-5002|"Paddle Serving Array Overflow Error."| +|-5100|"Paddle Serving Op Inference Error."| + + + +**一.读取 Response 数据** + +读取 Response 对象中 Tensor 数据示例如下 +```C +Response res; +uint32_t model_num = res.outputs_size(); +for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) { + std::string engine_name = output.engine_name(); + int idx = 0; + // 读取tensor维度 + int shape_size = output.tensor(idx).shape_size(); + for (int i = 0; i < shape_size; ++i) { + shape[i] = output.tensor(idx).shape(i); + } + // 读取LOD信息 + int lod_size = output.tensor(idx).lod_size(); + if (lod_size > 0) { + lod.resize(lod_size); + for (int i = 0; i < lod_size; ++i) { + lod[i] = output.tensor(idx).lod(i); + } + } + // 读取float数据 + int size = output.tensor(idx).float_data_size(); + float_data = std::vector( + output.tensor(idx).float_data().begin(), + output.tensor(idx).float_data().begin() + size); + // 读取int8数据 + string_data = output.tensor(idx).tensor_content(); +} +``` diff --git a/doc/Offical_Docs/6-2_Hot_Loading_CN.md b/doc/Offical_Docs/6-2_Hot_Loading_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..d99494dcf060652a39b60db0c11ba1eeeea2a37c --- /dev/null +++ b/doc/Offical_Docs/6-2_Hot_Loading_CN.md @@ -0,0 +1,230 @@ +# Paddle Serving 中的模型热加载 + +## 背景 + +在实际的工业场景下,通常是远端定期不间断产出模型,线上服务端需要在服务不中断的情况下拉取新模型对旧模型进行更新迭代。 + +## Server Monitor + +Paddle Serving 提供了一个自动监控脚本,远端地址更新模型后会拉取新模型更新本地模型,同时更新本地模型文件夹中的时间戳文件 `fluid_time_stamp` 实现热加载。 + +目前支持下面几种类型的远端监控 Monitor: + +| Monitor类型 | 描述 | 特殊选项 | +| :---------: | :----------------------------------------------------------: | :----------------------------------------------------------: | +| general | 远端无认证,可以通过 `wget` 直接访问下载文件(如无需认证的FTP,BOS等) | `general_host` 通用远端host | +| hdfs/afs(HadoopMonitor) | 远端为 HDFS 或 AFS,通过 Hadoop-Client 执行相关命令 | `hadoop_bin` Hadoop 二进制的路径
`fs_name` Hadoop fs_name,默认为空
`fs_ugi` Hadoop fs_ugi,默认为空 | +| ftp | 远端为 FTP,通过 `ftplib` 进行相关访问(使用该 Monitor,您可能需要执行 `pip install ftplib` 下载 `ftplib`) | `ftp_host` FTP host
`ftp_port` FTP port
`ftp_username` FTP username,默认为空
`ftp_password` FTP password,默认为空 | + +| Monitor通用选项 | 描述 | 默认值 | +| :--------------------: | :----------------------------------------------------------: | :--------------------: | +| `type` | 指定 Monitor 类型 | 无 | +| `remote_path` | 指定远端的基础路径 | 无 | +| `remote_model_name` | 指定远端需要拉取的模型名 | 无 | +| `remote_donefile_name` | 指定远端标志模型更新完毕的 donefile 文件名 | 无 | +| `local_path` | 指定本地工作路径 | 无 | +| `local_model_name` | 指定本地模型名 | 无 | +| `local_timestamp_file` | 指定本地用于热加载的时间戳文件,该文件被认为在 `local_path/local_model_name` 下。 | `fluid_time_file` | +| `local_tmp_path` | 指定本地存放临时文件的文件夹路径,若不存在则自动创建。 | `_serving_monitor_tmp` | +| `interval` | 指定轮询间隔时间,单位为秒。 | `10` | +| `unpacked_filename` | Monitor 支持 tarfile 打包的远程模型。如果远程模型是打包格式,则需要设置该选项来告知 Monitor 解压后的文件名。 | `None` | +| `debug` | 如果添加 `--debug` 选项,则输出更详细的中间信息。 | 默认不添加该选项 | + +下面通过 HadoopMonitor 示例来展示 Paddle Serving 的模型热加载功能。 + +## HadoopMonitor 示例 + +示例中在 `product_path` 中生产模型上传至 hdfs,在 `server_path` 中模拟服务端模型热加载: + +```shell +. +├── product_path +└── server_path +``` + +**一.生产模型** + +在 `product_path` 下运行下面的 Python 代码生产模型(运行前需要修改 hadoop 相关的参数),每隔 60 秒会产出 Boston 房价预测模型的打包文件 `uci_housing.tar.gz` 并上传至 hdfs 的`/`路径下,上传完毕后更新时间戳文件 `donefile` 并上传至 hdfs 的`/`路径下。 + +```python +import os +import sys +import time +import tarfile +import paddle +import paddle.fluid as fluid +import paddle_serving_client.io as serving_io + +train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.uci_housing.train(), buf_size=500), + batch_size=16) + +test_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.uci_housing.test(), buf_size=500), + batch_size=16) + +x = fluid.data(name='x', shape=[None, 13], dtype='float32') +y = fluid.data(name='y', shape=[None, 1], dtype='float32') + +y_predict = fluid.layers.fc(input=x, size=1, act=None) +cost = fluid.layers.square_error_cost(input=y_predict, label=y) +avg_loss = fluid.layers.mean(cost) +sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.01) +sgd_optimizer.minimize(avg_loss) + +place = fluid.CPUPlace() +feeder = fluid.DataFeeder(place=place, feed_list=[x, y]) +exe = fluid.Executor(place) +exe.run(fluid.default_startup_program()) + +def push_to_hdfs(local_file_path, remote_path): + afs = 'afs://***.***.***.***:***' # User needs to change + uci = '***,***' # User needs to change + hadoop_bin = '/path/to/haddop/bin' # User needs to change + prefix = '{} fs -Dfs.default.name={} -Dhadoop.job.ugi={}'.format(hadoop_bin, afs, uci) + os.system('{} -rmr {}/{}'.format( + prefix, remote_path, local_file_path)) + os.system('{} -put {} {}'.format( + prefix, local_file_path, remote_path)) + +name = "uci_housing" +for pass_id in range(30): + for data_train in train_reader(): + avg_loss_value, = exe.run(fluid.default_main_program(), + feed=feeder.feed(data_train), + fetch_list=[avg_loss]) + # Simulate the production model every other period of time + time.sleep(60) + model_name = "{}_model".format(name) + client_name = "{}_client".format(name) + serving_io.save_model(model_name, client_name, + {"x": x}, {"price": y_predict}, + fluid.default_main_program()) + # Packing model + tar_name = "{}.tar.gz".format(name) + tar = tarfile.open(tar_name, 'w:gz') + tar.add(model_name) + tar.close() + + # Push packaged model file to hdfs + push_to_hdfs(tar_name, '/') + + # Generate donefile + donefile_name = 'donefile' + os.system('touch {}'.format(donefile_name)) + + # Push donefile to hdfs + push_to_hdfs(donefile_name, '/') +``` + +hdfs 上的文件如下列所示: + +```bash +# hadoop fs -ls / +Found 2 items +-rw-r--r-- 1 root supergroup 0 2020-04-02 02:54 /donefile +-rw-r--r-- 1 root supergroup 2101 2020-04-02 02:54 /uci_housing.tar.gz +``` + +**二.服务端加载模型** + +进入 `server_path` 文件夹。 + +1. 用初始模型启动 Server 端 + +这里使用预训练的 Boston 房价预测模型作为初始模型: + +```shell +wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz +tar -xzf uci_housing.tar.gz +``` + +启动 Server 端: + +```shell +python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 +``` + +2. 执行监控程序 + +用下面的命令来执行 HDFS 监控程序: + +```shell +python -m paddle_serving_server.monitor \ + --type='hdfs' --hadoop_bin='/hadoop-3.1.2/bin/hadoop' \ + --remote_path='/' --remote_model_name='uci_housing.tar.gz' \ + --remote_donefile_name='donefile' --local_path='.' \ + --local_model_name='uci_housing_model' --local_timestamp_file='fluid_time_file' \ + --local_tmp_path='_tmp' --unpacked_filename='uci_housing_model' --debug +``` + +上面代码通过轮询方式监控远程 HDFS 地址`/`的时间戳文件`/donefile`,当时间戳变更则认为远程模型已经更新,将远程打包模型`/uci_housing.tar.gz`拉取到本地临时路径`./_tmp/uci_housing.tar.gz`下,解包出模型文件`./_tmp/uci_housing_model`后,更新本地模型`./uci_housing_model`以及Paddle Serving的时间戳文件`./uci_housing_model/fluid_time_file`。 + +预计输出如下: + +```shell +2020-04-02 10:12 INFO [monitor.py:85] _hadoop_bin: /hadoop-3.1.2/bin/hadoop +2020-04-02 10:12 INFO [monitor.py:85] _fs_name: +2020-04-02 10:12 INFO [monitor.py:85] _fs_ugi: +2020-04-02 10:12 INFO [monitor.py:209] AFS prefix cmd: /hadoop-3.1.2/bin/hadoop fs +2020-04-02 10:12 INFO [monitor.py:85] _remote_path: / +2020-04-02 10:12 INFO [monitor.py:85] _remote_model_name: uci_housing.tar.gz +2020-04-02 10:12 INFO [monitor.py:85] _remote_donefile_name: donefile +2020-04-02 10:12 INFO [monitor.py:85] _local_model_name: uci_housing_model +2020-04-02 10:12 INFO [monitor.py:85] _local_path: . +2020-04-02 10:12 INFO [monitor.py:85] _local_timestamp_file: fluid_time_file +2020-04-02 10:12 INFO [monitor.py:85] _local_tmp_path: _tmp +2020-04-02 10:12 INFO [monitor.py:85] _interval: 10 +2020-04-02 10:12 DEBUG [monitor.py:214] check cmd: /hadoop-3.1.2/bin/hadoop fs -ls /donefile 2>/dev/null +2020-04-02 10:12 DEBUG [monitor.py:216] resp: -rw-r--r-- 1 root supergroup 0 2020-04-02 10:11 /donefile +2020-04-02 10:12 INFO [monitor.py:138] doneilfe(donefile) changed. +2020-04-02 10:12 DEBUG [monitor.py:233] pull cmd: /hadoop-3.1.2/bin/hadoop fs -get /uci_housing.tar.gz _tmp/uci_housing.tar.gz 2>/dev/null +2020-04-02 10:12 INFO [monitor.py:144] pull remote model(uci_housing.tar.gz). +2020-04-02 10:12 INFO [monitor.py:98] unpack remote file(uci_housing.tar.gz). +2020-04-02 10:12 DEBUG [monitor.py:108] remove packed file(uci_housing.tar.gz). +2020-04-02 10:12 INFO [monitor.py:110] using unpacked filename: uci_housing_model. +2020-04-02 10:12 DEBUG [monitor.py:175] update model cmd: cp -r _tmp/uci_housing_model/* ./uci_housing_model +2020-04-02 10:12 INFO [monitor.py:152] update local model(uci_housing_model). +2020-04-02 10:12 DEBUG [monitor.py:184] update timestamp cmd: touch ./uci_housing_model/fluid_time_file +2020-04-02 10:12 INFO [monitor.py:157] update model timestamp(fluid_time_file). +2020-04-02 10:12 INFO [monitor.py:161] sleep 10s. +2020-04-02 10:12 DEBUG [monitor.py:214] check cmd: /hadoop-3.1.2/bin/hadoop fs -ls /donefile 2>/dev/null +2020-04-02 10:12 DEBUG [monitor.py:216] resp: -rw-r--r-- 1 root supergroup 0 2020-04-02 10:11 /donefile +2020-04-02 10:12 INFO [monitor.py:161] sleep 10s. +``` + +3. 查看 Server 日志 + +通过下面命令查看 Server 的运行日志: + +```shell +tail -f log/serving.INFO +``` + +日志中显示模型已经被热加载: + +```shell +I0330 09:38:40.087316 7361 server.cpp:150] Begin reload framework... +W0330 09:38:40.087399 7361 infer.h:656] Succ reload version engine: 18446744073709551615 +I0330 09:38:40.087414 7361 manager.h:131] Finish reload 1 workflow(s) +I0330 09:38:50.087535 7361 server.cpp:150] Begin reload framework... +W0330 09:38:50.087641 7361 infer.h:250] begin reload model[uci_housing_model]. +I0330 09:38:50.087972 7361 infer.h:66] InferEngineCreationParams: model_path = uci_housing_model, enable_memory_optimization = 0, static_optimization = 0, force_update_static_cache = 0 +I0330 09:38:50.088027 7361 analysis_predictor.cc:88] Profiler is deactivated, and no profiling report will be generated. +I0330 09:38:50.088393 7361 analysis_predictor.cc:841] MODEL VERSION: 1.7.1 +I0330 09:38:50.088413 7361 analysis_predictor.cc:843] PREDICTOR VERSION: 1.6.3 +I0330 09:38:50.089519 7361 graph_pattern_detector.cc:96] --- detected 1 subgraphs +I0330 09:38:50.090925 7361 analysis_predictor.cc:470] ======= optimize end ======= +W0330 09:38:50.090986 7361 infer.h:472] Succ load common model[0x7fc83c06abd0], path[uci_housing_model]. +I0330 09:38:50.091022 7361 analysis_predictor.cc:88] Profiler is deactivated, and no profiling report will be generated. +W0330 09:38:50.091050 7361 infer.h:509] td_core[0x7fc83c0ad770] clone model from pd_core[0x7fc83c06abd0] succ, cur_idx[0]. +... +W0330 09:38:50.091784 7361 infer.h:489] Succ load clone model, path[uci_housing_model] +W0330 09:38:50.091794 7361 infer.h:656] Succ reload version engine: 18446744073709551615 +I0330 09:38:50.091820 7361 manager.h:131] Finish reload 1 workflow(s) +I0330 09:39:00.091987 7361 server.cpp:150] Begin reload framework... +W0330 09:39:00.092161 7361 infer.h:656] Succ reload version engine: 18446744073709551615 +I0330 09:39:00.092177 7361 manager.h:131] Finish reload 1 workflow(s) +``` diff --git a/doc/Offical_Docs/6-5_Cpp_ABTest_CN.md b/doc/Offical_Docs/6-5_Cpp_ABTest_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..ff5c7145f8a3680e66e1bf7b82292c8a78059be4 --- /dev/null +++ b/doc/Offical_Docs/6-5_Cpp_ABTest_CN.md @@ -0,0 +1,198 @@ +# C++ Serving ABTest + +- [功能设计](#1) +- [使用案例](#2) + - [1.1 安装 Paddle Serving Wheels](#2.1) + - [1.2 下载多个模型并保存模型参数](#2.2) + - [1.3 启动 A,B,C 3个服务](#2.3) + - [1.4 客户端注册 A,B,C 服务端地址](#2.4) + - [1.5 启动客户端并验证结果](#2.5) + +ABTest 是一种功能测试方案,一般是为同一个产品目标制定多种方案,让一部分用户使用 A 方案,另一部分用户使用 B 或 C 方案,根据测试效果,如点击率、转化率等来评价方案的优劣。 + +模型服务化部署框架中,ABTest 属于一个重要的基础功能,为模型迭代升级提供实验环境。Paddle Serving 的 PYTHON SDK 中实现 ABTest 功能,为用户提供简单易用功能测试环境。 + + + +## 功能设计 + +Paddle Serving 的 ABTest 功能是基于 PYTHON SDK 和 多个服务端构成。每个服务端加载不同模型,在客户端上注册多个服务端地址和访问比例,最终确定访问。 + +
+ +
+ +## 使用案例 + +以 [imdb](https://github.com/PaddlePaddle/Serving/tree/develop/examples/C%2B%2B/imdb) 示例为例,介绍 ABTest 的使用,部署有5个步骤: + +1. 安装 Paddle Serving Wheels +2. 下载多个模型并保存模型参数 +3. 启动 A,B,C 3个服务 +4. 客户端注册 A,B,C 服务端地址 +5. 启动客户端并验证结果 + + + +**一.安装 Paddle Serving Wheels** + +使用 ABTest 功能的前提是使用 PYTHON SDK,因此需要安装 `paddle_serving_client` 的 wheel 包。[安装方法](./2-1_Docker_Images_CN.md) 如下: + +``` +pip3 install paddle-serving-client==0.8.3 -i https://pypi.tuna.tsinghua.edu.cn/simple +``` + + + +**二.下载多个模型并保存模型参数** + +本示例已提供了一键下载脚本 `sh get_data.sh`,下载自训练的模型 `bow`、`cnn`和`lstm` 3种不同方式训练的模型。 + +``` +sh get_data.sh +``` + +3种模型的所有文件如下所示,已为用户提前保存模型参数,无需执行保存操作。 +``` +├── imdb_bow_client_conf +│   ├── serving_client_conf.prototxt +│   └── serving_client_conf.stream.prototxt +├── imdb_bow_model +│   ├── embedding_0.w_0 +│   ├── fc_0.b_0 +│   ├── fc_0.w_0 +│   ├── fc_1.b_0 +│   ├── fc_1.w_0 +│   ├── fc_2.b_0 +│   ├── fc_2.w_0 +│   ├── fluid_time_file +│   ├── __model__ +│   ├── serving_server_conf.prototxt +│   └── serving_server_conf.stream.prototxt +├── imdb_cnn_client_conf +│   ├── serving_client_conf.prototxt +│   └── serving_client_conf.stream.prototxt +├── imdb_cnn_model +│   ├── embedding_0.w_0 +│   ├── fc_0.b_0 +│   ├── fc_0.w_0 +│   ├── fc_1.b_0 +│   ├── fc_1.w_0 +│   ├── fluid_time_file +│   ├── __model__ +│   ├── sequence_conv_0.b_0 +│   ├── sequence_conv_0.w_0 +│   ├── serving_server_conf.prototxt +│   └── serving_server_conf.stream.prototxt +├── imdb_lstm_client_conf +│   ├── serving_client_conf.prototxt +│   └── serving_client_conf.stream.prototxt +├── imdb_lstm_model +│   ├── embedding_0.w_0 +│   ├── fc_0.b_0 +│   ├── fc_0.w_0 +│   ├── fc_1.b_0 +│   ├── fc_1.w_0 +│   ├── fc_2.b_0 +│   ├── fc_2.w_0 +│   ├── lstm_0.b_0 +│   ├── lstm_0.w_0 +│   ├── __model__ +│   ├── serving_server_conf.prototxt +│   └── serving_server_conf.stream.prototxt +``` + +虽然3个模型的网络结构不同,但是 `feed var` 和 `fetch_var` 都是相同的便于做 ABTest。 +``` +feed_var { + name: "words" + alias_name: "words" + is_lod_tensor: true + feed_type: 0 + shape: -1 +} +fetch_var { + name: "fc_2.tmp_2" + alias_name: "prediction" + is_lod_tensor: false + fetch_type: 1 + shape: 2 +} +``` + + + +**三.启动 A,B,C 3个服务** + +后台启动 `bow`、`cnn` 和 `lstm` 模型服务: + +```python +## 启动 bow 模型服务 +python3 -m paddle_serving_server.serve --model imdb_bow_model/ --port 9297 >/dev/null 2>&1 & + +## 启动 cnn 模型服务 +python3 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9298 >/dev/null 2>&1 & + +## 启动 lstm 模型服务 +python3 -m paddle_serving_server.serve --model imdb_lstm_model/ --port 9299 >/dev/null 2>&1 & +``` + + + +**四.客户端注册 A,B,C 服务端地址** + +使用 `paddle_serving_client` 中 `Client::add_variant(self, tag, cluster, variant_weight)` 接口注册服务标签、服务地址和权重。框架会将所有权重求和后计算每个服务的比例。本示例中,bow 服务的权重是10,cnn 服务的权重是30, lstm的权重是60,每次请求分别请求到3个服务的比例是10%、30%和60%。 + +``` +from paddle_serving_client import Client +from paddle_serving_app.reader.imdb_reader import IMDBDataset +import sys +import numpy as np + +client = Client() +client.load_client_config(sys.argv[1]) +client.add_variant("bow", ["127.0.0.1:9297"], 10) +client.add_variant("cnn", ["127.0.0.1:9298"], 30) +client.add_variant("lstm", ["127.0.0.1:9299"], 60) +client.connect() +``` +如要在结果中打印请求到了哪个服务,在 `client.predict(feed, fetch, batch, need_variant_tag, logid)` 中设置 `need_variant_tag=True`。 + + + +**五.启动客户端并验证结果** + +运行命令: +``` +head test_data/part-0 | python3.7 abtest_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab +``` + +运行结果如下,10次请求中,bow 服务2次,cnn 服务3次,lstm 服务5次,与设置的比例基本相近。 +``` +I0506 04:02:46.720135 44567 naming_service_thread.cpp:202] brpc::policy::ListNamingService("127.0.0.1:9297"): added 1 +I0506 04:02:46.722630 44567 naming_service_thread.cpp:202] brpc::policy::ListNamingService("127.0.0.1:9298"): added 1 +I0506 04:02:46.723577 44567 naming_service_thread.cpp:202] brpc::policy::ListNamingService("127.0.0.1:9299"): added 1 +I0506 04:02:46.814075 44567 general_model.cpp:490] [client]logid=0,client_cost=9.889ms,server_cost=6.283ms. +server_tag=lstm prediction=[0.500398 0.49960205] +I0506 04:02:46.826339 44567 general_model.cpp:490] [client]logid=0,client_cost=10.261ms,server_cost=9.503ms. +server_tag=lstm prediction=[0.5007235 0.49927652] +I0506 04:02:46.828992 44567 general_model.cpp:490] [client]logid=0,client_cost=1.667ms,server_cost=0.741ms. +server_tag=bow prediction=[0.25859657 0.74140346] +I0506 04:02:46.843299 44567 general_model.cpp:490] [client]logid=0,client_cost=13.402ms,server_cost=12.827ms. +server_tag=lstm prediction=[0.50039905 0.4996009 ] +I0506 04:02:46.850219 44567 general_model.cpp:490] [client]logid=0,client_cost=5.129ms,server_cost=4.332ms. +server_tag=cnn prediction=[0.6369219 0.36307803] +I0506 04:02:46.854203 44567 general_model.cpp:490] [client]logid=0,client_cost=2.804ms,server_cost=0.782ms. +server_tag=bow prediction=[0.15088597 0.849114 ] +I0506 04:02:46.858268 44567 general_model.cpp:490] [client]logid=0,client_cost=3.292ms,server_cost=2.677ms. +server_tag=cnn prediction=[0.4608788 0.5391212] +I0506 04:02:46.869217 44567 general_model.cpp:490] [client]logid=0,client_cost=10.13ms,server_cost=9.556ms. +server_tag=lstm prediction=[0.5000269 0.49997318] +I0506 04:02:46.883790 44567 general_model.cpp:490] [client]logid=0,client_cost=13.312ms,server_cost=12.822ms. +server_tag=lstm prediction=[0.50083774 0.49916226] +I0506 04:02:46.887256 44567 general_model.cpp:490] [client]logid=0,client_cost=2.432ms,server_cost=1.812ms. +server_tag=cnn prediction=[0.47895813 0.52104187] + +``` diff --git a/doc/Offical_Docs/7-0_Python_Pipeline_Int_CN.md b/doc/Offical_Docs/7-0_Python_Pipeline_Int_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..71df7d608db65bde046e959b12f81b7cf379926f --- /dev/null +++ b/doc/Offical_Docs/7-0_Python_Pipeline_Int_CN.md @@ -0,0 +1,13 @@ +# Python Pipeline 框架 + +在许多深度学习框架中,模型服务化部署通常用于单模型的一键部署。但在 AI 工业大生产的背景下,端到端的单一深度学习模型不能解决复杂问题,多个深度学习模型组合使用是解决现实复杂问题的常规手段,如文字识别 OCR 服务至少需要检测和识别2种模型;视频理解服务一般需要视频抽帧、切词、音频处理、分类等多种模型组合实现。当前,通用多模型组合服务的设计和实现是非常复杂的,既要能实现复杂的模型拓扑关系,又要保证服务的高并发、高可用和易于开发和维护等。 + +Paddle Serving 实现了一套通用的多模型组合服务编程框架 Python Pipeline,不仅解决上述痛点,同时还能大幅提高 GPU 利用率,并易于开发和维护。 + +Python Pipeline 使用案例请阅读[Python Pipeline 快速部署案例](./3-2_QuickStart_Pipeline_OCR_CN.md) + +通过阅读以下内容掌握 Python Pipeline 核心功能和使用方法、高阶功能用法和性能优化指南等。 +- [Python Pipeline 框架设计](7-1_Python_Pipeline_Design_CN.md) +- [Python Pipeline 核心功能](7-2_Python_Pipeline_Senior_CN.md) +- [Python Pipeline 优化指南](7-3_Python_Pipeline_Optimize_CN.md) +- [Python Pipeline 性能指标](7-4_Python_Pipeline_Benchmark_CN.md) diff --git a/doc/Offical_Docs/7-1_Python_Pipeline_Design_CN.md b/doc/Offical_Docs/7-1_Python_Pipeline_Design_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..24cbbf0d03d7df0bca7841782d1bfaefce165003 --- /dev/null +++ b/doc/Offical_Docs/7-1_Python_Pipeline_Design_CN.md @@ -0,0 +1,514 @@ +# Python Pipeline 框架设计 + +- [目标](#1) +- [框架设计](#2) + - [2.1 网络层设计](#2.1) + - [2.2 图执行引擎层](#2.2) + - [2.3 服务日志](#2.3) + - [2.4 错误信息](#2.4) +- [自定义信息](#3) + - [3.1 自定义 Web 服务 URL](#3.1) + - [3.2 自定义服务输入和输出结构](#3.2) + - [3.3 自定义服务并发和模型配置](#3.3) + - [3.4 自定义推理过程](#3.4) + - [3.5 自定义业务错误类型](#3.5) + + + + +## 目标 +为了解决多个深度学习模型组合的复杂问题,Paddle Serving 团队设计了一个通用端到端多模型组合框架,其核心特点包括: + +1. 通用性:框架既要满足通用模型的输入类型,又要满足模型组合的复杂拓扑关系。 +2. 高性能:与常见互联网后端服务不同,深度学习模型的推理程序属于计算密集型程序,同时 GPU 等计算芯片价格昂贵,因此在平均响应时间不苛刻的场景下,计算资源占用和吞吐量指标格外重要。 +3. 高可用性:高可用的架构依赖每个服务的健壮性,服务状态可查询、异常可监控和管理是必备条件。 +4. 易于开发与调试:使用 Python 语言开发可大幅提升研发效率,运行的错误信息准确帮助开发者快速定位问题。 + + + +## 框架设计 +Python Pipeline 框架分为网络服务层和图执行引擎2部分,网络服务层处理多种网络协议请求和通用输入参数问题,图执行引擎层解决复杂拓扑关系。如下图所示 + +
+ +
+ + + +**一.网络服务层** + +网络服务层包括了 gRPC-gateway 和 gRPC Server。gPRC gateway 接收 HTTP 请求,打包成 proto 格式后转发给 gRPC Server,一套处理程序可同时处理 HTTP、gRPC 2种类型请求。 + +另外,在支持多种模型的输入输出数据类型上,使用统一的 service.proto 结构,具有更好的通用性。 + +```proto +message Request { + repeated string key = 1; + repeated string value = 2; + optional string name = 3; + optional string method = 4; + optional int64 logid = 5; + optional string clientip = 6; +}; + +message Response { + optional int32 err_no = 1; + optional string err_msg = 2; + repeated string key = 3; + repeated string value = 4; +}; +``` +Request 是输入结构,`key` 与 `value` 是配对的 string 数组。 `name` 与 `method` 对应 URL://{ip}:{port}/{name}/{method}。`logid` 和 `clientip` 便于用户串联服务级请求和自定义策略。 + +Response 是输出结构,`err_no` 和 `err_msg` 表达处理结果的正确性和错误信息,`key` 和 `value` 为结果。 + +Pipeline 服务包装了继承于 WebService 类,以 [OCR 示例](https://github.com/PaddlePaddle/Serving/tree/develop/examples/Pipeline/PaddleOCR/ocr)为例,派生出 OcrService 类,get_pipeline_response 函数内实现 DAG 拓扑关系,默认服务入口为 read_op,函数返回的 Op 为最后一个处理,此处要求最后返回的 Op 必须唯一。 + +所有服务和模型的所有配置信息在 `config.yml` 中记录,URL 的 name 字段由 OcrService 初始化定义;run_service 函数启动服务。 + +```python +class OcrService(WebService): + def get_pipeline_response(self, read_op): + det_op = DetOp(name="det", input_ops=[read_op]) + rec_op = RecOp(name="rec", input_ops=[det_op]) + return rec_op + +ocr_service = OcrService(name="ocr") +ocr_service.prepare_pipeline_config("config.yml") +ocr_service.run_service() +``` + +与网络框架相关的配置在 `config.yml` 中设置。其中 `worker_num` 表示框架主线程 gRPC 线程池工作线程数,可理解成网络同步线程并发数。 + +其次,`rpc_port` 和 `http_port` 是服务端口,可同时开启,不允许同时为空。 +``` +worker_num: 10 + +# http 和 gRPC 服务端口 +rpc_port: 9988 +http_port: 18089 +``` + + + +**二.图执行引擎层** + +图执行引擎的设计思路是基于有向无环图实现多模型组合的复杂拓扑关系,有向无环图由单节点或多节点串联、并联结构构成。 + +
+ +
+ +图执行引擎抽象归纳出2种数据结构 Op 节点和 Channel 有向边,构建一条异步流水线工作流。核心概念和设计思路如下: +- Op 节点: 可理解成1个推理模型、一个处理方法,甚至是训练前向代码,可独立运行,独立设置并发度。每个 Op 节点的计算结果放入其绑定的 Channel 中。 +- Channel 数据管道: 可理解为一个单向缓冲队列。每个 Channel 只接收上游 Op 节点的计算输出,作为下游 Op 节点的输入。 +- 工作流:根据用户定义的节点依赖关系,图执行引擎自动生成有向无环图。每条用户请求到达图执行引擎时会生成一个唯一自增 ID,通过这种唯一性绑定关系标记流水线中的不同请求。 + +Op 的设计原则: +- 单个 Op 默认的功能是根据输入的 Channel 数据,访问一个 Paddle Serving 的单模型服务,并将结果存在输出的 Channel +- 单个 Op 可以支持用户自定义,包括 preprocess,process,postprocess 三个函数都可以由用户继承和实现 +- 单个 Op 可以控制并发数,从而增加处理并发数 +- 单个 Op 可以获取多个不同 RPC 请求的数据,以实现 Auto-Batching +- Op 可以由线程或进程启动 + +其构造函数如下: + +```python +def __init__(name=None, + input_ops=[], + server_endpoints=[], + fetch_list=[], + client_config=None, + client_type=None, + concurrency=1, + timeout=-1, + retry=1, + batch_size=1, + auto_batching_timeout=None, + local_service_handler=None) +``` + +各参数含义如下: + +| 参数名 | 类型 | 含义 | +| :-------------------: | :---------: |:------------------------------------------------: | +| name | (str) | 用于标识 Op 类型的字符串,该字段必须全局唯一。 | +| input_ops | (list) | 当前 Op 的所有前继 Op 的列表。 | +| server_endpoints | (list) |远程 Paddle Serving Service 的 endpoints 列表。如果不设置该参数,认为是local_precditor模式,从local_service_conf中读取配置。 | +| fetch_list | (list) |远程 Paddle Serving Service 的 fetch 列表。 | +| client_config | (str) |Paddle Serving Service 对应的 Client 端配置文件路径。 | +| client_type | (str) |可选择brpc、grpc或local_predictor。local_predictor不启动Serving服务,进程内预测。 | +| concurrency | (int) | Op 的并发数。 | +| timeout | (int) |process 操作的超时时间,单位为毫秒。若该值小于零,则视作不超时。 | +| retry | (int) |超时重试次数。当该值为 1 时,不进行重试。 | +| batch_size | (int) |进行 Auto-Batching 的期望 batch_size 大小,由于构建 batch 可能超时,实际 batch_size 可能小于设定值,默认为 1。 | +| auto_batching_timeout | (float) |进行 Auto-Batching 构建 batch 的超时时间,单位为毫秒。batch_size > 1时,要设置auto_batching_timeout,否则请求数量不足batch_size时会阻塞等待。 | +| local_service_handler | (object) |local predictor handler,Op init() 入参赋值或在 Op init() 中创建| + +对于 Op 之间需要传输过大数据的情况,可以考虑 RAM DB 外存进行全局存储,通过在 Channel 中传递索引的 Key 来进行数据传输 + + +Channel的设计原则: +- Channel 是 Op 之间共享数据的数据结构,负责共享数据或者共享数据状态信息 +- Channel 可以支持多个OP的输出存储在同一个 Channel,同一个 Channel 中的数据可以被多个 Op 使用 + +下图为图执行引擎中 Channel 的设计,采用 input buffer 和 output buffer 进行多 Op 输入或多 Op 输出的数据对齐,中间采用一个 Queue 进行缓冲 + +
+ +
+ + + +**三.服务日志** + +Pipeline 服务日志在当前目录的 `PipelineServingLogs` 目录下,有3种类型日志,分别是 `pipeline.log`、`pipeline.log.wf`、`pipeline.tracer`。 +- `pipeline.log` : 记录 debug & info日志信息 +- `pipeline.log.wf` : 记录 warning & error日志 +- `pipeline.tracer` : 统计各个阶段耗时、channel 堆积信息 + +``` +├── config.yml +├── get_data.sh +├── PipelineServingLogs +│   ├── pipeline.log +│   ├── pipeline.log.wf +│   └── pipeline.tracer +├── README_CN.md +├── README.md +├── uci_housing_client +│   ├── serving_client_conf.prototxt +│   └── serving_client_conf.stream.prototxt +├── uci_housing_model +│   ├── fc_0.b_0 +│   ├── fc_0.w_0 +│   ├── __model__ +│   ├── serving_server_conf.prototxt +│   └── serving_server_conf.stream.prototxt +├── web_service_java.py +└── web_service.py +``` + +在服务发生异常时,错误信息会记录在 pipeline.log.wf 日志中。打印 tracer 日志要求在 config.yml 的 DAG 属性中添加 tracer 配置。 + +1. 日志与请求的唯一标识 +Pipeline 中有2种 id 用以串联请求,分别是 data_id 和 log_id,二者区别如下: +- data_id : Pipeline 框架生成的自增 ID,标记请求唯一性标识 +- log_id : 上游模块传入的标识,跟踪多个服务间串联关系,由于用户可不传入或不保证唯一性,因此不能作为唯一性标识 + +通常,Pipeline 框架打印的日志会同时带上 data_id 和 log_id。开启 auto-batching 后,会使用批量中的第一个 data_id 标记 batch 整体,同时框架会在一条日志中打印批量中所有 data_id。 + +2. 日志滚动 +Pipeline 的日志模块在 `logger.py` 中定义,使用了 `logging.handlers.RotatingFileHandler` 支持磁盘日志文件的轮换。根据不同文件级别和日质量分别设置了 `maxBytes` 和 `backupCount`,当即将超出预定大小时,将关闭旧文件并打开一个新文件用于输出。 + +```python +"handlers": { + "f_pipeline.log": { + "class": "logging.handlers.RotatingFileHandler", + "level": "INFO", + "formatter": "normal_fmt", + "filename": os.path.join(log_dir, "pipeline.log"), + "maxBytes": 512000000, + "backupCount": 20, + }, + "f_pipeline.log.wf": { + "class": "logging.handlers.RotatingFileHandler", + "level": "WARNING", + "formatter": "normal_fmt", + "filename": os.path.join(log_dir, "pipeline.log.wf"), + "maxBytes": 512000000, + "backupCount": 10, + }, + "f_tracer.log": { + "class": "logging.handlers.RotatingFileHandler", + "level": "INFO", + "formatter": "tracer_fmt", + "filename": os.path.join(log_dir, "pipeline.tracer"), + "maxBytes": 512000000, + "backupCount": 5, + }, +} + +``` + + + +**四. 错误信息** + +框架提供的错误信息如下所示, 完整信息在 `error_catch.py` 中 `CustomExceptionCode` 类中定义。 + +| 错误码 | 说明 | +| :---: | :-------------: | +| 0 | 成功 | +| 50 ~ 999 | 产品错误 | +| 3000 ~ 3999 | 框架内部服务错误 | +| 4000 ~ 4999 | 配置错误 | +| 5000 ~ 5999 | 用户输入错误 | +| 6000 ~ 6999 | 超时错误 | +| 7000 ~ 7999 | 类型检查错误 | +| 8000 ~ 8999 | 内部通讯错误 | +| 9000 ~ 9999 | 推理错误 | +| 10000 ~ | 其他错误 | + +具体错误信息如下: + +``` +class CustomExceptionCode(enum.Enum): + OK = 0 + PRODUCT_ERROR = 50 + + NOT_IMPLEMENTED = 3000 + CLOSED_ERROR = 3001 + NO_SERVICE = 3002 + INIT_ERROR = 3003 + CONF_ERROR = 4000 + INPUT_PARAMS_ERROR = 5000 + TIMEOUT = 6000 + TYPE_ERROR = 7000 + RPC_PACKAGE_ERROR = 8000 + CLIENT_ERROR = 9000 + UNKNOW = 10000 +``` + + + +## 自定义信息 + +提供给开发者提供以下自定义信息,包括自定义 Web 服务、自定义服务输入和输出结构、自定义服务并发和模型配置和自定义推理过程 +- 自定义 Web 服务 URL +- 自定义服务输入和输出结构 +- 自定义服务并发和模型配置 +- 自定义推理过程 +- 自定义业务错误类型 + + + +**一.自定义 Web 服务 URL** + +在 Web 服务中自定义服务名称是常见操作,尤其是将已有服务迁移到新框架。URL 中核心字段包括 `ip`、`port`、`name` 和 `method`,根据最新部署的环境信息设置前2个字段,重点介绍如何设置 `name` 和 `method`,框架提供默认的 `methon` 是 `prediciton`,如 `http://127.0.0.1:9999/ocr/prediction` 。 + +框架有2处代码与此相关,分别是 gRPC Gateway 的配置文件 `python/pipeline/gateway/proto/gateway.proto` 和 服务启动文件 `web_server.py`。 + +业务场景中通过设置 `name` 和 验证 `method` 解决问题。以 [OCR 示例]()为例,服务启动文件 `web_server.py` 通过类 `OcrService` 构造函数的 `name` 字段设置 URL 中 `name` 字段; +``` +ocr_service = OcrService(name="ocr") +ocr_service.prepare_pipeline_config("config.yml") +ocr_service.run_service() +``` + +框架提供默认的 `methon` 是 `prediciton`,通过重载 `RequestOp::unpack_request_package` 来验证 `method`。 +``` +def unpack_request_package(self, request): + dict_data = {} + log_id = None + if request is None: + _LOGGER.critical("request is None") + raise ValueError("request is None") + if request.method is not "prediction": + _LOGGER.critical("request method error") + raise ValueError("request method error") + ... +``` + +在 `python/pipeline/gateway/proto/gateway.proto` 文件可以对 `name` 和 `method` 做严格限制,一般不需要修改,如需要特殊指定修改后,需要重新编译 Paddle Serving,[编译方法]() + +```proto +service PipelineService { + rpc inference(Request) returns (Response) { + option (google.api.http) = { + post : "/{name=*}/{method=*}" + body : "*" + }; + } +}; +``` + + +**二.自定义服务输入和输出结构** + +输入和输出结构包括 proto 中 Request 和 Response 结构,以及 Op 前后处理返回。 + +当默认 proto 结构不满足业务需求时,同时下面2个文件的 proto 的 Request 和 Response message 结构,保持一致。 +- pipeline/gateway/proto/gateway.proto +- pipeline/proto/pipeline_service.proto + +修改后,需要[重新编译]() + + + +**三.自定义服务并发和模型配置** + +完整的配置信息可参考[配置信息]() + + + +**四.自定义推理过程** + +推理 Op 为开发者提供3个外部函数接口: + +| 变量或接口 | 说明 | +| :----------------------------------------------: | :----------------------------------------------------------: | +| def preprocess(self, input_dicts) | 对从 Channel 中获取的数据进行处理,处理完的数据将作为 **process** 函数的输入。(该函数对一个 **sample** 进行处理) | +| def process(self, feed_dict_list, typical_logid) | 基于 Paddle Serving Client 进行 RPC 预测,处理完的数据将作为 **postprocess** 函数的输入。(该函数对一个 **batch** 进行处理) | +| def postprocess(self, input_dicts, fetch_dict) | 处理预测结果,处理完的数据将被放入后继 Channel 中,以被后继 Op 获取。(该函数对一个 **sample** 进行处理) | +| def init_op(self) | 用于加载资源(如字典等)。 | +| self.concurrency_idx | 当前进程(非线程)的并发数索引(不同种类的 Op 单独计算)。 | + +Op 在一个运行周期中会依次执行 preprocess,process,postprocess 三个操作(当不设置 `server_endpoints` 参数时,不执行 process 操作),用户可以对这三个函数进行重写,默认实现如下: + +```python +def preprocess(self, input_dicts): + # multiple previous Op + if len(input_dicts) != 1: + raise NotImplementedError( + 'this Op has multiple previous inputs. Please override this func.' + ) + (_, input_dict), = input_dicts.items() + return input_dict + +def process(self, feed_dict_list, typical_logid): + err, err_info = ChannelData.check_batch_npdata(feed_dict_list) + if err != 0: + raise NotImplementedError( + "{} Please override preprocess func.".format(err_info)) + call_result = self.client.predict( + feed=feed_dict_list, fetch=self._fetch_names, log_id=typical_logid) + if isinstance(self.client, MultiLangClient): + if call_result is None or call_result["serving_status_code"] != 0: + return None + call_result.pop("serving_status_code") + return call_result + +def postprocess(self, input_dicts, fetch_dict): + return fetch_dict +``` + +**preprocess** 的参数是前继 Channel 中的数据 `input_dicts`,该变量(作为一个 **sample**)是一个以前继 Op 的 name 为 Key,对应 Op 的输出为 Value 的字典。 + +**process** 的参数是 Paddle Serving Client 预测接口的输入变量 `fetch_dict_list`(preprocess 函数的返回值的列表),该变量(作为一个 **batch**)是一个列表,列表中的元素为以 feed_name 为 Key,对应 ndarray 格式的数据为 Value 的字典。`typical_logid` 作为向 PaddleServingService 穿透的 logid。 + +**postprocess** 的参数是 `input_dicts` 和 `fetch_dict`,`input_dicts` 与 preprocess 的参数一致,`fetch_dict` (作为一个 **sample**)是 process 函数的返回 batch 中的一个 sample(如果没有执行 process ,则该值为 preprocess 的返回值)。 + +用户还可以对 **init_op** 函数进行重写,已加载自定义的一些资源(比如字典等),默认实现如下: + +```python +def init_op(self): + pass +``` + +RequestOp 和 ResponseOp 是 Python Pipeline 的中2个特殊 Op,分别是用分解 RPC 数据加入到图执行引擎中,和拿到图执行引擎的预测结果并打包 RPC 数据到客户端。 +RequestOp 类的设计如下所示,核心是在 unpack_request_package 函数中解析请求数据,因此,当修改 Request 结构后重写此函数实现全新的解包处理。 + +| 接口 | 说明 | +| :---------------------------------------: | :----------------------------------------: | +| init_op(self) | OP初始化,设置默认名称@DAGExecutor | +| unpack_request_package(self, request) | 解析请求数据 | + +```python +class RequestOp(Op): + def __init__(self): + # PipelineService.name = "@DAGExecutor" + super(RequestOp, self).__init__(name="@DAGExecutor", input_ops=[]) + # init op + try: + self.init_op() + except Exception as e: + _LOGGER.critical("Op(Request) Failed to init: {}".format(e)) + os._exit(-1) + + def unpack_request_package(self, request): + dict_data = {} + log_id = None + if request is None: + _LOGGER.critical("request is None") + raise ValueError("request is None") + + for idx, key in enumerate(request.key): + dict_data[key] = request.value[idx] + log_id = request.logid + _LOGGER.info("RequestOp unpack one request. log_id:{}, clientip:{} \ + name:{}, method:{}".format(log_id, request.clientip, request.name, + request.method)) + + return dict_data, log_id, None, "" +``` + +ResponseOp 类的设计如下所示,核心是在 pack_response_package 中打包返回结构,因此修改 Response 结构后重写此函数实现全新的打包格式。 + +| 接口 | 说明 | +| :------------------------------------------: | :-----------------------------------------: | +| init_op(self) | Op 初始化,设置默认名称 @DAGExecutor | +| pack_response_package(self, channeldata) | 处理接收的 RPC 数据 | + +```python +class ResponseOp(Op): + def __init__(self, input_ops): + super(ResponseOp, self).__init__( + name="@DAGExecutor", input_ops=input_ops) + # init op + try: + self.init_op() + except Exception as e: + _LOGGER.critical("Op(ResponseOp) Failed to init: {}".format( + e, exc_info=True)) + os._exit(-1) + + def pack_response_package(self, channeldata): + resp = pipeline_service_pb2.Response() + error_code = channeldata.error_code + error_info = "" + ... + + # pack results + if error_code is None: + error_code = 0 + resp.err_no = error_code + resp.err_msg = error_info + + return resp +``` + + +**五.自定义业务错误类型** + +用户可根据业务场景自定义错误码,继承 ProductErrCode,在 Op 的 preprocess 或 postprocess 中返回列表中返回,下一阶段处理会根据自定义错误码跳过后置OP处理。 +```python +class ProductErrCode(enum.Enum): + """ + ProductErrCode is a base class for recording business error code. + product developers inherit this class and extend more error codes. + """ + pass +``` + +其使用方法如下所示,定义了一种错误类型 `Product_Error` ,在 `preprocess` 函数返回值中设置错误信息,在 `postprocess` 函数中也可以设置。 +```python + +class ProductErrCode(enum.Enum): + """ + ProductErrCode is a base class for recording business error code. + product developers inherit this class and extend more error codes. + """ + Product_Error = 100001, + +def preprocess(self, input_dicts, data_id, log_id): + """ + In preprocess stage, assembling data for process stage. users can + override this function for model feed features. + Args: + input_dicts: input data to be preprocessed + data_id: inner unique id + log_id: global unique id for RTT + Return: + input_dict: data for process stage + is_skip_process: skip process stage or not, False default + prod_errcode: None default, otherwise, product errores occured. + It is handled in the same way as exception. + prod_errinfo: "" default + """ + (_, input_dict), = input_dicts.items() + if input_dict.get_key("product_error"): + return input_dict, False, Product_Error, "Product Error Occured" + return input_dict, False, None, "" + +``` diff --git a/doc/Offical_Docs/7-2_Python_Pipeline_Senior_CN.md b/doc/Offical_Docs/7-2_Python_Pipeline_Senior_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..c372d0800407bd14455bbcab1cbabe928fd5f659 --- /dev/null +++ b/doc/Offical_Docs/7-2_Python_Pipeline_Senior_CN.md @@ -0,0 +1,628 @@ +# Python Pipeline 核心功能 + +从设计上,Python Pipeline 框架实现轻量级的服务化部署,提供了丰富的核心功能,既能满足服务基本使用,又能满足特性需求。 + +- [安装与环境检查](#1) +- [服务启动与关闭](#2) +- [本地与远程推理](#3) +- [批量推理](#4) + - [4.1 客户端打包批量数据](#4.1) + - [4.2 服务端合并多个请求动态合并批量](#4.2) + - [4.3 Mini-Batch](#4.3) +- [单机多卡推理](#5) +- [多种计算芯片上推理](#6) +- [TensorRT 推理加速](#7) +- [MKLDNN 推理加速](#8) +- [低精度推理](#9) + - [9.1 CPU 低精度推理](#9.1) + - [9.2 GPU 和 TensorRT 低精度推理](#9.2) + - [9.3 性能测试](#9.3) +- [复杂图结构 DAG 跳过某个 Op 运行](#10) + + + +## 安装与环境检查 + +在运行 Python Pipeline 服务前,确保当前环境下可部署且通过[安装指南](./2-0_Index_CN.md)已完成安装。其次,`v0.8.0`及以上版本提供了环境检查功能,检验环境是否安装正确。 + +输入以下命令,进入环境检查程序。 +```python +python3 -m paddle_serving_server.serve check +``` + +在环境检验程序中输入多条指令来检查,例如 `check_pipeline`,`check_all`等,完整指令列表如下。 + +| 指令 | 描述| +|---------|----| +|check_all | 检查 Paddle Inference、Pipeline Serving、C++ Serving。只打印检测结果,不记录日志| +|check_pipeline | 检查 Pipeline Serving,只打印检测结果,不记录日志| +|check_cpp | 检查 C++ Serving,只打印检测结果,不记录日志| +|check_inference | 检查 Paddle Inference 是否安装正确,只打印检测结果,不记录日志| +|debug | 发生报错后,该命令将打印提示日志到屏幕,并记录详细日志文件| +|exit | 退出| + + +程序会分别运行 cpu 和 gpu 示例。运行成功则打印 `Pipeline cpu environment running success +` 和 `Pipeline gpu environment running success`。 + +``` +/usr/local/lib/python3.7/runpy.py:125: RuntimeWarning: 'paddle_serving_server.serve' found in sys.modules after import of package 'paddle_serving_server', but prior to execution of 'paddle_serving_server.serve'; this may result in unpredictable behaviour + warn(RuntimeWarning(msg)) +Welcome to the check env shell.Type help to list commands. + +(Cmd) check_pipeline +Pipeline cpu environment running success +Pipeline gpu environment running success +``` + +运行失败时,错误信息会记录到当前目录下 `stderr.log` 文件 和 `Pipeline_test_cpu/PipelineServingLogs` 目录下。用户可根据错误信息调试。 + +``` +(Cmd) check_all +PaddlePaddle inference environment running success +C++ cpu environment running success +C++ gpu environment running failure, if you need this environment, please refer to https://github.com/PaddlePaddle/Serving/blob/develop/doc/Install_CN.md +Traceback (most recent call last): + File "/usr/local/lib/python3.7/runpy.py", line 193, in _run_module_as_main + "__main__", mod_spec) + File "/usr/local/lib/python3.7/runpy.py", line 85, in _run_code + exec(code, run_globals) + File "/usr/local/lib/python3.7/site-packages/paddle_serving_server/serve.py", line 541, in + Check_Env_Shell().cmdloop() + File "/usr/local/lib/python3.7/cmd.py", line 138, in cmdloop + stop = self.onecmd(line) + File "/usr/local/lib/python3.7/cmd.py", line 217, in onecmd + return func(arg) + File "/usr/local/lib/python3.7/site-packages/paddle_serving_server/serve.py", line 501, in do_check_all + check_env("all") + File "/usr/local/lib/python3.7/site-packages/paddle_serving_server/env_check/run.py", line 94, in check_env + run_test_cases(pipeline_test_cases, "Pipeline", is_open_std) + File "/usr/local/lib/python3.7/site-packages/paddle_serving_server/env_check/run.py", line 66, in run_test_cases + mv_log_to_new_dir(new_dir_path) + File "/usr/local/lib/python3.7/site-packages/paddle_serving_server/env_check/run.py", line 48, in mv_log_to_new_dir + shutil.move(file_path, dir_path) + File "/usr/local/lib/python3.7/shutil.py", line 555, in move + raise Error("Destination path '%s' already exists" % real_dst) +shutil.Error: Destination path '/home/work/Pipeline_test_cpu/PipelineServingLogs' already exists + +``` + + + +## 服务启动与关闭 + +服务启动需要三类文件,PYTHON 程序、模型文件和配置文件。以[Python Pipeline 快速部署案例](./3-2_QuickStart_Pipeline_OCR_CN.md)为例, +``` +. +├── config.yml +├── imgs +│   └── ggg.png +├── ocr_det_client +│   ├── serving_client_conf.prototxt +│   └── serving_client_conf.stream.prototxt +├── ocr_det_model +│   ├── inference.pdiparams +│   ├── inference.pdmodel +│   ├── serving_server_conf.prototxt +│   └── serving_server_conf.stream.prototxt +├── ocr_det.tar.gz +├── ocr_rec_client +│   ├── serving_client_conf.prototxt +│   └── serving_client_conf.stream.prototxt +├── ocr_rec_model +│   ├── inference.pdiparams +│   ├── inference.pdmodel +│   ├── serving_server_conf.prototxt +│   └── serving_server_conf.stream.prototxt +├── pipeline_http_client.py +├── pipeline_rpc_client.py +├── ppocr_keys_v1.txt +└── web_service.py +``` + +启动服务端程序运行 `web_service.py`,启动客户端程序运行 `pipeline_http_client.py` 或 `pipeline_rpc_client.py`。服务端启动的日志信息在 `PipelineServingLogs` 目录下可用于调试。 +``` +├── PipelineServingLogs +│   ├── pipeline.log +│   ├── pipeline.log.wf +│   └── pipeline.tracer +``` + +关闭程序可使用2种方式, +- 前台关闭程序:`Ctrl+C` 关停服务 +- 后台关闭程序: +```python +python3 -m paddle_serving_server.serve stop # 触发 SIGINT 信号 +python3 -m paddle_serving_server.serve kill # 触发 SIGKILL 信号,强制关闭 +``` + + + +## 本地与远程推理 + +本地推理是指在服务所在机器环境下开启多进程推理,而远程推理是指本地服务请求远程 C++ Serving 推理服务。 + +本地推理的优势是实现简单,一般本地处理相比于远程推理耗时更低。而远程推理的优势是可实现 Python Pipeline 较难实现的功能,如部署加密模型,大模型推理。 + +Python Pipeline 的本地推理可参考如下配置,在 `uci` op 中 增加 `local_service_conf` 配置,并设置 `client_type: local_predictor`。 +``` +op: + uci: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 10 + + #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + + #uci模型路径 + model_config: uci_housing_model + + #计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 0 + + #计算硬件ID,优先由device_type决定硬件类型。devices为""或空缺时为CPU预测;当为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 + devices: "" # "0,1" + + #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测 + client_type: local_predictor + + #Fetch结果列表,以client_config中fetch_var的alias_name为准 + fetch_list: ["price"] +``` + +Python Pipeline 的远程推理可参考如下配置,设置 `client_type: brpc`,`server_endpoints`,`timeout` 和本地 `client_config`。 + +``` +op: + bow: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 1 + + #client连接类型,brpc + client_type: brpc + + #Serving交互重试次数,默认不重试 + retry: 1 + + #Serving交互超时时间, 单位ms + timeout: 3000 + + #Serving IPs + server_endpoints: ["127.0.0.1:9393"] + + #bow模型client端配置 + client_config: "imdb_bow_client_conf/serving_client_conf.prototxt" + + #Fetch结果列表,以client_config中fetch_var的alias_name为准 + fetch_list: ["prediction"] +``` + + + +## 批量推理 + +Pipeline 支持批量推理,通过增大 batch size 可以提高 GPU 利用率。Python Pipeline 支持3种 batch 形式以及适用的场景如下: +- 场景1:客户端打包批量数据(Client Batch) +- 场景2:服务端合并多个请求动态合并批量(Server auto-batching) +- 场景3:拆分一个大批量的推理请求为多个小批量推理请求(Server mini-batch) + + + +**一.客户端打包批量数据** + +当输入数据是 numpy 类型,如shape 为[4, 3, 512, 512]的 numpy 数据,即4张图片,可直接作为输入数据。 +当输入数据的 shape 不同时,需要按最大的shape的尺寸 Padding 对齐后发送给服务端 + + + +**二.服务端合并多个请求动态合并批量** + +有助于提升吞吐和计算资源的利用率,当多个请求的 shape 尺寸不相同时,不支持合并。当前有2种合并策略,分别是: + +- 等待时间与最大批量结合(推荐):结合`batch_size`和`auto_batching_timeout`配合使用,实际请求的批量条数超过`batch_size`时会立即执行,不超过时会等待`auto_batching_timeout`时间再执行 +``` +op: + bow: + # 并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 1 + + # client连接类型,brpc, grpc和local_predictor + client_type: brpc + + # Serving IPs + server_endpoints: ["127.0.0.1:9393"] + + # bow模型client端配置 + client_config: "imdb_bow_client_conf/serving_client_conf.prototxt" + + # 批量查询Serving的数量, 默认1。batch_size>1要设置auto_batching_timeout,否则不足batch_size时会阻塞 + batch_size: 2 + + # 批量查询超时,与batch_size配合使用 + auto_batching_timeout: 2000 +``` +- 阻塞式等待:仅设置`batch_size`,不设置`auto_batching_timeout`或`auto_batching_timeout=0`,会一直等待接受 `batch_size` 个请求后再推理。 + +``` +op: + bow: + # 并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 1 + + # client连接类型,brpc, grpc和local_predictor + client_type: brpc + + # Serving IPs + server_endpoints: ["127.0.0.1:9393"] + + # bow模型client端配置 + client_config: "imdb_bow_client_conf/serving_client_conf.prototxt" + + # 批量查询Serving的数量, 默认1。batch_size>1要设置auto_batching_timeout,否则不足batch_size时会阻塞 + batch_size: 2 + + # 批量查询超时,与batch_size配合使用 + auto_batching_timeout: 2000 + +``` + + + +**三.Mini-Batch** + +拆分一个批量数据推理请求成为多个小块推理:会降低批量数据 Padding 对齐的大小,从而提升速度。可参考 [OCR 示例](),核心思路是拆分数据成多个小批量,放入 list 对象 feed_list 并返回 + +``` +def preprocess(self, input_dicts, data_id, log_id): + (_, input_dict), = input_dicts.items() + raw_im = input_dict["image"] + data = np.frombuffer(raw_im, np.uint8) + im = cv2.imdecode(data, cv2.IMREAD_COLOR) + dt_boxes = input_dict["dt_boxes"] + dt_boxes = self.sorted_boxes(dt_boxes) + feed_list = [] + img_list = [] + max_wh_ratio = 0 + + ## Many mini-batchs, the type of feed_data is list. + max_batch_size = len(dt_boxes) + + # If max_batch_size is 0, skipping predict stage + if max_batch_size == 0: + return {}, True, None, "" + boxes_size = len(dt_boxes) + batch_size = boxes_size // max_batch_size + rem = boxes_size % max_batch_size + for bt_idx in range(0, batch_size + 1): + imgs = None + boxes_num_in_one_batch = 0 + if bt_idx == batch_size: + if rem == 0: + continue + else: + boxes_num_in_one_batch = rem + elif bt_idx < batch_size: + boxes_num_in_one_batch = max_batch_size + else: + _LOGGER.error("batch_size error, bt_idx={}, batch_size={}". + format(bt_idx, batch_size)) + break + + start = bt_idx * max_batch_size + end = start + boxes_num_in_one_batch + img_list = [] + for box_idx in range(start, end): + boximg = self.get_rotate_crop_image(im, dt_boxes[box_idx]) + img_list.append(boximg) + h, w = boximg.shape[0:2] + wh_ratio = w * 1.0 / h + max_wh_ratio = max(max_wh_ratio, wh_ratio) + _, w, h = self.ocr_reader.resize_norm_img(img_list[0], + max_wh_ratio).shape + + imgs = np.zeros((boxes_num_in_one_batch, 3, w, h)).astype('float32') + for id, img in enumerate(img_list): + norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) + imgs[id] = norm_img + feed = {"x": imgs.copy()} + feed_list.append(feed) + + return feed_list, False, None, "" +``` + + + +## 单机多卡推理 + +单机多卡推理与 `config.yml` 中配置4个参数关系紧密,`is_thread_op`、`concurrency`、`device_type` 和 `devices`,必须在进程模型和 GPU 模式,每张卡上可分配多个进程,即 M 个 Op 进程与 N 个 GPU 卡绑定。 +``` +dag: + #op资源类型, True, 为线程模型;False,为进程模型 + is_thread_op: False + +op: + det: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 6 + + #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + + client_type: local_predictor + + # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 0 + + # 计算硬件 ID,当 devices 为""或不写时为 CPU 预测;当 devices 为"0", "0,1,2"时为 GPU 预测,表示使用的 GPU 卡 + devices: "0,1,2" +``` +以上述案例为例,`concurrency:6`,即启动6个进程,`devices:0,1,2`,根据轮询分配机制,得到如下绑定关系: + +- 进程ID: 0 绑定 GPU 卡0 +- 进程ID: 1 绑定 GPU 卡1 +- 进程ID: 2 绑定 GPU 卡2 +- 进程ID: 3 绑定 GPU 卡0 +- 进程ID: 4 绑定 GPU 卡1 +- 进程ID: 5 绑定 GPU 卡2 +- 进程ID: 6 绑定 GPU 卡0 + +对于更灵活的进程与 GPU 卡绑定方式,会持续开发。 + + + +## 多种计算芯片上推理 + +除了支持 CPU、GPU 芯片推理之外,Python Pipeline 还支持在多种计算硬件上推理。根据 `config.yml` 中的 `device_type` 和 `devices`来设置推理硬件和加速库如下: +- CPU(Intel) : 0 +- GPU(GPU / Jetson / 海光 DCU) : 1 +- TensorRT : 2 +- CPU(Arm) : 3 +- XPU : 4 +- Ascend310 : 5 +- ascend910 : 6 + +当不设置`device_type`时,根据 `devices` 来设置,即当 `device_type` 为 "" 或空缺时为 CPU 推理;当有设定如"0,1,2"时,为 GPU 推理,并指定 GPU 卡。 + +以使用 XPU 的编号为0卡为例,配合 `ir_optim` 一同开启,`config.yml`详细配置如下: +``` +# 计算硬件类型 +device_type: 4 + +# 计算硬件ID,优先由device_type决定硬件类型 +devices: "0" + +# 开启ir优化 +ir_optim: True + +``` + + +## TensorRT 推理加速 + +TensorRT 是一个高性能的深度学习推理优化器,在 Nvdia 的 GPU 硬件平台运行的推理框架,为深度学习应用提供低延迟、高吞吐率的部署推理。 + +通过设置`device_type`、`devices`和`ir_optim` 字段即可实现 TensorRT 高性能推理。必须同时设置 `ir_optim: True` 才能开启 TensorRT。 + +``` +op: + imagenet: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 1 + + #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + + #uci模型路径 + model_config: serving_server/ + + #计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 2 + + #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 + devices: "1" # "0,1" + + #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测 + client_type: local_predictor + + #Fetch结果列表,以client_config中fetch_var的alias_name为准 + fetch_list: ["score"] + + #开启 ir_optim + ir_optim: True +``` + + +## MKL-DNN 推理加速 + +MKL-DNN 针对 Intel CPU 和 GPU 的数学核心库,对深度学习网络进行算子和指令集的性能优化,从而提升执行速度。Paddle 框架已集成了 MKL-DNN。 + +目前仅支持 Intel CPU 推理加速,通过设置`device_type` 和 `devices` 和 `use_mkldnn` 字段使用 MKL-DNN。 + +``` +op: + imagenet: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 1 + + #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + + #uci模型路径 + model_config: serving_server/ + + #计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 0 + + #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 + devices: "" + + #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测 + client_type: local_predictor + + #Fetch结果列表,以client_config中fetch_var的alias_name为准 + fetch_list: ["score"] + + #开启 MKLDNN + use_mkldnn: True +``` + + +## 低精度推理 + +Pipeline Serving支持低精度推理,CPU、GPU和TensoRT支持的精度类型如下图所示: + +低精度推理需要有量化模型,配合`config.yml`配置一起使用,以[低精度示例]() 为例 + + + +**一.CPU 低精度推理** + +通过设置,`device_type` 和 `devices` 字段使用 CPU 推理,通过调整`precision`、`thread_num`和`use_mkldnn`参数选择低精度和性能调优。 + +``` +op: + imagenet: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 1 + + #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + + #uci模型路径 + model_config: serving_server/ + + #计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 0 + + #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 + devices: "" + + #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测 + client_type: local_predictor + + #Fetch结果列表,以client_config中fetch_var的alias_name为准 + fetch_list: ["score"] + + #精度,CPU 支持: "fp32"(default), "bf16"(mkldnn); 不支持: "int8" + precision: "bf16" + + #CPU 算数计算线程数,默认4线程 + thread_num: 10 + + #开启 MKLDNN + use_mkldnn: True +``` + + + +**二.GPU 和 TensorRT 低精度推理** + +通过设置`device_type` 和 `devices` 字段使用原生 GPU 或 TensorRT 推理,通过调整`precision`、`ir_optim`和`use_calib`参数选择低精度和性能调优,如开启 TensorRT,必须一同开启`ir_optim`,`use_calib`仅配合 int8 使用。 +``` +op: + imagenet: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 1 + + #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + + #uci模型路径 + model_config: serving_server/ + + #计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 2 + + #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 + devices: "1" # "0,1" + + #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测 + client_type: local_predictor + + #Fetch结果列表,以client_config中fetch_var的alias_name为准 + fetch_list: ["score"] + + #精度,GPU 支持: "fp32"(default), "fp16", "int8" + precision: "int8" + + #开启 TensorRT int8 calibration + use_calib: True + + #开启 ir_optim + ir_optim: True +``` + + + +**三.性能测试** + +测试环境如下: +- GPU 型号: A100-40GB +- CPU 型号: Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz * 160 +- CUDA: CUDA Version: 11.2 +- CuDNN: 8.0 + +测试方法: +- 模型: Resnet50 量化模型 +- 部署方法: Python Pipeline 部署 +- 计时方法: 刨除第一次运行初始化,运行100次计算平均值 + +在此环境下测试不同精度推理结果,GPU 推理性能较好的配置是 +- GPU + int8 + ir_optim + TensorRT + use_calib : 15.1 ms +- GPU + fp16 + ir_optim + TensorRT : 17.2 ms + +CPU 推理性能较好的配置是 +- CPU + bf16 + MKLDNN : 18.2 ms +- CPU + fp32 + thread_num=10 : 18.4 ms + +完整性能指标如下: +
+ +
+ +## 复杂图结构 DAG 跳过某个 Op 运行 + +此应用场景一般在 Op 前后处理中有 if 条件判断时,不满足条件时,跳过后面处理。实际做法是在跳过此 Op 的 process 阶段,只要在 preprocess 做好判断,跳过 process 阶段,在和 postprocess 后直接返回即可。 +preprocess 返回结果列表的第二个结果是 `is_skip_process=True` 表示是否跳过当前 Op 的 process 阶段,直接进入 postprocess 处理。 + +```python +## Op::preprocess() 函数实现 +def preprocess(self, input_dicts, data_id, log_id): + """ + In preprocess stage, assembling data for process stage. users can + override this function for model feed features. + Args: + input_dicts: input data to be preprocessed + data_id: inner unique id + log_id: global unique id for RTT + Return: + input_dict: data for process stage + is_skip_process: skip process stage or not, False default + prod_errcode: None default, otherwise, product errores occured. + It is handled in the same way as exception. + prod_errinfo: "" default + """ + # multiple previous Op + if len(input_dicts) != 1: + _LOGGER.critical( + self._log( + "Failed to run preprocess: this Op has multiple previous " + "inputs. Please override this func.")) + os._exit(-1) + (_, input_dict), = input_dicts.items() + return input_dict, False, None, "" + +``` +以下示例 Jump::preprocess() 重载了原函数,返回了 True 字段 +```python +class JumpOp(Op): + ## Overload func JumpOp::preprocess + def preprocess(self, input_dicts, data_id, log_id): + (_, input_dict), = input_dicts.items() + if input_dict.has_key("jump"): + return input_dict, True, None, "" + else + return input_dict, False, None, "" +``` diff --git a/doc/Offical_Docs/7-3_Python_Pipeline_Optimize_CN.md b/doc/Offical_Docs/7-3_Python_Pipeline_Optimize_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..ef2380193413f564775649e9012a5d642cd044a4 --- /dev/null +++ b/doc/Offical_Docs/7-3_Python_Pipeline_Optimize_CN.md @@ -0,0 +1,214 @@ +# Python Pipeline 优化指南 + +- [优化响应时长](#1) + - [1.1 分析响应时长](#1.1) + - [Pipeline Trace Tool](#1.1.1) + - [Pipeline Profile Tool](#1.1.2) + - [1.2 优化思路](#1.2) +- [优化服务吞吐](#2) + - [2.1 分析吞吐瓶颈](#2.1) + - [2.2 优化思路](#2.2) + - [增加 Op 并发](#2.2.1) + - [动态批量](#2.2.2) + - [CPU 与 GPU 处理分离](#2.2.3) + + +通常,服务的性能优化是基于耗时分析,首先要掌握服务运行的各阶段耗时信息,从中找到耗时最长的性能瓶颈再做针对性优化。对于模型推理服务化不仅要关注耗时,由于 GPU 芯片昂贵,更要关注服务吞吐,从而提升 GPU 利用率实现降本增效。因此,模型推理服务化可总结为: +- 优化响应时长 +- 优化服务吞吐 + +经过分析和调优后,各个阶段实现整体服务的性能最优。 + + + +## 优化响应时长 + +首先,优化响应时长的主要思路首先要掌握各阶段耗时,并分析出性能瓶颈或者耗时占比较高的阶段,再针对性能瓶颈做专项优化。 + +Paddle Serving 提供2种耗时分析工具,`Pipeline Trace Tool` 和 `Pipeline Profile Tool`。2个工具的特点如下: +- Pipeline Trace Tool : 统计服务端所有进程各个阶段的平均耗时,包括每个 `Op` 和 `Channel`,用于定量分析。 +- Pipeline Profile Tool : 是可视化 Trace View 工具,生成多进程并发效果图,用定性和定量分析执行和并发效果。 + + + +**一.耗时分析** + + + +1.Pipeline Trace Tool + +`Pipeline Trace Tool` 统计每个 `Op` 和 `Channel` 中各阶段的处理耗时, + +开启方法在配置文件 `config.yml` 的 `dag` 区段内添加 `tracer` 字段,框架会每隔 `interval_s` 时间生成 Trace 信息。 +``` +dag: + #op资源类型, True, 为线程模型;False,为进程模型 + is_thread_op: True + + #tracer, 跟踪框架吞吐,每个OP和channel的工作情况。无tracer时不生成数据 + tracer: + #每次trace的时间间隔,单位秒/s + interval_s: 10 +``` + +生成的 Trace 信息保存在 `./PipelineServingLogs/pipeline.tracer` 日志中。如下图所示 +``` +==================== TRACER ====================== + Op(uci): + in[8473.507333333333 ms]: # 等待前置 Channel 中数据放入 Op 的耗时,如长时间无请求,此值会变大 + prep[0.6753333333333333 ms] # 推理前处理 preprocess 阶段耗时 + midp[26.476333333333333 ms] # 推理 process 阶段耗时 + postp[1.8616666666666666 ms] # 推理后处理 postprocess 阶段耗时 + out[1.3236666666666668 ms] # 后处理结果放入后置 channel 耗时 + idle[0.9965882097324374] # 框架自循环耗时,间隔 1 ms,如此值很大说明系统负载高,调度变慢 + DAGExecutor: + Query count[30] # interval_s 间隔时间内请求数量 + QPS[27.35 q/s] # interval_s 间隔时间内服务 QPS + Succ[1.0] # interval_s 间隔时间内请求成功率 + Error req[] # 异常请求信息 + Latency: + ave[36.55233333333334 ms] # 平均延时 + .50[8.702 ms] # 50分位延时 + .60[8.702 ms] # 60分位延时 + .70[92.346 ms] # 70分位延时 + .80[92.346 ms] # 70分位延时 + .90[92.346 ms] # 90分位延时 + .95[92.346 ms] # 95分位延时 + .99[92.346 ms] # 99分位延时 + Channel (server worker num[1]): + chl0(In: ['@DAGExecutor'], Out: ['uci']) size[0/0] # 框架 RequestOp 与 uci Op 之间 Channel 中堆积请求数。此值较大,说明下游 uci Op 消费能力不足。 + chl1(In: ['uci'], Out: ['@DAGExecutor']) size[0/0] # uci Op 与 框架 ResponseOp 之间 Channel 中堆积的请求数。此值较大,说明下游 ReponseOp 消费能力不足。 + ==================== TRACER ====================== +``` + + +2.Pipeline Profile Tool + +``` +dag: + #op资源类型, True, 为线程模型;False,为进程模型 + is_thread_op: True + + #使用性能分析, 默认为 False,imeline性能数据,对性能有一定影响 + use_profile: True, +``` + +开启后,Server 端在预测的过程中会将对应的日志信息打印到`标准输出`,为了更直观地展现各阶段的耗时,因此服务启动要使用如下命令: +``` +python3.7 web_service.py > profile.txt 2>&1 +``` + +服务接收请求后,输出 Profile 信息到 `profile.txt` 文件中。再粘贴如下代码到 `trace.py`, 使用框架提供 Analyst 模块对日志文件做进一步的分析处理。 +``` +from paddle_serving_server.pipeline import Analyst +import json +import sys + +if __name__ == "__main__": + log_filename = "profile.txt" + trace_filename = "trace" + analyst = Analyst(log_filename) + analyst.save_trace(trace_filename) +``` + +运行命令,脚本将日志中的时间打点信息转换成 json 格式保存到 `trace` 文件。 +``` +python3.7 trace.py +``` + +`trace` 文件可以通过 `chrome` 浏览器的 `tracing` 功能进行可视化。 +``` +打开 chrome 浏览器,在地址栏输入 chrome://tracing/ ,跳转至 tracing 页面,点击 load 按钮,打开保存的 trace 文件,即可将预测服务的各阶段时间信息可视化。 +``` + +通过图示中并发请求的处理流程可观测到推理阶段的流水线状态,以及多个请求在推理阶段的`间隔`信息,进行优化。 + + + +**二.降低响应时长优化思路** + +根据 `Pipeline Trace Tool` 输出结果在不同阶段耗时长的问题,常见场景的优化方法如下: +- Op 推理阶段(midp) 耗时长: + - 增加 Op 并发度 + - 开启 auto-batching (前提是多个请求的 shape 一致) + - 若批量数据中某条数据的 shape 很大,padding 很大导致推理很慢,可参考 OCR 示例中 mini-batch 方法。 + - 开启 TensorRT/MKL-DNN 优化 + - 开启低精度推理 +- Op 前处理阶段(prep) 或 后处理阶段耗时长: + - 增加 OP 并发度 + - 优化前后处理逻辑 +- in/out 耗时长(channel 堆积>5) + - 检查 channel 传递的数据大小,可能为传输的数据大导致延迟大。 + - 优化传入数据,不传递数据或压缩后再传入 + - 增加 Op 并发度 + - 减少上游 Op 并发度 + +根据 `Pipeline Profile Tool` 输出结果优化流水行并发的效果 +- 增加 Op 并发度,或调整不同 Op 的并发度 +- 开启 auto-batching + +此外,还有一些优化思路,如将 CPU 处理较慢的过程转换到 GPU 上处理等,客户端与服务端传输较大数据时,可使用共享内存方式传递内存或显存地址等。 + + + +## 优化服务吞吐 + + + +**一.分析吞吐瓶颈** + +服务的吞吐量受到多种多因素条件制约,如 Op 处理时长、传输数据耗时、并发数和 DAG 图结构等,可以将这些因素进一步拆解,当传输数据不是极端庞大的时候,最重要因素是流水线中`最慢 Op 的处理时长和并发数`。 +``` +Op 处理时长: +op_cost = process(pre + mid + post) + +服务吞吐量: +service_throughput = 1 / 最慢 op_cost * 并发数 + +服务平响: +service_avg_cost = ∑op_concurrency 【关键路径】 + +批量预测平均耗时: +avg_batch_cost = (N * pre + mid + post) / N +``` + + +**二.优化思路** + +优化吞吐的主要方法是 `增大 Op 并发数`、`自动批量` 和 `CPU 与 GPU 处理分离` + + + +1.增加 Op 并发** + +调整 Op 的并发数量通过设置 `is_thread_op: False` 进程类型 Op 和 `uci` Op 的 `concurrency` 字段 +``` +dag: + #op资源类型, True, 为线程模型;False,为进程模型 + is_thread_op: False +op: + uci: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 10 +``` +Op 的进程数量不是越大越好,受到机器 CPU 核数、内存和显存大小的限制,推荐设置 Op 的并发数不超过系统 CPU 核数。 + + + +2.动态批量 + +动态批量是增加吞吐的有一种方法,开启方式可参考[Python Pipeline 核心功能](./7-2_Python_Pipeline_Senior_CN.md#批量推理) + + + +3.CPU 与 GPU 处理分离 + +在 `CV` 模型中,对图片或视频的前后处理成为主要瓶颈时,可考虑此方案,即将前后处理过程独立成一个 Op 并独立设置并发度。 + +将 CPU 前后处理和 GPU 推理过程比例调整到服务最佳配比。以 OCR 为例,原有流水线设计为 `RequestOp -> DetOp -> RecOp -> ResponseOp`。 + +根据耗时分析,`DetOp` 和 `RecOp` 的前处理耗时很长,因此,将2个模型前处理分离成独立 Op,最新的流水线设计为: + +`RequestOp -> PreDetOp -> DetOp -> PreRecOp -> RecOp -> ResponseOp`,并调大 `PreDetOp` 和 `PreRecOp`的并发度,从而获得 20% 的性能提升。 + +由于增加了2次数据传递,单条请求的处理延时会增加。 diff --git a/doc/Offical_Docs/7-4_Python_Pipeline_Benchmark_CN.md b/doc/Offical_Docs/7-4_Python_Pipeline_Benchmark_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..9d29abf823fd24b98176df093bbb0258456f200f --- /dev/null +++ b/doc/Offical_Docs/7-4_Python_Pipeline_Benchmark_CN.md @@ -0,0 +1,59 @@ +# Python Pipeline 性能测试 + +- [测试环境](#1) +- [性能指标与结论](#2) + + + +## 测试环境 + +测试环境如下表所示: +| | GPU | 显存 | CPU | 内存 | +|----------|---------|----------|----------------------------------------------|------| +| Serving端 | 4x Tesla P4-8GB | 7611MiB | Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz 48核 | 216G | +| Client端 | 4x Tesla P4-8GB | 7611MiB | Intel(R) Xeon(R) Gold 5117 CPU @ 2.00GHz 48核 | 216G | + +使用单卡GPU,未开启TensorRT。 +模型:ResNet_v2_50 + + + +## 性能指标与结论 + +通过测试,使用 Python Pipeline 模式通过多进程并发,充分利用 GPU 显卡,具有较好的吞吐性能。 + + +测试数据如下: + +|model_name |thread_num |batch_size |CPU_util(%) |GPU_memory(mb) |GPU_util(%) |qps(samples/s) |total count |mean(ms) |median(ms) |80 percent(ms) |90 percent(ms) |99 percent(ms) |total cost(s) |each cost(s)| +|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:--|:-- +|ResNet_v2_50 |1 |1 |2.2 |3327 |17.25 |17.633658869240787 |355 |56.428481238996476 |38.646728515625 |39.496826171875 |39.98369140625 |1273.1911083984373 |20.131953477859497 |20.033540725708008| +|ResNet_v2_50 |1 |4 |2.7 |3617 |28.122 |53.50748430453522 |268 |74.71539215543378 |74.6181640625 |75.3138671875 |75.6051025390625 |77.85322998046874 |20.03458046913147 |20.024930953979492| +|ResNet_v2_50 |1 |8 |1.7 |3877 |25.7869 |59.60582783086999 |150 |133.5897119140625 |132.7503662109375 |134.968310546875 |136.470703125 |140.79039062499996 |20.132259607315063 |20.03933620452881| +|ResNet_v2_50 |1 |16 |7.0 |4777 |27.0175 |63.2627646819339 |80 |252.30162048339844 |251.8448486328125 |253.046630859375 |253.91142578125 |263.361640625 |20.233070850372314 |20.18476152420044| +|ResNet_v2_50 |1 |32 |7.5 |6567 |38.532 |62.945314687348024 |40 |506.8969482421875 |507.3531494140625 |510.562353515625 |511.421240234375 |536.8068920898437 |20.335111618041992 |20.276386737823486| +|ResNet_v2_50 |2 |1 |4.7 |6567 |49.4828 |50.40600094376044 |1010 |39.63352195815285 |39.5345458984375 |40.452880859375 |41.1375 |42.940522460937494 |20.037296772003174 |20.01696753501892| +|ResNet_v2_50 |2 |4 |2.7 |6567 |44.4744 |83.4255836891382 |420 |95.38548002697172 |95.7069091796875 |97.599951171875 |98.098583984375 |102.39680908203125 |20.137707471847534 |20.03199553489685| +|ResNet_v2_50 |2 |8 |2.2 |6567 |42.898 |91.3727510505176 |230 |174.89108568274457 |175.0452880859375 |175.82001953125 |176.7634033203125 |178.64064453125002 |20.13729453086853 |20.1132071018219| +|ResNet_v2_50 |2 |16 |2.2 |6567 |45 |97.5591285698611 |124 |327.16720088835683 |328.6126708984375 |329.75185546875 |330.386962890625 |336.86397460937496 |20.336385011672974 |20.284939169883728| +|ResNet_v2_50 |2 |32 |3.2 |6567 |59.5714 |100.70765418116333 |64 |633.9812698364258 |637.8568115234375 |648.103515625 |650.7439697265625 |659.2212915039062 |20.336090803146362 |20.28787398338318| +|ResNet_v2_50 |4 |1 |3.1 |6567 |64.3333 |80.27845081929433 |1617 |49.56464230756223 |49.4873046875 |51.5537109375 |52.693408203125 |55.207568359374996 |20.142391681671143 |20.038144528865814| +|ResNet_v2_50 |4 |4 |3.3 |6567 |70.4563 |136.62061939701394 |688 |116.51574919944586 |121.8629150390625 |129.8181640625 |133.384423828125 |142.69500732421875 |20.143372297286987 |20.041599333286285| +|ResNet_v2_50 |4 |8 |3.0 |6567 |70.896 |158.46554975132275 |399 |201.30669079926378 |210.69775390625 |228.51748046875 |236.427294921875 |252.24822753906233 |20.143179416656494 |20.081032752990723| +|ResNet_v2_50 |4 |16 |3.2 |6567 |66.3832 |156.4935247130092 |197 |407.6668608224937 |423.974609375 |450.368212890625 |464.45986328125 |482.93658203125 |20.141408443450928 |20.078101694583893| +|ResNet_v2_50 |4 |32 |3.3 |6567 |72.4791 |162.01742190796557 |104 |785.5079204852765 |813.0341796875 |887.107958984375 |909.6556640625 |935.3334838867188 |20.541000843048096 |20.423666059970856| +|ResNet_v2_50 |8 |1 |3.5 |6567 |93.977 |115.9749228558386 |2337 |68.5580409078145 |65.45849609375 |76.13930664062501 |83.542041015625 |91.45666015624998 |20.15090799331665 |20.028797417879105| +|ResNet_v2_50 |8 |4 |4.2 |6567 |90.0952 |175.58748591910316 |889 |180.7330482920592 |170.5810546875 |218.99931640625 |240.06337890625002 |254.413759765625 |20.252012729644775 |20.084695398807526| +|ResNet_v2_50 |8 |8 |2.6 |6567 |93.8693 |206.76595246418208 |526 |306.52158695119414 |303.043212890625 |321.0791015625 |350.5477294921875 |400.32452392578125 |20.351513147354126 |20.15437400341034| +|ResNet_v2_50 |8 |16 |3.2 |6567 |85.7273 |205.31850043117367 |265 |614.1745522553066 |552.372314453125 |775.89169921875 |802.022607421875 |902.2763183593761 |20.650842428207397 |20.345011442899704| +|ResNet_v2_50 |8 |32 |5.0 |6567 |89.8717 |219.8410273718835 |146 |1138.4533474020761 |1039.640869140625 |1364.289794921875 |1474.6744384765625 |1788.2614379882834 |21.251720190048218 |20.777225106954575| +|ResNet_v2_50 |12 |1 |5.0 |6567 |89.4762 |110.00858327847862 |2218 |108.50048552943953 |103.015625 |121.09404296875003 |137.1392333984375 |151.80401123046872 |20.162063121795654 |20.055511037508648| +|ResNet_v2_50 |12 |4 |4.1 |6567 |77.7619 |153.7824464757549 |779 |309.68895575507463 |285.585205078125 |378.07421875 |413.481640625 |424.70853515625 |20.262390613555908 |20.104551911354065| +|ResNet_v2_50 |12 |8 |3.6 |6567 |72.6977 |165.36021780846013 |425 |571.1991590073529 |510.995849609375 |731.9383300781251 |747.6568359375 |757.304716796875 |20.56117272377014 |20.230452219645183| +|ResNet_v2_50 |12 |16 |1.5 |6567 |76.2222 |189.6414991568285 |252 |987.7153136238219 |926.00390625 |1080.99130859375 |1249.4956298828126 |1434.4802392578124 |21.26116919517517 |20.74245794614156| +|ResNet_v2_50 |12 |32 |2.8 |6567 |84.25 |203.868228281784 |138 |1811.640237559443 |1764.2760009765625 |1855.28046875 |2023.56826171875 |2586.8038134765625 |21.66105055809021 |20.834286351998646| +|ResNet_v2_50 |16 |1 |4.8 |6567 |94.3333 |116.34927733312234 |2347 |136.7957122373642 |135.959716796875 |144.1568359375 |146.105517578125 |175.05707519531248 |20.172020435333252 |20.067057371139526| +|ResNet_v2_50 |16 |4 |15.4 |6567 |83.6364 |160.59012047270738 |822 |393.3079394412447 |396.446533203125 |426.272216796875 |429.777734375 |564.1119360351562 |20.47448492050171 |20.206754431128502| +|ResNet_v2_50 |16 |8 |6.8 |6567 |81.0233 |169.95774070621547 |437 |741.5512622684854 |751.521484375 |763.199169921875 |948.8041992187501 |1001.156142578125 |20.56981921195984 |20.254074171185493| +|ResNet_v2_50 |16 |16 |3.5 |6567 |77.8706 |186.56600081516 |248 |1332.1007946383568 |1365.2745361328125 |1399.212255859375 |1432.4037353515625 |1771.4374853515626 |21.26861262321472 |20.64799252152443| +|ResNet_v2_50 |16 |32 |4.3 |6567 |83.6371 |201.1293408638195 |140 |2419.3400198800223 |2561.09228515625 |2616.081103515625 |2642.0835205078124 |2883.8197412109366 |22.274224042892456 |21.169659316539764| diff --git a/doc/Offical_Docs/9-0_Kubernetes_Int_CN.md b/doc/Offical_Docs/9-0_Kubernetes_Int_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..cd06680961ae7272524bbf6852f30b5b45e6f401 --- /dev/null +++ b/doc/Offical_Docs/9-0_Kubernetes_Int_CN.md @@ -0,0 +1,7 @@ +Kubernetes 集群部署 + +服务部署经历从物理机、虚拟机、容器化、云原生4个阶段。云原生,提供集装箱组合模式的乐高生态,Docker、Kubernetes 已称为云原生时代基础设施,推动应用程序大发展。Kubernetes 的可扩展性和分布式架构一直是人工智能和机器学习的绝佳选择,随着解决方案不断成熟,推动机器学习大规模工程落地。 + +本章节介绍 Kubernetes 上集群化部署 Paddle Serving 方案以及企业级安全网关部署案例。 +- [Kubernetes 集群部署方案](./9-1_Kubernetes_CN.md) +- [Kubernetes 安全网关部署案例]() diff --git a/doc/Offical_Docs/9-1_Kubernetes_CN.md b/doc/Offical_Docs/9-1_Kubernetes_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..96fb5473fb4602c11d69200016f37183665efb73 --- /dev/null +++ b/doc/Offical_Docs/9-1_Kubernetes_CN.md @@ -0,0 +1,205 @@ +# Kubernetes 集群部署 + +Kubernetes 是一个基于容器技术的分布式架构的解决方案,是云原生容器集群管理系统,提供服务发现与负载均衡、存储编排、自动部署和回滚、资源管理、自动恢复以及密钥和配置管理。Paddle Serving 支持 Kubenetes 集群部署方案,为企业级用户提供集群部署示例。 + +## 部署方案 + +为了解决 Pod 迁移、Node Pod 端口、域名动态分配等问题,选择使用 Ingress 解决方案,对外提供可访问的 URL、负载均衡、SSL、基于名称的虚拟主机等功能。在众多 Ingress 插件中选用 Kong 作为微服务的 API 网关,因其具备以下优势: +- 拥有丰富的微服务功能,如 API认证、鉴权、DDos保护和灰度部署等 +- 提供一些 API、服务的定义,可抽象成 Kubernetes 的 CRD,通过 Kubernetes Ingress 配置实现同步状态到 Kong 集群 +- 集群配置信息存储在 postgres 数据库,配置信息实现全局节点共享和实时同步 +- 有成熟的第三方管理 UI,实现可视化管理 Kong 配置 + +Paddle Serving 的 Kubernetes 集群部署方案设计如下图所示,用户流量通过 Kong Ingress 转发到 Kubernetes 集群。Kubernetes 集群负责管理 Service 和 Pod 实例。 + +

+ +

+ +## 部署步骤 + +**一. 准备环境** + +推荐[购买并使用百度智能云 CCE 集群](https://cloud.baidu.com/doc/CCE/index.html),提供完整的部署环境。如自行安装 Kubenetes 集群,请参考[教程](https://kubernetes.io/zh/docs/setup/)。 + +此外,还需要准备一个用于 Kubenetes 集群部署的镜像仓库,通常与云服务提供商绑定,如果使用百度智能云的CCE集群,可以参照[百度智能云 CCR 镜像仓库使用方式](https://cloud.baidu.com/doc/CCR/index.html)。当然 Docker Hub 也可以作为镜像仓库,但下载速度慢,集群扩容时间较长。 + +在 Kubenetes 集群中运行下面命令,安装网关工具 Kong + +``` +kubectl apply -f https://bit.ly/kong-ingress-dbless +``` + +**二. 安装 Kubernetes ** +kubernetes 集群环境安装和启动步骤如下,并使用 kubectl 命令与通过它与 Kubernetes 进行交互和管理。 +``` +// close OS firewall +systemctl disable firewarlld +systemctl stop firewarlld + +// install etcd & kubernetes +yum install -y etcd kubernetes + +// start etcd & kubernetes +systemctl start etcd +systemctl start docker +systemctl start kube-apiserver +systemctl start kube-controller-manager +systemctl start kube-scheduler +systemctl start kubelet +systemctl start kube-proxy +``` + +**二. 制作镜像** + +首先,可直接使用 Paddle Serving 提供的镜像作为 Base 制作业务镜像,或者重新制作镜像。Paddle Serving 提供以下3种镜像,区别如下: +- 开发镜像:安装多种开发工具,可用于调试和编译代码,镜像体积较大。 +- 运行镜像:安装运行 Serving 的必备工具,经过裁剪后镜像体积较小,适合在存储受限场景使用 +- Java 镜像:为 Java SDK 提供基础环境,包括 JRE、JDK 和 Maven +- XPU 镜像:为 Arm 或 异构硬件(百度昆仑、海光DCU)环境部署 + +完整镜像列表,请参考 [DOCKER 开发镜像列表](./Docker_Images_CN.md) + +制作镜像的整体步骤如下,这里选定 Serving 运行镜像,相比于开发镜像体积更小,镜像内已安装相关的依赖和 Serving wheel 包。 +1.选定运行镜像:registry.baidubce.com/paddlepaddle/serving:0.8.3-cuda10.1-cudnn7-runtime +2.运行镜像并拷贝模型和服务代码到镜像中,当你需要部署外部其他模型时,更换模型和代码即可。 +3.制作并上传新镜像 + +假定已完成上述3个前置运行镜像并拷贝模型到镜像中,看具体操作。 +```bash +# Run docker +nvidia-docker run --rm -dit --name pipeline_serving_demo registry.baidubce.com/paddlepaddle/serving:0.8.0-cuda10.1-cudnn7-runtime bash + +# Enter your serving repo, and download OCR models +cd /home/work/Serving/examples/Pipeline/PaddleOCR/ocr + +python3 -m paddle_serving_app.package --get_model ocr_rec +tar -xzvf ocr_rec.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_det +tar -xzvf ocr_det.tar.gz +cd .. + +# Copy OCR directory to your docker +docker cp ocr pipeline_serving_demo:/home/ + +# Commit and push it +docker commit pipeline_serving_demo registry.baidubce.com/paddlepaddle/serving:k8s_ocr_pipeline_0.8.3_post101 +docker push registry.baidubce.com/paddlepaddle/serving:k8s_ocr_pipeline_0.8.3_post101 +``` + +最终,你完成了业务镜像制作环节。通过拉取制作的镜像,创建Docker示例后,在`/home`路径下验证模型目录,通过以下命令验证 Wheel 包安装。 +``` +pip3.7 list | grep paddle +``` +输出显示已安装3个 Serving Wheel 包和1个 Paddle Wheel 包。 +``` +paddle-serving-app 0.8.3 +paddle-serving-client 0.8.3 +paddle-serving-server-gpu 0.8.3.post101 +paddlepaddle-gpu 2.2.2.post101 +``` + +**三. 集群部署** + +Serving/tools/generate_k8s_yamls.sh 会生成 Kubernetes 部署配置。以 OCR 为例,运行以下命令生成 Kubernetes 集群配置。 +``` +sh tools/generate_k8s_yamls.sh --app_name ocr --image_name registry.baidubce.com/paddlepaddle/serving:k8s_ocr_pipeline_0.8.3_post101 --workdir /home/ocr --command "python3.7 web_service.py" --port 9999 +``` +生成信息如下: +``` +named arg: app_name: ocr +named arg: image_name: registry.baidubce.com/paddlepaddle/serving:k8s_ocr_pipeline_0.8.3_post101 +named arg: workdir: /home/ocr +named arg: command: python3.7 web_service.py +named arg: port: 9999 +check k8s_serving.yaml and k8s_ingress.yaml please. +``` + +运行命令后,生成2个 yaml 文件,分别是 k8s_serving.yaml 和 k8s_ingress.yaml。执行以下命令启动 Kubernetes 集群 和 Ingress 网关。 + +``` +kubectl create -f k8s_serving.yaml +kubectl create -f k8s_ingress.yaml +``` + +Kubernetes 下常用命令 +| 命令 | 说明 | +| --- | --- | +| kubectl create -f xxx.yaml | 使用 xxx.yml 创建资源对象 | +| kubectl apply -f xxx.yaml | 使用 xxx.yml 更新资源对象 | +| kubectl delete po mysql| 删除名为 mysql 的 pods | +| kubectl get all --all-namespace | 查询所有资源信息 | +| kubectl get po | 查询所有 pods | +| kubectl get namespace | 查询所有命名空间 | +| kubectl get rc | 查询所有| +| kubectl get services | 查询所有 services | +| kubectl get node | 查询所有 node 节点 | +| kubectl get deploy | 查询集群部署状态 | + +按下面4个步骤查询集群状态并进入 Pod 容器: + +1. 最终通过输入以下命令检验集群部署状态: +``` +kubectl get deploy + +``` + +部署状态如下: +``` +NAME READY UP-TO-DATE AVAILABLE AGE +ocr 1/1 1 1 10m +``` + +2. 查询全部 Pod 信息 运行命令: +``` +kubectl get pods +``` +查询 Pod 信息如下: +``` +NAME READY STATUS RESTARTS AGE +ocr-c5bd77d49-mfh72 1/1 Running 0 10m +uci-5bc7d545f5-zfn65 1/1 Running 0 52d +``` + +3. 进入 Pod container 运行命令: +``` +kubectl exec -ti ocr-c5bd77d49-mfh72 -n bash +``` + +4. 查询集群服务状态: +``` +kubectl get service --all-namespaces +``` + +集群部署状态如下: +``` +NAMESPACE NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +default bert ClusterIP 172.16.86.12 9292/TCP 20m +default kubernetes ClusterIP 172.16.0.1 443/TCP 28d +default ocr ClusterIP 172.16.152.43 9999/TCP 50m +kong kong-proxy LoadBalancer 172.16.88.132 80:8893/TCP,443:8805/TCP 25d +kong kong-validation-webhook ClusterIP 172.16.38.100 443/TCP 25d +kube-system heapster ClusterIP 172.16.240.64 80/TCP 28d +kube-system kube-dns ClusterIP 172.16.0.10 53/UDP,53/TCP,9153/TCP 28d +kube-system metrics-server ClusterIP 172.16.34.157 443/TCP 28d +``` + +根据 kong-proxy 的 CLUSTER-IP 和 端口信息,访问 URL: http://172.16.88.132:80/ocr/prediction 查询 OCR 服务。 + +**四.更新镜像** + +假定更新了文件或数据,重新生成 k8s_serving.yaml 和 k8s_ingress.yaml。 +``` +sh tools/generate_k8s_yamls.sh --app_name ocr --image_name registry.baidubce.com/paddlepaddle/serving:k8s_ocr_pipeline_0.8.3_post101 --workdir /home/ocr --command "python3.7 web_service.py" --port 9999 +``` +更新配置,并重启Pod +``` +kubectl apply -f k8s_serving.yaml +kubectl apply -f k8s_ingress.yaml + +# 查找 ocr 的 pod name +kubectl get pods + +# 更新 pod +kubectl exec -it ocr-c5bd77d49-s8jwh -n default -- /bin/sh +``` diff --git a/doc/Offical_Docs/Home_Page_CN.md b/doc/Offical_Docs/Home_Page_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..183e060b326a5d9faffe391b5aa8239b3e07c4b0 --- /dev/null +++ b/doc/Offical_Docs/Home_Page_CN.md @@ -0,0 +1,88 @@ +# Paddle Serving - 端到端服务化推理框架 + +## 1.Paddle Serving 介绍 +面向模型服务化部署场景的端到端服务化推理框架 Paddle Serving,可以实现飞桨模型在 X86、ARM 平台多种硬件上高性能服务化部署,支持5种以上的 GPU、NPU 硬件推理加速;此外,Paddle Serving 提供 Docker 和 Kubernetes 的云端部署方案。 + +## 2.快速上手-代码模块 + +进入到 Serving 的 git 目录下,进入到 [fit_a_line](https://github.com/PaddlePaddle/Serving/tree/v0.8.3/examples/C%2B%2B/fit_a_line) 示例 +``` +## 下载模型 +sh get_data.sh + +## 启动服务 +python3 -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 + +## HTTP curl +curl -XPOST http://0.0.0.0:9393/GeneralModelService/inference -d ' {"tensor":[{"float_data":[0.0137,-0.1136,0.2553,-0.0692,0.0582,-0.0727,-0.1583,-0.0584,0.6283,0.4919,0.1856,0.0795,-0.0332],"elem_type":1,"name":"x","alias_name":"x","shape":[1,13]}],"fetch_var_names":["price"],"log_id":0}' +``` + +## 3.部署流程图 + +开发流程:①准备部署环境;②准备部署模型;③Serving程序开发;④服务启动与优化 + +**①准备部署环境** +docker 是一个开源的应用容器引擎,可以让应用程序更加方便地被打包和移植。Paddle Serving 容器化部署建议在 docker 中进行Serving服务化部署。在 Serving Docker 环境中安装 PYTHON Wheel 包 + +**②准备部署模型** + +下载推理模型后,为了便于模型服务化部署,需要将推理模型保存成用于 Serving 部署的参数形式 + +**③Serving程序开发** + +修改服务端和客户端代码适配模型的前后处理,通过修改配置或命令行参数,如端口、指定硬件和并发数量等指定部署参数。 + +**④服务启动与优化** + +命令方式启动服务端和客户端,根据输出结果和性能指标做进一步的性能优化。 + +## 4.Demo 展示区 + +参考 [模型库](./4-0_ModelZoo_CN.md) + +## 5.核心优势 + +Paddle Serving 具备工业级功能、高性能等优势。 + +**一.工业级** + +- 支持 HTTP、gRPC、bRPC 等多种协议;提供 C++、Python、Java 语言 SDK +- 设计并实现基于有向无环图(DAG)的异步流水线高性能推理框架,具有多模型组合、异步调度、并发推理、动态批量、多卡多流推理、请求缓存等特性 +- 适配 x86(Intel) CPU、ARM CPU、Nvidia GPU、昆仑 XPU、华为昇腾310/910、海光 DCU、Nvidia Jetson 等多种硬件 +- 集成 Intel MKLDNN、Nvidia TensorRT 加速库,以及低精度和量化推理 +- 提供一套模型安全部署解决方案,包括加密模型部署、鉴权校验、HTTPs 安全网关,并在实际项目中应用 +- 支持云端部署,提供百度云智能云 kubernetes 集群部署 Paddle Serving 案例 +- 提供丰富的经典模型部署示例,如 PaddleOCR、PaddleClas、PaddleDetection、PaddleSeg、PaddleNLP 和 PaddleRec等套件,共计40多个预训练精品模型 + +**二.高性能** + +# 1. 测试环境和说明 +1) GPU型号:Tesla P4(7611 Mib) +2) Cuda版本:11.0 +3) 模型:ResNet_v2_50 +4) 为了测试异步合并batch的效果,测试数据中batch=1 +5) [使用的测试代码和使用的数据集](../../examples/C++/PaddleClas/resnet_v2_50) +6) 下图中蓝色是C++ Serving,灰色为TF-Serving。 +7) 折线图为QPS,数值越大表示每秒钟处理的请求数量越大,性能就越好。 +8) 柱状图为平均处理时延,数值越大表示单个请求处理时间越长,性能就越差。 + +同步模型默认参数配置情况下,C++ Serving QPS 和平均时延指标均优于 TF-Serving。 +

+
+ +
+

+ +异步模式情况下,两者性能接近,但当 Client 并发数达到70的时候,TF-Serving 服务直接超时,而 C++ Serving 能够正常返回结果。 +

+
+ +
+

+ + +## 6.合作案例 + +## 7.资源汇总 + +## 8.开发者贡献&社区 diff --git a/doc/Offical_Docs/images/6-1_Cpp_Asynchronous_Framwork_CN_1.png b/doc/Offical_Docs/images/6-1_Cpp_Asynchronous_Framwork_CN_1.png new file mode 100644 index 0000000000000000000000000000000000000000..88c36f7688719d7081598e5177cd64042ab5c9de Binary files /dev/null and b/doc/Offical_Docs/images/6-1_Cpp_Asynchronous_Framwork_CN_1.png differ diff --git a/doc/Offical_Docs/images/6-5_Cpp_ABTest_CN_1.png b/doc/Offical_Docs/images/6-5_Cpp_ABTest_CN_1.png new file mode 100644 index 0000000000000000000000000000000000000000..5e8f8980dffb46f4960390e6edb281968ae8bd83 Binary files /dev/null and b/doc/Offical_Docs/images/6-5_Cpp_ABTest_CN_1.png differ diff --git a/doc/Run_On_NPU_CN.md b/doc/Run_On_NPU_CN.md index 2919ae7290071b409852634e4274911d8f46992b..74a438bca1e4bfa0016c4165ab1edb372ef62268 100644 --- a/doc/Run_On_NPU_CN.md +++ b/doc/Run_On_NPU_CN.md @@ -75,7 +75,7 @@ go env -w GOPROXY=https://goproxy.cn,direct go install github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2 go install github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2 go install github.com/golang/protobuf/protoc-gen-go@v1.4.3 -go install google.golang.org/grpc@v1.33.0 +go install google.golang.org/grpc@v1.33.1 go env -w GO111MODULE=auto ``` @@ -193,4 +193,4 @@ python3 -m paddle_serving_server.serve --model resnet_v2_50_imagenet_model --thr ## 其他说明 ### NPU芯片支持相关参考资料 -* [昇腾NPU芯片运行飞桨](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/09_hardware_support/npu_docs/paddle_install_cn.html) \ No newline at end of file +* [昇腾NPU芯片运行飞桨](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/09_hardware_support/npu_docs/paddle_install_cn.html) diff --git a/doc/Save_CN.md b/doc/Save_CN.md index 29b260bfb029333dff8da7db83f651aa6b8bb8af..6c8122ea4517d3b4f731b8f27c311cbdec2f58db 100644 --- a/doc/Save_CN.md +++ b/doc/Save_CN.md @@ -2,6 +2,12 @@ (简体中文|[English](./Save_EN.md)) +## 保存用于 Serving 部署模型的意义 + + + + + ## 从已保存的模型文件中导出 如果已使用Paddle 的`save_inference_model`接口保存出预测要使用的模型,你可以使用Paddle Serving提供的名为`paddle_serving_client.convert`的内置模块进行转换。 ```python diff --git a/doc/TensorRT_Dynamic_Shape_CN.md b/doc/TensorRT_Dynamic_Shape_CN.md index 7ffc8693a4b8070c6395ad0c0fe200d646fc1df6..7a9759c33ce6e9b5658af91d0deed3919856d340 100644 --- a/doc/TensorRT_Dynamic_Shape_CN.md +++ b/doc/TensorRT_Dynamic_Shape_CN.md @@ -33,6 +33,7 @@ python -m paddle_serving_server.serve \ **二. C++ Serving 设置动态 shape** +1. 方法一: 在`**/paddle_inference/paddle/include/paddle_engine.h` 修改如下代码 ``` @@ -127,6 +128,55 @@ python -m paddle_serving_server.serve \ } ``` +2. 方法二: +在`**/python/paddle_serving_server/serve.py` 参考如下代码生成配置信息, +并使用`server.set_trt_dynamic_shape_info(info)`方法进行设置 + +``` +def set_ocr_dynamic_shape_info(): + info = [] + min_input_shape = { + "x": [1, 3, 50, 50], + "conv2d_182.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_2.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_3.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_4.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_5.tmp_0": [1, 1, 20, 20] + } + max_input_shape = { + "x": [1, 3, 1536, 1536], + "conv2d_182.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_2.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_3.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_4.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_5.tmp_0": [20, 200, 960, 960], + } + opt_input_shape = { + "x": [1, 3, 960, 960], + "conv2d_182.tmp_0": [3, 96, 240, 240], + "nearest_interp_v2_2.tmp_0": [3, 96, 240, 240], + "nearest_interp_v2_3.tmp_0": [3, 24, 240, 240], + "nearest_interp_v2_4.tmp_0": [3, 24, 240, 240], + "nearest_interp_v2_5.tmp_0": [3, 24, 240, 240], + } + det_info = { + "min_input_shape": min_input_shape, + "max_input_shape": max_input_shape, + "opt_input_shape": opt_input_shape, + } + info.append(det_info) + min_input_shape = {"x": [1, 3, 32, 10], "lstm_1.tmp_0": [1, 1, 128]} + max_input_shape = {"x": [50, 3, 32, 1000], "lstm_1.tmp_0": [500, 50, 128]} + opt_input_shape = {"x": [6, 3, 32, 100], "lstm_1.tmp_0": [25, 5, 128]} + rec_info = { + "min_input_shape": min_input_shape, + "max_input_shape": max_input_shape, + "opt_input_shape": opt_input_shape, + } + info.append(rec_info) + return info +``` + ## Pipeline Serving diff --git a/doc/TensorRT_Dynamic_Shape_EN.md b/doc/TensorRT_Dynamic_Shape_EN.md index c9c9aabf2025028cf0aa22e9e86187cdc8f8cfad..47a6704971eda2172e19b460cdd22eea2d16279f 100644 --- a/doc/TensorRT_Dynamic_Shape_EN.md +++ b/doc/TensorRT_Dynamic_Shape_EN.md @@ -16,6 +16,8 @@ The following is the dynamic shape api For detail, please refer to API doc [C++](https://paddleinference.paddlepaddle.org.cn/api_reference/cxx_api_doc/Config/GPUConfig.html#tensorrt)/[Python](https://paddleinference.paddlepaddle.org.cn/api_reference/python_api_doc/Config/GPUConfig.html#tensorrt) ### C++ Serving + +1. Method 1: Modify the following code in `**/paddle_inference/paddle/include/paddle_engine.h` ``` @@ -110,6 +112,54 @@ Modify the following code in `**/paddle_inference/paddle/include/paddle_engine.h } ``` +2. Method 2: +Refer to the code of `**/python/paddle_serving_server/serve.py` below to generate the configuration information, +and using method `server.set_trt_dynamic_shape_info(info)` to set information. + +``` +def set_ocr_dynamic_shape_info(): + info = [] + min_input_shape = { + "x": [1, 3, 50, 50], + "conv2d_182.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_2.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_3.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_4.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_5.tmp_0": [1, 1, 20, 20] + } + max_input_shape = { + "x": [1, 3, 1536, 1536], + "conv2d_182.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_2.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_3.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_4.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_5.tmp_0": [20, 200, 960, 960], + } + opt_input_shape = { + "x": [1, 3, 960, 960], + "conv2d_182.tmp_0": [3, 96, 240, 240], + "nearest_interp_v2_2.tmp_0": [3, 96, 240, 240], + "nearest_interp_v2_3.tmp_0": [3, 24, 240, 240], + "nearest_interp_v2_4.tmp_0": [3, 24, 240, 240], + "nearest_interp_v2_5.tmp_0": [3, 24, 240, 240], + } + det_info = { + "min_input_shape": min_input_shape, + "max_input_shape": max_input_shape, + "opt_input_shape": opt_input_shape, + } + info.append(det_info) + min_input_shape = {"x": [1, 3, 32, 10], "lstm_1.tmp_0": [1, 1, 128]} + max_input_shape = {"x": [50, 3, 32, 1000], "lstm_1.tmp_0": [500, 50, 128]} + opt_input_shape = {"x": [6, 3, 32, 100], "lstm_1.tmp_0": [25, 5, 128]} + rec_info = { + "min_input_shape": min_input_shape, + "max_input_shape": max_input_shape, + "opt_input_shape": opt_input_shape, + } + info.append(rec_info) + return info +``` ### Pipeline Serving @@ -151,4 +201,4 @@ if use_trt: names[3]: [10, head_number, 60, 60] }) -``` \ No newline at end of file +``` diff --git a/doc/images/kubernetes_design.png b/doc/images/kubernetes_design.png new file mode 100644 index 0000000000000000000000000000000000000000..05ace675ba636f7d271cb9db8cdba741db494026 Binary files /dev/null and b/doc/images/kubernetes_design.png differ diff --git a/doc/images/low_precision_profile.png b/doc/images/low_precision_profile.png new file mode 100644 index 0000000000000000000000000000000000000000..82356df740bf4a31bc47ae2017103e7fca3792be Binary files /dev/null and b/doc/images/low_precision_profile.png differ diff --git a/doc/images/wechat_group_1.jpeg b/doc/images/wechat_group_1.jpeg index 4e9e65b14c5dcd594ecd95e7ea56add14d0f92d4..80e8acc728faaae7bcb254e4fed93dfaffd84d59 100644 Binary files a/doc/images/wechat_group_1.jpeg and b/doc/images/wechat_group_1.jpeg differ diff --git a/doc/wechat_group_1.jpeg b/doc/wechat_group_1.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..443a1549d5c79e86b26038d7eb2e704ed5f9213f Binary files /dev/null and b/doc/wechat_group_1.jpeg differ diff --git a/examples/C++/PaddleClas/resnet_50_vd/README_CN.md b/examples/C++/PaddleClas/resnet_50_vd/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..0034b5b4c03e572b7a27ff6296e185fed713eabb --- /dev/null +++ b/examples/C++/PaddleClas/resnet_50_vd/README_CN.md @@ -0,0 +1,69 @@ +# 图像分类 + +## 1.获取模型 + +``` +wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/inference/ResNet50_vd_infer.tar && tar xf ResNet50_vd_infer.tar + +``` + +## 2.用 paddle_serving_client 把下载的推理模型保存用于 Serving 部署的模型参数 +``` +# 保存 ResNet50_vd 模型参数 +python3 -m paddle_serving_client.convert --dirname ./ResNet50_vd_infer/ \ + --model_filename inference.pdmodel \ + --params_filename inference.pdiparams \ + --serving_server ./ResNet50_vd_serving/ \ + --serving_client ./ResNet50_vd_client/ +``` +会在当前文件夹多出 `ResNet50_vd_serving` 和 `ResNet50_vd_client` 的文件夹 + + +保存参数后,会在当前文件夹多出 `ResNet50_vd_serving` 和 `ResNet50_vd_client` 的文件夹: +``` +├── daisy.jpg +├── http_client.py +├── imagenet.label +├── ResNet50_vd_client +│   ├── serving_client_conf.prototxt +│   └── serving_client_conf.stream.prototxt +├── ResNet50_vd_infer +│   ├── inference.pdiparams +│   ├── inference.pdiparams.info +│   └── inference.pdmodel +├── ResNet50_vd_serving +│   ├── fluid_time_file +│   ├── inference.pdiparams +│   ├── inference.pdmodel +│   ├── serving_server_conf.prototxt +│   └── serving_server_conf.stream.prototxt +├── rpc_client.py +``` + +**三.启动服务** + +C++ Serving 服务可以指定一个网络端口同时接收 HTTP、gRPC 和 bRPC 请求。命令参数 `--model` 指定模型路径,`--gpu_ids` 指定 GPU 卡,`--port` 指定端口。 + +``` +python3 -m paddle_serving_server.serve --model ResNet50_vd_serving --gpu_ids 0 --port 9394 +``` + +**四.启动客户端** + +1. `rpc_client.py` 封装了 HTTP 请求客户端 + +``` +python3 http_client.py +``` + +2. `http_client.py` 封装了 gRPC 请求客户端 + +``` +python3 rpc_client.py +``` + +成功运行后,模型预测的结果会打印如下: + +``` +prediction: daisy, probability: 0.9341399073600769 +``` diff --git a/examples/C++/PaddleClas/resnet_50_vd/daisy.jpg b/examples/C++/PaddleClas/resnet_50_vd/daisy.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7edeca63e5f32e68550ef720d81f59df58a8eabc Binary files /dev/null and b/examples/C++/PaddleClas/resnet_50_vd/daisy.jpg differ diff --git a/examples/C++/PaddleClas/resnet_50_vd/http_client.py b/examples/C++/PaddleClas/resnet_50_vd/http_client.py new file mode 100644 index 0000000000000000000000000000000000000000..722f489e30d5e7e98408f287540ff1b6c1cf0cfc --- /dev/null +++ b/examples/C++/PaddleClas/resnet_50_vd/http_client.py @@ -0,0 +1,82 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from paddle_serving_client import HttpClient + +#app +from paddle_serving_app.reader import Sequential, URL2Image, Resize +from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize +import time + +client = HttpClient() +client.load_client_config("./ResNet50_vd_client/serving_client_conf.prototxt") +''' +if you want use GRPC-client, set_use_grpc_client(True) +or you can directly use client.grpc_client_predict(...) +as for HTTP-client,set_use_grpc_client(False)(which is default) +or you can directly use client.http_client_predict(...) +''' +#client.set_use_grpc_client(True) +''' +if you want to enable Encrypt Module,uncommenting the following line +''' +#client.use_key("./key") +''' +if you want to compress,uncommenting the following line +''' +#client.set_response_compress(True) +#client.set_request_compress(True) +''' +we recommend use Proto data format in HTTP-body, set True(which is default) +if you want use JSON data format in HTTP-body, set False +''' +#client.set_http_proto(True) +client.connect(["127.0.0.1:9394"]) + +label_dict = {} +label_idx = 0 +with open("imagenet.label") as fin: + for line in fin: + label_dict[label_idx] = line.strip() + label_idx += 1 + +#preprocess +seq = Sequential([ + URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)), + Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True) +]) + +start = time.time() +image_file = "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg" +for i in range(1): + img = seq(image_file) + res = client.predict(feed={"inputs": img}, fetch=[], batch=False) + + if res is None: + raise ValueError("predict error") + + if res.err_no != 0: + raise ValueError("predict error. Response : {}".format(res)) + + max_val = res.outputs[0].tensor[0].float_data[0] + max_idx = 0 + for one_data in enumerate(res.outputs[0].tensor[0].float_data): + if one_data[1] > max_val: + max_val = one_data[1] + max_idx = one_data[0] + label = label_dict[max_idx].strip().replace(",", "") + print("prediction: {}, probability: {}".format(label, max_val)) +end = time.time() +print(end - start) diff --git a/examples/C++/PaddleClas/resnet_50_vd/imagenet.label b/examples/C++/PaddleClas/resnet_50_vd/imagenet.label new file mode 100644 index 0000000000000000000000000000000000000000..d7146735146ea1894173d6d0e20fb90af36be849 --- /dev/null +++ b/examples/C++/PaddleClas/resnet_50_vd/imagenet.label @@ -0,0 +1,1000 @@ +tench, Tinca tinca, +goldfish, Carassius auratus, +great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias, +tiger shark, Galeocerdo cuvieri, +hammerhead, hammerhead shark, +electric ray, crampfish, numbfish, torpedo, +stingray, +cock, +hen, +ostrich, Struthio camelus, +brambling, Fringilla montifringilla, +goldfinch, Carduelis carduelis, +house finch, linnet, Carpodacus mexicanus, +junco, snowbird, +indigo bunting, indigo finch, indigo bird, Passerina cyanea, +robin, American robin, Turdus migratorius, +bulbul, +jay, +magpie, +chickadee, +water ouzel, dipper, +kite, +bald eagle, American eagle, Haliaeetus leucocephalus, +vulture, +great grey owl, great gray owl, Strix nebulosa, +European fire salamander, Salamandra salamandra, +common newt, Triturus vulgaris, +eft, +spotted salamander, Ambystoma maculatum, +axolotl, mud puppy, Ambystoma mexicanum, +bullfrog, Rana catesbeiana, +tree frog, tree-frog, +tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui, +loggerhead, loggerhead turtle, Caretta caretta, +leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea, +mud turtle, +terrapin, +box turtle, box tortoise, +banded gecko, +common iguana, iguana, Iguana iguana, +American chameleon, anole, Anolis carolinensis, +whiptail, whiptail lizard, +agama, +frilled lizard, Chlamydosaurus kingi, +alligator lizard, +Gila monster, Heloderma suspectum, +green lizard, Lacerta viridis, +African chameleon, Chamaeleo chamaeleon, +Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis, +African crocodile, Nile crocodile, Crocodylus niloticus, +American alligator, Alligator mississipiensis, +triceratops, +thunder snake, worm snake, Carphophis amoenus, +ringneck snake, ring-necked snake, ring snake, +hognose snake, puff adder, sand viper, +green snake, grass snake, +king snake, kingsnake, +garter snake, grass snake, +water snake, +vine snake, +night snake, Hypsiglena torquata, +boa constrictor, Constrictor constrictor, +rock python, rock snake, Python sebae, +Indian cobra, Naja naja, +green mamba, +sea snake, +horned viper, cerastes, sand viper, horned asp, Cerastes cornutus, +diamondback, diamondback rattlesnake, Crotalus adamanteus, +sidewinder, horned rattlesnake, Crotalus cerastes, +trilobite, +harvestman, daddy longlegs, Phalangium opilio, +scorpion, +black and gold garden spider, Argiope aurantia, +barn spider, Araneus cavaticus, +garden spider, Aranea diademata, +black widow, Latrodectus mactans, +tarantula, +wolf spider, hunting spider, +tick, +centipede, +black grouse, +ptarmigan, +ruffed grouse, partridge, Bonasa umbellus, +prairie chicken, prairie grouse, prairie fowl, +peacock, +quail, +partridge, +African grey, African gray, Psittacus erithacus, +macaw, +sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita, +lorikeet, +coucal, +bee eater, +hornbill, +hummingbird, +jacamar, +toucan, +drake, +red-breasted merganser, Mergus serrator, +goose, +black swan, Cygnus atratus, +tusker, +echidna, spiny anteater, anteater, +platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus, +wallaby, brush kangaroo, +koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus, +wombat, +jellyfish, +sea anemone, anemone, +brain coral, +flatworm, platyhelminth, +nematode, nematode worm, roundworm, +conch, +snail, +slug, +sea slug, nudibranch, +chiton, coat-of-mail shell, sea cradle, polyplacophore, +chambered nautilus, pearly nautilus, nautilus, +Dungeness crab, Cancer magister, +rock crab, Cancer irroratus, +fiddler crab, +king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica, +American lobster, Northern lobster, Maine lobster, Homarus americanus, +spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish, +crayfish, crawfish, crawdad, crawdaddy, +hermit crab, +isopod, +white stork, Ciconia ciconia, +black stork, Ciconia nigra, +spoonbill, +flamingo, +little blue heron, Egretta caerulea, +American egret, great white heron, Egretta albus, +bittern, +crane, +limpkin, Aramus pictus, +European gallinule, Porphyrio porphyrio, +American coot, marsh hen, mud hen, water hen, Fulica americana, +bustard, +ruddy turnstone, Arenaria interpres, +red-backed sandpiper, dunlin, Erolia alpina, +redshank, Tringa totanus, +dowitcher, +oystercatcher, oyster catcher, +pelican, +king penguin, Aptenodytes patagonica, +albatross, mollymawk, +grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus, +killer whale, killer, orca, grampus, sea wolf, Orcinus orca, +dugong, Dugong dugon, +sea lion, +Chihuahua, +Japanese spaniel, +Maltese dog, Maltese terrier, Maltese, +Pekinese, Pekingese, Peke, +Shih-Tzu, +Blenheim spaniel, +papillon, +toy terrier, +Rhodesian ridgeback, +Afghan hound, Afghan, +basset, basset hound, +beagle, +bloodhound, sleuthhound, +bluetick, +black-and-tan coonhound, +Walker hound, Walker foxhound, +English foxhound, +redbone, +borzoi, Russian wolfhound, +Irish wolfhound, +Italian greyhound, +whippet, +Ibizan hound, Ibizan Podenco, +Norwegian elkhound, elkhound, +otterhound, otter hound, +Saluki, gazelle hound, +Scottish deerhound, deerhound, +Weimaraner, +Staffordshire bullterrier, Staffordshire bull terrier, +American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier, +Bedlington terrier, +Border terrier, +Kerry blue terrier, +Irish terrier, +Norfolk terrier, +Norwich terrier, +Yorkshire terrier, +wire-haired fox terrier, +Lakeland terrier, +Sealyham terrier, Sealyham, +Airedale, Airedale terrier, +cairn, cairn terrier, +Australian terrier, +Dandie Dinmont, Dandie Dinmont terrier, +Boston bull, Boston terrier, +miniature schnauzer, +giant schnauzer, +standard schnauzer, +Scotch terrier, Scottish terrier, Scottie, +Tibetan terrier, chrysanthemum dog, +silky terrier, Sydney silky, +soft-coated wheaten terrier, +West Highland white terrier, +Lhasa, Lhasa apso, +flat-coated retriever, +curly-coated retriever, +golden retriever, +Labrador retriever, +Chesapeake Bay retriever, +German short-haired pointer, +vizsla, Hungarian pointer, +English setter, +Irish setter, red setter, +Gordon setter, +Brittany spaniel, +clumber, clumber spaniel, +English springer, English springer spaniel, +Welsh springer spaniel, +cocker spaniel, English cocker spaniel, cocker, +Sussex spaniel, +Irish water spaniel, +kuvasz, +schipperke, +groenendael, +malinois, +briard, +kelpie, +komondor, +Old English sheepdog, bobtail, +Shetland sheepdog, Shetland sheep dog, Shetland, +collie, +Border collie, +Bouvier des Flandres, Bouviers des Flandres, +Rottweiler, +German shepherd, German shepherd dog, German police dog, alsatian, +Doberman, Doberman pinscher, +miniature pinscher, +Greater Swiss Mountain dog, +Bernese mountain dog, +Appenzeller, +EntleBucher, +boxer, +bull mastiff, +Tibetan mastiff, +French bulldog, +Great Dane, +Saint Bernard, St Bernard, +Eskimo dog, husky, +malamute, malemute, Alaskan malamute, +Siberian husky, +dalmatian, coach dog, carriage dog, +affenpinscher, monkey pinscher, monkey dog, +basenji, +pug, pug-dog, +Leonberg, +Newfoundland, Newfoundland dog, +Great Pyrenees, +Samoyed, Samoyede, +Pomeranian, +chow, chow chow, +keeshond, +Brabancon griffon, +Pembroke, Pembroke Welsh corgi, +Cardigan, Cardigan Welsh corgi, +toy poodle, +miniature poodle, +standard poodle, +Mexican hairless, +timber wolf, grey wolf, gray wolf, Canis lupus, +white wolf, Arctic wolf, Canis lupus tundrarum, +red wolf, maned wolf, Canis rufus, Canis niger, +coyote, prairie wolf, brush wolf, Canis latrans, +dingo, warrigal, warragal, Canis dingo, +dhole, Cuon alpinus, +African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus, +hyena, hyaena, +red fox, Vulpes vulpes, +kit fox, Vulpes macrotis, +Arctic fox, white fox, Alopex lagopus, +grey fox, gray fox, Urocyon cinereoargenteus, +tabby, tabby cat, +tiger cat, +Persian cat, +Siamese cat, Siamese, +Egyptian cat, +cougar, puma, catamount, mountain lion, painter, panther, Felis concolor, +lynx, catamount, +leopard, Panthera pardus, +snow leopard, ounce, Panthera uncia, +jaguar, panther, Panthera onca, Felis onca, +lion, king of beasts, Panthera leo, +tiger, Panthera tigris, +cheetah, chetah, Acinonyx jubatus, +brown bear, bruin, Ursus arctos, +American black bear, black bear, Ursus americanus, Euarctos americanus, +ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus, +sloth bear, Melursus ursinus, Ursus ursinus, +mongoose, +meerkat, mierkat, +tiger beetle, +ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle, +ground beetle, carabid beetle, +long-horned beetle, longicorn, longicorn beetle, +leaf beetle, chrysomelid, +dung beetle, +rhinoceros beetle, +weevil, +fly, +bee, +ant, emmet, pismire, +grasshopper, hopper, +cricket, +walking stick, walkingstick, stick insect, +cockroach, roach, +mantis, mantid, +cicada, cicala, +leafhopper, +lacewing, lacewing fly, +"dragonfly, darning needle, devils darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk", +damselfly, +admiral, +ringlet, ringlet butterfly, +monarch, monarch butterfly, milkweed butterfly, Danaus plexippus, +cabbage butterfly, +sulphur butterfly, sulfur butterfly, +lycaenid, lycaenid butterfly, +starfish, sea star, +sea urchin, +sea cucumber, holothurian, +wood rabbit, cottontail, cottontail rabbit, +hare, +Angora, Angora rabbit, +hamster, +porcupine, hedgehog, +fox squirrel, eastern fox squirrel, Sciurus niger, +marmot, +beaver, +guinea pig, Cavia cobaya, +sorrel, +zebra, +hog, pig, grunter, squealer, Sus scrofa, +wild boar, boar, Sus scrofa, +warthog, +hippopotamus, hippo, river horse, Hippopotamus amphibius, +ox, +water buffalo, water ox, Asiatic buffalo, Bubalus bubalis, +bison, +ram, tup, +bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis, +ibex, Capra ibex, +hartebeest, +impala, Aepyceros melampus, +gazelle, +Arabian camel, dromedary, Camelus dromedarius, +llama, +weasel, +mink, +polecat, fitch, foulmart, foumart, Mustela putorius, +black-footed ferret, ferret, Mustela nigripes, +otter, +skunk, polecat, wood pussy, +badger, +armadillo, +three-toed sloth, ai, Bradypus tridactylus, +orangutan, orang, orangutang, Pongo pygmaeus, +gorilla, Gorilla gorilla, +chimpanzee, chimp, Pan troglodytes, +gibbon, Hylobates lar, +siamang, Hylobates syndactylus, Symphalangus syndactylus, +guenon, guenon monkey, +patas, hussar monkey, Erythrocebus patas, +baboon, +macaque, +langur, +colobus, colobus monkey, +proboscis monkey, Nasalis larvatus, +marmoset, +capuchin, ringtail, Cebus capucinus, +howler monkey, howler, +titi, titi monkey, +spider monkey, Ateles geoffroyi, +squirrel monkey, Saimiri sciureus, +Madagascar cat, ring-tailed lemur, Lemur catta, +indri, indris, Indri indri, Indri brevicaudatus, +Indian elephant, Elephas maximus, +African elephant, Loxodonta africana, +lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens, +giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca, +barracouta, snoek, +eel, +coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch, +rock beauty, Holocanthus tricolor, +anemone fish, +sturgeon, +gar, garfish, garpike, billfish, Lepisosteus osseus, +lionfish, +puffer, pufferfish, blowfish, globefish, +abacus, +abaya, +"academic gown, academic robe, judges robe", +accordion, piano accordion, squeeze box, +acoustic guitar, +aircraft carrier, carrier, flattop, attack aircraft carrier, +airliner, +airship, dirigible, +altar, +ambulance, +amphibian, amphibious vehicle, +analog clock, +apiary, bee house, +apron, +ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin, +assault rifle, assault gun, +backpack, back pack, knapsack, packsack, rucksack, haversack, +bakery, bakeshop, bakehouse, +balance beam, beam, +balloon, +ballpoint, ballpoint pen, ballpen, Biro, +Band Aid, +banjo, +bannister, banister, balustrade, balusters, handrail, +barbell, +barber chair, +barbershop, +barn, +barometer, +barrel, cask, +barrow, garden cart, lawn cart, wheelbarrow, +baseball, +basketball, +bassinet, +bassoon, +bathing cap, swimming cap, +bath towel, +bathtub, bathing tub, bath, tub, +beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon, +beacon, lighthouse, beacon light, pharos, +beaker, +bearskin, busby, shako, +beer bottle, +beer glass, +bell cote, bell cot, +bib, +bicycle-built-for-two, tandem bicycle, tandem, +bikini, two-piece, +binder, ring-binder, +binoculars, field glasses, opera glasses, +birdhouse, +boathouse, +bobsled, bobsleigh, bob, +bolo tie, bolo, bola tie, bola, +bonnet, poke bonnet, +bookcase, +bookshop, bookstore, bookstall, +bottlecap, +bow, +bow tie, bow-tie, bowtie, +brass, memorial tablet, plaque, +brassiere, bra, bandeau, +breakwater, groin, groyne, mole, bulwark, seawall, jetty, +breastplate, aegis, egis, +broom, +bucket, pail, +buckle, +bulletproof vest, +bullet train, bullet, +butcher shop, meat market, +cab, hack, taxi, taxicab, +caldron, cauldron, +candle, taper, wax light, +cannon, +canoe, +can opener, tin opener, +cardigan, +car mirror, +carousel, carrousel, merry-go-round, roundabout, whirligig, +"carpenters kit, tool kit", +carton, +car wheel, +cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM, +cassette, +cassette player, +castle, +catamaran, +CD player, +cello, violoncello, +cellular telephone, cellular phone, cellphone, cell, mobile phone, +chain, +chainlink fence, +chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour, +chain saw, chainsaw, +chest, +chiffonier, commode, +chime, bell, gong, +china cabinet, china closet, +Christmas stocking, +church, church building, +cinema, movie theater, movie theatre, movie house, picture palace, +cleaver, meat cleaver, chopper, +cliff dwelling, +cloak, +clog, geta, patten, sabot, +cocktail shaker, +coffee mug, +coffeepot, +coil, spiral, volute, whorl, helix, +combination lock, +computer keyboard, keypad, +confectionery, confectionary, candy store, +container ship, containership, container vessel, +convertible, +corkscrew, bottle screw, +cornet, horn, trumpet, trump, +cowboy boot, +cowboy hat, ten-gallon hat, +cradle, +crane, +crash helmet, +crate, +crib, cot, +Crock Pot, +croquet ball, +crutch, +cuirass, +dam, dike, dyke, +desk, +desktop computer, +dial telephone, dial phone, +diaper, nappy, napkin, +digital clock, +digital watch, +dining table, board, +dishrag, dishcloth, +dishwasher, dish washer, dishwashing machine, +disk brake, disc brake, +dock, dockage, docking facility, +dogsled, dog sled, dog sleigh, +dome, +doormat, welcome mat, +drilling platform, offshore rig, +drum, membranophone, tympan, +drumstick, +dumbbell, +Dutch oven, +electric fan, blower, +electric guitar, +electric locomotive, +entertainment center, +envelope, +espresso maker, +face powder, +feather boa, boa, +file, file cabinet, filing cabinet, +fireboat, +fire engine, fire truck, +fire screen, fireguard, +flagpole, flagstaff, +flute, transverse flute, +folding chair, +football helmet, +forklift, +fountain, +fountain pen, +four-poster, +freight car, +French horn, horn, +frying pan, frypan, skillet, +fur coat, +garbage truck, dustcart, +gasmask, respirator, gas helmet, +gas pump, gasoline pump, petrol pump, island dispenser, +goblet, +go-kart, +golf ball, +golfcart, golf cart, +gondola, +gong, tam-tam, +gown, +grand piano, grand, +greenhouse, nursery, glasshouse, +grille, radiator grille, +grocery store, grocery, food market, market, +guillotine, +hair slide, +hair spray, +half track, +hammer, +hamper, +hand blower, blow dryer, blow drier, hair dryer, hair drier, +hand-held computer, hand-held microcomputer, +handkerchief, hankie, hanky, hankey, +hard disc, hard disk, fixed disk, +harmonica, mouth organ, harp, mouth harp, +harp, +harvester, reaper, +hatchet, +holster, +home theater, home theatre, +honeycomb, +hook, claw, +hoopskirt, crinoline, +horizontal bar, high bar, +horse cart, horse-cart, +hourglass, +iPod, +iron, smoothing iron, +"jack-o-lantern", +jean, blue jean, denim, +jeep, landrover, +jersey, T-shirt, tee shirt, +jigsaw puzzle, +jinrikisha, ricksha, rickshaw, +joystick, +kimono, +knee pad, +knot, +lab coat, laboratory coat, +ladle, +lampshade, lamp shade, +laptop, laptop computer, +lawn mower, mower, +lens cap, lens cover, +letter opener, paper knife, paperknife, +library, +lifeboat, +lighter, light, igniter, ignitor, +limousine, limo, +liner, ocean liner, +lipstick, lip rouge, +Loafer, +lotion, +loudspeaker, speaker, speaker unit, loudspeaker system, speaker system, +"loupe, jewelers loupe", +lumbermill, sawmill, +magnetic compass, +mailbag, postbag, +mailbox, letter box, +maillot, +maillot, tank suit, +manhole cover, +maraca, +marimba, xylophone, +mask, +matchstick, +maypole, +maze, labyrinth, +measuring cup, +medicine chest, medicine cabinet, +megalith, megalithic structure, +microphone, mike, +microwave, microwave oven, +military uniform, +milk can, +minibus, +miniskirt, mini, +minivan, +missile, +mitten, +mixing bowl, +mobile home, manufactured home, +Model T, +modem, +monastery, +monitor, +moped, +mortar, +mortarboard, +mosque, +mosquito net, +motor scooter, scooter, +mountain bike, all-terrain bike, off-roader, +mountain tent, +mouse, computer mouse, +mousetrap, +moving van, +muzzle, +nail, +neck brace, +necklace, +nipple, +notebook, notebook computer, +obelisk, +oboe, hautboy, hautbois, +ocarina, sweet potato, +odometer, hodometer, mileometer, milometer, +oil filter, +organ, pipe organ, +oscilloscope, scope, cathode-ray oscilloscope, CRO, +overskirt, +oxcart, +oxygen mask, +packet, +paddle, boat paddle, +paddlewheel, paddle wheel, +padlock, +paintbrush, +"pajama, pyjama, pjs, jammies", +palace, +panpipe, pandean pipe, syrinx, +paper towel, +parachute, chute, +parallel bars, bars, +park bench, +parking meter, +passenger car, coach, carriage, +patio, terrace, +pay-phone, pay-station, +pedestal, plinth, footstall, +pencil box, pencil case, +pencil sharpener, +perfume, essence, +Petri dish, +photocopier, +pick, plectrum, plectron, +pickelhaube, +picket fence, paling, +pickup, pickup truck, +pier, +piggy bank, penny bank, +pill bottle, +pillow, +ping-pong ball, +pinwheel, +pirate, pirate ship, +pitcher, ewer, +"plane, carpenters plane, woodworking plane", +planetarium, +plastic bag, +plate rack, +plow, plough, +"plunger, plumbers helper", +Polaroid camera, Polaroid Land camera, +pole, +police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria, +poncho, +pool table, billiard table, snooker table, +pop bottle, soda bottle, +pot, flowerpot, +"potters wheel", +power drill, +prayer rug, prayer mat, +printer, +prison, prison house, +projectile, missile, +projector, +puck, hockey puck, +punching bag, punch bag, punching ball, punchball, +purse, +quill, quill pen, +quilt, comforter, comfort, puff, +racer, race car, racing car, +racket, racquet, +radiator, +radio, wireless, +radio telescope, radio reflector, +rain barrel, +recreational vehicle, RV, R.V., +reel, +reflex camera, +refrigerator, icebox, +remote control, remote, +restaurant, eating house, eating place, eatery, +revolver, six-gun, six-shooter, +rifle, +rocking chair, rocker, +rotisserie, +rubber eraser, rubber, pencil eraser, +rugby ball, +rule, ruler, +running shoe, +safe, +safety pin, +saltshaker, salt shaker, +sandal, +sarong, +sax, saxophone, +scabbard, +scale, weighing machine, +school bus, +schooner, +scoreboard, +screen, CRT screen, +screw, +screwdriver, +seat belt, seatbelt, +sewing machine, +shield, buckler, +shoe shop, shoe-shop, shoe store, +shoji, +shopping basket, +shopping cart, +shovel, +shower cap, +shower curtain, +ski, +ski mask, +sleeping bag, +slide rule, slipstick, +sliding door, +slot, one-armed bandit, +snorkel, +snowmobile, +snowplow, snowplough, +soap dispenser, +soccer ball, +sock, +solar dish, solar collector, solar furnace, +sombrero, +soup bowl, +space bar, +space heater, +space shuttle, +spatula, +speedboat, +"spider web, spiders web", +spindle, +sports car, sport car, +spotlight, spot, +stage, +steam locomotive, +steel arch bridge, +steel drum, +stethoscope, +stole, +stone wall, +stopwatch, stop watch, +stove, +strainer, +streetcar, tram, tramcar, trolley, trolley car, +stretcher, +studio couch, day bed, +stupa, tope, +submarine, pigboat, sub, U-boat, +suit, suit of clothes, +sundial, +sunglass, +sunglasses, dark glasses, shades, +sunscreen, sunblock, sun blocker, +suspension bridge, +swab, swob, mop, +sweatshirt, +swimming trunks, bathing trunks, +swing, +switch, electric switch, electrical switch, +syringe, +table lamp, +tank, army tank, armored combat vehicle, armoured combat vehicle, +tape player, +teapot, +teddy, teddy bear, +television, television system, +tennis ball, +thatch, thatched roof, +theater curtain, theatre curtain, +thimble, +thresher, thrasher, threshing machine, +throne, +tile roof, +toaster, +tobacco shop, tobacconist shop, tobacconist, +toilet seat, +torch, +totem pole, +tow truck, tow car, wrecker, +toyshop, +tractor, +trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi, +tray, +trench coat, +tricycle, trike, velocipede, +trimaran, +tripod, +triumphal arch, +trolleybus, trolley coach, trackless trolley, +trombone, +tub, vat, +turnstile, +typewriter keyboard, +umbrella, +unicycle, monocycle, +upright, upright piano, +vacuum, vacuum cleaner, +vase, +vault, +velvet, +vending machine, +vestment, +viaduct, +violin, fiddle, +volleyball, +waffle iron, +wall clock, +wallet, billfold, notecase, pocketbook, +wardrobe, closet, press, +warplane, military plane, +washbasin, handbasin, washbowl, lavabo, wash-hand basin, +washer, automatic washer, washing machine, +water bottle, +water jug, +water tower, +whiskey jug, +whistle, +wig, +window screen, +window shade, +Windsor tie, +wine bottle, +wing, +wok, +wooden spoon, +wool, woolen, woollen, +worm fence, snake fence, snake-rail fence, Virginia fence, +wreck, +yawl, +yurt, +web site, website, internet site, site, +comic book, +crossword puzzle, crossword, +street sign, +traffic light, traffic signal, stoplight, +book jacket, dust cover, dust jacket, dust wrapper, +menu, +plate, +guacamole, +consomme, +hot pot, hotpot, +trifle, +ice cream, icecream, +ice lolly, lolly, lollipop, popsicle, +French loaf, +bagel, beigel, +pretzel, +cheeseburger, +hotdog, hot dog, red hot, +mashed potato, +head cabbage, +broccoli, +cauliflower, +zucchini, courgette, +spaghetti squash, +acorn squash, +butternut squash, +cucumber, cuke, +artichoke, globe artichoke, +bell pepper, +cardoon, +mushroom, +Granny Smith, +strawberry, +orange, +lemon, +fig, +pineapple, ananas, +banana, +jackfruit, jak, jack, +custard apple, +pomegranate, +hay, +carbonara, +chocolate sauce, chocolate syrup, +dough, +meat loaf, meatloaf, +pizza, pizza pie, +potpie, +burrito, +red wine, +espresso, +cup, +eggnog, +alp, +bubble, +cliff, drop, drop-off, +coral reef, +geyser, +lakeside, lakeshore, +promontory, headland, head, foreland, +sandbar, sand bar, +seashore, coast, seacoast, sea-coast, +valley, vale, +volcano, +ballplayer, baseball player, +groom, bridegroom, +scuba diver, +rapeseed, +daisy, +"yellow ladys slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum", +corn, +acorn, +hip, rose hip, rosehip, +buckeye, horse chestnut, conker, +coral fungus, +agaric, +gyromitra, +stinkhorn, carrion fungus, +earthstar, +hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa, +bolete, +ear, spike, capitulum, +toilet tissue, toilet paper, bathroom tissue diff --git a/examples/C++/PaddleClas/resnet_50_vd/rpc_client.py b/examples/C++/PaddleClas/resnet_50_vd/rpc_client.py new file mode 100644 index 0000000000000000000000000000000000000000..5e4c42841fccc257b057fd09c06ae495b0fa77bc --- /dev/null +++ b/examples/C++/PaddleClas/resnet_50_vd/rpc_client.py @@ -0,0 +1,51 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +from paddle_serving_client import Client + +#app +from paddle_serving_app.reader import Sequential, URL2Image, Resize +from paddle_serving_app.reader import CenterCrop, RGB2BGR, Transpose, Div, Normalize +import time + +client = Client() +client.load_client_config("./ResNet50_vd_client/serving_client_conf.prototxt") +client.connect(["127.0.0.1:9394"]) + +label_dict = {} +label_idx = 0 +with open("imagenet.label") as fin: + for line in fin: + label_dict[label_idx] = line.strip() + label_idx += 1 + +#preprocess +seq = Sequential([ + URL2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)), + Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True) +]) + +start = time.time() +image_file = "https://paddle-serving.bj.bcebos.com/imagenet-example/daisy.jpg" +for i in range(1): + img = seq(image_file) + fetch_map = client.predict(feed={"inputs": img}, fetch=[], batch=False) + + prob = max(fetch_map["save_infer_model/scale_0.tmp_1"][0]) + label = label_dict[fetch_map["save_infer_model/scale_0.tmp_1"][0].tolist() + .index(prob)].strip().replace(",", "") + print("prediction: {}, probability: {}".format(label, prob)) +end = time.time() +print(end - start) diff --git a/examples/C++/PaddleNLP/bert/bert_gpu_server.py b/examples/C++/PaddleNLP/bert/bert_gpu_server.py old mode 100644 new mode 100755 index 7708a078636fd876c40e88d1441bc711d599f8a6..1d683e59cf05062065d1277827d4bc456e1031ea --- a/examples/C++/PaddleNLP/bert/bert_gpu_server.py +++ b/examples/C++/PaddleNLP/bert/bert_gpu_server.py @@ -19,9 +19,9 @@ from paddle_serving_server import OpSeqMaker from paddle_serving_server import Server op_maker = OpMaker() -read_op = op_maker.create('general_reader') -general_infer_op = op_maker.create('general_infer') -general_response_op = op_maker.create('general_response') +read_op = op_maker.create('GeneralReaderOp') +general_infer_op = op_maker.create('GeneralInferOp') +general_response_op = op_maker.create('GeneralResponseOp') op_seq_maker = OpSeqMaker() op_seq_maker.add_op(read_op) diff --git a/examples/C++/PaddleNLP/bert/bert_server.py b/examples/C++/PaddleNLP/bert/bert_server.py old mode 100644 new mode 100755 index 35d38be0cac50b899b58085c7f103f32537859c4..c2ccd7ef2d662534b91c4d6acd255b237c05f26e --- a/examples/C++/PaddleNLP/bert/bert_server.py +++ b/examples/C++/PaddleNLP/bert/bert_server.py @@ -19,9 +19,9 @@ from paddle_serving_server import OpSeqMaker from paddle_serving_server import Server op_maker = OpMaker() -read_op = op_maker.create('general_reader') -general_infer_op = op_maker.create('general_infer') -general_response_op = op_maker.create('general_response') +read_op = op_maker.create('GeneralReaderOp') +general_infer_op = op_maker.create('GeneralInferOp') +general_response_op = op_maker.create('GeneralResponseOp') op_seq_maker = OpSeqMaker() op_seq_maker.add_op(read_op) diff --git a/examples/C++/PaddleOCR/ocr/README.md b/examples/C++/PaddleOCR/ocr/README.md index 28849bf3278ace80340344a1661c620d70c7b062..12f0fd6cd5f06a97b0f5e846037217a40e37fe89 100755 --- a/examples/C++/PaddleOCR/ocr/README.md +++ b/examples/C++/PaddleOCR/ocr/README.md @@ -4,9 +4,9 @@ ## Get Model ``` -wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/ocr_rec.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_rec tar -xzvf ocr_rec.tar.gz -wget https://paddle-serving.bj.bcebos.com/ocr/ocr_det.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_det tar -xzvf ocr_det.tar.gz ``` @@ -108,7 +108,7 @@ python3 rec_web_client.py When a service starts the concatenation of two models, it only needs to pass in the relative path of the model folder in order after `--model`, and the custom C++ OP class name after `--op`. The order of the model after `--model` and the class name after `--OP` needs to correspond. Here, it is assumed that we have defined the two OPs as GeneralDetectionOp and GeneralRecOp respectively, The script code is as follows: ```python #One service starts the concatenation of two models -python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralRecOp --port 9293 +python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralInferOp --port 9293 #ocr_det_model correspond to GeneralDetectionOp, ocr_rec_model correspond to GeneralRecOp ``` diff --git a/examples/C++/PaddleOCR/ocr/README_CN.md b/examples/C++/PaddleOCR/ocr/README_CN.md index d9671115e0c4970af48fcd224551a96526be1356..a737531d778a25ef225c46240736e1bbc7381f35 100755 --- a/examples/C++/PaddleOCR/ocr/README_CN.md +++ b/examples/C++/PaddleOCR/ocr/README_CN.md @@ -4,9 +4,9 @@ ## 获取模型 ``` -wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/image/OCR/ocr_rec.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_rec tar -xzvf ocr_rec.tar.gz -wget https://paddle-serving.bj.bcebos.com/ocr/ocr_det.tar.gz +python3 -m paddle_serving_app.package --get_model ocr_det tar -xzvf ocr_det.tar.gz ``` ## 获取数据集(可选) @@ -106,7 +106,7 @@ python3 rec_web_client.py 一个服务启动两个模型串联,只需要在`--model后依次按顺序传入模型文件夹的相对路径`,且需要在`--op后依次传入自定义C++OP类名称`,其中--model后面的模型与--op后面的类名称的顺序需要对应,`这里假设我们已经定义好了两个OP分别为GeneralDetectionOp和GeneralRecOp`,则脚本代码如下: ```python #一个服务启动多模型串联 -python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralRecOp --port 9293 +python3 -m paddle_serving_server.serve --model ocr_det_model ocr_rec_model --op GeneralDetectionOp GeneralInferOp --port 9293 #多模型串联 ocr_det_model对应GeneralDetectionOp ocr_rec_model对应GeneralRecOp ``` diff --git a/examples/C++/PaddleOCR/ocr/det_debugger_server.py b/examples/C++/PaddleOCR/ocr/det_debugger_server.py index 5b40fe9372a56b2b663c1bfeff02619a8ec9730b..6679ee0f4e0f18000ee33e5331a3d2e44197c846 100644 --- a/examples/C++/PaddleOCR/ocr/det_debugger_server.py +++ b/examples/C++/PaddleOCR/ocr/det_debugger_server.py @@ -47,18 +47,18 @@ class OCRService(WebService): }) def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) self.ori_h, self.ori_w, _ = im.shape det_img = self.det_preprocess(im) _, self.new_h, self.new_w = det_img.shape return { - "image": det_img[np.newaxis, :].copy() - }, ["concat_1.tmp_0"], True + "x": det_img[np.newaxis, :].copy() + }, ["save_infer_model/scale_0.tmp_1"], True def postprocess(self, feed={}, fetch=[], fetch_map=None): - det_out = fetch_map["concat_1.tmp_0"] + det_out = fetch_map["save_infer_model/scale_0.tmp_1"] ratio_list = [ float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w ] diff --git a/examples/C++/PaddleOCR/ocr/det_web_server.py b/examples/C++/PaddleOCR/ocr/det_web_server.py index d38686e5a86c4f2df45db7f495a8c08a72270919..d52f4b447006e220ed2f6362afee253c0b9eb69d 100644 --- a/examples/C++/PaddleOCR/ocr/det_web_server.py +++ b/examples/C++/PaddleOCR/ocr/det_web_server.py @@ -47,17 +47,17 @@ class OCRService(WebService): }) def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) self.ori_h, self.ori_w, _ = im.shape det_img = self.det_preprocess(im) _, self.new_h, self.new_w = det_img.shape print(det_img) - return {"image": det_img}, ["concat_1.tmp_0"], False + return {"x": det_img}, ["save_infer_model/scale_0.tmp_1"], False def postprocess(self, feed={}, fetch=[], fetch_map=None): - det_out = fetch_map["concat_1.tmp_0"] + det_out = fetch_map["save_infer_model/scale_0.tmp_1"] ratio_list = [ float(self.new_h) / self.ori_h, float(self.new_w) / self.ori_w ] diff --git a/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py b/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py index b3187f50e6f0d677d5377dac1735bd6e679b4755..507971c36c0e900cce471ceb2c636f4dd6232ccd 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py +++ b/examples/C++/PaddleOCR/ocr/ocr_cpp_client.py @@ -42,13 +42,11 @@ for img_file in os.listdir(test_img_dir): image_data = file.read() image = cv2_to_base64(image_data) fetch_map = client.predict( - feed={"image": image}, - fetch=["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"], + feed={"x": image}, + fetch=["save_infer_model/scale_0.tmp_1"], batch=True) result = {} - result["score"] = fetch_map["softmax_0.tmp_0"] - del fetch_map["softmax_0.tmp_0"] - rec_res = OCRReader().postprocess(fetch_map, with_score=False) + rec_res = OCRReader().postprocess_ocrv2(fetch_map, with_score=False) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py b/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py index 88dd94a8224fc5c9c6f972b96d81af60ce518763..bb10dba44d4baf3a9ed1e6b1f2f9af02178c1261 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py +++ b/examples/C++/PaddleOCR/ocr/ocr_debugger_server.py @@ -48,7 +48,7 @@ class OCRService(WebService): self.ocr_reader = OCRReader() def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) ori_h, ori_w, _ = im.shape @@ -57,7 +57,7 @@ class OCRService(WebService): det_img = det_img[np.newaxis, :] det_img = det_img.copy() det_out = self.det_client.predict( - feed={"image": det_img}, fetch=["concat_1.tmp_0"], batch=True) + feed={"x": det_img}, fetch=["save_infer_model/scale_0.tmp_1"], batch=True) filter_func = FilterBoxes(10, 10) post_func = DBPostProcess({ "thresh": 0.3, @@ -68,7 +68,7 @@ class OCRService(WebService): }) sorted_boxes = SortedBoxes() ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w] - dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list]) + dt_boxes_list = post_func(det_out["save_infer_model/scale_0.tmp_1"], [ratio_list]) dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w]) dt_boxes = sorted_boxes(dt_boxes) get_rotate_crop_image = GetRotateCropImage() @@ -88,12 +88,12 @@ class OCRService(WebService): for id, img in enumerate(img_list): norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) imgs[id] = norm_img - feed = {"image": imgs.copy()} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed = {"x": imgs.copy()} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/ocr_web_client.py b/examples/C++/PaddleOCR/ocr/ocr_web_client.py index ce96a8bbcd585f37368d70070d649e25a0129029..91620e8ed9b0973a6bb31b09afbdce7b99aac8b6 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_web_client.py +++ b/examples/C++/PaddleOCR/ocr/ocr_web_client.py @@ -34,7 +34,7 @@ for img_file in os.listdir(test_img_dir): with open(os.path.join(test_img_dir, img_file), 'rb') as file: image_data1 = file.read() image = cv2_to_base64(image_data1) - data = {"feed": [{"image": image}], "fetch": ["res"]} + data = {"feed": [{"x": image}], "fetch": ["res"]} r = requests.post(url=url, headers=headers, data=json.dumps(data)) print(r) print(r.json()) diff --git a/examples/C++/PaddleOCR/ocr/ocr_web_server.py b/examples/C++/PaddleOCR/ocr/ocr_web_server.py index 58fc850c94a5e8d2f37ae5d03f14b60d343a2203..2273c7e1d2d90a1f4d3dd0a14a8469974a09ba98 100644 --- a/examples/C++/PaddleOCR/ocr/ocr_web_server.py +++ b/examples/C++/PaddleOCR/ocr/ocr_web_server.py @@ -44,13 +44,13 @@ class OCRService(WebService): self.ocr_reader = OCRReader() def preprocess(self, feed=[], fetch=[]): - data = base64.b64decode(feed[0]["image"].encode('utf8')) + data = base64.b64decode(feed[0]["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) ori_h, ori_w, _ = im.shape det_img = self.det_preprocess(im) det_out = self.det_client.predict( - feed={"image": det_img}, fetch=["concat_1.tmp_0"], batch=False) + feed={"x": det_img}, fetch=["save_infer_model/scale_0.tmp_1"], batch=False) _, new_h, new_w = det_img.shape filter_func = FilterBoxes(10, 10) post_func = DBPostProcess({ @@ -62,7 +62,7 @@ class OCRService(WebService): }) sorted_boxes = SortedBoxes() ratio_list = [float(new_h) / ori_h, float(new_w) / ori_w] - dt_boxes_list = post_func(det_out["concat_1.tmp_0"], [ratio_list]) + dt_boxes_list = post_func(det_out["save_infer_model/scale_0.tmp_1"], [ratio_list]) dt_boxes = filter_func(dt_boxes_list[0], [ori_h, ori_w]) dt_boxes = sorted_boxes(dt_boxes) get_rotate_crop_image = GetRotateCropImage() @@ -78,12 +78,12 @@ class OCRService(WebService): for img in img_list: norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) feed_list.append(norm_img[np.newaxis, :]) - feed_batch = {"image": np.concatenate(feed_list, axis=0)} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed_batch = {"x": np.concatenate(feed_list, axis=0)} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed_batch, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/rec_debugger_server.py b/examples/C++/PaddleOCR/ocr/rec_debugger_server.py index f84463238af859a00983f515e405686c00fdf9fa..cb096ac1a7ab9475da0d61fb396bdec471fc2f2d 100644 --- a/examples/C++/PaddleOCR/ocr/rec_debugger_server.py +++ b/examples/C++/PaddleOCR/ocr/rec_debugger_server.py @@ -38,7 +38,7 @@ class OCRService(WebService): def preprocess(self, feed=[], fetch=[]): img_list = [] for feed_data in feed: - data = base64.b64decode(feed_data["image"].encode('utf8')) + data = base64.b64decode(feed_data["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) img_list.append(im) @@ -53,12 +53,12 @@ class OCRService(WebService): for i, img in enumerate(img_list): norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) imgs[i] = norm_img - feed = {"image": imgs.copy()} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed = {"x": imgs.copy()} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleOCR/ocr/rec_web_client.py b/examples/C++/PaddleOCR/ocr/rec_web_client.py index 312a2148886d6f084a1c077d84e907cb28c0652a..e78145b60d0c2d6c6032acbdba679bbdf89df51b 100644 --- a/examples/C++/PaddleOCR/ocr/rec_web_client.py +++ b/examples/C++/PaddleOCR/ocr/rec_web_client.py @@ -36,6 +36,6 @@ for img_file in os.listdir(test_img_dir): image_data1 = file.read() image = cv2_to_base64(image_data1) #data = {"feed": [{"image": image}], "fetch": ["res"]} - data = {"feed": [{"image": image}] * 3, "fetch": ["res"]} + data = {"feed": [{"x": image}] * 3, "fetch": ["res"]} r = requests.post(url=url, headers=headers, data=json.dumps(data)) print(r.json()) diff --git a/examples/C++/PaddleOCR/ocr/rec_web_server.py b/examples/C++/PaddleOCR/ocr/rec_web_server.py index 2db6e398d3a025e739761fabd50c5bb8a6609f07..1a6e45812c42280100fb14b029a3a16508c3b9a5 100644 --- a/examples/C++/PaddleOCR/ocr/rec_web_server.py +++ b/examples/C++/PaddleOCR/ocr/rec_web_server.py @@ -39,7 +39,7 @@ class OCRService(WebService): # TODO: to handle batch rec images img_list = [] for feed_data in feed: - data = base64.b64decode(feed_data["image"].encode('utf8')) + data = base64.b64decode(feed_data["x"].encode('utf8')) data = np.fromstring(data, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) img_list.append(im) @@ -55,12 +55,12 @@ class OCRService(WebService): norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio) imgs[i] = norm_img - feed = {"image": imgs.copy()} - fetch = ["ctc_greedy_decoder_0.tmp_0", "softmax_0.tmp_0"] + feed = {"x": imgs.copy()} + fetch = ["save_infer_model/scale_0.tmp_1"] return feed, fetch, True def postprocess(self, feed={}, fetch=[], fetch_map=None): - rec_res = self.ocr_reader.postprocess(fetch_map, with_score=True) + rec_res = self.ocr_reader.postprocess_ocrv2(fetch_map, with_score=True) res_lst = [] for res in rec_res: res_lst.append(res[0]) diff --git a/examples/C++/PaddleRec/criteo_ctr/test_server.py b/examples/C++/PaddleRec/criteo_ctr/test_server.py old mode 100644 new mode 100755 index 34f859daab4c808aa9d50d2109a81a69eed96df6..ca4c6afb0ddc0da3e24540797f242c748d4b128f --- a/examples/C++/PaddleRec/criteo_ctr/test_server.py +++ b/examples/C++/PaddleRec/criteo_ctr/test_server.py @@ -20,14 +20,14 @@ from paddle_serving_server import OpSeqMaker from paddle_serving_server import Server op_maker = OpMaker() -read_op = op_maker.create('general_reader') -general_infer_op = op_maker.create('general_infer') -response_op = op_maker.create('general_response') +read_op = op_maker.create('GeneralReaderOp') +general_infer_op = op_maker.create('GeneralInferOp') +general_response_op = op_maker.create('GeneralResponseOp') op_seq_maker = OpSeqMaker() op_seq_maker.add_op(read_op) op_seq_maker.add_op(general_infer_op) -op_seq_maker.add_op(response_op) +op_seq_maker.add_op(general_response_op) server = Server() server.set_op_sequence(op_seq_maker.get_op_sequence()) diff --git a/examples/C++/imdb/README_CN.md b/examples/C++/imdb/README_CN.md index a1fecc8af35dcd2f5a38f47480b9b80b3cf96054..42841c6b8d0b4d1f064a33b54503193b88eed635 100755 --- a/examples/C++/imdb/README_CN.md +++ b/examples/C++/imdb/README_CN.md @@ -1,4 +1,4 @@ -## IMDB评论情绪预测服务 +## IMDB 评论情绪预测 ABTest 服务 (简体中文|[English](./README.md)) @@ -11,16 +11,24 @@ sh get_data.sh ### 启动预测服务(支持BRPC-Client/GRPC-Client/Http-Client) +```python +## 启动 bow 模型服务 +python3 -m paddle_serving_server.serve --model imdb_bow_model/ --port 9297 >/dev/null 2>&1 & + +## 启动 cnn 模型服务 +python3 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9298 >/dev/null 2>&1 & + +## 启动 lstm 模型服务 +python3 -m paddle_serving_server.serve --model imdb_lstm_model/ --port 9299 >/dev/null 2>&1 & ``` -python3 -m paddle_serving_server.serve --model imdb_cnn_model/ --port 9292 -``` -### BRPC-Client预测 + +### ABTest 预测 ``` -head test_data/part-0 | python3 test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab +head test_data/part-0 | python3 abtest_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab ``` 预测test_data/part-0的前十个样例。 -### BRPC-Client预测 +### http预测 ``` head test_data/part-0 | python3 test_http_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab ``` diff --git a/examples/C++/imdb/abtest_client.py b/examples/C++/imdb/abtest_client.py index 1a14c87c355552248394fa504d37f54a4c58132a..e0f910e37c7cf5a99c0b5ec1249f0ceed68f21ae 100644 --- a/examples/C++/imdb/abtest_client.py +++ b/examples/C++/imdb/abtest_client.py @@ -11,35 +11,35 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - +# pylint: disable=doc-string-missing from paddle_serving_client import Client +from paddle_serving_app.reader.imdb_reader import IMDBDataset +import sys import numpy as np client = Client() -client.load_client_config('imdb_bow_client_conf/serving_client_conf.prototxt') -client.add_variant("bow", ["127.0.0.1:8000"], 10) -client.add_variant("lstm", ["127.0.0.1:9000"], 90) +client.load_client_config(sys.argv[1]) +client.add_variant("bow", ["127.0.0.1:9297"], 10) +client.add_variant("cnn", ["127.0.0.1:9298"], 30) +client.add_variant("lstm", ["127.0.0.1:9299"], 60) client.connect() -print('please wait for about 10s') -with open('processed.data') as f: - cnt = {"bow": {'acc': 0, 'total': 0}, "lstm": {'acc': 0, 'total': 0}} - for line in f: - word_ids, label = line.split(';') - word_ids = [int(x) for x in word_ids.split(',')] - word_len = len(word_ids) - feed = { - "words": np.array(word_ids).reshape(word_len, 1), - "words.lod": [0, word_len] - } - fetch = ["acc", "cost", "prediction"] - [fetch_map, tag] = client.predict( - feed=feed, fetch=fetch, need_variant_tag=True, batch=True) - if (float(fetch_map["prediction"][0][1]) - 0.5) * (float(label[0]) - 0.5 - ) > 0: - cnt[tag]['acc'] += 1 - cnt[tag]['total'] += 1 +# you can define any english sentence or dataset here +# This example reuses imdb reader in training, you +# can define your own data preprocessing easily. +imdb_dataset = IMDBDataset() +imdb_dataset.load_resource(sys.argv[2]) - for tag, data in cnt.items(): - print('[{}](total: {}) acc: {}'.format(tag, data[ - 'total'], float(data['acc']) / float(data['total']))) +for line in sys.stdin: + word_ids, label = imdb_dataset.get_words_and_label(line) + word_len = len(word_ids) + feed = { + "words": np.array(word_ids).reshape(word_len, 1), + "words.lod": [0, word_len] + } + #print(feed) + fetch = ["prediction"] + fetch_map = client.predict( + feed=feed, fetch=fetch, batch=True, need_variant_tag=True) + print("server_tag={} prediction={} ".format(fetch_map[1], fetch_map[0][ + "prediction"][0])) diff --git a/examples/C++/imdb/test_http_client.py b/examples/C++/imdb/test_http_client.py index e3cc705150ccc197ab1be24bf11e0a92e1d62380..d22b92d7850097130e24d3cf86857ddcc8caef2f 100755 --- a/examples/C++/imdb/test_http_client.py +++ b/examples/C++/imdb/test_http_client.py @@ -40,7 +40,7 @@ we recommend use Proto data format in HTTP-body, set True(which is default) if you want use JSON data format in HTTP-body, set False ''' #client.set_http_proto(True) -client.connect(["127.0.0.1:9292"]) +client.connect(["127.0.0.1:9297"]) # you can define any english sentence or dataset here # This example reuses imdb reader in training, you diff --git a/examples/Pipeline/LowPrecision/ResNet50_Slim/README.md b/examples/Pipeline/LowPrecision/ResNet50_Slim/README.md new file mode 100644 index 0000000000000000000000000000000000000000..8b115baee975c2a96eb6c1f4b43dcf8059fe4fea --- /dev/null +++ b/examples/Pipeline/LowPrecision/ResNet50_Slim/README.md @@ -0,0 +1,25 @@ +# Low precsion examples of python pipeline + +Here we take the ResNet50 quantization model as an example to introduce the low-precision deployment case of Python Pipline. + +## 1.Get model +``` +wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz +tar zxvf ResNet50_quant.tar.gz +``` + +## 2.Save model var for serving +``` +python3 -m paddle_serving_client.convert --dirname ResNet50_quant --serving_server serving_server --serving_client serving_client +``` + +## 3.Start server +``` +python3 resnet50_web_service.py &>log.txt & +``` + +## 4.Test +``` +python3 pipeline_rpc_client.py +python3 pipeline_http_client.py +``` diff --git a/examples/Pipeline/LowPrecision/ResNet50_Slim/README_CN.md b/examples/Pipeline/LowPrecision/ResNet50_Slim/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..1396b29858710eed8bc54a9a27f05022ef3a443f --- /dev/null +++ b/examples/Pipeline/LowPrecision/ResNet50_Slim/README_CN.md @@ -0,0 +1,25 @@ +# Python Pipeline 低精度部署案例 + +这里以 ResNet50 量化模型为例,介绍 Python Pipline 低精度量化模型部署案例。 + +## 1.获取模型 +``` +wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz +tar zxvf ResNet50_quant.tar.gz +``` + +## 2.保存模型参数 +``` +python3 -m paddle_serving_client.convert --dirname ResNet50_quant --serving_server serving_server --serving_client serving_client +``` + +## 3.启动服务 +``` +python3 resnet50_web_service.py &>log.txt & +``` + +## 4.测试 +``` +python3 pipeline_rpc_client.py +python3 pipeline_http_client.py +``` diff --git a/examples/Pipeline/LowPrecision/ResNet50_Slim/benchmark.py b/examples/Pipeline/LowPrecision/ResNet50_Slim/benchmark.py new file mode 100644 index 0000000000000000000000000000000000000000..4b0336f97c2c520a46d596bf5e435c2b9e3094a9 --- /dev/null +++ b/examples/Pipeline/LowPrecision/ResNet50_Slim/benchmark.py @@ -0,0 +1,153 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import base64 +import yaml +import requests +import time +import json + +from paddle_serving_server.pipeline import PipelineClient +import numpy as np +from paddle_serving_client.utils import MultiThreadRunner +from paddle_serving_client.utils import benchmark_args, show_latency + + +def parse_benchmark(filein, fileout): + with open(filein, "r") as fin: + res = yaml.load(fin, yaml.FullLoader) + del_list = [] + for key in res["DAG"].keys(): + if "call" in key: + del_list.append(key) + for key in del_list: + del res["DAG"][key] + with open(fileout, "w") as fout: + yaml.dump(res, fout, default_flow_style=False) + + +def gen_yml(device, gpu_id): + fin = open("config.yml", "r") + config = yaml.load(fin, yaml.FullLoader) + fin.close() + config["dag"]["tracer"] = {"interval_s": 10} + if device == "gpu": + config["op"]["imagenet"]["local_service_conf"]["device_type"] = 1 + config["op"]["imagenet"]["local_service_conf"]["devices"] = gpu_id + else: + config["op"]["imagenet"]["local_service_conf"]["device_type"] = 0 + with open("config2.yml", "w") as fout: + yaml.dump(config, fout, default_flow_style=False) + + +def cv2_to_base64(image): + return base64.b64encode(image).decode('utf8') + + +def run_http(idx, batch_size): + print("start thread ({})".format(idx)) + url = "http://127.0.0.1:18000/imagenet/prediction" + start = time.time() + + with open(os.path.join(".", "daisy.jpg"), 'rb') as file: + image_data1 = file.read() + image = cv2_to_base64(image_data1) + keys, values = [], [] + for i in range(batch_size): + keys.append("image_{}".format(i)) + values.append(image) + data = {"key": keys, "value": values} + latency_list = [] + start_time = time.time() + total_num = 0 + while True: + l_start = time.time() + r = requests.post(url=url, data=json.dumps(data)) + print(r.json()) + l_end = time.time() + latency_list.append(l_end * 1000 - l_start * 1000) + total_num += 1 + if time.time() - start_time > 20: + break + end = time.time() + return [[end - start], latency_list, [total_num]] + + +def multithread_http(thread, batch_size): + multi_thread_runner = MultiThreadRunner() + start = time.time() + result = multi_thread_runner.run(run_http, thread, batch_size) + end = time.time() + total_cost = end - start + avg_cost = 0 + total_number = 0 + for i in range(thread): + avg_cost += result[0][i] + total_number += result[2][i] + avg_cost = avg_cost / thread + print("Total cost: {}s".format(total_cost)) + print("Each thread cost: {}s. ".format(avg_cost)) + print("Total count: {}. ".format(total_number)) + print("AVG QPS: {} samples/s".format(batch_size * total_number / + total_cost)) + show_latency(result[1]) + + +def run_rpc(thread, batch_size): + client = PipelineClient() + client.connect(['127.0.0.1:18080']) + start = time.time() + test_img_dir = "imgs/" + for img_file in os.listdir(test_img_dir): + with open(os.path.join(test_img_dir, img_file), 'rb') as file: + image_data = file.read() + image = cv2_to_base64(image_data) + start_time = time.time() + while True: + ret = client.predict(feed_dict={"image": image}, fetch=["res"]) + if time.time() - start_time > 10: + break + end = time.time() + return [[end - start]] + + +def multithread_rpc(thraed, batch_size): + multi_thread_runner = MultiThreadRunner() + result = multi_thread_runner.run(run_rpc, thread, batch_size) + + +if __name__ == "__main__": + if sys.argv[1] == "yaml": + mode = sys.argv[2] # brpc/ local predictor + thread = int(sys.argv[3]) + device = sys.argv[4] + if device == "gpu": + gpu_id = sys.argv[5] + else: + gpu_id = None + gen_yml(device, gpu_id) + elif sys.argv[1] == "run": + mode = sys.argv[2] # http/ rpc + thread = int(sys.argv[3]) + batch_size = int(sys.argv[4]) + if mode == "http": + multithread_http(thread, batch_size) + elif mode == "rpc": + multithread_rpc(thread, batch_size) + elif sys.argv[1] == "dump": + filein = sys.argv[2] + fileout = sys.argv[3] + parse_benchmark(filein, fileout) diff --git a/examples/Pipeline/LowPrecision/ResNet50_Slim/benchmark.sh b/examples/Pipeline/LowPrecision/ResNet50_Slim/benchmark.sh new file mode 100644 index 0000000000000000000000000000000000000000..14c82dfcca801bc00bec57ef972f2260dd1d844a --- /dev/null +++ b/examples/Pipeline/LowPrecision/ResNet50_Slim/benchmark.sh @@ -0,0 +1,44 @@ +export FLAGS_profile_pipeline=1 +alias python3="python3.6" +modelname="clas-ResNet_v2_50" + +# HTTP +#ps -ef | grep web_service | awk '{print $2}' | xargs kill -9 +sleep 3 +# Create yaml,If you already have the config.yaml, ignore it. +#python3 benchmark.py yaml local_predictor 1 gpu +rm -rf profile_log_$modelname + +echo "Starting HTTP Clients..." +# Start a client in each thread, tesing the case of multiple threads. +for thread_num in 1 2 4 8 12 16 +do + for batch_size in 1 + do + echo "----${modelname} thread num: ${thread_num} batch size: ${batch_size} mode:http ----" >>profile_log_$modelname + # Start one web service, If you start the service yourself, you can ignore it here. + #python3 web_service.py >web.log 2>&1 & + #sleep 3 + + # --id is the serial number of the GPU card, Must be the same as the gpu id used by the server. + nvidia-smi --id=3 --query-gpu=memory.used --format=csv -lms 1000 > gpu_use.log 2>&1 & + nvidia-smi --id=3 --query-gpu=utilization.gpu --format=csv -lms 1000 > gpu_utilization.log 2>&1 & + echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py + # Start http client + python3 benchmark.py run http $thread_num $batch_size > profile 2>&1 + + # Collect CPU metrics, Filter data that is zero momentarily, Record the maximum value of GPU memory and the average value of GPU utilization + python3 cpu_utilization.py >> profile_log_$modelname + grep -av '^0 %' gpu_utilization.log > gpu_utilization.log.tmp + awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$modelname + awk -F' ' '{sum+=$1} END {print "GPU_UTILIZATION:", sum/NR, sum, NR }' gpu_utilization.log.tmp >> profile_log_$modelname + + # Show profiles + python3 ../../../util/show_profile.py profile $thread_num >> profile_log_$modelname + tail -n 8 profile >> profile_log_$modelname + echo '' >> profile_log_$modelname + done +done + +# Kill all nvidia-smi background task. +pkill nvidia-smi diff --git a/examples/Pipeline/LowPrecision/ResNet50_Slim/config.yml b/examples/Pipeline/LowPrecision/ResNet50_Slim/config.yml new file mode 100644 index 0000000000000000000000000000000000000000..eb5e918b9b0ec8780e833c2ef01150fafcbc312d --- /dev/null +++ b/examples/Pipeline/LowPrecision/ResNet50_Slim/config.yml @@ -0,0 +1,44 @@ +#worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG +##当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num +worker_num: 10 + +#http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port +http_port: 18080 +rpc_port: 9993 + +dag: + #op资源类型, True, 为线程模型;False,为进程模型 + is_thread_op: False +op: + imagenet: + #并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 1 + + #当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + + #uci模型路径 + model_config: serving_server/ + + #计算硬件类型: 空缺时由devices决定(CPU/GPU),0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 2 + + #计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 + devices: "0" # "0,1" + + #client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测 + client_type: local_predictor + + #Fetch结果列表,以client_config中fetch_var的alias_name为准 + fetch_list: ["save_infer_model/scale_0.tmp_0"] + + #precsion, 预测精度,降低预测精度可提升预测速度 + #GPU 支持: "fp32"(default), "fp16", "int8"; + #CPU 支持: "fp32"(default), "fp16", "bf16"(mkldnn); 不支持: "int8" + precision: "int8" + + #开启 TensorRT calibration, 量化模型要设置 use_calib: False, 非量化模型离线生成int8需要开启 use_calib: True + use_calib: False + + #开启 ir_optim + ir_optim: True diff --git a/examples/Pipeline/LowPrecision/ResNet50_Slim/daisy.jpg b/examples/Pipeline/LowPrecision/ResNet50_Slim/daisy.jpg new file mode 100644 index 0000000000000000000000000000000000000000..7edeca63e5f32e68550ef720d81f59df58a8eabc Binary files /dev/null and b/examples/Pipeline/LowPrecision/ResNet50_Slim/daisy.jpg differ diff --git a/examples/Pipeline/LowPrecision/ResNet50_Slim/imagenet.label b/examples/Pipeline/LowPrecision/ResNet50_Slim/imagenet.label new file mode 100644 index 0000000000000000000000000000000000000000..d7146735146ea1894173d6d0e20fb90af36be849 --- /dev/null +++ b/examples/Pipeline/LowPrecision/ResNet50_Slim/imagenet.label @@ -0,0 +1,1000 @@ +tench, Tinca tinca, +goldfish, Carassius auratus, +great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias, +tiger shark, Galeocerdo cuvieri, +hammerhead, hammerhead shark, +electric ray, crampfish, numbfish, torpedo, +stingray, +cock, +hen, +ostrich, Struthio camelus, +brambling, Fringilla montifringilla, +goldfinch, Carduelis carduelis, +house finch, linnet, Carpodacus mexicanus, +junco, snowbird, +indigo bunting, indigo finch, indigo bird, Passerina cyanea, +robin, American robin, Turdus migratorius, +bulbul, +jay, +magpie, +chickadee, +water ouzel, dipper, +kite, +bald eagle, American eagle, Haliaeetus leucocephalus, +vulture, +great grey owl, great gray owl, Strix nebulosa, +European fire salamander, Salamandra salamandra, +common newt, Triturus vulgaris, +eft, +spotted salamander, Ambystoma maculatum, +axolotl, mud puppy, Ambystoma mexicanum, +bullfrog, Rana catesbeiana, +tree frog, tree-frog, +tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui, +loggerhead, loggerhead turtle, Caretta caretta, +leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea, +mud turtle, +terrapin, +box turtle, box tortoise, +banded gecko, +common iguana, iguana, Iguana iguana, +American chameleon, anole, Anolis carolinensis, +whiptail, whiptail lizard, +agama, +frilled lizard, Chlamydosaurus kingi, +alligator lizard, +Gila monster, Heloderma suspectum, +green lizard, Lacerta viridis, +African chameleon, Chamaeleo chamaeleon, +Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis, +African crocodile, Nile crocodile, Crocodylus niloticus, +American alligator, Alligator mississipiensis, +triceratops, +thunder snake, worm snake, Carphophis amoenus, +ringneck snake, ring-necked snake, ring snake, +hognose snake, puff adder, sand viper, +green snake, grass snake, +king snake, kingsnake, +garter snake, grass snake, +water snake, +vine snake, +night snake, Hypsiglena torquata, +boa constrictor, Constrictor constrictor, +rock python, rock snake, Python sebae, +Indian cobra, Naja naja, +green mamba, +sea snake, +horned viper, cerastes, sand viper, horned asp, Cerastes cornutus, +diamondback, diamondback rattlesnake, Crotalus adamanteus, +sidewinder, horned rattlesnake, Crotalus cerastes, +trilobite, +harvestman, daddy longlegs, Phalangium opilio, +scorpion, +black and gold garden spider, Argiope aurantia, +barn spider, Araneus cavaticus, +garden spider, Aranea diademata, +black widow, Latrodectus mactans, +tarantula, +wolf spider, hunting spider, +tick, +centipede, +black grouse, +ptarmigan, +ruffed grouse, partridge, Bonasa umbellus, +prairie chicken, prairie grouse, prairie fowl, +peacock, +quail, +partridge, +African grey, African gray, Psittacus erithacus, +macaw, +sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita, +lorikeet, +coucal, +bee eater, +hornbill, +hummingbird, +jacamar, +toucan, +drake, +red-breasted merganser, Mergus serrator, +goose, +black swan, Cygnus atratus, +tusker, +echidna, spiny anteater, anteater, +platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus, +wallaby, brush kangaroo, +koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus, +wombat, +jellyfish, +sea anemone, anemone, +brain coral, +flatworm, platyhelminth, +nematode, nematode worm, roundworm, +conch, +snail, +slug, +sea slug, nudibranch, +chiton, coat-of-mail shell, sea cradle, polyplacophore, +chambered nautilus, pearly nautilus, nautilus, +Dungeness crab, Cancer magister, +rock crab, Cancer irroratus, +fiddler crab, +king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica, +American lobster, Northern lobster, Maine lobster, Homarus americanus, +spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish, +crayfish, crawfish, crawdad, crawdaddy, +hermit crab, +isopod, +white stork, Ciconia ciconia, +black stork, Ciconia nigra, +spoonbill, +flamingo, +little blue heron, Egretta caerulea, +American egret, great white heron, Egretta albus, +bittern, +crane, +limpkin, Aramus pictus, +European gallinule, Porphyrio porphyrio, +American coot, marsh hen, mud hen, water hen, Fulica americana, +bustard, +ruddy turnstone, Arenaria interpres, +red-backed sandpiper, dunlin, Erolia alpina, +redshank, Tringa totanus, +dowitcher, +oystercatcher, oyster catcher, +pelican, +king penguin, Aptenodytes patagonica, +albatross, mollymawk, +grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus, +killer whale, killer, orca, grampus, sea wolf, Orcinus orca, +dugong, Dugong dugon, +sea lion, +Chihuahua, +Japanese spaniel, +Maltese dog, Maltese terrier, Maltese, +Pekinese, Pekingese, Peke, +Shih-Tzu, +Blenheim spaniel, +papillon, +toy terrier, +Rhodesian ridgeback, +Afghan hound, Afghan, +basset, basset hound, +beagle, +bloodhound, sleuthhound, +bluetick, +black-and-tan coonhound, +Walker hound, Walker foxhound, +English foxhound, +redbone, +borzoi, Russian wolfhound, +Irish wolfhound, +Italian greyhound, +whippet, +Ibizan hound, Ibizan Podenco, +Norwegian elkhound, elkhound, +otterhound, otter hound, +Saluki, gazelle hound, +Scottish deerhound, deerhound, +Weimaraner, +Staffordshire bullterrier, Staffordshire bull terrier, +American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier, +Bedlington terrier, +Border terrier, +Kerry blue terrier, +Irish terrier, +Norfolk terrier, +Norwich terrier, +Yorkshire terrier, +wire-haired fox terrier, +Lakeland terrier, +Sealyham terrier, Sealyham, +Airedale, Airedale terrier, +cairn, cairn terrier, +Australian terrier, +Dandie Dinmont, Dandie Dinmont terrier, +Boston bull, Boston terrier, +miniature schnauzer, +giant schnauzer, +standard schnauzer, +Scotch terrier, Scottish terrier, Scottie, +Tibetan terrier, chrysanthemum dog, +silky terrier, Sydney silky, +soft-coated wheaten terrier, +West Highland white terrier, +Lhasa, Lhasa apso, +flat-coated retriever, +curly-coated retriever, +golden retriever, +Labrador retriever, +Chesapeake Bay retriever, +German short-haired pointer, +vizsla, Hungarian pointer, +English setter, +Irish setter, red setter, +Gordon setter, +Brittany spaniel, +clumber, clumber spaniel, +English springer, English springer spaniel, +Welsh springer spaniel, +cocker spaniel, English cocker spaniel, cocker, +Sussex spaniel, +Irish water spaniel, +kuvasz, +schipperke, +groenendael, +malinois, +briard, +kelpie, +komondor, +Old English sheepdog, bobtail, +Shetland sheepdog, Shetland sheep dog, Shetland, +collie, +Border collie, +Bouvier des Flandres, Bouviers des Flandres, +Rottweiler, +German shepherd, German shepherd dog, German police dog, alsatian, +Doberman, Doberman pinscher, +miniature pinscher, +Greater Swiss Mountain dog, +Bernese mountain dog, +Appenzeller, +EntleBucher, +boxer, +bull mastiff, +Tibetan mastiff, +French bulldog, +Great Dane, +Saint Bernard, St Bernard, +Eskimo dog, husky, +malamute, malemute, Alaskan malamute, +Siberian husky, +dalmatian, coach dog, carriage dog, +affenpinscher, monkey pinscher, monkey dog, +basenji, +pug, pug-dog, +Leonberg, +Newfoundland, Newfoundland dog, +Great Pyrenees, +Samoyed, Samoyede, +Pomeranian, +chow, chow chow, +keeshond, +Brabancon griffon, +Pembroke, Pembroke Welsh corgi, +Cardigan, Cardigan Welsh corgi, +toy poodle, +miniature poodle, +standard poodle, +Mexican hairless, +timber wolf, grey wolf, gray wolf, Canis lupus, +white wolf, Arctic wolf, Canis lupus tundrarum, +red wolf, maned wolf, Canis rufus, Canis niger, +coyote, prairie wolf, brush wolf, Canis latrans, +dingo, warrigal, warragal, Canis dingo, +dhole, Cuon alpinus, +African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus, +hyena, hyaena, +red fox, Vulpes vulpes, +kit fox, Vulpes macrotis, +Arctic fox, white fox, Alopex lagopus, +grey fox, gray fox, Urocyon cinereoargenteus, +tabby, tabby cat, +tiger cat, +Persian cat, +Siamese cat, Siamese, +Egyptian cat, +cougar, puma, catamount, mountain lion, painter, panther, Felis concolor, +lynx, catamount, +leopard, Panthera pardus, +snow leopard, ounce, Panthera uncia, +jaguar, panther, Panthera onca, Felis onca, +lion, king of beasts, Panthera leo, +tiger, Panthera tigris, +cheetah, chetah, Acinonyx jubatus, +brown bear, bruin, Ursus arctos, +American black bear, black bear, Ursus americanus, Euarctos americanus, +ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus, +sloth bear, Melursus ursinus, Ursus ursinus, +mongoose, +meerkat, mierkat, +tiger beetle, +ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle, +ground beetle, carabid beetle, +long-horned beetle, longicorn, longicorn beetle, +leaf beetle, chrysomelid, +dung beetle, +rhinoceros beetle, +weevil, +fly, +bee, +ant, emmet, pismire, +grasshopper, hopper, +cricket, +walking stick, walkingstick, stick insect, +cockroach, roach, +mantis, mantid, +cicada, cicala, +leafhopper, +lacewing, lacewing fly, +"dragonfly, darning needle, devils darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk", +damselfly, +admiral, +ringlet, ringlet butterfly, +monarch, monarch butterfly, milkweed butterfly, Danaus plexippus, +cabbage butterfly, +sulphur butterfly, sulfur butterfly, +lycaenid, lycaenid butterfly, +starfish, sea star, +sea urchin, +sea cucumber, holothurian, +wood rabbit, cottontail, cottontail rabbit, +hare, +Angora, Angora rabbit, +hamster, +porcupine, hedgehog, +fox squirrel, eastern fox squirrel, Sciurus niger, +marmot, +beaver, +guinea pig, Cavia cobaya, +sorrel, +zebra, +hog, pig, grunter, squealer, Sus scrofa, +wild boar, boar, Sus scrofa, +warthog, +hippopotamus, hippo, river horse, Hippopotamus amphibius, +ox, +water buffalo, water ox, Asiatic buffalo, Bubalus bubalis, +bison, +ram, tup, +bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis, +ibex, Capra ibex, +hartebeest, +impala, Aepyceros melampus, +gazelle, +Arabian camel, dromedary, Camelus dromedarius, +llama, +weasel, +mink, +polecat, fitch, foulmart, foumart, Mustela putorius, +black-footed ferret, ferret, Mustela nigripes, +otter, +skunk, polecat, wood pussy, +badger, +armadillo, +three-toed sloth, ai, Bradypus tridactylus, +orangutan, orang, orangutang, Pongo pygmaeus, +gorilla, Gorilla gorilla, +chimpanzee, chimp, Pan troglodytes, +gibbon, Hylobates lar, +siamang, Hylobates syndactylus, Symphalangus syndactylus, +guenon, guenon monkey, +patas, hussar monkey, Erythrocebus patas, +baboon, +macaque, +langur, +colobus, colobus monkey, +proboscis monkey, Nasalis larvatus, +marmoset, +capuchin, ringtail, Cebus capucinus, +howler monkey, howler, +titi, titi monkey, +spider monkey, Ateles geoffroyi, +squirrel monkey, Saimiri sciureus, +Madagascar cat, ring-tailed lemur, Lemur catta, +indri, indris, Indri indri, Indri brevicaudatus, +Indian elephant, Elephas maximus, +African elephant, Loxodonta africana, +lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens, +giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca, +barracouta, snoek, +eel, +coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch, +rock beauty, Holocanthus tricolor, +anemone fish, +sturgeon, +gar, garfish, garpike, billfish, Lepisosteus osseus, +lionfish, +puffer, pufferfish, blowfish, globefish, +abacus, +abaya, +"academic gown, academic robe, judges robe", +accordion, piano accordion, squeeze box, +acoustic guitar, +aircraft carrier, carrier, flattop, attack aircraft carrier, +airliner, +airship, dirigible, +altar, +ambulance, +amphibian, amphibious vehicle, +analog clock, +apiary, bee house, +apron, +ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin, +assault rifle, assault gun, +backpack, back pack, knapsack, packsack, rucksack, haversack, +bakery, bakeshop, bakehouse, +balance beam, beam, +balloon, +ballpoint, ballpoint pen, ballpen, Biro, +Band Aid, +banjo, +bannister, banister, balustrade, balusters, handrail, +barbell, +barber chair, +barbershop, +barn, +barometer, +barrel, cask, +barrow, garden cart, lawn cart, wheelbarrow, +baseball, +basketball, +bassinet, +bassoon, +bathing cap, swimming cap, +bath towel, +bathtub, bathing tub, bath, tub, +beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon, +beacon, lighthouse, beacon light, pharos, +beaker, +bearskin, busby, shako, +beer bottle, +beer glass, +bell cote, bell cot, +bib, +bicycle-built-for-two, tandem bicycle, tandem, +bikini, two-piece, +binder, ring-binder, +binoculars, field glasses, opera glasses, +birdhouse, +boathouse, +bobsled, bobsleigh, bob, +bolo tie, bolo, bola tie, bola, +bonnet, poke bonnet, +bookcase, +bookshop, bookstore, bookstall, +bottlecap, +bow, +bow tie, bow-tie, bowtie, +brass, memorial tablet, plaque, +brassiere, bra, bandeau, +breakwater, groin, groyne, mole, bulwark, seawall, jetty, +breastplate, aegis, egis, +broom, +bucket, pail, +buckle, +bulletproof vest, +bullet train, bullet, +butcher shop, meat market, +cab, hack, taxi, taxicab, +caldron, cauldron, +candle, taper, wax light, +cannon, +canoe, +can opener, tin opener, +cardigan, +car mirror, +carousel, carrousel, merry-go-round, roundabout, whirligig, +"carpenters kit, tool kit", +carton, +car wheel, +cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM, +cassette, +cassette player, +castle, +catamaran, +CD player, +cello, violoncello, +cellular telephone, cellular phone, cellphone, cell, mobile phone, +chain, +chainlink fence, +chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour, +chain saw, chainsaw, +chest, +chiffonier, commode, +chime, bell, gong, +china cabinet, china closet, +Christmas stocking, +church, church building, +cinema, movie theater, movie theatre, movie house, picture palace, +cleaver, meat cleaver, chopper, +cliff dwelling, +cloak, +clog, geta, patten, sabot, +cocktail shaker, +coffee mug, +coffeepot, +coil, spiral, volute, whorl, helix, +combination lock, +computer keyboard, keypad, +confectionery, confectionary, candy store, +container ship, containership, container vessel, +convertible, +corkscrew, bottle screw, +cornet, horn, trumpet, trump, +cowboy boot, +cowboy hat, ten-gallon hat, +cradle, +crane, +crash helmet, +crate, +crib, cot, +Crock Pot, +croquet ball, +crutch, +cuirass, +dam, dike, dyke, +desk, +desktop computer, +dial telephone, dial phone, +diaper, nappy, napkin, +digital clock, +digital watch, +dining table, board, +dishrag, dishcloth, +dishwasher, dish washer, dishwashing machine, +disk brake, disc brake, +dock, dockage, docking facility, +dogsled, dog sled, dog sleigh, +dome, +doormat, welcome mat, +drilling platform, offshore rig, +drum, membranophone, tympan, +drumstick, +dumbbell, +Dutch oven, +electric fan, blower, +electric guitar, +electric locomotive, +entertainment center, +envelope, +espresso maker, +face powder, +feather boa, boa, +file, file cabinet, filing cabinet, +fireboat, +fire engine, fire truck, +fire screen, fireguard, +flagpole, flagstaff, +flute, transverse flute, +folding chair, +football helmet, +forklift, +fountain, +fountain pen, +four-poster, +freight car, +French horn, horn, +frying pan, frypan, skillet, +fur coat, +garbage truck, dustcart, +gasmask, respirator, gas helmet, +gas pump, gasoline pump, petrol pump, island dispenser, +goblet, +go-kart, +golf ball, +golfcart, golf cart, +gondola, +gong, tam-tam, +gown, +grand piano, grand, +greenhouse, nursery, glasshouse, +grille, radiator grille, +grocery store, grocery, food market, market, +guillotine, +hair slide, +hair spray, +half track, +hammer, +hamper, +hand blower, blow dryer, blow drier, hair dryer, hair drier, +hand-held computer, hand-held microcomputer, +handkerchief, hankie, hanky, hankey, +hard disc, hard disk, fixed disk, +harmonica, mouth organ, harp, mouth harp, +harp, +harvester, reaper, +hatchet, +holster, +home theater, home theatre, +honeycomb, +hook, claw, +hoopskirt, crinoline, +horizontal bar, high bar, +horse cart, horse-cart, +hourglass, +iPod, +iron, smoothing iron, +"jack-o-lantern", +jean, blue jean, denim, +jeep, landrover, +jersey, T-shirt, tee shirt, +jigsaw puzzle, +jinrikisha, ricksha, rickshaw, +joystick, +kimono, +knee pad, +knot, +lab coat, laboratory coat, +ladle, +lampshade, lamp shade, +laptop, laptop computer, +lawn mower, mower, +lens cap, lens cover, +letter opener, paper knife, paperknife, +library, +lifeboat, +lighter, light, igniter, ignitor, +limousine, limo, +liner, ocean liner, +lipstick, lip rouge, +Loafer, +lotion, +loudspeaker, speaker, speaker unit, loudspeaker system, speaker system, +"loupe, jewelers loupe", +lumbermill, sawmill, +magnetic compass, +mailbag, postbag, +mailbox, letter box, +maillot, +maillot, tank suit, +manhole cover, +maraca, +marimba, xylophone, +mask, +matchstick, +maypole, +maze, labyrinth, +measuring cup, +medicine chest, medicine cabinet, +megalith, megalithic structure, +microphone, mike, +microwave, microwave oven, +military uniform, +milk can, +minibus, +miniskirt, mini, +minivan, +missile, +mitten, +mixing bowl, +mobile home, manufactured home, +Model T, +modem, +monastery, +monitor, +moped, +mortar, +mortarboard, +mosque, +mosquito net, +motor scooter, scooter, +mountain bike, all-terrain bike, off-roader, +mountain tent, +mouse, computer mouse, +mousetrap, +moving van, +muzzle, +nail, +neck brace, +necklace, +nipple, +notebook, notebook computer, +obelisk, +oboe, hautboy, hautbois, +ocarina, sweet potato, +odometer, hodometer, mileometer, milometer, +oil filter, +organ, pipe organ, +oscilloscope, scope, cathode-ray oscilloscope, CRO, +overskirt, +oxcart, +oxygen mask, +packet, +paddle, boat paddle, +paddlewheel, paddle wheel, +padlock, +paintbrush, +"pajama, pyjama, pjs, jammies", +palace, +panpipe, pandean pipe, syrinx, +paper towel, +parachute, chute, +parallel bars, bars, +park bench, +parking meter, +passenger car, coach, carriage, +patio, terrace, +pay-phone, pay-station, +pedestal, plinth, footstall, +pencil box, pencil case, +pencil sharpener, +perfume, essence, +Petri dish, +photocopier, +pick, plectrum, plectron, +pickelhaube, +picket fence, paling, +pickup, pickup truck, +pier, +piggy bank, penny bank, +pill bottle, +pillow, +ping-pong ball, +pinwheel, +pirate, pirate ship, +pitcher, ewer, +"plane, carpenters plane, woodworking plane", +planetarium, +plastic bag, +plate rack, +plow, plough, +"plunger, plumbers helper", +Polaroid camera, Polaroid Land camera, +pole, +police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria, +poncho, +pool table, billiard table, snooker table, +pop bottle, soda bottle, +pot, flowerpot, +"potters wheel", +power drill, +prayer rug, prayer mat, +printer, +prison, prison house, +projectile, missile, +projector, +puck, hockey puck, +punching bag, punch bag, punching ball, punchball, +purse, +quill, quill pen, +quilt, comforter, comfort, puff, +racer, race car, racing car, +racket, racquet, +radiator, +radio, wireless, +radio telescope, radio reflector, +rain barrel, +recreational vehicle, RV, R.V., +reel, +reflex camera, +refrigerator, icebox, +remote control, remote, +restaurant, eating house, eating place, eatery, +revolver, six-gun, six-shooter, +rifle, +rocking chair, rocker, +rotisserie, +rubber eraser, rubber, pencil eraser, +rugby ball, +rule, ruler, +running shoe, +safe, +safety pin, +saltshaker, salt shaker, +sandal, +sarong, +sax, saxophone, +scabbard, +scale, weighing machine, +school bus, +schooner, +scoreboard, +screen, CRT screen, +screw, +screwdriver, +seat belt, seatbelt, +sewing machine, +shield, buckler, +shoe shop, shoe-shop, shoe store, +shoji, +shopping basket, +shopping cart, +shovel, +shower cap, +shower curtain, +ski, +ski mask, +sleeping bag, +slide rule, slipstick, +sliding door, +slot, one-armed bandit, +snorkel, +snowmobile, +snowplow, snowplough, +soap dispenser, +soccer ball, +sock, +solar dish, solar collector, solar furnace, +sombrero, +soup bowl, +space bar, +space heater, +space shuttle, +spatula, +speedboat, +"spider web, spiders web", +spindle, +sports car, sport car, +spotlight, spot, +stage, +steam locomotive, +steel arch bridge, +steel drum, +stethoscope, +stole, +stone wall, +stopwatch, stop watch, +stove, +strainer, +streetcar, tram, tramcar, trolley, trolley car, +stretcher, +studio couch, day bed, +stupa, tope, +submarine, pigboat, sub, U-boat, +suit, suit of clothes, +sundial, +sunglass, +sunglasses, dark glasses, shades, +sunscreen, sunblock, sun blocker, +suspension bridge, +swab, swob, mop, +sweatshirt, +swimming trunks, bathing trunks, +swing, +switch, electric switch, electrical switch, +syringe, +table lamp, +tank, army tank, armored combat vehicle, armoured combat vehicle, +tape player, +teapot, +teddy, teddy bear, +television, television system, +tennis ball, +thatch, thatched roof, +theater curtain, theatre curtain, +thimble, +thresher, thrasher, threshing machine, +throne, +tile roof, +toaster, +tobacco shop, tobacconist shop, tobacconist, +toilet seat, +torch, +totem pole, +tow truck, tow car, wrecker, +toyshop, +tractor, +trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi, +tray, +trench coat, +tricycle, trike, velocipede, +trimaran, +tripod, +triumphal arch, +trolleybus, trolley coach, trackless trolley, +trombone, +tub, vat, +turnstile, +typewriter keyboard, +umbrella, +unicycle, monocycle, +upright, upright piano, +vacuum, vacuum cleaner, +vase, +vault, +velvet, +vending machine, +vestment, +viaduct, +violin, fiddle, +volleyball, +waffle iron, +wall clock, +wallet, billfold, notecase, pocketbook, +wardrobe, closet, press, +warplane, military plane, +washbasin, handbasin, washbowl, lavabo, wash-hand basin, +washer, automatic washer, washing machine, +water bottle, +water jug, +water tower, +whiskey jug, +whistle, +wig, +window screen, +window shade, +Windsor tie, +wine bottle, +wing, +wok, +wooden spoon, +wool, woolen, woollen, +worm fence, snake fence, snake-rail fence, Virginia fence, +wreck, +yawl, +yurt, +web site, website, internet site, site, +comic book, +crossword puzzle, crossword, +street sign, +traffic light, traffic signal, stoplight, +book jacket, dust cover, dust jacket, dust wrapper, +menu, +plate, +guacamole, +consomme, +hot pot, hotpot, +trifle, +ice cream, icecream, +ice lolly, lolly, lollipop, popsicle, +French loaf, +bagel, beigel, +pretzel, +cheeseburger, +hotdog, hot dog, red hot, +mashed potato, +head cabbage, +broccoli, +cauliflower, +zucchini, courgette, +spaghetti squash, +acorn squash, +butternut squash, +cucumber, cuke, +artichoke, globe artichoke, +bell pepper, +cardoon, +mushroom, +Granny Smith, +strawberry, +orange, +lemon, +fig, +pineapple, ananas, +banana, +jackfruit, jak, jack, +custard apple, +pomegranate, +hay, +carbonara, +chocolate sauce, chocolate syrup, +dough, +meat loaf, meatloaf, +pizza, pizza pie, +potpie, +burrito, +red wine, +espresso, +cup, +eggnog, +alp, +bubble, +cliff, drop, drop-off, +coral reef, +geyser, +lakeside, lakeshore, +promontory, headland, head, foreland, +sandbar, sand bar, +seashore, coast, seacoast, sea-coast, +valley, vale, +volcano, +ballplayer, baseball player, +groom, bridegroom, +scuba diver, +rapeseed, +daisy, +"yellow ladys slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum", +corn, +acorn, +hip, rose hip, rosehip, +buckeye, horse chestnut, conker, +coral fungus, +agaric, +gyromitra, +stinkhorn, carrion fungus, +earthstar, +hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa, +bolete, +ear, spike, capitulum, +toilet tissue, toilet paper, bathroom tissue diff --git a/examples/Pipeline/LowPrecision/ResNet50_Slim/pipeline_http_client.py b/examples/Pipeline/LowPrecision/ResNet50_Slim/pipeline_http_client.py new file mode 100644 index 0000000000000000000000000000000000000000..119a412113dac95621ef8fcad06398fed40a3da0 --- /dev/null +++ b/examples/Pipeline/LowPrecision/ResNet50_Slim/pipeline_http_client.py @@ -0,0 +1,35 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np +import requests +import json +import cv2 +import base64 +import os + + +def cv2_to_base64(image): + return base64.b64encode(image).decode('utf8') + + +if __name__ == "__main__": + url = "http://127.0.0.1:18080/imagenet/prediction" + with open(os.path.join(".", "daisy.jpg"), 'rb') as file: + image_data1 = file.read() + image = cv2_to_base64(image_data1) + data = {"key": ["image"], "value": [image]} + for i in range(1): + r = requests.post(url=url, data=json.dumps(data)) + print(r.json()) diff --git a/examples/Pipeline/LowPrecision/ResNet50_Slim/pipeline_rpc_client.py b/examples/Pipeline/LowPrecision/ResNet50_Slim/pipeline_rpc_client.py new file mode 100644 index 0000000000000000000000000000000000000000..82a570244cecc51061a38b64c25602f8dfbe931d --- /dev/null +++ b/examples/Pipeline/LowPrecision/ResNet50_Slim/pipeline_rpc_client.py @@ -0,0 +1,37 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from paddle_serving_server.pipeline import PipelineClient +import numpy as np +import requests +import json +import cv2 +import base64 +import os + +client = PipelineClient() +client.connect(['127.0.0.1:9993']) + + +def cv2_to_base64(image): + return base64.b64encode(image).decode('utf8') + + +with open("daisy.jpg", 'rb') as file: + image_data = file.read() +image = cv2_to_base64(image_data) + +for i in range(1): + ret = client.predict(feed_dict={"image": image}, fetch=["label", "prob"]) + print(ret) diff --git a/examples/Pipeline/LowPrecision/ResNet50_Slim/resnet50_web_service.py b/examples/Pipeline/LowPrecision/ResNet50_Slim/resnet50_web_service.py new file mode 100644 index 0000000000000000000000000000000000000000..0a9c5f97e3536944ee5c7f0bbae1448a19bf4eb7 --- /dev/null +++ b/examples/Pipeline/LowPrecision/ResNet50_Slim/resnet50_web_service.py @@ -0,0 +1,71 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import sys +from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage +from paddle_serving_server.web_service import WebService, Op +import logging +import numpy as np +import base64, cv2 + + +class ImagenetOp(Op): + def init_op(self): + self.seq = Sequential([ + Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)), + Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], + True) + ]) + self.label_dict = {} + label_idx = 0 + with open("imagenet.label") as fin: + for line in fin: + self.label_dict[label_idx] = line.strip() + label_idx += 1 + + def preprocess(self, input_dicts, data_id, log_id): + (_, input_dict), = input_dicts.items() + batch_size = len(input_dict.keys()) + imgs = [] + for key in input_dict.keys(): + data = base64.b64decode(input_dict[key].encode('utf8')) + data = np.fromstring(data, np.uint8) + im = cv2.imdecode(data, cv2.IMREAD_COLOR) + img = self.seq(im) + imgs.append(img[np.newaxis, :].copy()) + input_imgs = np.concatenate(imgs, axis=0) + return {"image": input_imgs}, False, None, "" + + def postprocess(self, input_dicts, fetch_dict, data_id, log_id): + score_list = fetch_dict["save_infer_model/scale_0.tmp_0"] + result = {"label": [], "prob": []} + for score in score_list: + score = score.tolist() + max_score = max(score) + result["label"].append(self.label_dict[score.index(max_score)] + .strip().replace(",", "")) + result["prob"].append(max_score) + result["label"] = str(result["label"]) + result["prob"] = str(result["prob"]) + return result, None, "" + + +class ImageService(WebService): + def get_pipeline_response(self, read_op): + image_op = ImagenetOp(name="imagenet", input_ops=[read_op]) + return image_op + + +uci_service = ImageService(name="imagenet") +uci_service.prepare_pipeline_config("config.yml") +uci_service.run_service() diff --git a/examples/Pipeline/PaddleNLP/bert/README.md b/examples/Pipeline/PaddleNLP/bert/README.md index c396b77c9d2b9198d0474540872cb1c4dcdce5b1..0fb8739b430f49f8bc066eb9a357ee92850b89d3 100644 --- a/examples/Pipeline/PaddleNLP/bert/README.md +++ b/examples/Pipeline/PaddleNLP/bert/README.md @@ -1,6 +1,6 @@ -# Imagenet Pipeline WebService +# Bert Pipeline WebService -This document will takes Imagenet service as an example to introduce how to use Pipeline WebService. +This document will takes Bert service as an example to introduce how to use Pipeline WebService. ## Get model ``` diff --git a/examples/Pipeline/PaddleNLP/bert/README_CN.md b/examples/Pipeline/PaddleNLP/bert/README_CN.md index 841abdadf5a3848fcf1e042d8e73c051610eefaa..eb93951d50ffb0c3bd899d816294ffde66e646a9 100644 --- a/examples/Pipeline/PaddleNLP/bert/README_CN.md +++ b/examples/Pipeline/PaddleNLP/bert/README_CN.md @@ -1,6 +1,6 @@ -# Imagenet Pipeline WebService +# Bert Pipeline WebService -这里以 Imagenet 服务为例来介绍 Pipeline WebService 的使用。 +这里以 Bert 服务为例来介绍 Pipeline WebService 的使用。 ## 获取模型 ``` diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/README.md b/examples/Pipeline/PaddleNLP/semantic_indexing/README.md new file mode 100644 index 0000000000000000000000000000000000000000..20e0bc04a2b0de6c3fb21355b8636de73c625d42 --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/README.md @@ -0,0 +1,201 @@ +# In-batch Negatives + + **目录** + +* [模型下载](#模型下载) +* [模型部署](#模型部署) + + + + +## 1. 语义索引模型 + +**语义索引训练模型下载链接:** + +以下模型结构参数为: `TrasformerLayer:12, Hidden:768, Heads:12, OutputEmbSize: 256` + +|Model|训练参数配置|硬件|MD5| +| ------------ | ------------ | ------------ |-----------| +|[batch_neg](https://bj.bcebos.com/v1/paddlenlp/models/inbatch_model.zip)|

margin:0.2 scale:30 epoch:3 lr:5E-5 bs:64 max_len:64
|
4卡 v100-16g
|f3e5c7d7b0b718c2530c5e1b136b2d74| + +``` +wget https://bj.bcebos.com/v1/paddlenlp/models/inbatch_model.zip +unzip inbatch_model.zip -d checkpoints +``` + + + +## 2. 模型部署 + +### 2.1 动转静导出 + +首先把动态图模型转换为静态图: + +``` +python export_model.py --params_path checkpoints/model_40/model_state.pdparams --output_path=./output +``` +也可以运行下面的bash脚本: + +``` +sh scripts/export_model.sh +``` + +### 2.2 Paddle Inference预测 + +预测既可以抽取向量也可以计算两个文本的相似度。 + +修改id2corpus的样本: + +``` +# 抽取向量 +id2corpus={0:'国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据'} +# 计算相似度 +corpus_list=[['中西方语言与文化的差异','中西方文化差异以及语言体现中西方文化,差异,语言体现'], + ['中西方语言与文化的差异','飞桨致力于让深度学习技术的创新与应用更简单']] + +``` + +然后使用PaddleInference + +``` +python deploy/python/predict.py --model_dir=./output +``` +也可以运行下面的bash脚本: + +``` +sh deploy.sh +``` +最终输出的是256维度的特征向量和句子对的预测概率: + +``` +(1, 256) +[[-0.0394925 -0.04474756 -0.065534 0.00939134 0.04359895 0.14659195 + -0.0091779 -0.07303623 0.09413272 -0.01255222 -0.08685658 0.02762237 + 0.10138468 0.00962821 0.10888419 0.04553023 0.05898942 0.00694253 + .... + +[0.959269642829895, 0.04725276678800583] +``` + +### 2.3 Paddle Serving部署 + +Paddle Serving 的详细文档请参考 [Pipeline_Design](https://github.com/PaddlePaddle/Serving/blob/v0.7.0/doc/Python_Pipeline/Pipeline_Design_CN.md)和[Serving_Design](https://github.com/PaddlePaddle/Serving/blob/v0.7.0/doc/Serving_Design_CN.md),首先把静态图模型转换成Serving的格式: + +``` +python export_to_serving.py \ + --dirname "output" \ + --model_filename "inference.get_pooled_embedding.pdmodel" \ + --params_filename "inference.get_pooled_embedding.pdiparams" \ + --server_path "./serving_server" \ + --client_path "./serving_client" \ + --fetch_alias_names "output_embedding" + +``` + +参数含义说明 +* `dirname`: 需要转换的模型文件存储路径,Program 结构文件和参数文件均保存在此目录。 +* `model_filename`: 存储需要转换的模型 Inference Program 结构的文件名称。如果设置为 None ,则使用 `__model__` 作为默认的文件名 +* `params_filename`: 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保>存在一个单独的二进制文件中,它才需要被指定。如果模型参数是存储在各自分离的文件中,设置它的值为 None +* `server_path`: 转换后的模型文件和配置文件的存储路径。默认值为 serving_server +* `client_path`: 转换后的客户端配置文件存储路径。默认值为 serving_client +* `fetch_alias_names`: 模型输出的别名设置,比如输入的 input_ids 等,都可以指定成其他名字,默认不指定 +* `feed_alias_names`: 模型输入的别名设置,比如输出 pooled_out 等,都可以重新指定成其他模型,默认不指定 + +也可以运行下面的 bash 脚本: +``` +sh scripts/export_to_serving.sh +``` + +Paddle Serving的部署有两种方式,第一种方式是Pipeline的方式,第二种是C++的方式,下面分别介绍这两种方式的用法: + +#### 2.3.1 Pipeline方式 + +启动 Pipeline Server: + +``` +python web_service.py +``` + +启动客户端调用 Server。 + +首先修改rpc_client.py中需要预测的样本: + +``` +list_data = [ + "国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据", + "试论翻译过程中的文化差异与语言空缺翻译过程,文化差异,语言空缺,文化对比" +] +``` +然后运行: + +``` +python rpc_client.py +``` +模型的输出为: + +``` +{'0': '国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据', '1': '试论翻译过程中的文化差异与语言空缺翻译过程,文化差异,语言空缺,文化对比'} +PipelineClient::predict pack_data time:1641450851.3752182 +PipelineClient::predict before time:1641450851.375738 +['output_embedding'] +(2, 256) +[[ 0.07830612 -0.14036864 0.03433796 -0.14967982 -0.03386067 0.06630666 + 0.01357943 0.03531194 0.02411093 0.02000859 0.05724002 -0.08119463 + ...... +``` + +可以看到客户端发送了2条文本,返回了2个 embedding 向量 + +#### 2.3.2 C++的方式 + +启动C++的Serving: + +``` +python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_id 2 --thread 5 --ir_optim True --use_trt --precision FP16 +``` +也可以使用脚本: + +``` +sh deploy/C++/start_server.sh +``` +Client 可以使用 http 或者 rpc 两种方式,rpc 的方式为: + +``` +python deploy/C++/rpc_client.py +``` +运行的输出为: +``` +I0209 20:40:07.978225 20896 general_model.cpp:490] [client]logid=0,client_cost=395.695ms,server_cost=392.559ms. +time to cost :0.3960278034210205 seconds +{'output_embedding': array([[ 9.01343748e-02, -1.21870913e-01, 1.32834800e-02, + -1.57673359e-01, -2.60387752e-02, 6.98455423e-02, + 1.58108603e-02, 3.89952064e-02, 3.22783105e-02, + 3.49135026e-02, 7.66086206e-02, -9.12970975e-02, + 6.25643134e-02, 7.21886680e-02, 7.03565404e-02, + 5.44054210e-02, 3.25332815e-03, 5.01751155e-02, +...... +``` +可以看到服务端返回了向量 + +或者使用 http 的客户端访问模式: + +``` +python deploy/C++/http_client.py +``` +运行的输出为: + +``` +(2, 64) +(2, 64) +outputs { + tensor { + float_data: 0.09013437479734421 + float_data: -0.12187091261148453 + float_data: 0.01328347995877266 + float_data: -0.15767335891723633 +...... +``` +可以看到服务端返回了向量 + + + diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/base_model.py b/examples/Pipeline/PaddleNLP/semantic_indexing/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..c471d126c2649fee7554fa8f026284c7300ada2f --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/base_model.py @@ -0,0 +1,187 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import abc +import sys + +import numpy as np + +import paddle +import paddle.nn as nn +import paddle.nn.functional as F + + +class SemanticIndexBase(nn.Layer): + def __init__(self, pretrained_model, dropout=None, output_emb_size=None): + super().__init__() + self.ptm = pretrained_model + self.dropout = nn.Dropout(dropout if dropout is not None else 0.1) + + # if output_emb_size is not None, then add Linear layer to reduce embedding_size, + # we recommend set output_emb_size = 256 considering the trade-off beteween + # recall performance and efficiency + + self.output_emb_size = output_emb_size + if output_emb_size > 0: + weight_attr = paddle.ParamAttr( + initializer=paddle.nn.initializer.TruncatedNormal(std=0.02)) + self.emb_reduce_linear = paddle.nn.Linear( + 768, output_emb_size, weight_attr=weight_attr) + + @paddle.jit.to_static(input_spec=[ + paddle.static.InputSpec( + shape=[None, None], dtype='int64'), paddle.static.InputSpec( + shape=[None, None], dtype='int64') + ]) + def get_pooled_embedding(self, + input_ids, + token_type_ids=None, + position_ids=None, + attention_mask=None): + _, cls_embedding = self.ptm(input_ids, token_type_ids, position_ids, + attention_mask) + + if self.output_emb_size > 0: + cls_embedding = self.emb_reduce_linear(cls_embedding) + cls_embedding = self.dropout(cls_embedding) + cls_embedding = F.normalize(cls_embedding, p=2, axis=-1) + + return cls_embedding + + def get_semantic_embedding(self, data_loader): + self.eval() + with paddle.no_grad(): + for batch_data in data_loader: + input_ids, token_type_ids = batch_data + input_ids = paddle.to_tensor(input_ids) + token_type_ids = paddle.to_tensor(token_type_ids) + + text_embeddings = self.get_pooled_embedding( + input_ids, token_type_ids=token_type_ids) + + yield text_embeddings + + def cosine_sim(self, + query_input_ids, + title_input_ids, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None): + + query_cls_embedding = self.get_pooled_embedding( + query_input_ids, query_token_type_ids, query_position_ids, + query_attention_mask) + + title_cls_embedding = self.get_pooled_embedding( + title_input_ids, title_token_type_ids, title_position_ids, + title_attention_mask) + + cosine_sim = paddle.sum(query_cls_embedding * title_cls_embedding, + axis=-1) + return cosine_sim + + @abc.abstractmethod + def forward(self): + pass + + +class SemanticIndexBaseStatic(nn.Layer): + def __init__(self, pretrained_model, dropout=None, output_emb_size=None): + super().__init__() + self.ptm = pretrained_model + self.dropout = nn.Dropout(dropout if dropout is not None else 0.1) + + # if output_emb_size is not None, then add Linear layer to reduce embedding_size, + # we recommend set output_emb_size = 256 considering the trade-off beteween + # recall performance and efficiency + + self.output_emb_size = output_emb_size + if output_emb_size > 0: + weight_attr = paddle.ParamAttr( + initializer=paddle.nn.initializer.TruncatedNormal(std=0.02)) + self.emb_reduce_linear = paddle.nn.Linear( + 768, output_emb_size, weight_attr=weight_attr) + + @paddle.jit.to_static(input_spec=[ + paddle.static.InputSpec( + shape=[None, None], dtype='int64'), paddle.static.InputSpec( + shape=[None, None], dtype='int64') + ]) + def get_pooled_embedding(self, + input_ids, + token_type_ids=None, + position_ids=None, + attention_mask=None): + _, cls_embedding = self.ptm(input_ids, token_type_ids, position_ids, + attention_mask) + + if self.output_emb_size > 0: + cls_embedding = self.emb_reduce_linear(cls_embedding) + cls_embedding = self.dropout(cls_embedding) + cls_embedding = F.normalize(cls_embedding, p=2, axis=-1) + + return cls_embedding + + def get_semantic_embedding(self, data_loader): + self.eval() + with paddle.no_grad(): + for batch_data in data_loader: + input_ids, token_type_ids = batch_data + input_ids = paddle.to_tensor(input_ids) + token_type_ids = paddle.to_tensor(token_type_ids) + + text_embeddings = self.get_pooled_embedding( + input_ids, token_type_ids=token_type_ids) + + yield text_embeddings + + def cosine_sim(self, + query_input_ids, + title_input_ids, + query_token_type_ids=None, + query_position_ids=None, + query_attention_mask=None, + title_token_type_ids=None, + title_position_ids=None, + title_attention_mask=None): + + query_cls_embedding = self.get_pooled_embedding( + query_input_ids, query_token_type_ids, query_position_ids, + query_attention_mask) + + title_cls_embedding = self.get_pooled_embedding( + title_input_ids, title_token_type_ids, title_position_ids, + title_attention_mask) + + cosine_sim = paddle.sum(query_cls_embedding * title_cls_embedding, + axis=-1) + return cosine_sim + + def forward(self, + input_ids, + token_type_ids=None, + position_ids=None, + attention_mask=None): + _, cls_embedding = self.ptm(input_ids, token_type_ids, position_ids, + attention_mask) + + if self.output_emb_size > 0: + cls_embedding = self.emb_reduce_linear(cls_embedding) + cls_embedding = self.dropout(cls_embedding) + cls_embedding = F.normalize(cls_embedding, p=2, axis=-1) + + return cls_embedding diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/http_client.py b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/http_client.py new file mode 100644 index 0000000000000000000000000000000000000000..a976ad9fc33b06ce7148adc7153d4b35183e31c0 --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/http_client.py @@ -0,0 +1,81 @@ +# coding:utf-8 +# pylint: disable=doc-string-missing +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import time +import numpy as np +import requests +import json + +from paddle_serving_client import HttpClient +import paddlenlp as ppnlp + + +def convert_example(example, + tokenizer, + max_seq_length=512, + pad_to_max_seq_len=True): + list_input_ids = [] + list_token_type_ids = [] + for text in example: + encoded_inputs = tokenizer( + text=text, + max_seq_len=max_seq_length, + pad_to_max_seq_len=pad_to_max_seq_len) + input_ids = encoded_inputs["input_ids"] + token_type_ids = encoded_inputs["token_type_ids"] + + list_input_ids.append(input_ids) + list_token_type_ids.append(token_type_ids) + return list_input_ids, list_token_type_ids + + +# 启动python客户端 +endpoint_list = ['127.0.0.1:9393'] +client = HttpClient() +client.load_client_config('serving_client') +client.connect(endpoint_list) +feed_names = client.feed_names_ +fetch_names = client.fetch_names_ +print(feed_names) +print(fetch_names) + +# 创建tokenizer +tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0') +max_seq_len = 64 + +# 数据预处理 + +list_data = ['国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据.', '面向生态系统服务的生态系统分类方案研发与应用'] +# for i in range(5): +# list_data.extend(list_data) +# print(len(list_data)) +examples = convert_example(list_data, tokenizer, max_seq_length=max_seq_len) +print(examples) + +feed_dict = {} +feed_dict['input_ids'] = np.array(examples[0]) +feed_dict['token_type_ids'] = np.array(examples[1]) + +print(feed_dict['input_ids'].shape) +print(feed_dict['token_type_ids'].shape) + +# batch设置为True表示的是批量预测 +b_start = time.time() +result = client.predict(feed=feed_dict, fetch=fetch_names, batch=True) +b_end = time.time() +print(result) +print("time to cost :{} seconds".format(b_end - b_start)) diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/rpc_client.py b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/rpc_client.py new file mode 100644 index 0000000000000000000000000000000000000000..9ea4c245f2a10256166a512f9282282e69d9997b --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/rpc_client.py @@ -0,0 +1,77 @@ +# coding:utf-8 +# pylint: disable=doc-string-missing +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import time +import numpy as np + +from paddle_serving_client import Client +import paddlenlp as ppnlp + + +def convert_example(example, + tokenizer, + max_seq_length=512, + pad_to_max_seq_len=True): + list_input_ids = [] + list_token_type_ids = [] + for text in example: + encoded_inputs = tokenizer( + text=text, + max_seq_len=max_seq_length, + pad_to_max_seq_len=pad_to_max_seq_len) + input_ids = encoded_inputs["input_ids"] + token_type_ids = encoded_inputs["token_type_ids"] + list_input_ids.append(input_ids) + list_token_type_ids.append(token_type_ids) + return list_input_ids, list_token_type_ids + + +# 启动python客户端 +endpoint_list = ['127.0.0.1:9393'] +client = Client() +client.load_client_config('serving_client') +client.connect(endpoint_list) +feed_names = client.feed_names_ +fetch_names = client.fetch_names_ +print(feed_names) +print(fetch_names) + +# 创建tokenizer +tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0') +max_seq_len = 64 + +# 数据预处理 + +list_data = ['国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据.', '面向生态系统服务的生态系统分类方案研发与应用'] +# for i in range(5): +# list_data.extend(list_data) +# print(len(list_data)) +examples = convert_example(list_data, tokenizer, max_seq_length=max_seq_len) +print(examples) + +feed_dict = {} +feed_dict['input_ids'] = np.array(examples[0]) +feed_dict['token_type_ids'] = np.array(examples[1]) + +print(feed_dict['input_ids'].shape) +print(feed_dict['token_type_ids'].shape) +# batch设置为True表示的是批量预测 +b_start = time.time() +result = client.predict(feed=feed_dict, fetch=fetch_names, batch=True) +b_end = time.time() +print("time to cost :{} seconds".format(b_end - b_start)) +print(result) diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/start_server.sh b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/start_server.sh new file mode 100644 index 0000000000000000000000000000000000000000..55d380d6f87396887675a008c54bb8544ce2a793 --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/C++/start_server.sh @@ -0,0 +1 @@ +python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_id 2 --thread 5 --ir_optim True --use_trt --precision FP16 \ No newline at end of file diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/config_nlp.yml b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/config_nlp.yml new file mode 100644 index 0000000000000000000000000000000000000000..d896adbfa1f9671cb569137637cf5f3ec169ef69 --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/config_nlp.yml @@ -0,0 +1,34 @@ +# worker_num, 最大并发数。当build_dag_each_worker=True时, 框架会创建worker_num个进程,每个进程内构建grpcSever和DAG +# 当build_dag_each_worker=False时,框架会设置主线程grpc线程池的max_workers=worker_num +worker_num: 20 +# build_dag_each_worker, False,框架在进程内创建一条DAG;True,框架会每个进程内创建多个独立的DAG +build_dag_each_worker: false + +dag: + # op资源类型, True, 为线程模型;False,为进程模型 + is_thread_op: False + # 使用性能分析, True,生成Timeline性能数据,对性能有一定影响;False为不使用 + tracer: + interval_s: 10 +# http端口, rpc_port和http_port不允许同时为空。当rpc_port可用且http_port为空时,不自动生成http_port +http_port: 18082 +# rpc端口, rpc_port和http_port不允许同时为空。当rpc_port为空且http_port不为空时,会自动将rpc_port设置为http_port+1 +rpc_port: 8088 +op: + ernie: + # 并发数,is_thread_op=True时,为线程并发;否则为进程并发 + concurrency: 1 + # 当op配置没有server_endpoints时,从local_service_conf读取本地服务配置 + local_service_conf: + # client类型,包括brpc, grpc和local_predictor.local_predictor不启动Serving服务,进程内预测 + client_type: local_predictor + #ir_optim + ir_optim: True + # device_type, 0=cpu, 1=gpu, 2=tensorRT, 3=arm cpu, 4=kunlun xpu + device_type: 1 + # 计算硬件ID,当devices为""或不写时为CPU预测;当devices为"0", "0,1,2"时为GPU预测,表示使用的GPU卡 + devices: '2' + # Fetch结果列表,以client_config中fetch_var的alias_name为准, 如果没有设置则全部返回 + fetch_list: ['output_embedding'] + # 模型路径 + model_config: ../../serving_server/ diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/deploy.sh b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/deploy.sh new file mode 100644 index 0000000000000000000000000000000000000000..fe8f071e0a47a47f5dc24d84ea4eaaf8e7503c06 --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/deploy.sh @@ -0,0 +1 @@ +python predict.py --model_dir=../../output \ No newline at end of file diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/predict.py b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/predict.py new file mode 100644 index 0000000000000000000000000000000000000000..0e81dbb5092ce6178587f5aa8f40d758f4446a42 --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/predict.py @@ -0,0 +1,292 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +import sys + +import numpy as np +import paddle +import paddlenlp as ppnlp +from scipy.special import softmax +from scipy import spatial +from paddle import inference +from paddlenlp.data import Stack, Tuple, Pad +from paddlenlp.datasets import load_dataset +from paddlenlp.utils.log import logger + +sys.path.append('.') + +# yapf: disable +parser = argparse.ArgumentParser() +parser.add_argument("--model_dir", type=str, required=True, + help="The directory to static model.") + +parser.add_argument("--max_seq_length", default=128, type=int, + help="The maximum total input sequence length after tokenization. Sequences " + "longer than this will be truncated, sequences shorter will be padded.") +parser.add_argument("--batch_size", default=15, type=int, + help="Batch size per GPU/CPU for training.") +parser.add_argument('--device', choices=['cpu', 'gpu', 'xpu'], default="gpu", + help="Select which device to train model, defaults to gpu.") + +parser.add_argument('--use_tensorrt', default=False, type=eval, choices=[True, False], + help='Enable to use tensorrt to speed up.') +parser.add_argument("--precision", default="fp32", type=str, choices=["fp32", "fp16", "int8"], + help='The tensorrt precision.') + +parser.add_argument('--cpu_threads', default=10, type=int, + help='Number of threads to predict when using cpu.') +parser.add_argument('--enable_mkldnn', default=False, type=eval, choices=[True, False], + help='Enable to use mkldnn to speed up when using cpu.') + +parser.add_argument("--benchmark", type=eval, default=False, + help="To log some information about environment and running.") +parser.add_argument("--save_log_path", type=str, default="./log_output/", + help="The file path to save log.") +args = parser.parse_args() +# yapf: enable + + +def convert_example(example, + tokenizer, + max_seq_length=512, + pad_to_max_seq_len=False): + """ + Builds model inputs from a sequence. + + A BERT sequence has the following format: + + - single sequence: ``[CLS] X [SEP]`` + + Args: + example(obj:`list(str)`): The list of text to be converted to ids. + tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer` + which contains most of the methods. Users should refer to the superclass for more information regarding methods. + max_seq_len(obj:`int`): The maximum total input sequence length after tokenization. + Sequences longer than this will be truncated, sequences shorter will be padded. + is_test(obj:`False`, defaults to `False`): Whether the example contains label or not. + + Returns: + input_ids(obj:`list[int]`): The list of query token ids. + token_type_ids(obj: `list[int]`): List of query sequence pair mask. + """ + + result = [] + for key, text in example.items(): + encoded_inputs = tokenizer( + text=text, + max_seq_len=max_seq_length, + pad_to_max_seq_len=pad_to_max_seq_len) + input_ids = encoded_inputs["input_ids"] + token_type_ids = encoded_inputs["token_type_ids"] + result += [input_ids, token_type_ids] + return result + + +class Predictor(object): + def __init__(self, + model_dir, + device="gpu", + max_seq_length=128, + batch_size=32, + use_tensorrt=False, + precision="fp32", + cpu_threads=10, + enable_mkldnn=False): + self.max_seq_length = max_seq_length + self.batch_size = batch_size + + model_file = model_dir + "/inference.pdmodel" + params_file = model_dir + "/inference.pdiparams" + if not os.path.exists(model_file): + raise ValueError("not find model file path {}".format(model_file)) + if not os.path.exists(params_file): + raise ValueError("not find params file path {}".format(params_file)) + config = paddle.inference.Config(model_file, params_file) + + if device == "gpu": + # set GPU configs accordingly + # such as intialize the gpu memory, enable tensorrt + config.enable_use_gpu(100, 0) + precision_map = { + "fp16": inference.PrecisionType.Half, + "fp32": inference.PrecisionType.Float32, + "int8": inference.PrecisionType.Int8 + } + precision_mode = precision_map[precision] + + if args.use_tensorrt: + config.enable_tensorrt_engine( + max_batch_size=batch_size, + min_subgraph_size=30, + precision_mode=precision_mode) + elif device == "cpu": + # set CPU configs accordingly, + # such as enable_mkldnn, set_cpu_math_library_num_threads + config.disable_gpu() + if args.enable_mkldnn: + # cache 10 different shapes for mkldnn to avoid memory leak + config.set_mkldnn_cache_capacity(10) + config.enable_mkldnn() + config.set_cpu_math_library_num_threads(args.cpu_threads) + elif device == "xpu": + # set XPU configs accordingly + config.enable_xpu(100) + + config.switch_use_feed_fetch_ops(False) + self.predictor = paddle.inference.create_predictor(config) + self.input_handles = [ + self.predictor.get_input_handle(name) + for name in self.predictor.get_input_names() + ] + self.output_handle = self.predictor.get_output_handle( + self.predictor.get_output_names()[0]) + + if args.benchmark: + import auto_log + pid = os.getpid() + self.autolog = auto_log.AutoLogger( + model_name="ernie-1.0", + model_precision=precision, + batch_size=self.batch_size, + data_shape="dynamic", + save_path=args.save_log_path, + inference_config=config, + pids=pid, + process_name=None, + gpu_ids=0, + time_keys=[ + 'preprocess_time', 'inference_time', 'postprocess_time' + ], + warmup=0, + logger=logger) + + def extract_embedding(self, data, tokenizer): + """ + Predicts the data labels. + + Args: + data (obj:`List(str)`): The batch data whose each element is a raw text. + tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer` + which contains most of the methods. Users should refer to the superclass for more information regarding methods. + + Returns: + results(obj:`dict`): All the feature vectors. + """ + if args.benchmark: + self.autolog.times.start() + + examples = [] + for text in data: + input_ids, segment_ids = convert_example(text, tokenizer) + examples.append((input_ids, segment_ids)) + + batchify_fn = lambda samples, fn=Tuple( + Pad(axis=0, pad_val=tokenizer.pad_token_id), # input + Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment + ): fn(samples) + + if args.benchmark: + self.autolog.times.stamp() + + input_ids, segment_ids = batchify_fn(examples) + self.input_handles[0].copy_from_cpu(input_ids) + self.input_handles[1].copy_from_cpu(segment_ids) + self.predictor.run() + logits = self.output_handle.copy_to_cpu() + if args.benchmark: + self.autolog.times.stamp() + + if args.benchmark: + self.autolog.times.end(stamp=True) + + return logits + + def predict(self, data, tokenizer): + """ + Predicts the data labels. + + Args: + data (obj:`List(str)`): The batch data whose each element is a raw text. + tokenizer(obj:`PretrainedTokenizer`): This tokenizer inherits from :class:`~paddlenlp.transformers.PretrainedTokenizer` + which contains most of the methods. Users should refer to the superclass for more information regarding methods. + + Returns: + results(obj:`dict`): All the predictions probs. + """ + if args.benchmark: + self.autolog.times.start() + + examples = [] + for idx, text in enumerate(data): + input_ids, segment_ids = convert_example({idx: text[0]}, tokenizer) + title_ids, title_segment_ids = convert_example({ + idx: text[1] + }, tokenizer) + examples.append( + (input_ids, segment_ids, title_ids, title_segment_ids)) + + batchify_fn = lambda samples, fn=Tuple( + Pad(axis=0, pad_val=tokenizer.pad_token_id), # input + Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment + Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment + Pad(axis=0, pad_val=tokenizer.pad_token_id), # segment + ): fn(samples) + + if args.benchmark: + self.autolog.times.stamp() + + query_ids, query_segment_ids, title_ids, title_segment_ids = batchify_fn( + examples) + self.input_handles[0].copy_from_cpu(query_ids) + self.input_handles[1].copy_from_cpu(query_segment_ids) + self.predictor.run() + query_logits = self.output_handle.copy_to_cpu() + + self.input_handles[0].copy_from_cpu(title_ids) + self.input_handles[1].copy_from_cpu(title_segment_ids) + self.predictor.run() + title_logits = self.output_handle.copy_to_cpu() + + if args.benchmark: + self.autolog.times.stamp() + + if args.benchmark: + self.autolog.times.end(stamp=True) + result = [ + float(1 - spatial.distance.cosine(arr1, arr2)) + for arr1, arr2 in zip(query_logits, title_logits) + ] + return result + + +if __name__ == "__main__": + # Define predictor to do prediction. + predictor = Predictor(args.model_dir, args.device, args.max_seq_length, + args.batch_size, args.use_tensorrt, args.precision, + args.cpu_threads, args.enable_mkldnn) + + # ErnieTinyTokenizer is special for ernie-tiny pretained model. + output_emb_size = 256 + tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0') + id2corpus = {0: '国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据'} + corpus_list = [{idx: text} for idx, text in id2corpus.items()] + res = predictor.extract_embedding(corpus_list, tokenizer) + print(res.shape) + print(res) + corpus_list = [['中西方语言与文化的差异', '中西方文化差异以及语言体现中西方文化,差异,语言体现'], + ['中西方语言与文化的差异', '飞桨致力于让深度学习技术的创新与应用更简单']] + res = predictor.predict(corpus_list, tokenizer) + print(res) diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/rpc_client.py b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/rpc_client.py new file mode 100644 index 0000000000000000000000000000000000000000..03863db6114b7c381dae17ee3bf33f00f15d8f4a --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/rpc_client.py @@ -0,0 +1,39 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import time +import numpy as np + +from paddle_serving_server.pipeline import PipelineClient + +client = PipelineClient() +client.connect(['127.0.0.1:8088']) + +list_data = [ + "国有企业引入非国有资本对创新绩效的影响——基于制造业国有上市公司的经验证据", + "试论翻译过程中的文化差异与语言空缺翻译过程,文化差异,语言空缺,文化对比" +] +feed = {} +for i, item in enumerate(list_data): + feed[str(i)] = item + +print(feed) +start_time = time.time() +ret = client.predict(feed_dict=feed) +end_time = time.time() +print("time to cost :{} seconds".format(end_time - start_time)) + +result = np.array(eval(ret.value[0])) +print(ret.key) +print(result.shape) +print(result) diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/web_service.py b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/web_service.py new file mode 100644 index 0000000000000000000000000000000000000000..1ad12032b3c92d72a5297f15d732b7dfbd19589e --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/deploy/python/web_service.py @@ -0,0 +1,82 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +import numpy as np +import sys + +from paddle_serving_server.web_service import WebService, Op + +_LOGGER = logging.getLogger() + + +def convert_example(example, + tokenizer, + max_seq_length=512, + pad_to_max_seq_len=False): + result = [] + for text in example: + encoded_inputs = tokenizer( + text=text, + max_seq_len=max_seq_length, + pad_to_max_seq_len=pad_to_max_seq_len) + input_ids = encoded_inputs["input_ids"] + token_type_ids = encoded_inputs["token_type_ids"] + result += [input_ids, token_type_ids] + return result + + +class ErnieOp(Op): + def init_op(self): + import paddlenlp as ppnlp + self.tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained( + 'ernie-1.0') + + def preprocess(self, input_dicts, data_id, log_id): + from paddlenlp.data import Stack, Tuple, Pad + + (_, input_dict), = input_dicts.items() + print("input dict", input_dict) + batch_size = len(input_dict.keys()) + examples = [] + for i in range(batch_size): + input_ids, segment_ids = convert_example([input_dict[str(i)]], + self.tokenizer) + examples.append((input_ids, segment_ids)) + batchify_fn = lambda samples, fn=Tuple( + Pad(axis=0, pad_val=self.tokenizer.pad_token_id), # input + Pad(axis=0, pad_val=self.tokenizer.pad_token_id), # segment + ): fn(samples) + input_ids, segment_ids = batchify_fn(examples) + feed_dict = {} + feed_dict['input_ids'] = input_ids + feed_dict['token_type_ids'] = segment_ids + return feed_dict, False, None, "" + + def postprocess(self, input_dicts, fetch_dict, data_id, log_id): + new_dict = {} + new_dict["output_embedding"] = str(fetch_dict["output_embedding"] + .tolist()) + return new_dict, None, "" + + +class ErnieService(WebService): + def get_pipeline_response(self, read_op): + ernie_op = ErnieOp(name="ernie", input_ops=[read_op]) + return ernie_op + + +ernie_service = ErnieService(name="ernie") +ernie_service.prepare_pipeline_config("config_nlp.yml") +ernie_service.run_service() diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/export_model.py b/examples/Pipeline/PaddleNLP/semantic_indexing/export_model.py new file mode 100644 index 0000000000000000000000000000000000000000..da468ea7b2c3af6eff093eef98a3e4f9393f9b3d --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/export_model.py @@ -0,0 +1,65 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import os +from functools import partial + +import numpy as np +import paddle +import paddle.nn.functional as F +import paddlenlp as ppnlp +from paddlenlp.data import Stack, Tuple, Pad + +from base_model import SemanticIndexBase, SemanticIndexBaseStatic + +# yapf: disable +parser = argparse.ArgumentParser() +parser.add_argument("--params_path", type=str, required=True, + default='./checkpoint/model_900/model_state.pdparams', help="The path to model parameters to be loaded.") +parser.add_argument("--output_path", type=str, default='./output', + help="The path of model parameter in static graph to be saved.") +args = parser.parse_args() +# yapf: enable + +if __name__ == "__main__": + # If you want to use ernie1.0 model, plesace uncomment the following code + output_emb_size = 256 + + pretrained_model = ppnlp.transformers.ErnieModel.from_pretrained( + "ernie-1.0") + + tokenizer = ppnlp.transformers.ErnieTokenizer.from_pretrained('ernie-1.0') + model = SemanticIndexBaseStatic( + pretrained_model, output_emb_size=output_emb_size) + + if args.params_path and os.path.isfile(args.params_path): + state_dict = paddle.load(args.params_path) + model.set_dict(state_dict) + print("Loaded parameters from %s" % args.params_path) + + model.eval() + + # Convert to static graph with specific input description + model = paddle.jit.to_static( + model, + input_spec=[ + paddle.static.InputSpec( + shape=[None, None], dtype="int64"), # input_ids + paddle.static.InputSpec( + shape=[None, None], dtype="int64") # segment_ids + ]) + # Save in static graph model. + save_path = os.path.join(args.output_path, "inference") + paddle.jit.save(model, save_path) diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/export_to_serving.py b/examples/Pipeline/PaddleNLP/semantic_indexing/export_to_serving.py new file mode 100644 index 0000000000000000000000000000000000000000..c24f931510e5662ae1b824049d1ac35c4ef34076 --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/export_to_serving.py @@ -0,0 +1,47 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import paddle_serving_client.io as serving_io +# yapf: disable +parser = argparse.ArgumentParser() +parser.add_argument("--dirname", type=str, required=True, + default='./output', help="Path of saved model files. Program file and parameter files are saved in this directory.") +parser.add_argument("--model_filename", type=str, required=True, + default='inference.get_pooled_embedding.pdmodel', help="The name of file to load the inference program. If it is None, the default filename __model__ will be used.") +parser.add_argument("--params_filename", type=str, required=True, + default='inference.get_pooled_embedding.pdiparams', help="The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.") +parser.add_argument("--server_path", type=str, default='./serving_server', + help="The path of server parameter in static graph to be saved.") +parser.add_argument("--client_path", type=str, default='./serving_client', + help="The path of client parameter in static graph to be saved.") +parser.add_argument("--feed_alias_names", type=str, default=None, + help='set alias names for feed vars, split by comma \',\', you should run --show_proto to check the number of feed vars') +parser.add_argument("--fetch_alias_names", type=str, default=None, + help='set alias names for feed vars, split by comma \',\', you should run --show_proto to check the number of fetch vars') +parser.add_argument("--show_proto", type=bool, default=False, + help='If yes, you can preview the proto and then determine your feed var alias name and fetch var alias name.') +# yapf: enable + +if __name__ == "__main__": + args = parser.parse_args() + serving_io.inference_model_to_serving( + dirname=args.dirname, + serving_server=args.server_path, + serving_client=args.client_path, + model_filename=args.model_filename, + params_filename=args.params_filename, + show_proto=args.show_proto, + feed_alias_names=args.feed_alias_names, + fetch_alias_names=args.fetch_alias_names) diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_model.sh b/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_model.sh new file mode 100644 index 0000000000000000000000000000000000000000..7c79266219cea03e16968ed0d00a3755615c7432 --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_model.sh @@ -0,0 +1 @@ +python export_model.py --params_path checkpoints/model_40/model_state.pdparams --output_path=./output \ No newline at end of file diff --git a/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_to_serving.sh b/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_to_serving.sh new file mode 100644 index 0000000000000000000000000000000000000000..b0d7a422551fd09eb1a28cfacdf47237a8efc795 --- /dev/null +++ b/examples/Pipeline/PaddleNLP/semantic_indexing/scripts/export_to_serving.sh @@ -0,0 +1,7 @@ +python export_to_serving.py \ + --dirname "output" \ + --model_filename "inference.get_pooled_embedding.pdmodel" \ + --params_filename "inference.get_pooled_embedding.pdiparams" \ + --server_path "serving_server" \ + --client_path "serving_client" \ + --fetch_alias_names "output_embedding" diff --git a/examples/Pipeline/PaddleOCR/ocr/pipeline_http_client.py b/examples/Pipeline/PaddleOCR/ocr/pipeline_http_client.py index 9952271430b2e260be292b63f39eaf1105b06521..f60d357df7b1ad4e2ce0e4e3f327733090db2947 100644 --- a/examples/Pipeline/PaddleOCR/ocr/pipeline_http_client.py +++ b/examples/Pipeline/PaddleOCR/ocr/pipeline_http_client.py @@ -29,9 +29,26 @@ test_img_dir = "imgs/" for img_file in os.listdir(test_img_dir): with open(os.path.join(test_img_dir, img_file), 'rb') as file: image_data1 = file.read() + # print file name + print('{}{}{}'.format('*' * 10, img_file, '*' * 10)) image = cv2_to_base64(image_data1) -for i in range(4): data = {"key": ["image"], "value": [image]} r = requests.post(url=url, data=json.dumps(data)) - print(r.json()) + result = r.json() + print("erro_no:{}, err_msg:{}".format(result["err_no"], result["err_msg"])) + # check success + if result["err_no"] == 0: + ocr_result = result["value"][0] + try: + for item in eval(ocr_result): + # return transcription and points + print("{}, {}".format(item[0], item[1])) + except Exception as e: + print("No results") + continue + + else: + print( + "For details about error message, see PipelineServingLogs/pipeline.log.wf" + ) diff --git a/examples/Pipeline/PaddleOCR/ocr/pipeline_rpc_client.py b/examples/Pipeline/PaddleOCR/ocr/pipeline_rpc_client.py index 66faa0428cfcbd97a5fb0d340d4ee07be828cda2..e37182359fa66005e112a9fc04e66b2fb4326718 100644 --- a/examples/Pipeline/PaddleOCR/ocr/pipeline_rpc_client.py +++ b/examples/Pipeline/PaddleOCR/ocr/pipeline_rpc_client.py @@ -34,8 +34,24 @@ test_img_dir = "imgs/" for img_file in os.listdir(test_img_dir): with open(os.path.join(test_img_dir, img_file), 'rb') as file: image_data = file.read() + # print file name + print('{}{}{}'.format('*' * 10, img_file, '*' * 10)) image = cv2_to_base64(image_data) -for i in range(1): - ret = client.predict(feed_dict={"image": image}, fetch=["res"]) - print(ret) + result = client.predict(feed_dict={"image": image}, fetch=["res"]) + print("erro_no:{}, err_msg:{}".format(result.err_no, result.err_msg)) + # check success + if result.err_no == 0: + ocr_result = result.value[0] + try: + for item in eval(ocr_result): + # return transcription and points + print("{}, {}".format(item[0], item[1])) + except Exception as e: + print("No results") + continue + + else: + print( + "For details about error message, see PipelineServingLogs/pipeline.log.wf" + ) diff --git a/examples/Pipeline/PaddleOCR/ocr/web_service.py b/examples/Pipeline/PaddleOCR/ocr/web_service.py index 5cb1ada570287f98340d9456175b58558f71e8f6..0ef748cedf45aecdfa3193329ac440ef4ab1f58a 100644 --- a/examples/Pipeline/PaddleOCR/ocr/web_service.py +++ b/examples/Pipeline/PaddleOCR/ocr/web_service.py @@ -14,6 +14,7 @@ from paddle_serving_server.web_service import WebService, Op import logging import numpy as np +import copy import cv2 import base64 from paddle_serving_app.reader import OCRReader @@ -34,17 +35,18 @@ class DetOp(Op): self.filter_func = FilterBoxes(10, 10) self.post_func = DBPostProcess({ "thresh": 0.3, - "box_thresh": 0.5, + "box_thresh": 0.6, "max_candidates": 1000, "unclip_ratio": 1.5, "min_size": 3 }) - + """ when opening tensorrt(configure in config.yml) and each time the input shape for inferring is different, using this method for configuring tensorrt dynamic shape to infer in each op model """ + def set_dynamic_shape_info(self): min_input_shape = { "x": [1, 3, 50, 50], @@ -74,7 +76,7 @@ class DetOp(Op): "min_input_shape": min_input_shape, "max_input_shape": max_input_shape, "opt_input_shape": opt_input_shape, - } + } def preprocess(self, input_dicts, data_id, log_id): (_, input_dict), = input_dicts.items() @@ -107,25 +109,20 @@ class RecOp(Op): self.ocr_reader = OCRReader() self.get_rotate_crop_image = GetRotateCropImage() self.sorted_boxes = SortedBoxes() - + """ when opening tensorrt(configure in config.yml) and each time the input shape for inferring is different, using this method for configuring tensorrt dynamic shape to infer in each op model """ + def set_dynamic_shape_info(self): - min_input_shape = { - "x": [1, 3, 32, 10], - "lstm_1.tmp_0": [1, 1, 128] - } + min_input_shape = {"x": [1, 3, 32, 10], "lstm_1.tmp_0": [1, 1, 128]} max_input_shape = { "x": [50, 3, 32, 1000], "lstm_1.tmp_0": [500, 50, 128] } - opt_input_shape = { - "x": [6, 3, 32, 100], - "lstm_1.tmp_0": [25, 5, 128] - } + opt_input_shape = {"x": [6, 3, 32, 100], "lstm_1.tmp_0": [25, 5, 128]} self.dynamic_shape_info = { "min_input_shape": min_input_shape, "max_input_shape": max_input_shape, @@ -137,8 +134,10 @@ class RecOp(Op): raw_im = input_dict["image"] data = np.frombuffer(raw_im, np.uint8) im = cv2.imdecode(data, cv2.IMREAD_COLOR) - dt_boxes = input_dict["dt_boxes"] - dt_boxes = self.sorted_boxes(dt_boxes) + self.dt_list = input_dict["dt_boxes"] + self.dt_list = self.sorted_boxes(self.dt_list) + # deepcopy to save origin dt_boxes + dt_boxes = copy.deepcopy(self.dt_list) feed_list = [] img_list = [] max_wh_ratio = 0 @@ -205,26 +204,31 @@ class RecOp(Op): imgs[id] = norm_img feed = {"x": imgs.copy()} feed_list.append(feed) - #_LOGGER.info("feed_list : {}".format(feed_list)) return feed_list, False, None, "" def postprocess(self, input_dicts, fetch_data, data_id, log_id): - res_list = [] + rec_list = [] + dt_num = len(self.dt_list) if isinstance(fetch_data, dict): if len(fetch_data) > 0: rec_batch_res = self.ocr_reader.postprocess_ocrv2( fetch_data, with_score=True) for res in rec_batch_res: - res_list.append(res[0]) + rec_list.append(res) elif isinstance(fetch_data, list): for one_batch in fetch_data: one_batch_res = self.ocr_reader.postprocess_ocrv2( one_batch, with_score=True) for res in one_batch_res: - res_list.append(res[0]) - - res = {"res": str(res_list)} + rec_list.append(res) + + result_list = [] + for i in range(dt_num): + text = rec_list[i] + dt_box = self.dt_list[i] + result_list.append([text, dt_box.tolist()]) + res = {"result": str(result_list)} return res, None, "" diff --git a/java/src/main/proto/general_model_service.proto b/java/src/main/proto/general_model_service.proto index aa06d388a468d71e968aa53b19f25c55f8c42ee1..6b0fc7e0d585d45ca45f26cf9083a7f116a25d33 100644 --- a/java/src/main/proto/general_model_service.proto +++ b/java/src/main/proto/general_model_service.proto @@ -89,11 +89,13 @@ message Request { message Response { repeated ModelOutput outputs = 1; repeated int64 profile_time = 2; - // Error code - int32 err_no = 3; + bool profile_server = 3; + uint64 log_id = 4; + // Error code + int32 err_no = 5; // Error messages - string err_msg = 4; + string err_msg = 6; }; message ModelOutput { diff --git a/paddle_inference/paddle/include/paddle_engine.h b/paddle_inference/paddle/include/paddle_engine.h index bf8c98ede60bc4266965d1aa12e2627dd0d0647a..2d76730555acb6ed0408584db1334e842db126c3 100644 --- a/paddle_inference/paddle/include/paddle_engine.h +++ b/paddle_inference/paddle/include/paddle_engine.h @@ -37,6 +37,7 @@ using paddle_infer::PrecisionType; using paddle_infer::Predictor; using paddle_infer::Tensor; using paddle_infer::CreatePredictor; +using paddle_infer::DistConfig; DECLARE_int32(gpuid); DECLARE_string(precision); @@ -206,11 +207,44 @@ class PaddleInferenceEngine : public EngineCore { config.SetModel(model_path); } + // Enable distributed model inferencing + DistConfig distCfg; + if (engine_conf.has_enable_dist_model() && + engine_conf.enable_dist_model()) { + int ep_size = engine_conf.dist_endpoints_size(); + int cur_index = engine_conf.dist_subgraph_index(); + if (ep_size <= cur_index) { + LOG(ERROR) << "create paddle predictor failed, Distributed model error." + << " dist_endpoints_size=" << ep_size + << " is not bigger than dist_subgraph_index=" << cur_index; + return -1; + } + std::vector vec_eps; + for (int i = 0; i < ep_size; ++i) { + vec_eps.emplace_back(engine_conf.dist_endpoints(i)); + } + distCfg.EnableDistModel(true); + distCfg.SetCarrierId(engine_conf.dist_carrier_id()); + distCfg.SetRanks(engine_conf.dist_nranks(), cur_index); + distCfg.SetEndpoints(vec_eps, engine_conf.dist_endpoints(cur_index)); + distCfg.SetCommInitConfig(engine_conf.dist_cfg_file()); + + config.SetDistConfig(distCfg); + LOG(INFO) << "Create Distributed predictor! dist_carrier_id=" + << engine_conf.dist_carrier_id() + << ", Ranks=" << engine_conf.dist_nranks() + << ", current index of ranks=" << cur_index + << ", current endpoint=" + << engine_conf.dist_endpoints(cur_index) + << ", communicate init config file=" + << engine_conf.dist_cfg_file(); + } + config.SwitchSpecifyInputNames(true); - config.SetCpuMathLibraryNumThreads(1); + config.SetCpuMathLibraryNumThreads(engine_conf.cpu_math_thread_num()); if (engine_conf.has_use_gpu() && engine_conf.use_gpu()) { // 2000MB GPU memory - config.EnableUseGpu(50, gpu_id); + config.EnableUseGpu(engine_conf.gpu_memory_mb(), gpu_id); if (engine_conf.has_gpu_multi_stream() && engine_conf.gpu_multi_stream()) { config.EnableGpuMultiStream(); @@ -225,21 +259,70 @@ class PaddleInferenceEngine : public EngineCore { config.SwitchIrOptim(true); } + int local_min_subgraph_size = min_subgraph_size; + if (engine_conf.has_min_subgraph_size()) { + local_min_subgraph_size = engine_conf.min_subgraph_size(); + } + if (engine_conf.has_use_trt() && engine_conf.use_trt()) { config.SwitchIrOptim(true); if (!engine_conf.has_use_gpu() || !engine_conf.use_gpu()) { - config.EnableUseGpu(50, gpu_id); + config.EnableUseGpu(engine_conf.gpu_memory_mb(), gpu_id); if (engine_conf.has_gpu_multi_stream() && engine_conf.gpu_multi_stream()) { config.EnableGpuMultiStream(); } } - config.EnableTensorRtEngine(1 << 20, + config.EnableTensorRtEngine(engine_conf.trt_workspace_size(), max_batch, - min_subgraph_size, + local_min_subgraph_size, precision_type, - false, + engine_conf.trt_use_static(), FLAGS_use_calib); + std::map> min_input_shape; + std::map> max_input_shape; + std::map> optim_input_shape; + if (engine_conf.min_input_shape_size() > 0) { + for (auto& iter : engine_conf.min_input_shape()) { + std::string key = iter.first; + std::string value = iter.second; + std::istringstream ss(value); + std::string word; + std::vector arr; + while (ss >> word) { + arr.push_back(std::stoi(word)); + } + min_input_shape[key] = arr; + } + } + if (engine_conf.max_input_shape_size() > 0) { + for (auto& iter : engine_conf.max_input_shape()) { + std::string key = iter.first; + std::string value = iter.second; + std::istringstream ss(value); + std::string word; + std::vector arr; + while (ss >> word) { + arr.push_back(std::stoi(word)); + } + max_input_shape[key] = arr; + } + } + if (engine_conf.opt_input_shape_size() > 0) { + for (auto& iter : engine_conf.opt_input_shape()) { + std::string key = iter.first; + std::string value = iter.second; + std::istringstream ss(value); + std::string word; + std::vector arr; + while (ss >> word) { + arr.push_back(std::stoi(word)); + } + optim_input_shape[key] = arr; + } + } + config.SetTRTDynamicShapeInfo( + min_input_shape, max_input_shape, optim_input_shape); LOG(INFO) << "create TensorRT predictor"; } @@ -314,6 +397,28 @@ class PaddleInferenceEngine : public EngineCore { return -1; } + LOG(INFO) << "paddle_engine params : enable_dist_model:" + << engine_conf.enable_dist_model() + << ", use_gpu: " << engine_conf.has_use_gpu() + << ", gpu_id: " << gpu_id + << ", use_gpu_multi_stream: " << engine_conf.gpu_multi_stream() + << ", precision: " << FLAGS_precision + << ", enable_ir_optimization: " + << engine_conf.enable_ir_optimization() + << ", use_trt: " << engine_conf.use_trt() + << ", trt_max_batch: " << max_batch + << ", trt_min_subgraph_size: " << min_subgraph_size + << ", use_calib: " << FLAGS_use_calib + << ", use_lite: " << engine_conf.use_lite() + << ", use_ascend_cl: " << engine_conf.has_use_ascend_cl() + << ", use_xpu: " << engine_conf.use_xpu() + << ", enable_memory_optimization: " + << engine_conf.enable_memory_optimization() + << ", gpu_memory_mb: " << engine_conf.gpu_memory_mb() + << ", cpu_math_thread_num: " << engine_conf.cpu_math_thread_num() + << ", trt_workspace_size: " << engine_conf.trt_workspace_size() + << ", trt_use_static: " << engine_conf.trt_use_static(); + VLOG(2) << "create paddle predictor sucess, path: " << model_path; return 0; } diff --git a/python/paddle_serving_app/local_predict.py b/python/paddle_serving_app/local_predict.py index 5f922a28f849866fcd08a29b63c70a986d064c68..7ad11e1c2bf0abc4d447311e3081b434cbb25dc9 100644 --- a/python/paddle_serving_app/local_predict.py +++ b/python/paddle_serving_app/local_predict.py @@ -93,7 +93,9 @@ class LocalPredictor(object): use_ascend_cl=False, min_subgraph_size=3, dynamic_shape_info={}, - use_calib=False): + use_calib=False, + collect_shape_range_info="", + tuned_dynamic_shape_info=""): """ Load model configs and create the paddle predictor by Paddle Inference API. @@ -160,12 +162,14 @@ class LocalPredictor(object): "use_trt:{}, use_lite:{}, use_xpu:{}, precision:{}, use_calib:{}, " "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, " "mkldnn_bf16_op_list:{}, use_feed_fetch_ops:{}, " - "use_ascend_cl:{}, min_subgraph_size:{}, dynamic_shape_info:{}". + "use_ascend_cl:{}, min_subgraph_size:{}, dynamic_shape_info:{}," + "collect_shape_range_info:{},tuned_dynamic_shape_info:{}". format(model_path, use_gpu, gpu_id, use_profile, thread_num, mem_optim, ir_optim, use_trt, use_lite, use_xpu, precision, use_calib, use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list, mkldnn_bf16_op_list, use_feed_fetch_ops, use_ascend_cl, - min_subgraph_size, dynamic_shape_info)) + min_subgraph_size, dynamic_shape_info, + collect_shape_range_info,tuned_dynamic_shape_info)) self.feed_names_ = [var.alias_name for var in model_conf.feed_var] self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var] @@ -213,6 +217,8 @@ class LocalPredictor(object): if mkldnn_op_list is not None: config.set_mkldnn_op(mkldnn_op_list) # set gpu + if collect_shape_range_info != "": + config.collect_shape_range_info(collect_shape_range_info) if not use_gpu: config.disable_gpu() else: @@ -226,6 +232,9 @@ class LocalPredictor(object): use_static=False, use_calib_mode=use_calib) + if tuned_dynamic_shape_info != "": + config.enable_tuned_tensorrt_dynamic_shape(tuned_dynamic_shape_info, True) + @ErrorCatch @ParamChecker def dynamic_shape_info_helper(dynamic_shape_info:lambda dynamic_shape_info: check_dynamic_shape_info(dynamic_shape_info)): @@ -235,7 +244,7 @@ class LocalPredictor(object): print("dynamic_shape_info configure error, it should contain [min_input_shape', 'max_input_shape', 'opt_input_shape' {}".format(resp.err_msg)) kill_stop_process_by_pid("kill", os.getpgid(os.getpid())) - if len(dynamic_shape_info): + if len(dynamic_shape_info) and tuned_dynamic_shape_info == "": config.set_trt_dynamic_shape_info( dynamic_shape_info['min_input_shape'], dynamic_shape_info['max_input_shape'], diff --git a/python/paddle_serving_app/reader/pptsn_reader.py b/python/paddle_serving_app/reader/pptsn_reader.py index c74c9db6aa1440bab3a163337b3b1c26652210f4..beff557aff18f765d60ffab97c0a842b45224f9f 100644 --- a/python/paddle_serving_app/reader/pptsn_reader.py +++ b/python/paddle_serving_app/reader/pptsn_reader.py @@ -1,20 +1,19 @@ import numpy as np -import av import cv2 import pickle -import decord as de import math import random import os from PIL import Image -import SimpleITK as sitk - try: import cPickle as pickle from cStringIO import StringIO + import av + import decord as de + import SimpleITK as sitk except ImportError: import pickle from io import BytesIO diff --git a/python/paddle_serving_server/dag.py b/python/paddle_serving_server/dag.py index 50a92fa1c8e9fbea5337e2cae0708dc3aeadb004..91c9014e9042830754a909a6d996748a4f328e84 100755 --- a/python/paddle_serving_server/dag.py +++ b/python/paddle_serving_server/dag.py @@ -30,10 +30,16 @@ class OpMaker(object): "GeneralDistKVOp", "GeneralCopyOp", "GeneralDetectionOp", + "GeneralRemoteOp", ] self.node_name_suffix_ = collections.defaultdict(int) - def create(self, node_type, engine_name=None, inputs=[], outputs=[]): + def create(self, + node_type, + engine_name=None, + inputs=[], + outputs=[], + addresses=[]): if node_type not in self.op_list: raise Exception("Op type {} is not supported right now".format( node_type)) @@ -55,6 +61,11 @@ class OpMaker(object): dep.name = dep_node.name dep.mode = "RO" node.dependencies.extend([dep]) + + # for general_remote op. + if addresses: + node.address.extend(addresses) + # Because the return value will be used as the key value of the # dict, and the proto object is variable which cannot be hashed, # so it is processed into a string. This has little effect on diff --git a/python/paddle_serving_server/env_check/run.py b/python/paddle_serving_server/env_check/run.py index 2c4216b22e464ab2e3fe4ec363b62eb2fe074540..0c29426af37f8cc2c9e82ea57ce2ad7d852aa281 100644 --- a/python/paddle_serving_server/env_check/run.py +++ b/python/paddle_serving_server/env_check/run.py @@ -21,21 +21,27 @@ Usage: export PYTHON_EXECUTABLE=/usr/local/bin/python3.6 python3.6 -m paddle_serving_server.serve check ''' - import sys import os import pytest - inference_test_cases = ["test_fit_a_line.py::TestFitALine::test_inference"] -cpp_test_cases = ["test_fit_a_line.py::TestFitALine::test_cpu", "test_fit_a_line.py::TestFitALine::test_gpu"] -pipeline_test_cases = ["test_uci_pipeline.py::TestUCIPipeline::test_cpu", "test_uci_pipeline.py::TestUCIPipeline::test_gpu"] +cpp_test_cases = [ + "test_fit_a_line.py::TestFitALine::test_cpu", + "test_fit_a_line.py::TestFitALine::test_gpu" +] +pipeline_test_cases = [ + "test_uci_pipeline.py::TestUCIPipeline::test_cpu", + "test_uci_pipeline.py::TestUCIPipeline::test_gpu" +] log_files = ["PipelineServingLogs", "log", "stderr.log", "stdout.log"] + def set_serving_log_path(): if 'SERVING_LOG_PATH' not in os.environ: serving_log_path = os.path.expanduser(os.getcwd()) + '/' - os.environ['SERVING_LOG_PATH']=serving_log_path + os.environ['SERVING_LOG_PATH'] = serving_log_path + def mv_log_to_new_dir(dir_path): import shutil @@ -46,8 +52,8 @@ def mv_log_to_new_dir(dir_path): file_path = os.path.join(serving_log_path, file_name) dir_path_temp = os.path.join(dir_path, file_name) if os.path.exists(file_path): - shutil.move(file_path, dir_path_temp) - + shutil.move(file_path, dir_path_temp) + def run_test_cases(cases_list, case_type, is_open_std): old_stdout, old_stderr = sys.stdout, sys.stderr @@ -66,33 +72,41 @@ def run_test_cases(cases_list, case_type, is_open_std): new_dir_path = os.path.join(serving_log_path, dir_name) mv_log_to_new_dir(new_dir_path) if res == 0: - print("{} {} environment running success".format(case_type, case_name)) + print("{} {} environment running success".format(case_type, + case_name)) elif res == 1: if case_name == "inference": - print("{} {} environment running failure. Please refer to https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html to configure environment".format(case_type, case_name)) + print( + "{} {} environment running failure. Please refer to https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html to configure environment". + format(case_type, case_name)) os._exit(0) else: - print("{} {} environment running failure, if you need this environment, please refer to https://github.com/PaddlePaddle/Serving/blob/develop/doc/Install_CN.md".format(case_type, case_name)) + print( + "{} {} environment running failure, if you need this environment, please refer to https://github.com/PaddlePaddle/Serving/blob/develop/doc/Install_CN.md". + format(case_type, case_name)) + def unset_env(key): del os.environ[key] + def check_env(mode): set_serving_log_path() if 'https_proxy' in os.environ or 'http_proxy' in os.environ: - unset_env("https_proxy") - unset_env("http_proxy") + unset_env("https_proxy") + unset_env("http_proxy") if 'GREP_OPTIONS' in os.environ: - unset_env("GREP_OPTIONS") - is_open_std = False - if mode is "debug": + unset_env("GREP_OPTIONS") + is_open_std = False + if mode == "debug": is_open_std = True - if mode is "all" or mode is "inference" or mode is "debug": + if mode == "all" or mode == "inference" or mode == "debug": run_test_cases(inference_test_cases, "PaddlePaddle", is_open_std) - if mode is "all" or mode is "cpp" or mode is "debug": + if mode == "all" or mode == "cpp" or mode == "debug": run_test_cases(cpp_test_cases, "C++", is_open_std) - if mode is "all" or mode is "pipeline" or mode is "debug": + if mode == "all" or mode == "pipeline" or mode == "debug": run_test_cases(pipeline_test_cases, "Pipeline", is_open_std) + if __name__ == '__main__': check_env("debug") diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 09931dad80e19b364cb4e17a4b878662ec190aff..8ac473d36b11bd41909975692215caa166bc94e0 100755 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -37,12 +37,15 @@ from paddle_serving_server.util import * from paddle_serving_server.env_check.run import check_env import cmd + def signal_handler(signal, frame): print('Process stopped') sys.exit(0) + signal.signal(signal.SIGINT, signal_handler) + # web_service.py is still used by Pipeline. def port_is_available(port): with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: @@ -185,10 +188,10 @@ def serve_args(): action="store_true", help="Use encryption model") parser.add_argument( - "--encryption_rpc_port", - type=int, - required=False, - default=12000, + "--encryption_rpc_port", + type=int, + required=False, + default=12000, help="Port of encryption model, only valid for arg.use_encryption_model") parser.add_argument( "--use_trt", default=False, action="store_true", help="Use TensorRT") @@ -217,11 +220,87 @@ def serve_args(): action="store_true", help="Use gpu_multi_stream") parser.add_argument( - "--enable_prometheus", default=False, action="store_true", help="Use Prometheus") + "--enable_prometheus", + default=False, + action="store_true", + help="Use Prometheus") + parser.add_argument( + "--prometheus_port", + type=int, + default=19393, + help="Port of the Prometheus") + parser.add_argument( + "--request_cache_size", + type=int, + default=0, + help="Max request cache size") + parser.add_argument( + "--use_dist_model", + default=False, + action="store_true", + help="Use distributed model") + parser.add_argument( + "--dist_carrier_id", + type=str, + default="", + help="carrier id of distributed model") + parser.add_argument( + "--dist_cfg_file", + type=str, + default="", + help="config file of distributed model") + parser.add_argument( + "--dist_endpoints", + type=str, + default="+", + help="endpoints of distributed model. splited by comma") + parser.add_argument( + "--dist_nranks", + type=int, + default=0, + help="nranks of distributed model") parser.add_argument( - "--prometheus_port", type=int, default=19393, help="Port of the Prometheus") + "--dist_subgraph_index", + type=int, + default=-1, + help="index of distributed model") parser.add_argument( - "--request_cache_size", type=int, default=0, help="Port of the Prometheus") + "--dist_worker_serving_endpoints", + type=str, + default=None, + help="endpoints of worker serving endpoints") + parser.add_argument( + "--dist_master_serving", + default=False, + action="store_true", + help="The master serving of distributed inference") + parser.add_argument( + "--min_subgraph_size", + type=str, + default="", + nargs="+", + help="min_subgraph_size") + parser.add_argument( + "--gpu_memory_mb", + type=int, + default=50, + help="Initially allocate GPU storage size") + parser.add_argument( + "--cpu_math_thread_num", + type=int, + default=1, + help="Initialize the number of CPU computing threads") + parser.add_argument( + "--trt_workspace_size", + type=int, + default=33554432, + help="Initialize allocation 1 << 25 GPU storage size") + parser.add_argument( + "--trt_use_static", + default=False, + action="store_true", + help="Initialize TRT with static data") + return parser.parse_args() @@ -245,7 +324,7 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi workdir = "{}_{}".format(args.workdir, port) dag_list_op = [] - if model == "": + if model == "" and not args.dist_master_serving: print("You must specify your serving model") exit(-1) for single_model_config in args.model: @@ -270,30 +349,55 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi dag_list_op.append(temp_str_list[0]) - read_op = op_maker.create('GeneralReaderOp') - op_seq_maker.add_op(read_op) - #如果dag_list_op不是空,那么证明通过--op 传入了自定义OP或自定义的DAG串联关系。 - #此时,根据--op 传入的顺序去组DAG串联关系 - if len(dag_list_op) > 0: - for single_op in dag_list_op: - op_seq_maker.add_op(op_maker.create(single_op)) - #否则,仍然按照原有方式根虎--model去串联。 - else: - for idx, single_model in enumerate(model): - infer_op_name = "GeneralInferOp" - # 目前由于ocr的节点Det模型依赖于opencv的第三方库 - # 只有使用ocr的时候,才会加入opencv的第三方库并编译GeneralDetectionOp - # 故此处做特殊处理,当不满足下述情况时,所添加的op默认为GeneralInferOp - # 以后可能考虑不用python脚本来生成配置 - if len(model) == 2 and idx == 0 and single_model == "ocr_det_model": - infer_op_name = "GeneralDetectionOp" - else: + # The workflows of master serving in distributed model is different from + # worker servings. The workflow of worker servings is same to non-distributed + # model, but workerflow of master serving needs to add IP address of other + # worker serving in the machine. + if not args.dist_master_serving: + read_op = op_maker.create('GeneralReaderOp') + op_seq_maker.add_op(read_op) + is_ocr = False + #如果dag_list_op不是空,那么证明通过--op 传入了自定义OP或自定义的DAG串联关系。 + #此时,根据--op 传入的顺序去组DAG串联关系 + if len(dag_list_op) > 0: + for single_op in dag_list_op: + op_seq_maker.add_op(op_maker.create(single_op)) + if single_op == "GeneralDetectionOp": + is_ocr = True + #否则,仍然按照原有方式根虎--model去串联。 + else: + for idx, single_model in enumerate(model): infer_op_name = "GeneralInferOp" - general_infer_op = op_maker.create(infer_op_name) - op_seq_maker.add_op(general_infer_op) + # 目前由于ocr的节点Det模型依赖于opencv的第三方库 + # 只有使用ocr的时候,才会加入opencv的第三方库并编译GeneralDetectionOp + # 故此处做特殊处理,当不满足下述情况时,所添加的op默认为GeneralInferOp + # 以后可能考虑不用python脚本来生成配置 + if len(model + ) == 2 and idx == 0 and single_model == "ocr_det_model": + infer_op_name = "GeneralDetectionOp" + is_ocr = True + else: + infer_op_name = "GeneralInferOp" + general_infer_op = op_maker.create(infer_op_name) + op_seq_maker.add_op(general_infer_op) - general_response_op = op_maker.create('GeneralResponseOp') - op_seq_maker.add_op(general_response_op) + general_response_op = op_maker.create('GeneralResponseOp') + op_seq_maker.add_op(general_response_op) + else: + # for the master serving of distributed model only add one general_remote op. + if args.dist_worker_serving_endpoints is None: + raise ValueError( + "Params Error!. dist_worker_serving_endpoints is empty when dist_master_serving is set" + ) + worker_serving_endpoints = args.dist_worker_serving_endpoints.split(",") + if len(worker_serving_endpoints) == 0: + raise ValueError( + "Params Error!. dist_worker_serving_endpoints is empty when dist_master_serving is set" + ) + + general_remote_op = op_maker.create( + 'GeneralRemoteOp', None, [], [], addresses=worker_serving_endpoints) + op_seq_maker.add_op(general_remote_op, ) server.set_op_sequence(op_seq_maker.get_op_sequence()) server.set_num_threads(thread_num) @@ -306,10 +410,24 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi server.set_enable_prometheus(args.enable_prometheus) server.set_prometheus_port(args.prometheus_port) server.set_request_cache_size(args.request_cache_size) + server.set_enable_dist_model(args.use_dist_model) + server.set_dist_carrier_id(args.dist_carrier_id) + server.set_dist_cfg_file(args.dist_cfg_file) + server.set_dist_nranks(args.dist_nranks) + server.set_dist_endpoints(args.dist_endpoints.split(",")) + server.set_dist_subgraph_index(args.dist_subgraph_index) + server.set_min_subgraph_size(args.min_subgraph_size) + server.set_gpu_memory_mb(args.gpu_memory_mb) + server.set_cpu_math_thread_num(args.cpu_math_thread_num) if args.use_trt and device == "gpu": server.set_trt() server.set_ir_optimize(True) + server.set_trt_workspace_size(args.trt_workspace_size) + server.set_trt_use_static(args.trt_use_static) + if is_ocr: + info = set_ocr_dynamic_shape_info() + server.set_trt_dynamic_shape_info(info) if args.gpu_multi_stream and device == "gpu": server.set_gpu_multi_stream() @@ -345,6 +463,50 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi server.run_server() +def set_ocr_dynamic_shape_info(): + info = [] + min_input_shape = { + "x": [1, 3, 50, 50], + "conv2d_182.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_2.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_3.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_4.tmp_0": [1, 1, 20, 20], + "nearest_interp_v2_5.tmp_0": [1, 1, 20, 20] + } + max_input_shape = { + "x": [1, 3, 1536, 1536], + "conv2d_182.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_2.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_3.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_4.tmp_0": [20, 200, 960, 960], + "nearest_interp_v2_5.tmp_0": [20, 200, 960, 960], + } + opt_input_shape = { + "x": [1, 3, 960, 960], + "conv2d_182.tmp_0": [3, 96, 240, 240], + "nearest_interp_v2_2.tmp_0": [3, 96, 240, 240], + "nearest_interp_v2_3.tmp_0": [3, 24, 240, 240], + "nearest_interp_v2_4.tmp_0": [3, 24, 240, 240], + "nearest_interp_v2_5.tmp_0": [3, 24, 240, 240], + } + det_info = { + "min_input_shape": min_input_shape, + "max_input_shape": max_input_shape, + "opt_input_shape": opt_input_shape, + } + info.append(det_info) + min_input_shape = {"x": [1, 3, 32, 10], "lstm_1.tmp_0": [1, 1, 128]} + max_input_shape = {"x": [50, 3, 32, 1000], "lstm_1.tmp_0": [500, 50, 128]} + opt_input_shape = {"x": [6, 3, 32, 100], "lstm_1.tmp_0": [25, 5, 128]} + rec_info = { + "min_input_shape": min_input_shape, + "max_input_shape": max_input_shape, + "opt_input_shape": opt_input_shape, + } + info.append(rec_info) + return info + + def start_multi_card(args, serving_port=None): # pylint: disable=doc-string-missing if serving_port == None: @@ -489,8 +651,10 @@ def stop_serving(command: str, port: int=None): os.remove(filepath) return True + class Check_Env_Shell(cmd.Cmd): intro = "Welcome to the check env shell.Type help to list commands.\n" + # ----- basic commands ----- def do_help(self, arg): print("\nCommand list\t\tDescription\n"\ @@ -507,23 +671,23 @@ class Check_Env_Shell(cmd.Cmd): def do_check_all(self, arg): "Check Environment of Paddle Inference, Pipeline Serving, C++ Serving" - check_env("all") - + check_env("all") + def do_check_pipeline(self, arg): "Check Environment of Pipeline Serving" - check_env("pipeline") - + check_env("pipeline") + def do_check_cpp(self, arg): "Check Environment of C++ Serving" - check_env("cpp") + check_env("cpp") def do_check_inference(self, arg): "Check Environment of Paddle Inference" - check_env("inference") - + check_env("inference") + def do_debug(self, arg): "Open pytest log to debug" - check_env("debug") + check_env("debug") def do_exit(self, arg): "Exit Check Env Shell" @@ -531,6 +695,7 @@ class Check_Env_Shell(cmd.Cmd): os._exit(0) return True + if __name__ == "__main__": # args.device is not used at all. # just keep the interface. @@ -547,7 +712,7 @@ if __name__ == "__main__": else: os._exit(-1) elif args.server == "check": - Check_Env_Shell().cmdloop() + Check_Env_Shell().cmdloop() for single_model_config in args.model: if os.path.isdir(single_model_config): pass diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index 45f826470fff16cfab577caa4937dc81de61a2e9..4c05b43f1970fc0c690413a8bcae5f50b1acce86 100755 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -53,6 +53,14 @@ class Server(object): self.general_model_config_fn:'list'=[] # ["GeneralInferOp_0/general_model.prototxt"]The quantity is equal to the InferOp quantity,Feed and Fetch --OP self.subdirectory:'list'=[] # The quantity is equal to the InferOp quantity, and name = node.name = engine.name self.model_config_paths:'collections.OrderedDict()' # Save the serving_server_conf.prototxt path (feed and fetch information) this is a map for multi-model in a workflow + self.enable_dist_model: bool, enable distributed model, false default + self.dist_carrier_id: string, mark distributed model carrier name, "" default. + self.dist_cfg_file: string, file name of distributed configure, "" default. + self.dist_nranks: int, number of distributed nodes, 0 default. + self.dist_endpoints: list of string, all endpoints(ip:port) of distributed nodes, [] default. + self.dist_subgraph_index: index of distributed subgraph model, -1 default. It is used to select the endpoint of the current shard in distribute model. -1 default. + self.dist_worker_serving_endpoints: all endpoints of worker serving in the same machine. [] default. + self.dist_master_serving: the master serving is used for receiving client requests, only in pp0 of pipeline parallel, False default. """ self.server_handle_ = None self.infer_service_conf = None @@ -101,6 +109,20 @@ class Server(object): self.enable_prometheus = False self.prometheus_port = 19393 self.request_cache_size = 0 + self.enable_dist_model = False + self.dist_carrier_id = "" + self.dist_cfg_file = "" + self.dist_nranks = 0 + self.dist_endpoints = [] + self.dist_subgraph_index = -1 + self.dist_worker_serving_endpoints = [] + self.dist_master_serving = False + self.min_subgraph_size = [] + self.trt_dynamic_shape_info = [] + self.gpu_memory_mb = 50 + self.cpu_math_thread_num = 1 + self.trt_workspace_size = 33554432 # 1 << 25 + self.trt_use_static = False def get_fetch_list(self, infer_node_idx=-1): fetch_names = [ @@ -211,6 +233,78 @@ class Server(object): def set_request_cache_size(self, request_cache_size): self.request_cache_size = request_cache_size + def set_enable_dist_model(self, status): + self.enable_dist_model = status + + def set_dist_carrier_id(self, carrier_id): + if isinstance(carrier_id, int): + carrier_id = str(carrier_id) + self.dist_carrier_id = carrier_id + + def set_dist_cfg_file(self, dist_cfg_file): + self.dist_cfg_file = dist_cfg_file + + def set_dist_nranks(self, nranks): + if isinstance(nranks, str): + nranks = int(nranks) + elif not isinstance(nranks, int): + raise ValueError("dist_nranks type error! must be int or string") + + self.dist_nranks = nranks + + def set_dist_endpoints(self, endpoints): + if isinstance(endpoints, list): + self.dist_endpoints = endpoints + elif isinstance(endpoints, str): + self.dist_endpoints = [endpoints] + else: + raise ValueError( + "dist_endpoints type error! must be list or string") + + def set_dist_subgraph_index(self, subgraph_index): + if isinstance(subgraph_index, str): + subgraph_index = int(subgraph_index) + elif not isinstance(subgraph_index, int): + raise ValueError("subgraph type error! must be int or string") + + self.dist_subgraph_index = subgraph_index + + def set_dist_worker_serving_endpoint(self, serving_endpoints): + if isinstance(serving_endpoints, list): + self.dist_worker_serving_endpoint = serving_endpoints + elif not isinstance(serving_endpoints, str): + self.dist_worker_serving_endpoint = [serving_endpoints] + else: + raise ValueError( + "dist_worker_serving_endpoint type error! must be list or string" + ) + + def set_dist_master_serving(self, is_master): + self.dist_master_serving = is_master + + def set_min_subgraph_size(self, min_subgraph_size): + for s in min_subgraph_size: + try: + size = int(s) + except: + size = 3 + self.min_subgraph_size.append(size) + + def set_trt_dynamic_shape_info(self, info): + self.trt_dynamic_shape_info = info + + def set_gpu_memory_mb(self, gpu_memory_mb): + self.gpu_memory_mb = gpu_memory_mb + + def set_cpu_math_thread_num(self, cpu_math_thread_num): + self.cpu_math_thread_num = cpu_math_thread_num + + def set_trt_workspace_size(self, trt_workspace_size): + self.trt_workspace_size = trt_workspace_size + + def set_trt_use_static(self, trt_use_static): + self.trt_use_static = trt_use_static + def _prepare_engine(self, model_config_paths, device, use_encryption_model): self.device = device if self.model_toolkit_conf == None: @@ -264,6 +358,19 @@ class Server(object): engine.use_xpu = self.use_xpu engine.use_ascend_cl = self.use_ascend_cl engine.use_gpu = False + #engine.gpu_memory_mb = self.gpu_memory_mb + #engine.cpu_math_thread_num = self.cpu_math_thread_num + #engine.trt_workspace_size = self.trt_workspace_size + #engine.trt_use_static = self.trt_use_static + + # use distributed model. + if self.dist_subgraph_index >= 0: + engine.enable_dist_model = True + engine.dist_carrier_id = self.dist_carrier_id + engine.dist_cfg_file = self.dist_cfg_file + engine.dist_nranks = self.dist_nranks + engine.dist_endpoints.extend(self.dist_endpoints) + engine.dist_subgraph_index = self.dist_subgraph_index if len(self.gpuid) == 0: raise ValueError("CPU: self.gpuid = -1, GPU: must set it ") @@ -292,6 +399,25 @@ class Server(object): if use_encryption_model: engine.encrypted_model = True engine.type = "PADDLE_INFER" + if len(self.min_subgraph_size) > index: + engine.min_subgraph_size = self.min_subgraph_size[index] + if len(self.trt_dynamic_shape_info) > index: + dynamic_shape_info = self.trt_dynamic_shape_info[index] + try: + for key, value in dynamic_shape_info.items(): + shape_type = key + if shape_type == "min_input_shape": + local_map = engine.min_input_shape + if shape_type == "max_input_shape": + local_map = engine.max_input_shape + if shape_type == "opt_input_shape": + local_map = engine.opt_input_shape + for name, shape in value.items(): + local_value = ' '.join(str(i) for i in shape) + local_map[name] = local_value + except: + raise ValueError("Set TRT dynamic shape info error!") + self.model_toolkit_conf.append(server_sdk.ModelToolkitConf()) self.model_toolkit_conf[-1].engines.extend([engine]) index = index + 1 @@ -473,7 +599,7 @@ class Server(object): folder_name = "serving-%s-%s" % (self.get_serving_bin_name(), version_tag) tar_name = "%s.tar.gz" % folder_name - bin_url = "https://paddle-serving.bj.bcebos.com/bin/%s" % tar_name + bin_url = "https://paddle-serving.bj.bcebos.com/test-dev/bin/%s" % tar_name self.server_path = os.path.join(self.module_path, folder_name) diff --git a/python/pipeline/local_service_handler.py b/python/pipeline/local_service_handler.py old mode 100644 new mode 100755 index 1013278d8b8795f3bb3b7f76d6e6a2d388b7f1e9..70b82095266e474330d8e7efebd0f2ee9656bf61 --- a/python/pipeline/local_service_handler.py +++ b/python/pipeline/local_service_handler.py @@ -53,7 +53,9 @@ class LocalServiceHandler(object): mkldnn_bf16_op_list=None, min_subgraph_size=3, dynamic_shape_info={}, - use_calib=False): + use_calib=False, + collect_shape_range_info="", + tuned_dynamic_shape_info=""): """ Initialization of localservicehandler @@ -99,6 +101,8 @@ class LocalServiceHandler(object): self.min_subgraph_size = 3 self.dynamic_shape_info = {} self._use_calib = False + self.collect_shape_range_info = "" + self.tuned_dynamic_shape_info = "" if device_type == -1: # device_type is not set, determined by `devices`, @@ -179,6 +183,8 @@ class LocalServiceHandler(object): self._mkldnn_op_list = mkldnn_op_list self._mkldnn_bf16_op_list = mkldnn_bf16_op_list self._use_calib = use_calib + self.collect_shape_range_info = collect_shape_range_info + self.tuned_dynamic_shape_info = tuned_dynamic_shape_info _LOGGER.info( "Models({}) will be launched by device {}. use_gpu:{}, " @@ -187,15 +193,16 @@ class LocalServiceHandler(object): "client_type:{}, fetch_names:{}, precision:{}, use_calib:{}, " "use_mkldnn:{}, mkldnn_cache_capacity:{}, mkldnn_op_list:{}, " "mkldnn_bf16_op_list:{}, use_ascend_cl:{}, min_subgraph_size:{}," - "is_set_dynamic_shape_info:{}".format( + "is_set_dynamic_shape_info:{},collect_shape_range_info:{}," + "tuned_dynamic_shape_info:{}".format( model_config, self._device_name, self._use_gpu, self._use_trt, - self._use_lite, self._use_xpu, device_type, self._devices, - self._mem_optim, self._ir_optim, self._use_profile, - self._thread_num, self._client_type, self._fetch_names, - self._precision, self._use_calib, self._use_mkldnn, - self._mkldnn_cache_capacity, self._mkldnn_op_list, - self._mkldnn_bf16_op_list, self._use_ascend_cl, - self.min_subgraph_size, bool(len(self.dynamic_shape_info)))) + self._use_lite, self._use_xpu, device_type, self._devices, self. + _mem_optim, self._ir_optim, self._use_profile, self._thread_num, + self._client_type, self._fetch_names, self._precision, self. + _use_calib, self._use_mkldnn, self._mkldnn_cache_capacity, self. + _mkldnn_op_list, self._mkldnn_bf16_op_list, self._use_ascend_cl, + self.min_subgraph_size, bool(len(self.dynamic_shape_info)), + self.collect_shape_range_info, self.tuned_dynamic_shape_info)) def get_fetch_list(self): return self._fetch_names @@ -255,7 +262,9 @@ class LocalServiceHandler(object): use_ascend_cl=self._use_ascend_cl, min_subgraph_size=self.min_subgraph_size, dynamic_shape_info=self.dynamic_shape_info, - use_calib=self._use_calib) + use_calib=self._use_calib, + collect_shape_range_info=self.collect_shape_range_info, + tuned_dynamic_shape_info=self.tuned_dynamic_shape_info) return self._local_predictor_client def get_client_config(self): @@ -282,9 +291,9 @@ class LocalServiceHandler(object): if self._device_name == "cpu": from paddle_serving_server import OpMaker, OpSeqMaker, Server op_maker = OpMaker() - read_op = op_maker.create('general_reader') - general_infer_op = op_maker.create('general_infer') - general_response_op = op_maker.create('general_response') + read_op = op_maker.create('GeneralReaderOp') + general_infer_op = op_maker.create('GeneralInferOp') + general_response_op = op_maker.create('GeneralResponseOp') op_seq_maker = OpSeqMaker() op_seq_maker.add_op(read_op) @@ -296,9 +305,9 @@ class LocalServiceHandler(object): #gpu or arm from paddle_serving_server import OpMaker, OpSeqMaker, Server op_maker = OpMaker() - read_op = op_maker.create('general_reader') - general_infer_op = op_maker.create('general_infer') - general_response_op = op_maker.create('general_response') + read_op = op_maker.create('GeneralReaderOp') + general_infer_op = op_maker.create('GeneralInferOp') + general_response_op = op_maker.create('GeneralResponseOp') op_seq_maker = OpSeqMaker() op_seq_maker.add_op(read_op) diff --git a/python/pipeline/operator.py b/python/pipeline/operator.py index b04f79f971c5212242c2dad71abf19d2cd21af21..84a72d09cf2fee4be844128b31f7e15943b114bc 100644 --- a/python/pipeline/operator.py +++ b/python/pipeline/operator.py @@ -35,12 +35,12 @@ else: raise Exception("Error Python version") from .error_catch import ErrorCatch, CustomException, CustomExceptionCode, ParamChecker, ParamVerify -check_feed_dict=ParamVerify.check_feed_dict -check_fetch_list=ParamVerify.check_fetch_list +check_feed_dict = ParamVerify.check_feed_dict +check_fetch_list = ParamVerify.check_fetch_list from .proto import pipeline_service_pb2 -from .channel import (ThreadChannel, ProcessChannel,ChannelData, +from .channel import (ThreadChannel, ProcessChannel, ChannelData, ChannelDataType, ChannelStopError, ChannelTimeoutError) -from .error_catch import ProductErrCode +from .error_catch import ProductErrCode from .error_catch import CustomExceptionCode as ChannelDataErrcode from .util import NameGenerator from .profiler import UnsafeTimeProfiler as TimeProfiler @@ -119,9 +119,11 @@ class Op(object): self._for_close_op_lock = threading.Lock() self._succ_init_op = False self._succ_close_op = False - self.dynamic_shape_info = {} + self.dynamic_shape_info = {} self.set_dynamic_shape_info() - + self.collect_shape_range_info = "" + self.tuned_dynamic_shape_info = "" + def set_dynamic_shape_info(self): """ when opening tensorrt(configure in config.yml) and each time the input shape @@ -141,7 +143,6 @@ class Op(object): feed_names = client.feed_names_ fetch_names = client.fetch_names_ return feed_names, fetch_names - def init_from_dict(self, conf): """ @@ -164,9 +165,10 @@ class Op(object): if self._client_config is None: self._client_config = conf.get("client_config") if self._use_encryption_model is None: - print ("config use_encryption model here", conf.get("use_encryption_model")) + print("config use_encryption model here", + conf.get("use_encryption_model")) self._use_encryption_model = conf.get("use_encryption_model") - if self._encryption_key is None or self._encryption_key=="": + if self._encryption_key is None or self._encryption_key == "": self._encryption_key = conf.get("encryption_key") if self._timeout is None: self._timeout = conf["timeout"] @@ -235,6 +237,14 @@ class Op(object): "mkldnn_bf16_op_list") self.min_subgraph_size = local_service_conf.get( "min_subgraph_size") + self.collect_shape_range_info = local_service_conf.get( + "collect_shape_range_info") + self.tuned_dynamic_shape_info = local_service_conf.get( + "tuned_dynamic_shape_info") + if self.collect_shape_range_info is None: + self.collect_shape_range_info = "" + if self.tuned_dynamic_shape_info is None: + self.tuned_dynamic_shape_info = "" if self.model_config is None: self.with_serving = False @@ -259,7 +269,9 @@ class Op(object): mkldnn_bf16_op_list=self.mkldnn_bf16_op_list, min_subgraph_size=self.min_subgraph_size, dynamic_shape_info=self.dynamic_shape_info, - use_calib=self.use_calib) + use_calib=self.use_calib, + collect_shape_range_info=self.collect_shape_range_info, + tuned_dynamic_shape_info=self.tuned_dynamic_shape_info) service_handler.prepare_server() # get fetch_list serivce_ports = service_handler.get_port_list() self._server_endpoints = [ @@ -290,7 +302,9 @@ class Op(object): mkldnn_bf16_op_list=self.mkldnn_bf16_op_list, min_subgraph_size=self.min_subgraph_size, dynamic_shape_info=self.dynamic_shape_info, - use_calib=self.use_calib) + use_calib=self.use_calib, + collect_shape_range_info=self.collect_shape_range_info, + tuned_dynamic_shape_info=self.tuned_dynamic_shape_info) if self._client_config is None: self._client_config = service_handler.get_client_config( ) @@ -401,14 +415,16 @@ class Op(object): if self.client_type == 'brpc': client = Client() client.load_client_config(client_config) - self.right_feed_names, self.right_fetch_names = self.get_feed_fetch_list(client) + self.right_feed_names, self.right_fetch_names = self.get_feed_fetch_list( + client) elif self.client_type == 'pipeline_grpc': client = PPClient() elif self.client_type == 'local_predictor': if self.local_predictor is None: raise ValueError("local predictor not yet created") client = self.local_predictor - self.right_feed_names, self.right_fetch_names = self.get_feed_fetch_list(client) + self.right_feed_names, self.right_fetch_names = self.get_feed_fetch_list( + client) else: raise ValueError("Failed to init client: unknow client " "type {}".format(self.client_type)) @@ -417,12 +433,13 @@ class Op(object): _LOGGER.info("Op({}) has no fetch name set. So fetch all vars") if self.client_type != "local_predictor": if self._use_encryption_model is None or self._use_encryption_model is False: - client.connect(server_endpoints) + client.connect(server_endpoints) else: - print("connect to encryption rpc client") - client.use_key(self._encryption_key) - client.connect(server_endpoints, encryption=True) - _LOGGER.info("init_client, feed_list:{}, fetch_list: {}".format(self.right_feed_names, self.right_fetch_names)) + print("connect to encryption rpc client") + client.use_key(self._encryption_key) + client.connect(server_endpoints, encryption=True) + _LOGGER.info("init_client, feed_list:{}, fetch_list: {}".format( + self.right_feed_names, self.right_fetch_names)) return client def get_input_ops(self): @@ -599,7 +616,7 @@ class Op(object): (_, input_dict), = input_dicts.items() return input_dict, False, None, "" - + def process(self, feed_batch, typical_logid=0): """ In process stage, send requests to the inference server or predict locally. @@ -616,19 +633,23 @@ class Op(object): call_result = None err_code = ChannelDataErrcode.OK.value err_info = "" - @ErrorCatch + + @ErrorCatch @ParamChecker - def feed_fetch_list_check_helper(feed_batch : lambda feed_batch: check_feed_dict(feed_batch[0], self.right_feed_names), - fetch_list : lambda fetch_list: check_fetch_list(fetch_list, self.right_fetch_names), - log_id): + def feed_fetch_list_check_helper( + feed_batch: lambda feed_batch: check_feed_dict(feed_batch[0], self.right_feed_names), + fetch_list: lambda fetch_list: check_fetch_list(fetch_list, self.right_fetch_names), + log_id): return None - _, resp = feed_fetch_list_check_helper(feed_batch, self._fetch_names, log_id=typical_logid) + + _, resp = feed_fetch_list_check_helper( + feed_batch, self._fetch_names, log_id=typical_logid) if resp.err_no != CustomExceptionCode.OK.value: err_code = resp.err_no err_info = resp.err_msg call_result = None return call_result, err_code, err_info - + if self.client_type == "local_predictor": err, err_info = ChannelData.check_batch_npdata(feed_batch) if err != 0: @@ -804,7 +825,7 @@ class Op(object): self.mkldnn_cache_capacity, self.mkldnn_op_list, self.mkldnn_bf16_op_list, self.is_jump_op(), self.get_output_channels_of_jump_ops(), - self.min_subgraph_size, self.dynamic_shape_info, + self.min_subgraph_size, self.dynamic_shape_info, self.use_calib)) p.daemon = True p.start() @@ -839,9 +860,9 @@ class Op(object): self._get_output_channels(), True, trace_buffer, self.model_config, self.workdir, self.thread_num, self.device_type, self.devices, self.mem_optim, - self.ir_optim, self.precision, self.use_mkldnn, - self.mkldnn_cache_capacity, self.mkldnn_op_list, - self.mkldnn_bf16_op_list, self.is_jump_op(), + self.ir_optim, self.precision, self.use_mkldnn, + self.mkldnn_cache_capacity, self.mkldnn_op_list, + self.mkldnn_bf16_op_list, self.is_jump_op(), self.get_output_channels_of_jump_ops(), self.min_subgraph_size, self.dynamic_shape_info, self.use_calib)) @@ -873,40 +894,43 @@ class Op(object): preped_data_dict = collections.OrderedDict() err_channeldata_dict = collections.OrderedDict() skip_process_dict = {} + @ErrorCatch def preprocess_help(self, parsed_data, data_id, logid_dict): preped_data, is_skip_process, prod_errcode, prod_errinfo = self.preprocess( parsed_data, data_id, logid_dict.get(data_id)) return preped_data, is_skip_process, prod_errcode, prod_errinfo - + for data_id, parsed_data in parsed_data_dict.items(): preped_data, error_channeldata = None, None is_skip_process = False prod_errcode, prod_errinfo = None, None log_id = logid_dict.get(data_id) - process_res, resp = preprocess_help(self, parsed_data, data_id = data_id, - logid_dict = logid_dict) + process_res, resp = preprocess_help( + self, parsed_data, data_id=data_id, logid_dict=logid_dict) if resp.err_no == CustomExceptionCode.OK.value: preped_data, is_skip_process, prod_errcode, prod_errinfo = process_res if is_skip_process is True: skip_process_dict[data_id] = True if prod_errcode is not None: - _LOGGER.error("data_id: {} return product error. Product ErrNo:{}, Product ErrMsg: {}".format(data_id, prod_errcode, prod_errinfo)) + _LOGGER.error( + "data_id: {} return product error. Product ErrNo:{}, Product ErrMsg: {}". + format(data_id, prod_errcode, prod_errinfo)) error_channeldata = ChannelData( - error_code=ChannelDataErrcode.PRODUCT_ERROR.value, - error_info="", - prod_error_code=prod_errcode, - prod_error_info=prod_errinfo, - data_id=data_id, - log_id=log_id) + error_code=ChannelDataErrcode.PRODUCT_ERROR.value, + error_info="", + prod_error_code=prod_errcode, + prod_error_info=prod_errinfo, + data_id=data_id, + log_id=log_id) else: - + error_channeldata = ChannelData( - error_code=resp.err_no, - error_info=resp.err_msg, - data_id=data_id, - log_id=log_id) - skip_process_dict[data_id] = True + error_code=resp.err_no, + error_info=resp.err_msg, + data_id=data_id, + log_id=log_id) + skip_process_dict[data_id] = True if error_channeldata is not None: err_channeldata_dict[data_id] = error_channeldata @@ -1086,8 +1110,8 @@ class Op(object): # 2 kinds of errors if error_code != ChannelDataErrcode.OK.value or midped_batch is None: error_info = "[{}] failed to predict. {}. Please check the input dict and checkout PipelineServingLogs/pipeline.log for more details.".format( - self.name, error_info) - + self.name, error_info) + _LOGGER.error(error_info) for data_id in data_ids: err_channeldata_dict[data_id] = ChannelData( @@ -1162,12 +1186,16 @@ class Op(object): _LOGGER.debug("{} Running postprocess".format(op_info_prefix)) postped_data_dict = collections.OrderedDict() err_channeldata_dict = collections.OrderedDict() + @ErrorCatch - def postprocess_help(self, parsed_data_dict, midped_data, data_id, logid_dict): - postped_data, prod_errcode, prod_errinfo = self.postprocess(parsed_data_dict[data_id], - midped_data, data_id, logid_dict.get(data_id)) + def postprocess_help(self, parsed_data_dict, midped_data, data_id, + logid_dict): + postped_data, prod_errcode, prod_errinfo = self.postprocess( + parsed_data_dict[data_id], midped_data, data_id, + logid_dict.get(data_id)) if not isinstance(postped_data, dict): - raise CustomException(CustomExceptionCode.TYPE_ERROR, "postprocess should return dict", True) + raise CustomException(CustomExceptionCode.TYPE_ERROR, + "postprocess should return dict", True) return postped_data, prod_errcode, prod_errinfo for data_id, midped_data in midped_data_dict.items(): @@ -1175,19 +1203,23 @@ class Op(object): postped_data, err_channeldata = None, None prod_errcode, prod_errinfo = None, None - post_res, resp = postprocess_help(self, parsed_data_dict, midped_data, data_id - = data_id, logid_dict = logid_dict) + post_res, resp = postprocess_help( + self, + parsed_data_dict, + midped_data, + data_id=data_id, + logid_dict=logid_dict) if resp.err_no == CustomExceptionCode.OK.value: postped_data, prod_errcode, prod_errinfo = post_res if prod_errcode is not None: - # product errors occured + # product errors occured err_channeldata = ChannelData( - error_code=ChannelDataErrcode.PRODUCT_ERROR.value, - error_info="", - prod_error_code=prod_errcode, - prod_error_info=prod_errinfo, - data_id=data_id, - log_id=log_id) + error_code=ChannelDataErrcode.PRODUCT_ERROR.value, + error_info="", + prod_error_code=prod_errcode, + prod_error_info=prod_errinfo, + data_id=data_id, + log_id=log_id) else: err_channeldata = ChannelData( error_code=resp.err_no, @@ -1203,16 +1235,16 @@ class Op(object): err, _ = ChannelData.check_npdata(postped_data) if err == 0: output_data = ChannelData( - ChannelDataType.CHANNEL_NPDATA.value, - npdata=postped_data, - data_id=data_id, - log_id=log_id) + ChannelDataType.CHANNEL_NPDATA.value, + npdata=postped_data, + data_id=data_id, + log_id=log_id) else: output_data = ChannelData( - ChannelDataType.DICT.value, - dictdata=postped_data, - data_id=data_id, - log_id=log_id) + ChannelDataType.DICT.value, + dictdata=postped_data, + data_id=data_id, + log_id=log_id) postped_data_dict[data_id] = output_data _LOGGER.debug("{} Succ postprocess".format(op_info_prefix)) return postped_data_dict, err_channeldata_dict @@ -1303,10 +1335,10 @@ class Op(object): def _run(self, concurrency_idx, input_channel, output_channels, is_thread_op, trace_buffer, model_config, workdir, thread_num, - device_type, devices, mem_optim, ir_optim, precision, - use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list, - mkldnn_bf16_op_list, is_jump_op, output_channels_of_jump_ops, - min_subgraph_size, dynamic_shape_info, use_calib): + device_type, devices, mem_optim, ir_optim, precision, use_mkldnn, + mkldnn_cache_capacity, mkldnn_op_list, mkldnn_bf16_op_list, + is_jump_op, output_channels_of_jump_ops, min_subgraph_size, + dynamic_shape_info, use_calib): """ _run() is the entry function of OP process / thread model.When client type is local_predictor in process mode, the CUDA environment needs to @@ -1344,12 +1376,14 @@ class Op(object): # init ops profiler = None + @ErrorCatch - def check_helper(self, is_thread_op, model_config, workdir, - thread_num, device_type, devices, mem_optim, ir_optim, - precision, use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list, - mkldnn_bf16_op_list, min_subgraph_size, dynamic_shape_info): - + def check_helper(self, is_thread_op, model_config, workdir, thread_num, + device_type, devices, mem_optim, ir_optim, precision, + use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list, + mkldnn_bf16_op_list, min_subgraph_size, + dynamic_shape_info): + if is_thread_op == False and self.client_type == "local_predictor": self.service_handler = local_service_handler.LocalServiceHandler( model_config=model_config, @@ -1367,7 +1401,9 @@ class Op(object): mkldnn_bf16_op_list=mkldnn_bf16_op_list, min_subgraph_size=min_subgraph_size, dynamic_shape_info=dynamic_shape_info, - use_calib=use_calib) + use_calib=use_calib, + collect_shape_range_info=self.collect_shape_range_info, + tuned_dynamic_shape_info=self.tuned_dynamic_shape_info) _LOGGER.info("Init cuda env in process {}".format( concurrency_idx)) @@ -1377,17 +1413,19 @@ class Op(object): profiler = self._initialize(is_thread_op, concurrency_idx) return profiler - profiler, resp = check_helper(self, is_thread_op, model_config, workdir, - thread_num, device_type, devices, mem_optim, ir_optim, - precision, use_mkldnn, mkldnn_cache_capacity, mkldnn_op_list, - mkldnn_bf16_op_list, min_subgraph_size, dynamic_shape_info) + profiler, resp = check_helper( + self, is_thread_op, model_config, workdir, thread_num, device_type, + devices, mem_optim, ir_optim, precision, use_mkldnn, + mkldnn_cache_capacity, mkldnn_op_list, mkldnn_bf16_op_list, + min_subgraph_size, dynamic_shape_info) if resp.err_no != CustomExceptionCode.OK.value: _LOGGER.critical( "{} failed to init op: {}".format(op_info_prefix, resp.err_msg), exc_info=False) - print("{} failed to init op: {}".format(op_info_prefix, resp.err_msg)) + print("{} failed to init op: {}".format(op_info_prefix, + resp.err_msg)) kill_stop_process_by_pid("kill", os.getpgid(os.getpid())) _LOGGER.info("{} Succ init".format(op_info_prefix)) @@ -1583,6 +1621,7 @@ class Op(object): Returns: TimeProfiler """ + @ErrorCatch def init_helper(self, is_thread_op, concurrency_idx): if is_thread_op: @@ -1592,7 +1631,7 @@ class Op(object): self.concurrency_idx = None # init client self.client = self.init_client(self._client_config, - self._server_endpoints) + self._server_endpoints) # user defined self.init_op() self._succ_init_op = True @@ -1601,10 +1640,10 @@ class Op(object): self.concurrency_idx = concurrency_idx # init client self.client = self.init_client(self._client_config, - self._server_endpoints) + self._server_endpoints) # user defined - self.init_op() - + self.init_op() + init_helper(self, is_thread_op, concurrency_idx) print("[OP Object] init success") # use a separate TimeProfiler per thread or process @@ -1910,8 +1949,8 @@ class VirtualOp(Op): \-> E ----------/ DAG view: [[A], [B, E], [C], [D], [F]] - BUILD DAG: [A -> B -> C -> D -> E -> F] - \-> E -> V1-> V2-> V3/ + BUILD DAG: [A -> B -> C -> D -> F] + \-> E -> V1-> V2->/ """ def __init__(self, name, concurrency=1): diff --git a/python/pipeline/pipeline_server.py b/python/pipeline/pipeline_server.py index 3ff765c4725a31c31a0de4effc01303507f1ebbc..17c408609b2066c47f184474c3b8ee8a6115bd86 100644 --- a/python/pipeline/pipeline_server.py +++ b/python/pipeline/pipeline_server.py @@ -261,6 +261,8 @@ class PipelineServer(object): "use_mkldnn": False, "mkldnn_cache_capacity": 0, "min_subgraph_size": 3, + "collect_shape_range_info": "", + "tuned_dynamic_shape_info": "", }, } for op in self._used_op: @@ -422,6 +424,8 @@ class ServerYamlConfChecker(object): "use_mkldnn": False, "mkldnn_cache_capacity": 0, "min_subgraph_size": 3, + "collect_shape_range_info": "", + "tuned_dynamic_shape_info": "", } conf_type = { "model_config": str, @@ -438,6 +442,8 @@ class ServerYamlConfChecker(object): "mkldnn_op_list": list, "mkldnn_bf16_op_list": list, "min_subgraph_size": int, + "collect_shape_range_info": str, + "tuned_dynamic_shape_info": str, } conf_qualification = {"thread_num": (">=", 1), } ServerYamlConfChecker.check_conf(conf, default_conf, conf_type, diff --git a/python/requirements.txt b/python/requirements.txt index c6938505b2cf5d38eda35192c930ac69ce2144ed..c47b6e5c158c6d2c665eaed0d4edfc6eb51f3a9c 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -1,4 +1,5 @@ numpy>=1.12, <=1.16.4 ; python_version<"3.5" +numpy<=1.19.4 ; platform_machine == "aarch64" shapely==1.8.0 wheel>=0.34.0, <0.35.0 setuptools>=44.1.0 @@ -24,7 +25,7 @@ pytest==7.0.1 prometheus-client==0.12.0 pillow==8.4.0 ; python_version == "3.6" pillow==9.0.0 ; python_version > "3.6" -av==8.0.3 -decord==0.4.2 -SimpleITK +av==8.0.3; platform_machine != "aarch64" +decord==0.4.2; platform_machine != "aarch64" +SimpleITK; platform_machine != "aarch64" diff --git a/tools/Dockerfile.cuda10.1-cudnn7-gcc54.devel b/tools/Dockerfile.cuda10.1-cudnn7-gcc54.devel index 7c2d19dc1a303cff2fb0cf16e857d0652be89e0b..6a162e89fc997239d57709f4949ed52d174266ec 100644 --- a/tools/Dockerfile.cuda10.1-cudnn7-gcc54.devel +++ b/tools/Dockerfile.cuda10.1-cudnn7-gcc54.devel @@ -84,7 +84,7 @@ RUN ln -sf /usr/local/bin/python3.6 /usr/local/bin/python3 && ln -sf /usr/local/ RUN rm -r /root/python_build # Install Go and glide -RUN wget -qO- https://dl.google.com/go/go1.14.linux-amd64.tar.gz | \ +RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.17.2.linux-amd64.tar.gz | \ tar -xz -C /usr/local && \ mkdir /root/go && \ mkdir /root/go/bin && \ diff --git a/tools/Dockerfile.java b/tools/Dockerfile.java index 661943ed033c15b8a8a4084a0585411db200a361..2cb085c2ead72ddace34428caeefc22385a89246 100644 --- a/tools/Dockerfile.java +++ b/tools/Dockerfile.java @@ -1,7 +1,7 @@ # A image for building paddle binaries # # Use cuda devel base image for both cpu and gpu environment # # When you modify it, please be aware of cudnn-runtime version -FROM hub.baidubce.com/paddlepaddle/serving:latest-cuda10.2-cudnn8-devel +FROM registry.baidubce.com/paddlepaddle/serving:0.8.0-cuda10.2-cudnn8-devel MAINTAINER PaddlePaddle Authors diff --git a/tools/Dockerfile.runtime_template b/tools/Dockerfile.runtime_template index b900e772f07eae981f3d7f4dc46734a7a79939de..a72fd72acdcd1e2034b390fa8db681df344e1549 100644 --- a/tools/Dockerfile.runtime_template +++ b/tools/Dockerfile.runtime_template @@ -40,7 +40,7 @@ WORKDIR /home RUN bash /build_scripts/install_trt.sh <> && rm -rf /build_scripts # install go -RUN wget -qO- https://dl.google.com/go/go1.14.linux-amd64.tar.gz | \ +RUN wget -qO- https://paddle-ci.cdn.bcebos.com/go1.17.2.linux-amd64.tar.gz | \ tar -xz -C /usr/local && \ mkdir /root/go && \ mkdir /root/go/bin && \ diff --git a/tools/cpp_examples/demo-serving/proto/general_model_service.proto b/tools/cpp_examples/demo-serving/proto/general_model_service.proto index 8fedb60e97ec5b81263687b47ff0794880da8671..3a1cba2c72fde19c9288dca1e6302b40273aac93 100755 --- a/tools/cpp_examples/demo-serving/proto/general_model_service.proto +++ b/tools/cpp_examples/demo-serving/proto/general_model_service.proto @@ -42,6 +42,13 @@ message Request { message Response { repeated ModelOutput outputs = 1; repeated int64 profile_time = 2; + bool profile_server = 3; + uint64 log_id = 4; + + // Error code + int32 err_no = 5; + // Error messages + string err_msg = 6; }; message ModelOutput { diff --git a/tools/generate_k8s_yamls.sh b/tools/generate_k8s_yamls.sh index c1b542ef5146739fde75d2e80e7898a2b832512e..892c03bf02e2f5d96b9ba69b637359f2bc362194 100644 --- a/tools/generate_k8s_yamls.sh +++ b/tools/generate_k8s_yamls.sh @@ -12,6 +12,7 @@ function usage echo " --workdir : workdir in image"; echo " --command : command to launch serving" echo " --port : serving port" + echo " --pod_num : number of pod replicas" echo " -h | --help : helper"; } @@ -20,6 +21,9 @@ function parse_args # positional args args=() + # default + pod_num=1 + # named args while [ "$1" != "" ]; do case "$1" in @@ -28,6 +32,7 @@ function parse_args --workdir ) workdir="$2"; shift;; --command ) start_command="$2"; shift;; --port ) port="$2"; shift;; + --pod_num ) pod_num="$2"; shift;; -h | --help ) usage; exit;; # quit and show usage * ) args+=("$1") # if no match, add it to the positional args esac @@ -41,7 +46,7 @@ function parse_args positional_2="${args[1]}" # validate required args - if [[ -z "${app_name}" || -z "${image_name}" || -z "${workdir}" || -z "${start_command}" || -z "${port}" ]]; then + if [[ -z "${app_name}" || -z "${image_name}" || -z "${workdir}" || -z "${start_command}" || -z "${port}" || -z "${pod_num}"]]; then echo "Invalid arguments. check your params again." usage exit; @@ -59,6 +64,7 @@ function run echo "named arg: workdir: $workdir" echo "named arg: command: $start_command" echo "named arg: port: $port" + echo "named arg: pod_num: $pod_num" sed -e "s/<< APP_NAME >>/$app_name/g" -e "s/<< IMAGE_NAME >>/$(echo $image_name | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< WORKDIR >>/$(echo $workdir | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< COMMAND >>/\"$(echo $start_command | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')\"/g" -e "s/<< PORT >>/$port/g" tools/k8s_serving.yaml_template > k8s_serving.yaml sed -e "s/<< APP_NAME >>/$app_name/g" -e "s/<< IMAGE_NAME >>/$(echo $image_name | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< WORKDIR >>/$(echo $workdir | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< COMMAND >>/\"$(echo $start_command | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')\"/g" -e "s/<< PORT >>/$port/g" tools/k8s_ingress.yaml_template > k8s_ingress.yaml diff --git a/tools/generate_runtime_docker.sh b/tools/generate_runtime_docker.sh index 1e58527795aeea6b156277a12af2ea36a2086724..e2a2262267565ef52cb4475d2ccd584dc0414bad 100644 --- a/tools/generate_runtime_docker.sh +++ b/tools/generate_runtime_docker.sh @@ -78,7 +78,8 @@ function run echo "named arg: image_name: $image_name" sed -e "s/<>/$base_image/g" -e "s/<>/$python/g" -e "s/<>/$env/g" -e "s/<>/$serving/g" -e "s/<>/$paddle/g" tools/Dockerfile.runtime_template > Dockerfile.tmp - docker build --network=host --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp . + #docker build --network=host --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp . + docker build --network=host -t $image_name -f Dockerfile.tmp . } run "$@"; diff --git a/tools/k8s_serving.yaml_template b/tools/k8s_serving.yaml_template index b66d929bf5e3856c50ba4871cb02a5192a26b6ff..dfe2f3ee670adb88e7ffffbee077f3a1f42a64b2 100644 --- a/tools/k8s_serving.yaml_template +++ b/tools/k8s_serving.yaml_template @@ -20,7 +20,7 @@ metadata: app: << APP_NAME >> name: << APP_NAME >> spec: - replicas: 1 + replicas: << POD_NUM >> selector: matchLabels: app: << APP_NAME >> diff --git a/tools/paddle_env_install.sh b/tools/paddle_env_install.sh index 3f062027b427daaf3cc64612ab5982bdc2c1374c..5dc776f50bb1cca48c3c6352f1a6baf4c075d16d 100644 --- a/tools/paddle_env_install.sh +++ b/tools/paddle_env_install.sh @@ -21,7 +21,7 @@ function env_install() { apt install -y libcurl4-openssl-dev libbz2-dev wget https://paddle-serving.bj.bcebos.com/others/centos_ssl.tar && tar xf centos_ssl.tar && rm -rf centos_ssl.tar && mv libcrypto.so.1.0.2k /usr/lib/libcrypto.so.1.0.2k && mv libssl.so.1.0.2k /usr/lib/libssl.so.1.0.2k && ln -sf /usr/lib/libcrypto.so.1.0.2k /usr/lib/libcrypto.so.10 && ln -sf /usr/lib/libssl.so.1.0.2k /usr/lib/libssl.so.10 && ln -sf /usr/lib/libcrypto.so.10 /usr/lib/libcrypto.so && ln -sf /usr/lib/libssl.so.10 /usr/lib/libssl.so - rm -rf /usr/local/go && wget -qO- https://paddle-ci.gz.bcebos.com/go1.15.12.linux-amd64.tar.gz | \ + rm -rf /usr/local/go && wget -qO- https://paddle-ci.cdn.bcebos.com/go1.17.2.linux-amd64.tar.gz | \ tar -xz -C /usr/local && \ mkdir /root/go && \ mkdir /root/go/bin && \