diff --git a/README.md b/README.md index 3bf8ca7f9f3cb82e6440a746ebb6eddfa8a96365..f95b58d263e146a42f016706c22ce334cb6d965e 100755 --- a/README.md +++ b/README.md @@ -42,8 +42,9 @@ The goal of Paddle Serving is to provide high-performance, flexible and easy-to- - AIStudio tutorial(Chinese) : [Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/1975340) - - Video tutorial(Chinese) : [深度学习服务化部署-以互联网应用为例](https://aistudio.baidu.com/aistudio/course/introduce/19084) +- Edge AI solution based on Paddle Serving & Baidu Intelligent Edge(Chinese) : [基于Paddle Serving&百度智能边缘BIE的边缘AI解决方案](https://mp.weixin.qq.com/s/j0EVlQXaZ7qmoz9Fv96Yrw) +

diff --git a/README_CN.md b/README_CN.md index ae94a50e2cecbf9168ece65d5a040d0ca6fd5218..903d357a9a2994935d87fe0516a6d70ebb31e822 100755 --- a/README_CN.md +++ b/README_CN.md @@ -40,8 +40,9 @@ Paddle Serving依托深度学习框架PaddlePaddle旨在帮助深度学习开发

教程

- AIStudio教程-[Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/1975340) - - 视频教程-[深度学习服务化部署-以互联网应用为例](https://aistudio.baidu.com/aistudio/course/introduce/19084) +- Edge AI solution based on Paddle Serving & Baidu Intelligent Edge(Chinese) : [基于Paddle Serving&百度智能边缘BIE的边缘AI解决方案](https://mp.weixin.qq.com/s/j0EVlQXaZ7qmoz9Fv96Yrw) +

diff --git a/core/pdcodegen/src/pdcodegen.cpp b/core/pdcodegen/src/pdcodegen.cpp index 1ad3fe658223b654efa642cda5e9b7de9ea6f794..be34307014e9e689d94a7f7071e133235d5ba53d 100644 --- a/core/pdcodegen/src/pdcodegen.cpp +++ b/core/pdcodegen/src/pdcodegen.cpp @@ -301,15 +301,33 @@ class PdsCodeGenerator : public CodeGenerator { inference_body += "\"\]\";\n"; inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") "; inference_body += "service_name=\[\" << \"$name$\" << \"\]\";\n"; // NOLINT - inference_body += " int err_code = svr->inference(request, response, log_id);\n"; - inference_body += " if (err_code != 0) {\n"; - inference_body += " LOG(WARNING)\n"; - inference_body += " << \"(logid=\" << log_id << \") Failed call "; - inference_body += "inferservice[$name$], name[$service$]\"\n"; - inference_body += " << \", error_code: \" << err_code;\n"; - inference_body += " cntl->SetFailed(err_code, \"InferService inference "; - inference_body += "failed!\");\n"; - inference_body += " }\n"; + if (service_name == "GeneralModelService") { + inference_body += "uint64_t key = 0;"; + inference_body += "int err_code = 0;"; + inference_body += "if (RequestCache::GetSingleton()->Get(*request, response, &key) != 0) {"; + inference_body += " err_code = svr->inference(request, response, log_id);"; + inference_body += " if (err_code != 0) {"; + inference_body += " LOG(WARNING)"; + inference_body += " << \"(logid=\" << log_id << \") Failed call inferservice[GeneralModelService], name[GeneralModelService]\""; + inference_body += " << \", error_code: \" << err_code;"; + inference_body += " cntl->SetFailed(err_code, \"InferService inference failed!\");"; + inference_body += " } else {"; + inference_body += " RequestCache::GetSingleton()->Put(*request, *response, &key);"; + inference_body += " }"; + inference_body += "} else {"; + inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") Get from cache\";"; + inference_body += "}"; + } else { + inference_body += " int err_code = svr->inference(request, response, log_id);\n"; + inference_body += " if (err_code != 0) {\n"; + inference_body += " LOG(WARNING)\n"; + inference_body += " << \"(logid=\" << log_id << \") Failed call "; + inference_body += "inferservice[$name$], name[$service$]\"\n"; + inference_body += " << \", error_code: \" << err_code;\n"; + inference_body += " cntl->SetFailed(err_code, \"InferService inference "; + inference_body += "failed!\");\n"; + inference_body += " }\n"; + } inference_body += " gettimeofday(&tv, NULL);\n"; inference_body += " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"; if (service_name == "GeneralModelService") { @@ -1085,15 +1103,33 @@ class PdsCodeGenerator : public CodeGenerator { inference_body += "\"\]\";\n"; inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") "; inference_body += "service_name=\[\" << \"$name$\" << \"\]\";\n"; // NOLINT - inference_body += " int err_code = svr->inference(request, response, log_id);\n"; - inference_body += " if (err_code != 0) {\n"; - inference_body += " LOG(WARNING)\n"; - inference_body += " << \"(logid=\" << log_id << \") Failed call "; - inference_body += "inferservice[$name$], name[$service$]\"\n"; - inference_body += " << \", error_code: \" << err_code;\n"; - inference_body += " cntl->SetFailed(err_code, \"InferService inference "; - inference_body += "failed!\");\n"; - inference_body += " }\n"; + if (service_name == "GeneralModelService") { + inference_body += "uint64_t key = 0;"; + inference_body += "int err_code = 0;"; + inference_body += "if (RequestCache::GetSingleton()->Get(*request, response, &key) != 0) {"; + inference_body += " err_code = svr->inference(request, response, log_id);"; + inference_body += " if (err_code != 0) {"; + inference_body += " LOG(WARNING)"; + inference_body += " << \"(logid=\" << log_id << \") Failed call inferservice[GeneralModelService], name[GeneralModelService]\""; + inference_body += " << \", error_code: \" << err_code;"; + inference_body += " cntl->SetFailed(err_code, \"InferService inference failed!\");"; + inference_body += " } else {"; + inference_body += " RequestCache::GetSingleton()->Put(*request, *response, &key);"; + inference_body += " }"; + inference_body += "} else {"; + inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") Get from cache\";"; + inference_body += "}"; + } else { + inference_body += " int err_code = svr->inference(request, response, log_id);\n"; + inference_body += " if (err_code != 0) {\n"; + inference_body += " LOG(WARNING)\n"; + inference_body += " << \"(logid=\" << log_id << \") Failed call "; + inference_body += "inferservice[$name$], name[$service$]\"\n"; + inference_body += " << \", error_code: \" << err_code;\n"; + inference_body += " cntl->SetFailed(err_code, \"InferService inference "; + inference_body += "failed!\");\n"; + inference_body += " }\n"; + } inference_body += " gettimeofday(&tv, NULL);\n"; inference_body += " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n"; if (service_name == "GeneralModelService") { diff --git a/core/predictor/common/constant.cpp b/core/predictor/common/constant.cpp index 8e7044a918abf330f6356bfa3c4bb598a384e0dc..b0acb886950face9383518cb7da227137a9c14be 100644 --- a/core/predictor/common/constant.cpp +++ b/core/predictor/common/constant.cpp @@ -44,8 +44,9 @@ DEFINE_bool(enable_cube, false, "enable cube"); DEFINE_string(general_model_path, "./conf", ""); DEFINE_string(general_model_file, "general_model.prototxt", ""); DEFINE_bool(enable_general_model, true, "enable general model"); -DEFINE_bool(enable_prometheus, true, "enable prometheus"); -DEFINE_int32(prometheus_port, 18010, ""); +DEFINE_bool(enable_prometheus, false, "enable prometheus"); +DEFINE_int32(prometheus_port, 19393, ""); +DEFINE_int64(request_cache_size, 0, "request cache size"); const char* START_OP_NAME = "startup_op"; } // namespace predictor diff --git a/core/predictor/common/constant.h b/core/predictor/common/constant.h index b74f69557bc3184566638806bcc5c7ea47b2df53..e0727ce458e04a6982692357150bfaf9c3c2c1f5 100644 --- a/core/predictor/common/constant.h +++ b/core/predictor/common/constant.h @@ -45,6 +45,7 @@ DECLARE_bool(enable_cube); DECLARE_bool(enable_general_model); DECLARE_bool(enable_prometheus); DECLARE_int32(prometheus_port); +DECLARE_int64(request_cache_size); // STATIC Variables extern const char* START_OP_NAME; diff --git a/core/predictor/common/inner_common.h b/core/predictor/common/inner_common.h index 703f14a596ff257cd7e00f316e265322c56b8672..9a7627aef393cac4891e58b0d029706625348209 100644 --- a/core/predictor/common/inner_common.h +++ b/core/predictor/common/inner_common.h @@ -61,6 +61,7 @@ #include "core/predictor/common/utils.h" #include "core/predictor/framework/prometheus_metric.h" +#include "core/predictor/framework/request_cache.h" #ifdef BCLOUD namespace brpc = baidu::rpc; diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h index 0051814593aa409b2c889ec5ffa2724083c00328..5c5ef8730dd82a1ea721600b24f27a7fd1e46594 100644 --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -236,6 +236,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine { } LOG(WARNING) << "Succ load engine, path: " << conf.model_dir(); + RequestCache::GetSingleton()->Clear(); return 0; } diff --git a/core/predictor/framework/request_cache.cpp b/core/predictor/framework/request_cache.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8ac9b7e448f5d1ad60691630c985df414c48b5ac --- /dev/null +++ b/core/predictor/framework/request_cache.cpp @@ -0,0 +1,236 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#include "core/predictor/framework/request_cache.h" + +#include "core/predictor/common/inner_common.h" + +#include "core/sdk-cpp/general_model_service.pb.h" + +namespace baidu { +namespace paddle_serving { +namespace predictor { + +using baidu::paddle_serving::predictor::general_model::Request; +using baidu::paddle_serving::predictor::general_model::Response; + +RequestCache::RequestCache(const int64_t size) + : cache_size_(size), used_size_(0) { + bstop_ = false; + thread_ptr_ = std::unique_ptr( + new std::thread([this]() { this->ThreadLoop(); })); +} + +RequestCache::~RequestCache() { + bstop_ = true; + condition_.notify_all(); + thread_ptr_->join(); +} + +RequestCache* RequestCache::GetSingleton() { + static RequestCache cache(FLAGS_request_cache_size); + return &cache; +} + +int RequestCache::Hash(const Request& req, uint64_t* key) { + uint64_t log_id = req.log_id(); + bool profile_server = req.profile_server(); + Request* r = const_cast(&req); + r->clear_log_id(); + r->clear_profile_server(); + std::string buf = req.SerializeAsString(); + *key = std::hash{}(buf); + r->set_log_id(log_id); + r->set_profile_server(profile_server); + return 0; +} + +int RequestCache::Get(const Request& req, Response* res, uint64_t* key) { + if (!Enabled()) { + return -1; + } + uint64_t local_key = 0; + Hash(req, &local_key); + if (key != nullptr) { + *key = local_key; + } + std::lock_guard lk(cache_mtx_); + auto iter = map_.find(local_key); + if (iter == map_.end()) { + LOG(INFO) << "key not found in cache"; + return -1; + } + auto entry = iter->second; + BuildResponse(entry, res); + UpdateLru(local_key); + + return 0; +} + +int RequestCache::Put(const Request& req, const Response& res, uint64_t* key) { + if (!Enabled()) { + return -1; + } + uint64_t local_key = 0; + if (key != nullptr && *key != 0) { + local_key = *key; + } else { + Hash(req, &local_key); + } + if (key != nullptr) { + *key = local_key; + } + + AddTask(local_key, res); + return 0; +} + +int RequestCache::PutImpl(const Response& res, uint64_t key) { + std::lock_guard lk(cache_mtx_); + auto iter = map_.find(key); + if (iter != map_.end()) { + LOG(WARNING) << "key[" << key << "] already exists in cache"; + return -1; + } + + CacheEntry entry; + if (BuildCacheEntry(res, &entry) != 0) { + LOG(WARNING) << "key[" << key << "] build cache entry failed"; + return -1; + } + map_.insert({key, entry}); + UpdateLru(key); + + return 0; +} + +int RequestCache::BuildResponse(const CacheEntry& entry, + predictor::general_model::Response* res) { + if (res == nullptr) { + return -1; + } + res->ParseFromString(entry.buf_); + res->clear_profile_time(); + return 0; +} + +int RequestCache::BuildCacheEntry(const Response& res, CacheEntry* entry) { + if (entry == nullptr) { + return -1; + } + std::lock_guard lk(cache_mtx_); + int size = res.ByteSize(); + if (size >= cache_size_) { + LOG(INFO) << "res size[" << size << "] larger than cache_size[" + << cache_size_ << "]"; + return -1; + } + while (size > GetFreeCacheSize()) { + if (RemoveOne() != 0) { + LOG(ERROR) << "RemoveOne failed so can not build entry"; + return -1; + } + } + entry->buf_ = res.SerializeAsString(); + used_size_ += size; + return 0; +} + +void RequestCache::UpdateLru(uint64_t key) { + std::lock_guard lk(cache_mtx_); + auto lru_iter = std::find(lru_.begin(), lru_.end(), key); + if (lru_iter != lru_.end()) { + lru_.erase(lru_iter); + } + lru_.push_front(key); +} + +bool RequestCache::Enabled() { return cache_size_ > 0; } + +int64_t RequestCache::GetFreeCacheSize() { return cache_size_ - used_size_; } + +int RequestCache::RemoveOne() { + std::lock_guard lk(cache_mtx_); + uint64_t lru_key = lru_.back(); + VLOG(1) << "Remove key[" << lru_key << "] from cache"; + auto iter = map_.find(lru_key); + if (iter == map_.end()) { + LOG(ERROR) << "Remove key[" << lru_key << "] not find in cache"; + return -1; + } + auto entry = iter->second; + used_size_ -= entry.buf_.size(); + map_.erase(iter); + lru_.pop_back(); + + return 0; +} + +void RequestCache::ThreadLoop() { + std::queue>> exec_task_queue; + for (;;) { + { + std::unique_lock lock(queue_mutex_); + condition_.wait( + lock, [this]() { return this->bstop_ || this->task_queue_.size(); }); + + if (!task_queue_.size()) { + if (bstop_) { + return; + } + continue; + } + swap(exec_task_queue, task_queue_); + } + while (!exec_task_queue.empty()) { + auto [key, res_ptr] = exec_task_queue.front(); + exec_task_queue.pop(); + PutImpl(*res_ptr, key); + } + } +} + +int RequestCache::AddTask(uint64_t key, const Response& res) { + std::unique_lock lock(queue_mutex_); + std::shared_ptr res_ptr = std::make_shared(res); + task_queue_.push(std::make_pair(key, res_ptr)); + condition_.notify_one(); + return 0; +} + +bool RequestCache::Empty() { + std::lock_guard lk(cache_mtx_); + return lru_.empty(); +} + +int RequestCache::Clear() { + { + std::unique_lock lock(queue_mutex_); + std::queue>> empty; + swap(empty, task_queue_); + } + int count = 0; + { + std::lock_guard lk(cache_mtx_); + count = lru_.size(); + lru_.clear(); + map_.clear(); + } + LOG(INFO) << "Clear " << count << " key!"; + return 0; +} + +} // namespace predictor +} // namespace paddle_serving +} // namespace baidu \ No newline at end of file diff --git a/core/predictor/framework/request_cache.h b/core/predictor/framework/request_cache.h new file mode 100644 index 0000000000000000000000000000000000000000..014775eca553a074ac904f6a779947a0ebbcb011 --- /dev/null +++ b/core/predictor/framework/request_cache.h @@ -0,0 +1,98 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace baidu { +namespace paddle_serving { +namespace predictor { + +namespace general_model { +class Request; +class Response; +} // namespace general_model + +struct CacheEntry { + explicit CacheEntry() {} + std::string buf_; +}; + +class RequestCache { + public: + explicit RequestCache(const int64_t size); + ~RequestCache(); + + static RequestCache* GetSingleton(); + + int Hash(const predictor::general_model::Request& req, uint64_t* key); + + int Get(const predictor::general_model::Request& req, + predictor::general_model::Response* res, + uint64_t* key = nullptr); + + int Put(const predictor::general_model::Request& req, + const predictor::general_model::Response& res, + uint64_t* key = nullptr); + + void ThreadLoop(); + + bool Empty(); + + int Clear(); + + private: + int BuildResponse(const CacheEntry& entry, + predictor::general_model::Response* res); + + int BuildCacheEntry(const predictor::general_model::Response& res, + CacheEntry* entry); + + void UpdateLru(uint64_t key); + + bool Enabled(); + + int64_t GetFreeCacheSize(); + + int RemoveOne(); + + int AddTask(uint64_t key, const predictor::general_model::Response& res); + + int PutImpl(const predictor::general_model::Response& res, uint64_t key); + + uint64_t cache_size_; + uint64_t used_size_; + std::unordered_map map_; + std::list lru_; + std::recursive_mutex cache_mtx_; + std::atomic bstop_{false}; + std::condition_variable condition_; + std::mutex queue_mutex_; + std::queue< + std::pair>> + task_queue_; + std::unique_ptr thread_ptr_; +}; + +} // namespace predictor +} // namespace paddle_serving +} // namespace baidu \ No newline at end of file diff --git a/doc/Install_CN.md b/doc/Install_CN.md index 8648d26d42a88859e5744badc74629b92f395da1..24dbaba054ad7638d9d50a021e0bb3e29b9ff338 100644 --- a/doc/Install_CN.md +++ b/doc/Install_CN.md @@ -10,7 +10,7 @@ ## 1.启动开发镜像 -**同时支持使用Serving镜像和Paddle镜像,1.1和1.2章节中的操作2选1即可。** +**同时支持使用Serving镜像和Paddle镜像,1.1和1.2章节中的操作2选1即可。** 在Paddle docker镜像上部署Serving服务需要安装额外依赖库,因此,我们直接使用Serving开发镜像。 ### 1.1 Serving开发镜像(CPU/GPU 2选1) **CPU:** ``` diff --git a/doc/Install_EN.md b/doc/Install_EN.md index db4fbe211a50c9a4219d4d4e36c51f54c869fc0b..c687e30bd915a74fb0181a7df306adc9fce5d782 100644 --- a/doc/Install_EN.md +++ b/doc/Install_EN.md @@ -9,7 +9,7 @@ **Tip-2**: The GPU environments in the following examples are all cuda10.2-cudnn7. If you use Python Pipeline to deploy and need Nvidia TensorRT to optimize prediction performance, please refer to [Supported Mirroring Environment and Instructions](#4.-Supported-Docker-Images-and-Instruction) to choose other versions. ## 1. Start the Docker Container -**Both Serving Dev Image and Paddle Dev Image are supported at the same time. You can choose 1 from the operation 2 in chapters 1.1 and 1.2.** +**Both Serving Dev Image and Paddle Dev Image are supported at the same time. You can choose 1 from the operation 2 in chapters 1.1 and 1.2.**Deploying the Serving service on the Paddle docker image requires the installation of additional dependency libraries. Therefore, we directly use the Serving development image. ### 1.1 Serving Dev Images (CPU/GPU 2 choose 1) **CPU:** diff --git a/doc/Run_On_Kubernetes_CN.md b/doc/Run_On_Kubernetes_CN.md index 951fda78dd0c04d2faa7db5b84cfa845235fbaa5..9b676d4aa18d09f8eee7f4b965898e9eb632f967 100644 --- a/doc/Run_On_Kubernetes_CN.md +++ b/doc/Run_On_Kubernetes_CN.md @@ -2,13 +2,13 @@ Paddle Serving在0.6.0版本开始支持在Kubenetes集群上部署,并提供反向代理和安全网关支持。与Paddle Serving在Docker镜像中开发类似,Paddle Serving 模型在Kubenetes集群部署需要制作轻量化的运行镜像,并使用kubectl工具在集群上部署。 -### 集群准备 +### 1.集群准备 如果您还没有Kubenetes集群,我们推荐[购买并使用百度智能云CCE集群](https://cloud.baidu.com/doc/CCE/index.html). 如果是其他云服务商提供的集群,或者自行安装Kubenetes集群,请遵照对应的教程。 您还需要准备一个用于Kubenetes集群部署使用的镜像仓库,通常与云服务提供商绑定,如果您使用的是百度智能云的CCE集群,可以参照[百度智能云CCR镜像仓库使用方式](https://cloud.baidu.com/doc/CCR/index.html)。当然Docker Hub也可以作为镜像仓库,但是可能在部署时会出现下载速度慢的情况。 -### 环境准备 +### 2.环境准备 需要在Kubenetes集群上安装网关工具KONG。 @@ -16,20 +16,20 @@ Paddle Serving在0.6.0版本开始支持在Kubenetes集群上部署,并提供 kubectl apply -f https://bit.ly/kong-ingress-dbless ``` +### 选择Serving开发镜像 (可选) +您可以直接选择已生成的Serving [DOCKER开发镜像列表](./Docker_Images_CN.md)作为Kubernetes部署的首选,携带了开发工具,可用于调试和编译代码。 +### 制作Serving运行镜像(可选) -### 制作Serving运行镜像(可选): +与[DOCKER开发镜像列表](./Docker_Images_CN.md)文档相比,开发镜像用于调试、编译代码,携带了大量的开发工具,因此镜像体积较大。运行镜像通常容器体积更小的轻量级容器,可在边缘端设备上部署。如您不需要轻量级运行容器,请直接跳过这一部分。 -首先您需要确定运行镜像的具体环境。和[DOCKER开发镜像列表](./Docker_Images_CN.md)文档相比,开发镜像用于调试、编译代码,携带了大量的开发工具,因此镜像体积较大。运行镜像通常要求缩小容器体积以提高部署的灵活性。如果您不太需要轻量级的运行容器,请直接跳过这一部分。 - -在`tools/generate_runtime_docker.sh`文件下,它的使用方式如下 +我们提供了运行镜像的生成脚本在Serving代码库下`tools/generate_runtime_docker.sh`文件,通过以下命令可生成代码。 ```bash -bash tools/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --name serving_runtime:cuda10.1-py36 +bash tools/generate_runtime_docker.sh --env cuda10.1 --python 3.7 --image_name serving_runtime:cuda10.1-py37 --paddle 2.2.0 --serving 0.7.0 ``` -会生成 cuda10.1,python 3.6,serving版本0.7.0 还有 paddle版本2.2.0的运行镜像。如果有其他疑问,可以执行下列语句得到帮助信息。 -如果您需要老版本Serving运行镜像,请checkout到老版本分支。 +会生成 cuda10.1,python 3.7,serving版本0.7.0 还有 paddle版本2.2.0的运行镜像。如果有其他疑问,可以执行下列语句得到帮助信息。强烈建议您使用最新的paddle和serving的版本(2个版本是对应的如paddle 2.2.x 与serving 0.7.0对应,paddle 2.1.x 与 serving 0.6.x对应),因为更早的版本上出现的错误只在最新版本修复,无法在历史版本中修复。 ``` bash tools/generate_runtime_docker.sh --help @@ -40,7 +40,7 @@ bash tools/generate_runtime_docker.sh --help - paddle-serving-server, paddle-serving-client,paddle-serving-app,paddlepaddle,具体版本可以在tools/runtime.dockerfile当中查看,同时,如果有定制化的需求,也可以在该文件中进行定制化。 - paddle-serving-server 二进制可执行程序 -也就是说,运行镜像在生成之后,我们只需要将我们运行的代码(如果有)和模型搬运到镜像中就可以。生成后的镜像名为`paddle_serving:cuda10.2-py36` +也就是说,运行镜像在生成之后,我们只需要将我们运行的代码(如果有)和模型搬运到镜像中就可以。生成后的镜像名为`paddle_serving:cuda10.2-py37` ### 添加您的代码和模型 diff --git a/doc/images/wechat_group_1.jpeg b/doc/images/wechat_group_1.jpeg index d907bee65143b83107393ba763e194937d72111a..9259862d82879075dd40ca8243a58f049378c887 100644 Binary files a/doc/images/wechat_group_1.jpeg and b/doc/images/wechat_group_1.jpeg differ diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py index 19f93dfe1782690333c32411a7545b1641b18a0e..9e9d87a652e56a46981423821dd9d01e7b4288f5 100755 --- a/python/paddle_serving_server/serve.py +++ b/python/paddle_serving_server/serve.py @@ -209,6 +209,8 @@ def serve_args(): "--enable_prometheus", default=False, action="store_true", help="Use Prometheus") parser.add_argument( "--prometheus_port", type=int, default=19393, help="Port of the Prometheus") + parser.add_argument( + "--request_cache_size", type=int, default=0, help="Port of the Prometheus") return parser.parse_args() @@ -292,6 +294,7 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi server.set_max_body_size(max_body_size) server.set_enable_prometheus(args.enable_prometheus) server.set_prometheus_port(args.prometheus_port) + server.set_request_cache_size(args.request_cache_size) if args.use_trt and device == "gpu": server.set_trt() diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index f1d0b63178a24ed9f506bf6a2fdbb67a6cc01002..e369c57d4d350207d65d048a96eb052db279bd30 100755 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -100,6 +100,7 @@ class Server(object): ] self.enable_prometheus = False self.prometheus_port = 19393 + self.request_cache_size = 0 def get_fetch_list(self, infer_node_idx=-1): fetch_names = [ @@ -207,6 +208,9 @@ class Server(object): def set_prometheus_port(self, prometheus_port): self.prometheus_port = prometheus_port + def set_request_cache_size(self, request_cache_size): + self.request_cache_size = request_cache_size + def _prepare_engine(self, model_config_paths, device, use_encryption_model): self.device = device if self.model_toolkit_conf == None: @@ -615,6 +619,17 @@ class Server(object): self.max_body_size, self.enable_prometheus, self.prometheus_port) + if self.enable_prometheus: + command = command + \ + "-enable_prometheus={} " \ + "-prometheus_port {} ".format( + self.enable_prometheus, + self.prometheus_port) + if self.request_cache_size > 0: + command = command + \ + "-request_cache_size {} ".format( + self.request_cache_size + ) print("Going to Run Comand") print(command)