diff --git a/README.md b/README.md
index 3bf8ca7f9f3cb82e6440a746ebb6eddfa8a96365..f95b58d263e146a42f016706c22ce334cb6d965e 100755
--- a/README.md
+++ b/README.md
@@ -42,8 +42,9 @@ The goal of Paddle Serving is to provide high-performance, flexible and easy-to-
- AIStudio tutorial(Chinese) : [Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/1975340)
-
- Video tutorial(Chinese) : [深度学习服务化部署-以互联网应用为例](https://aistudio.baidu.com/aistudio/course/introduce/19084)
+- Edge AI solution based on Paddle Serving & Baidu Intelligent Edge(Chinese) : [基于Paddle Serving&百度智能边缘BIE的边缘AI解决方案](https://mp.weixin.qq.com/s/j0EVlQXaZ7qmoz9Fv96Yrw)
+
diff --git a/README_CN.md b/README_CN.md
index ae94a50e2cecbf9168ece65d5a040d0ca6fd5218..903d357a9a2994935d87fe0516a6d70ebb31e822 100755
--- a/README_CN.md
+++ b/README_CN.md
@@ -40,8 +40,9 @@ Paddle Serving依托深度学习框架PaddlePaddle旨在帮助深度学习开发
教程
- AIStudio教程-[Paddle Serving服务化部署框架](https://www.paddlepaddle.org.cn/tutorials/projectdetail/1975340)
-
- 视频教程-[深度学习服务化部署-以互联网应用为例](https://aistudio.baidu.com/aistudio/course/introduce/19084)
+- Edge AI solution based on Paddle Serving & Baidu Intelligent Edge(Chinese) : [基于Paddle Serving&百度智能边缘BIE的边缘AI解决方案](https://mp.weixin.qq.com/s/j0EVlQXaZ7qmoz9Fv96Yrw)
+
diff --git a/core/pdcodegen/src/pdcodegen.cpp b/core/pdcodegen/src/pdcodegen.cpp
index 1ad3fe658223b654efa642cda5e9b7de9ea6f794..be34307014e9e689d94a7f7071e133235d5ba53d 100644
--- a/core/pdcodegen/src/pdcodegen.cpp
+++ b/core/pdcodegen/src/pdcodegen.cpp
@@ -301,15 +301,33 @@ class PdsCodeGenerator : public CodeGenerator {
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "service_name=\[\" << \"$name$\" << \"\]\";\n"; // NOLINT
- inference_body += " int err_code = svr->inference(request, response, log_id);\n";
- inference_body += " if (err_code != 0) {\n";
- inference_body += " LOG(WARNING)\n";
- inference_body += " << \"(logid=\" << log_id << \") Failed call ";
- inference_body += "inferservice[$name$], name[$service$]\"\n";
- inference_body += " << \", error_code: \" << err_code;\n";
- inference_body += " cntl->SetFailed(err_code, \"InferService inference ";
- inference_body += "failed!\");\n";
- inference_body += " }\n";
+ if (service_name == "GeneralModelService") {
+ inference_body += "uint64_t key = 0;";
+ inference_body += "int err_code = 0;";
+ inference_body += "if (RequestCache::GetSingleton()->Get(*request, response, &key) != 0) {";
+ inference_body += " err_code = svr->inference(request, response, log_id);";
+ inference_body += " if (err_code != 0) {";
+ inference_body += " LOG(WARNING)";
+ inference_body += " << \"(logid=\" << log_id << \") Failed call inferservice[GeneralModelService], name[GeneralModelService]\"";
+ inference_body += " << \", error_code: \" << err_code;";
+ inference_body += " cntl->SetFailed(err_code, \"InferService inference failed!\");";
+ inference_body += " } else {";
+ inference_body += " RequestCache::GetSingleton()->Put(*request, *response, &key);";
+ inference_body += " }";
+ inference_body += "} else {";
+ inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") Get from cache\";";
+ inference_body += "}";
+ } else {
+ inference_body += " int err_code = svr->inference(request, response, log_id);\n";
+ inference_body += " if (err_code != 0) {\n";
+ inference_body += " LOG(WARNING)\n";
+ inference_body += " << \"(logid=\" << log_id << \") Failed call ";
+ inference_body += "inferservice[$name$], name[$service$]\"\n";
+ inference_body += " << \", error_code: \" << err_code;\n";
+ inference_body += " cntl->SetFailed(err_code, \"InferService inference ";
+ inference_body += "failed!\");\n";
+ inference_body += " }\n";
+ }
inference_body += " gettimeofday(&tv, NULL);\n";
inference_body += " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n";
if (service_name == "GeneralModelService") {
@@ -1085,15 +1103,33 @@ class PdsCodeGenerator : public CodeGenerator {
inference_body += "\"\]\";\n";
inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") ";
inference_body += "service_name=\[\" << \"$name$\" << \"\]\";\n"; // NOLINT
- inference_body += " int err_code = svr->inference(request, response, log_id);\n";
- inference_body += " if (err_code != 0) {\n";
- inference_body += " LOG(WARNING)\n";
- inference_body += " << \"(logid=\" << log_id << \") Failed call ";
- inference_body += "inferservice[$name$], name[$service$]\"\n";
- inference_body += " << \", error_code: \" << err_code;\n";
- inference_body += " cntl->SetFailed(err_code, \"InferService inference ";
- inference_body += "failed!\");\n";
- inference_body += " }\n";
+ if (service_name == "GeneralModelService") {
+ inference_body += "uint64_t key = 0;";
+ inference_body += "int err_code = 0;";
+ inference_body += "if (RequestCache::GetSingleton()->Get(*request, response, &key) != 0) {";
+ inference_body += " err_code = svr->inference(request, response, log_id);";
+ inference_body += " if (err_code != 0) {";
+ inference_body += " LOG(WARNING)";
+ inference_body += " << \"(logid=\" << log_id << \") Failed call inferservice[GeneralModelService], name[GeneralModelService]\"";
+ inference_body += " << \", error_code: \" << err_code;";
+ inference_body += " cntl->SetFailed(err_code, \"InferService inference failed!\");";
+ inference_body += " } else {";
+ inference_body += " RequestCache::GetSingleton()->Put(*request, *response, &key);";
+ inference_body += " }";
+ inference_body += "} else {";
+ inference_body += " LOG(INFO) << \"(logid=\" << log_id << \") Get from cache\";";
+ inference_body += "}";
+ } else {
+ inference_body += " int err_code = svr->inference(request, response, log_id);\n";
+ inference_body += " if (err_code != 0) {\n";
+ inference_body += " LOG(WARNING)\n";
+ inference_body += " << \"(logid=\" << log_id << \") Failed call ";
+ inference_body += "inferservice[$name$], name[$service$]\"\n";
+ inference_body += " << \", error_code: \" << err_code;\n";
+ inference_body += " cntl->SetFailed(err_code, \"InferService inference ";
+ inference_body += "failed!\");\n";
+ inference_body += " }\n";
+ }
inference_body += " gettimeofday(&tv, NULL);\n";
inference_body += " long end = tv.tv_sec * 1000000 + tv.tv_usec;\n";
if (service_name == "GeneralModelService") {
diff --git a/core/predictor/common/constant.cpp b/core/predictor/common/constant.cpp
index 8e7044a918abf330f6356bfa3c4bb598a384e0dc..b0acb886950face9383518cb7da227137a9c14be 100644
--- a/core/predictor/common/constant.cpp
+++ b/core/predictor/common/constant.cpp
@@ -44,8 +44,9 @@ DEFINE_bool(enable_cube, false, "enable cube");
DEFINE_string(general_model_path, "./conf", "");
DEFINE_string(general_model_file, "general_model.prototxt", "");
DEFINE_bool(enable_general_model, true, "enable general model");
-DEFINE_bool(enable_prometheus, true, "enable prometheus");
-DEFINE_int32(prometheus_port, 18010, "");
+DEFINE_bool(enable_prometheus, false, "enable prometheus");
+DEFINE_int32(prometheus_port, 19393, "");
+DEFINE_int64(request_cache_size, 0, "request cache size");
const char* START_OP_NAME = "startup_op";
} // namespace predictor
diff --git a/core/predictor/common/constant.h b/core/predictor/common/constant.h
index b74f69557bc3184566638806bcc5c7ea47b2df53..e0727ce458e04a6982692357150bfaf9c3c2c1f5 100644
--- a/core/predictor/common/constant.h
+++ b/core/predictor/common/constant.h
@@ -45,6 +45,7 @@ DECLARE_bool(enable_cube);
DECLARE_bool(enable_general_model);
DECLARE_bool(enable_prometheus);
DECLARE_int32(prometheus_port);
+DECLARE_int64(request_cache_size);
// STATIC Variables
extern const char* START_OP_NAME;
diff --git a/core/predictor/common/inner_common.h b/core/predictor/common/inner_common.h
index 703f14a596ff257cd7e00f316e265322c56b8672..9a7627aef393cac4891e58b0d029706625348209 100644
--- a/core/predictor/common/inner_common.h
+++ b/core/predictor/common/inner_common.h
@@ -61,6 +61,7 @@
#include "core/predictor/common/utils.h"
#include "core/predictor/framework/prometheus_metric.h"
+#include "core/predictor/framework/request_cache.h"
#ifdef BCLOUD
namespace brpc = baidu::rpc;
diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h
index 0051814593aa409b2c889ec5ffa2724083c00328..5c5ef8730dd82a1ea721600b24f27a7fd1e46594 100644
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -236,6 +236,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
}
LOG(WARNING) << "Succ load engine, path: " << conf.model_dir();
+ RequestCache::GetSingleton()->Clear();
return 0;
}
diff --git a/core/predictor/framework/request_cache.cpp b/core/predictor/framework/request_cache.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8ac9b7e448f5d1ad60691630c985df414c48b5ac
--- /dev/null
+++ b/core/predictor/framework/request_cache.cpp
@@ -0,0 +1,236 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "core/predictor/framework/request_cache.h"
+
+#include "core/predictor/common/inner_common.h"
+
+#include "core/sdk-cpp/general_model_service.pb.h"
+
+namespace baidu {
+namespace paddle_serving {
+namespace predictor {
+
+using baidu::paddle_serving::predictor::general_model::Request;
+using baidu::paddle_serving::predictor::general_model::Response;
+
+RequestCache::RequestCache(const int64_t size)
+ : cache_size_(size), used_size_(0) {
+ bstop_ = false;
+ thread_ptr_ = std::unique_ptr(
+ new std::thread([this]() { this->ThreadLoop(); }));
+}
+
+RequestCache::~RequestCache() {
+ bstop_ = true;
+ condition_.notify_all();
+ thread_ptr_->join();
+}
+
+RequestCache* RequestCache::GetSingleton() {
+ static RequestCache cache(FLAGS_request_cache_size);
+ return &cache;
+}
+
+int RequestCache::Hash(const Request& req, uint64_t* key) {
+ uint64_t log_id = req.log_id();
+ bool profile_server = req.profile_server();
+ Request* r = const_cast(&req);
+ r->clear_log_id();
+ r->clear_profile_server();
+ std::string buf = req.SerializeAsString();
+ *key = std::hash{}(buf);
+ r->set_log_id(log_id);
+ r->set_profile_server(profile_server);
+ return 0;
+}
+
+int RequestCache::Get(const Request& req, Response* res, uint64_t* key) {
+ if (!Enabled()) {
+ return -1;
+ }
+ uint64_t local_key = 0;
+ Hash(req, &local_key);
+ if (key != nullptr) {
+ *key = local_key;
+ }
+ std::lock_guard lk(cache_mtx_);
+ auto iter = map_.find(local_key);
+ if (iter == map_.end()) {
+ LOG(INFO) << "key not found in cache";
+ return -1;
+ }
+ auto entry = iter->second;
+ BuildResponse(entry, res);
+ UpdateLru(local_key);
+
+ return 0;
+}
+
+int RequestCache::Put(const Request& req, const Response& res, uint64_t* key) {
+ if (!Enabled()) {
+ return -1;
+ }
+ uint64_t local_key = 0;
+ if (key != nullptr && *key != 0) {
+ local_key = *key;
+ } else {
+ Hash(req, &local_key);
+ }
+ if (key != nullptr) {
+ *key = local_key;
+ }
+
+ AddTask(local_key, res);
+ return 0;
+}
+
+int RequestCache::PutImpl(const Response& res, uint64_t key) {
+ std::lock_guard lk(cache_mtx_);
+ auto iter = map_.find(key);
+ if (iter != map_.end()) {
+ LOG(WARNING) << "key[" << key << "] already exists in cache";
+ return -1;
+ }
+
+ CacheEntry entry;
+ if (BuildCacheEntry(res, &entry) != 0) {
+ LOG(WARNING) << "key[" << key << "] build cache entry failed";
+ return -1;
+ }
+ map_.insert({key, entry});
+ UpdateLru(key);
+
+ return 0;
+}
+
+int RequestCache::BuildResponse(const CacheEntry& entry,
+ predictor::general_model::Response* res) {
+ if (res == nullptr) {
+ return -1;
+ }
+ res->ParseFromString(entry.buf_);
+ res->clear_profile_time();
+ return 0;
+}
+
+int RequestCache::BuildCacheEntry(const Response& res, CacheEntry* entry) {
+ if (entry == nullptr) {
+ return -1;
+ }
+ std::lock_guard lk(cache_mtx_);
+ int size = res.ByteSize();
+ if (size >= cache_size_) {
+ LOG(INFO) << "res size[" << size << "] larger than cache_size["
+ << cache_size_ << "]";
+ return -1;
+ }
+ while (size > GetFreeCacheSize()) {
+ if (RemoveOne() != 0) {
+ LOG(ERROR) << "RemoveOne failed so can not build entry";
+ return -1;
+ }
+ }
+ entry->buf_ = res.SerializeAsString();
+ used_size_ += size;
+ return 0;
+}
+
+void RequestCache::UpdateLru(uint64_t key) {
+ std::lock_guard lk(cache_mtx_);
+ auto lru_iter = std::find(lru_.begin(), lru_.end(), key);
+ if (lru_iter != lru_.end()) {
+ lru_.erase(lru_iter);
+ }
+ lru_.push_front(key);
+}
+
+bool RequestCache::Enabled() { return cache_size_ > 0; }
+
+int64_t RequestCache::GetFreeCacheSize() { return cache_size_ - used_size_; }
+
+int RequestCache::RemoveOne() {
+ std::lock_guard lk(cache_mtx_);
+ uint64_t lru_key = lru_.back();
+ VLOG(1) << "Remove key[" << lru_key << "] from cache";
+ auto iter = map_.find(lru_key);
+ if (iter == map_.end()) {
+ LOG(ERROR) << "Remove key[" << lru_key << "] not find in cache";
+ return -1;
+ }
+ auto entry = iter->second;
+ used_size_ -= entry.buf_.size();
+ map_.erase(iter);
+ lru_.pop_back();
+
+ return 0;
+}
+
+void RequestCache::ThreadLoop() {
+ std::queue>> exec_task_queue;
+ for (;;) {
+ {
+ std::unique_lock lock(queue_mutex_);
+ condition_.wait(
+ lock, [this]() { return this->bstop_ || this->task_queue_.size(); });
+
+ if (!task_queue_.size()) {
+ if (bstop_) {
+ return;
+ }
+ continue;
+ }
+ swap(exec_task_queue, task_queue_);
+ }
+ while (!exec_task_queue.empty()) {
+ auto [key, res_ptr] = exec_task_queue.front();
+ exec_task_queue.pop();
+ PutImpl(*res_ptr, key);
+ }
+ }
+}
+
+int RequestCache::AddTask(uint64_t key, const Response& res) {
+ std::unique_lock lock(queue_mutex_);
+ std::shared_ptr res_ptr = std::make_shared(res);
+ task_queue_.push(std::make_pair(key, res_ptr));
+ condition_.notify_one();
+ return 0;
+}
+
+bool RequestCache::Empty() {
+ std::lock_guard lk(cache_mtx_);
+ return lru_.empty();
+}
+
+int RequestCache::Clear() {
+ {
+ std::unique_lock lock(queue_mutex_);
+ std::queue>> empty;
+ swap(empty, task_queue_);
+ }
+ int count = 0;
+ {
+ std::lock_guard lk(cache_mtx_);
+ count = lru_.size();
+ lru_.clear();
+ map_.clear();
+ }
+ LOG(INFO) << "Clear " << count << " key!";
+ return 0;
+}
+
+} // namespace predictor
+} // namespace paddle_serving
+} // namespace baidu
\ No newline at end of file
diff --git a/core/predictor/framework/request_cache.h b/core/predictor/framework/request_cache.h
new file mode 100644
index 0000000000000000000000000000000000000000..014775eca553a074ac904f6a779947a0ebbcb011
--- /dev/null
+++ b/core/predictor/framework/request_cache.h
@@ -0,0 +1,98 @@
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+namespace baidu {
+namespace paddle_serving {
+namespace predictor {
+
+namespace general_model {
+class Request;
+class Response;
+} // namespace general_model
+
+struct CacheEntry {
+ explicit CacheEntry() {}
+ std::string buf_;
+};
+
+class RequestCache {
+ public:
+ explicit RequestCache(const int64_t size);
+ ~RequestCache();
+
+ static RequestCache* GetSingleton();
+
+ int Hash(const predictor::general_model::Request& req, uint64_t* key);
+
+ int Get(const predictor::general_model::Request& req,
+ predictor::general_model::Response* res,
+ uint64_t* key = nullptr);
+
+ int Put(const predictor::general_model::Request& req,
+ const predictor::general_model::Response& res,
+ uint64_t* key = nullptr);
+
+ void ThreadLoop();
+
+ bool Empty();
+
+ int Clear();
+
+ private:
+ int BuildResponse(const CacheEntry& entry,
+ predictor::general_model::Response* res);
+
+ int BuildCacheEntry(const predictor::general_model::Response& res,
+ CacheEntry* entry);
+
+ void UpdateLru(uint64_t key);
+
+ bool Enabled();
+
+ int64_t GetFreeCacheSize();
+
+ int RemoveOne();
+
+ int AddTask(uint64_t key, const predictor::general_model::Response& res);
+
+ int PutImpl(const predictor::general_model::Response& res, uint64_t key);
+
+ uint64_t cache_size_;
+ uint64_t used_size_;
+ std::unordered_map map_;
+ std::list lru_;
+ std::recursive_mutex cache_mtx_;
+ std::atomic bstop_{false};
+ std::condition_variable condition_;
+ std::mutex queue_mutex_;
+ std::queue<
+ std::pair>>
+ task_queue_;
+ std::unique_ptr thread_ptr_;
+};
+
+} // namespace predictor
+} // namespace paddle_serving
+} // namespace baidu
\ No newline at end of file
diff --git a/doc/Install_CN.md b/doc/Install_CN.md
index 8648d26d42a88859e5744badc74629b92f395da1..24dbaba054ad7638d9d50a021e0bb3e29b9ff338 100644
--- a/doc/Install_CN.md
+++ b/doc/Install_CN.md
@@ -10,7 +10,7 @@
## 1.启动开发镜像
-**同时支持使用Serving镜像和Paddle镜像,1.1和1.2章节中的操作2选1即可。**
+**同时支持使用Serving镜像和Paddle镜像,1.1和1.2章节中的操作2选1即可。** 在Paddle docker镜像上部署Serving服务需要安装额外依赖库,因此,我们直接使用Serving开发镜像。
### 1.1 Serving开发镜像(CPU/GPU 2选1)
**CPU:**
```
diff --git a/doc/Install_EN.md b/doc/Install_EN.md
index db4fbe211a50c9a4219d4d4e36c51f54c869fc0b..c687e30bd915a74fb0181a7df306adc9fce5d782 100644
--- a/doc/Install_EN.md
+++ b/doc/Install_EN.md
@@ -9,7 +9,7 @@
**Tip-2**: The GPU environments in the following examples are all cuda10.2-cudnn7. If you use Python Pipeline to deploy and need Nvidia TensorRT to optimize prediction performance, please refer to [Supported Mirroring Environment and Instructions](#4.-Supported-Docker-Images-and-Instruction) to choose other versions.
## 1. Start the Docker Container
-**Both Serving Dev Image and Paddle Dev Image are supported at the same time. You can choose 1 from the operation 2 in chapters 1.1 and 1.2.**
+**Both Serving Dev Image and Paddle Dev Image are supported at the same time. You can choose 1 from the operation 2 in chapters 1.1 and 1.2.**Deploying the Serving service on the Paddle docker image requires the installation of additional dependency libraries. Therefore, we directly use the Serving development image.
### 1.1 Serving Dev Images (CPU/GPU 2 choose 1)
**CPU:**
diff --git a/doc/Run_On_Kubernetes_CN.md b/doc/Run_On_Kubernetes_CN.md
index 951fda78dd0c04d2faa7db5b84cfa845235fbaa5..9b676d4aa18d09f8eee7f4b965898e9eb632f967 100644
--- a/doc/Run_On_Kubernetes_CN.md
+++ b/doc/Run_On_Kubernetes_CN.md
@@ -2,13 +2,13 @@
Paddle Serving在0.6.0版本开始支持在Kubenetes集群上部署,并提供反向代理和安全网关支持。与Paddle Serving在Docker镜像中开发类似,Paddle Serving 模型在Kubenetes集群部署需要制作轻量化的运行镜像,并使用kubectl工具在集群上部署。
-### 集群准备
+### 1.集群准备
如果您还没有Kubenetes集群,我们推荐[购买并使用百度智能云CCE集群](https://cloud.baidu.com/doc/CCE/index.html). 如果是其他云服务商提供的集群,或者自行安装Kubenetes集群,请遵照对应的教程。
您还需要准备一个用于Kubenetes集群部署使用的镜像仓库,通常与云服务提供商绑定,如果您使用的是百度智能云的CCE集群,可以参照[百度智能云CCR镜像仓库使用方式](https://cloud.baidu.com/doc/CCR/index.html)。当然Docker Hub也可以作为镜像仓库,但是可能在部署时会出现下载速度慢的情况。
-### 环境准备
+### 2.环境准备
需要在Kubenetes集群上安装网关工具KONG。
@@ -16,20 +16,20 @@ Paddle Serving在0.6.0版本开始支持在Kubenetes集群上部署,并提供
kubectl apply -f https://bit.ly/kong-ingress-dbless
```
+### 选择Serving开发镜像 (可选)
+您可以直接选择已生成的Serving [DOCKER开发镜像列表](./Docker_Images_CN.md)作为Kubernetes部署的首选,携带了开发工具,可用于调试和编译代码。
+### 制作Serving运行镜像(可选)
-### 制作Serving运行镜像(可选):
+与[DOCKER开发镜像列表](./Docker_Images_CN.md)文档相比,开发镜像用于调试、编译代码,携带了大量的开发工具,因此镜像体积较大。运行镜像通常容器体积更小的轻量级容器,可在边缘端设备上部署。如您不需要轻量级运行容器,请直接跳过这一部分。
-首先您需要确定运行镜像的具体环境。和[DOCKER开发镜像列表](./Docker_Images_CN.md)文档相比,开发镜像用于调试、编译代码,携带了大量的开发工具,因此镜像体积较大。运行镜像通常要求缩小容器体积以提高部署的灵活性。如果您不太需要轻量级的运行容器,请直接跳过这一部分。
-
-在`tools/generate_runtime_docker.sh`文件下,它的使用方式如下
+我们提供了运行镜像的生成脚本在Serving代码库下`tools/generate_runtime_docker.sh`文件,通过以下命令可生成代码。
```bash
-bash tools/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --name serving_runtime:cuda10.1-py36
+bash tools/generate_runtime_docker.sh --env cuda10.1 --python 3.7 --image_name serving_runtime:cuda10.1-py37 --paddle 2.2.0 --serving 0.7.0
```
-会生成 cuda10.1,python 3.6,serving版本0.7.0 还有 paddle版本2.2.0的运行镜像。如果有其他疑问,可以执行下列语句得到帮助信息。
-如果您需要老版本Serving运行镜像,请checkout到老版本分支。
+会生成 cuda10.1,python 3.7,serving版本0.7.0 还有 paddle版本2.2.0的运行镜像。如果有其他疑问,可以执行下列语句得到帮助信息。强烈建议您使用最新的paddle和serving的版本(2个版本是对应的如paddle 2.2.x 与serving 0.7.0对应,paddle 2.1.x 与 serving 0.6.x对应),因为更早的版本上出现的错误只在最新版本修复,无法在历史版本中修复。
```
bash tools/generate_runtime_docker.sh --help
@@ -40,7 +40,7 @@ bash tools/generate_runtime_docker.sh --help
- paddle-serving-server, paddle-serving-client,paddle-serving-app,paddlepaddle,具体版本可以在tools/runtime.dockerfile当中查看,同时,如果有定制化的需求,也可以在该文件中进行定制化。
- paddle-serving-server 二进制可执行程序
-也就是说,运行镜像在生成之后,我们只需要将我们运行的代码(如果有)和模型搬运到镜像中就可以。生成后的镜像名为`paddle_serving:cuda10.2-py36`
+也就是说,运行镜像在生成之后,我们只需要将我们运行的代码(如果有)和模型搬运到镜像中就可以。生成后的镜像名为`paddle_serving:cuda10.2-py37`
### 添加您的代码和模型
diff --git a/doc/images/wechat_group_1.jpeg b/doc/images/wechat_group_1.jpeg
index d907bee65143b83107393ba763e194937d72111a..9259862d82879075dd40ca8243a58f049378c887 100644
Binary files a/doc/images/wechat_group_1.jpeg and b/doc/images/wechat_group_1.jpeg differ
diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py
index 19f93dfe1782690333c32411a7545b1641b18a0e..9e9d87a652e56a46981423821dd9d01e7b4288f5 100755
--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -209,6 +209,8 @@ def serve_args():
"--enable_prometheus", default=False, action="store_true", help="Use Prometheus")
parser.add_argument(
"--prometheus_port", type=int, default=19393, help="Port of the Prometheus")
+ parser.add_argument(
+ "--request_cache_size", type=int, default=0, help="Port of the Prometheus")
return parser.parse_args()
@@ -292,6 +294,7 @@ def start_gpu_card_model(gpu_mode, port, args): # pylint: disable=doc-string-mi
server.set_max_body_size(max_body_size)
server.set_enable_prometheus(args.enable_prometheus)
server.set_prometheus_port(args.prometheus_port)
+ server.set_request_cache_size(args.request_cache_size)
if args.use_trt and device == "gpu":
server.set_trt()
diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py
index f1d0b63178a24ed9f506bf6a2fdbb67a6cc01002..e369c57d4d350207d65d048a96eb052db279bd30 100755
--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -100,6 +100,7 @@ class Server(object):
]
self.enable_prometheus = False
self.prometheus_port = 19393
+ self.request_cache_size = 0
def get_fetch_list(self, infer_node_idx=-1):
fetch_names = [
@@ -207,6 +208,9 @@ class Server(object):
def set_prometheus_port(self, prometheus_port):
self.prometheus_port = prometheus_port
+ def set_request_cache_size(self, request_cache_size):
+ self.request_cache_size = request_cache_size
+
def _prepare_engine(self, model_config_paths, device, use_encryption_model):
self.device = device
if self.model_toolkit_conf == None:
@@ -615,6 +619,17 @@ class Server(object):
self.max_body_size,
self.enable_prometheus,
self.prometheus_port)
+ if self.enable_prometheus:
+ command = command + \
+ "-enable_prometheus={} " \
+ "-prometheus_port {} ".format(
+ self.enable_prometheus,
+ self.prometheus_port)
+ if self.request_cache_size > 0:
+ command = command + \
+ "-request_cache_size {} ".format(
+ self.request_cache_size
+ )
print("Going to Run Comand")
print(command)