Merge branch 'develop' into add_cpp_pipeline

4b432695 · Thomas Young · GitHub · b2dcda57 · b2fb27ec · 4b432695
17 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -132,6 +132,7 @@ if (SERVER)
    #include(external/rocksdb)
    include(external/cudnn)
    include(paddlepaddle)
+    include(external/prometheus)
 endif()
 message("paddle serving source dir: " ${PADDLE_SERVING_SOURCE_DIR})

--- a/cmake/external/prometheus.cmake
+++ b/cmake/external/prometheus.cmake
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+INCLUDE(ExternalProject)
+SET(GIT_URL https://github.com)
+SET(PROMETHEUS_PREFIX_DIR    ${THIRD_PARTY_PATH}/prometheus)
+SET(PROMETHEUS_INSTALL_DIR   ${THIRD_PARTY_PATH}/install/prometheus)
+SET(PROMETHEUS_REPOSITORY    ${GIT_URL}/jupp0r/prometheus-cpp.git)
+SET(PROMETHEUS_TAG           v0.13.0)
+INCLUDE_DIRECTORIES(${PROMETHEUS_INSTALL_DIR}/include)
+ExternalProject_Add(
+  extern_prometheus
+  ${EXTERNAL_PROJECT_LOG_ARGS}
+  ${SHALLOW_CLONE}
+  GIT_REPOSITORY        ${PROMETHEUS_REPOSITORY}
+  GIT_TAG               ${PROMETHEUS_TAG}
+  PREFIX                ${PROMETHEUS_PREFIX_DIR}
+  UPDATE_COMMAND        ""
+  CMAKE_ARGS            -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
+                        -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
+                        -DCMAKE_C_FLAGS=${CMAKE_C_FLAGS}
+                        -DBUILD_SHARED_LIBS=OFF
+                        -DENABLE_PUSH=OFF
+                        -DENABLE_COMPRESSION=OFF
+                        -DENABLE_TESTING=OFF
+                        -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS}
+                        -DCMAKE_INSTALL_PREFIX:PATH=${PROMETHEUS_INSTALL_DIR}
+                        -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
+  BUILD_BYPRODUCTS     ${PROMETHEUS_LIBRARIES}
+)
+ADD_LIBRARY(prometheus STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET prometheus PROPERTY IMPORTED_LOCATION ${PROMETHEUS_INSTALL_DIR}/lib/libprometheus-cpp-core.a)
+ADD_LIBRARY(prometheus-pull STATIC IMPORTED GLOBAL)
+SET_PROPERTY(TARGET prometheus-pull PROPERTY IMPORTED_LOCATION ${PROMETHEUS_INSTALL_DIR}/lib/libprometheus-cpp-pull.a)
+ADD_DEPENDENCIES(prometheus extern_prometheus)
+LIST(APPEND prometheus_libs prometheus-pull)
\ No newline at end of file
--- a/core/general-server/CMakeLists.txt
+++ b/core/general-server/CMakeLists.txt
@@ -31,6 +31,7 @@ target_link_libraries(serving pdserving)
 target_link_libraries(serving cube-api)
 target_link_libraries(serving utils)
 target_link_libraries(serving utf8proc)
+target_link_libraries(serving prometheus ${prometheus_libs})
 if(WITH_ASCEND_CL AND NOT WITH_LITE)
    target_link_libraries(serving ascendcl acl_op_compiler)
 endif()

--- a/core/pdcodegen/src/pdcodegen.cpp
+++ b/core/pdcodegen/src/pdcodegen.cpp
@@ -328,6 +328,23 @@ class PdsCodeGenerator : public CodeGenerator {
          inference_body += "  LOG(INFO) << oss.str();\n";
          inference_body += "  response->add_profile_time(start);\n";
          inference_body += "  response->add_profile_time(end);\n";
+          inference_body += "  if (::baidu::paddle_serving::predictor::PrometheusMetric::Enabled()) {\n";
+          inference_body += "  if (err_code == 0) {\n";
+          inference_body += "    ::baidu::paddle_serving::predictor::PrometheusMetricManager::\n";
+          inference_body += "        GetGeneralSingleton()\n";
+          inference_body += "            ->MetricQuerySuccess()\n";
+          inference_body += "            .Increment(1);\n";
+          inference_body += "  } else {\n";
+          inference_body += "    ::baidu::paddle_serving::predictor::PrometheusMetricManager::\n";
+          inference_body += "        GetGeneralSingleton()\n";
+          inference_body += "            ->MetricQueryFailure()\n";
+          inference_body += "            .Increment(1);\n";
+          inference_body += "  }\n";
+          inference_body += "  ::baidu::paddle_serving::predictor::PrometheusMetricManager::\n";
+          inference_body += "      GetGeneralSingleton()\n";
+          inference_body += "          ->MetricQueryDuration()\n";
+          inference_body += "          .Increment(total_time * 1000);\n";
+          inference_body += "  }\n";
        } else {
          inference_body += "  // flush notice log\n";
          inference_body += "  LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - ";  // NOLINT
@@ -1095,6 +1112,23 @@ class PdsCodeGenerator : public CodeGenerator {
          inference_body += "  LOG(INFO) << oss.str();\n";
          inference_body += "  response->add_profile_time(start);\n";
          inference_body += "  response->add_profile_time(end);\n";
+          inference_body += "  if (::baidu::paddle_serving::predictor::PrometheusMetric::Enabled()) {\n";
+          inference_body += "  if (err_code == 0) {\n";
+          inference_body += "    ::baidu::paddle_serving::predictor::PrometheusMetricManager::\n";
+          inference_body += "        GetGeneralSingleton()\n";
+          inference_body += "            ->MetricQuerySuccess()\n";
+          inference_body += "            .Increment(1);\n";
+          inference_body += "  } else {\n";
+          inference_body += "    ::baidu::paddle_serving::predictor::PrometheusMetricManager::\n";
+          inference_body += "        GetGeneralSingleton()\n";
+          inference_body += "            ->MetricQueryFailure()\n";
+          inference_body += "            .Increment(1);\n";
+          inference_body += "  }\n";
+          inference_body += "  ::baidu::paddle_serving::predictor::PrometheusMetricManager::\n";
+          inference_body += "      GetGeneralSingleton()\n";
+          inference_body += "          ->MetricQueryDuration()\n";
+          inference_body += "          .Increment(total_time * 1000);\n";
+          inference_body += "  }\n";
        } else {
          inference_body += "  // flush notice log\n";
          inference_body += "  LOG(INFO) << \"(logid=\" << log_id << \") tc=\[\" << (end - ";  // NOLINT

--- a/core/predictor/CMakeLists.txt
+++ b/core/predictor/CMakeLists.txt
@@ -17,7 +17,7 @@ if (WITH_TRT)
    add_definitions(-DWITH_TRT)
 endif()
 target_link_libraries(pdserving
-        brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_inference ${paddle_depend_libs})
+        brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz paddle_inference ${paddle_depend_libs} prometheus ${prometheus_libs})
 # install
 install(TARGETS pdserving
        RUNTIME DESTINATION ${PADDLE_SERVING_INSTALL_DIR}/bin

--- a/core/predictor/common/constant.cpp
+++ b/core/predictor/common/constant.cpp
@@ -44,6 +44,8 @@ DEFINE_bool(enable_cube, false, "enable cube");
 DEFINE_string(general_model_path, "./conf", "");
 DEFINE_string(general_model_file, "general_model.prototxt", "");
 DEFINE_bool(enable_general_model, true, "enable general model");
+DEFINE_bool(enable_prometheus, true, "enable prometheus");
+DEFINE_int32(prometheus_port, 18010, "");
 const char* START_OP_NAME = "startup_op";
 }  // namespace predictor

--- a/core/predictor/common/constant.h
+++ b/core/predictor/common/constant.h
@@ -43,6 +43,8 @@ DECLARE_bool(enable_model_toolkit);
 DECLARE_string(enable_protocol_list);
 DECLARE_bool(enable_cube);
 DECLARE_bool(enable_general_model);
+DECLARE_bool(enable_prometheus);
+DECLARE_int32(prometheus_port);
 // STATIC Variables
 extern const char* START_OP_NAME;

--- a/core/predictor/common/inner_common.h
+++ b/core/predictor/common/inner_common.h
@@ -60,6 +60,8 @@
 #include "core/predictor/common/types.h"
 #include "core/predictor/common/utils.h"
+#include "core/predictor/framework/prometheus_metric.h"
 #ifdef BCLOUD
 namespace brpc = baidu::rpc;
 namespace butil = base;

--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -30,6 +30,7 @@
 #include "core/predictor/framework/factory.h"
 #include "core/predictor/framework/infer_data.h"
 #include "core/predictor/framework/memory.h"
+#include "core/predictor/framework/predictor_metric.h"
 #include "paddle_inference_api.h"  // NOLINT
 #include "experimental/float16.h"
 namespace baidu {
@@ -499,6 +500,9 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
  ~FluidInferEngine() {}
  typedef std::vector<paddle::PaddleTensor> TensorVector;
  int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    long start = tv.tv_sec * 1000000 + tv.tv_usec;
    // First of all, get the real core acording to the
    // Template parameter <EngineCore>.
    EngineCore* core = DBReloadableInferEngine<EngineCore>::get_core();
@@ -672,6 +676,17 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
      tensor_out.data = paddleBuf;
      tensorVector_out_pointer->push_back(tensor_out);
    }
+    gettimeofday(&tv, NULL);
+    long end = tv.tv_sec * 1000000 + tv.tv_usec;
+    long total_time = end - start;
+    if (PrometheusMetric::Enabled()) {
+      PrometheusMetricManager::GetGeneralSingleton()
+          ->MetricInferenceCount()
+          .Increment(1);
+      PrometheusMetricManager::GetGeneralSingleton()
+          ->MetricInferenceDuration()
+          .Increment(total_time);
+    }
    return 0;
  }

--- a/core/predictor/framework/prometheus_metric.cpp
+++ b/core/predictor/framework/prometheus_metric.cpp
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#include "core/predictor/framework/prometheus_metric.h"
+#include <thread>
+#include "prometheus/detail/utils.h"
+#include "prometheus/counter.h"
+#include "prometheus/exposer.h"
+#include "core/predictor/common/inner_common.h"
+namespace baidu {
+namespace paddle_serving {
+namespace predictor {
+PrometheusMetric::PrometheusMetric()
+    : registry_(std::make_shared<prometheus::Registry>()),
+      serializer_(new prometheus::TextSerializer()),
+      query_success_family_(
+          prometheus::BuildCounter()
+              .Name("pd_query_request_success")
+              .Help("Number of successful query requests")
+              .Register(*registry_)),
+      query_failure_family_(
+          prometheus::BuildCounter()
+              .Name("pd_query_request_failure")
+              .Help("Number of failed query requests")
+              .Register(*registry_)),
+      inf_count_family_(prometheus::BuildCounter()
+                            .Name("pd_inference_count")
+                            .Help("Number of inferences performed")
+                            .Register(*registry_)),
+      query_duration_us_family_(
+          prometheus::BuildCounter()
+              .Name("pd_query_request_duration_us")
+              .Help("Cummulative query request duration in microseconds")
+              .Register(*registry_)),
+      inf_duration_us_family_(
+          prometheus::BuildCounter()
+              .Name("pd_inference_duration_us")
+              .Help("Cummulative inference duration in microseconds")
+              .Register(*registry_)),
+      metrics_enabled_(false)
+{
+}
+size_t
+PrometheusMetric::HashLabels(const std::map<std::string, std::string>& labels)
+{
+  return prometheus::detail::hash_labels(labels);
+}
+PrometheusMetric::~PrometheusMetric()
+{
+}
+bool
+PrometheusMetric::Enabled()
+{
+  auto singleton = GetSingleton();
+  return singleton->metrics_enabled_;
+}
+void
+PrometheusMetric::EnableMetrics()
+{
+  auto singleton = GetSingleton();
+  singleton->metrics_enabled_ = true;
+  if (!singleton->exposer_) {
+    std::string str_port = std::to_string(FLAGS_prometheus_port);
+    std::string url = "127.0.0.1:" + str_port;
+    singleton->exposer_ = std::make_shared<prometheus::Exposer>(url);
+    singleton->exposer_->RegisterCollectable(PrometheusMetric::GetRegistry());
+  }
+}
+std::shared_ptr<prometheus::Registry>
+PrometheusMetric::GetRegistry()
+{
+  auto singleton = PrometheusMetric::GetSingleton();
+  return singleton->registry_;
+}
+const std::string
+PrometheusMetric::SerializedMetrics()
+{
+  auto singleton = PrometheusMetric::GetSingleton();
+  return singleton->serializer_->Serialize(
+      singleton->registry_.get()->Collect());
+}
+PrometheusMetric*
+PrometheusMetric::GetSingleton()
+{
+  static PrometheusMetric singleton;
+  return &singleton;
+}
+PrometheusMetricManager* 
+PrometheusMetricManager::GetGeneralSingleton() {
+  static PrometheusMetricManager manager("general", 0, -1);
+  return &manager;
+}
+PrometheusMetricManager::PrometheusMetricManager(
+    const std::string& model_name, const int64_t model_version,
+    const int device)
+{
+  std::map<std::string, std::string> labels;
+  GetMetricLabels(&labels, model_name, model_version, device);
+  metric_query_success_ =
+      CreateCounterMetric(PrometheusMetric::FamilyQuerySuccess(), labels);
+  metric_query_failure_ =
+      CreateCounterMetric(PrometheusMetric::FamilyQueryFailure(), labels);
+  metric_inf_count_ =
+      CreateCounterMetric(PrometheusMetric::FamilyInferenceCount(), labels);
+  metric_query_duration_us_ =
+      CreateCounterMetric(PrometheusMetric::FamilyQueryDuration(), labels);
+  metric_inf_duration_us_ =
+      CreateCounterMetric(PrometheusMetric::FamilyInferenceDuration(), labels);
+}
+PrometheusMetricManager::~PrometheusMetricManager()
+{
+  PrometheusMetric::FamilyQuerySuccess().Remove(metric_query_success_);
+  PrometheusMetric::FamilyQueryFailure().Remove(metric_query_failure_);
+  PrometheusMetric::FamilyInferenceCount().Remove(metric_inf_count_);
+  PrometheusMetric::FamilyQueryDuration().Remove(metric_query_duration_us_);
+  PrometheusMetric::FamilyInferenceDuration().Remove(
+      metric_inf_duration_us_);
+}
+void
+PrometheusMetricManager::GetMetricLabels(
+    std::map<std::string, std::string>* labels, const std::string& model_name,
+    const int64_t model_version, const int device)
+{
+  labels->insert(std::map<std::string, std::string>::value_type(
+      std::string("model"), model_name));
+  labels->insert(std::map<std::string, std::string>::value_type(
+      std::string("version"), std::to_string(model_version)));
+  if (device >= 0) {
+    std::string gpu = std::to_string(device);
+    labels->insert(std::map<std::string, std::string>::value_type(
+       std::string("gpu"), gpu));
+  }
+}
+prometheus::Counter*
+PrometheusMetricManager::CreateCounterMetric(
+    prometheus::Family<prometheus::Counter>& family,
+    const std::map<std::string, std::string>& labels)
+{
+  return &family.Add(labels);
+}
+}  // namespace predictor
+}  // namespace paddle_serving
+}  // namespace baidu
--- a/core/predictor/framework/prometheus_metric.h
+++ b/core/predictor/framework/prometheus_metric.h
+// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+#pragma once
+#include <atomic>
+#include <mutex>
+#include <thread>
+#include "prometheus/registry.h"
+#include "prometheus/serializer.h"
+#include "prometheus/text_serializer.h"
+#include "prometheus/counter.h"
+namespace prometheus {
+  class Exposer;
+}
+namespace baidu {
+namespace paddle_serving {
+namespace predictor {
+class PrometheusMetric {
+ public:
+  static size_t HashLabels(const std::map<std::string, std::string>& labels);
+  static bool Enabled();
+  static void EnableMetrics();
+  static std::shared_ptr<prometheus::Registry> GetRegistry();
+  static const std::string SerializedMetrics();
+  static prometheus::Family<prometheus::Counter>& FamilyQuerySuccess()
+  {
+    return GetSingleton()->query_success_family_;
+  }
+  static prometheus::Family<prometheus::Counter>& FamilyQueryFailure()
+  {
+    return GetSingleton()->query_failure_family_;
+  }
+  static prometheus::Family<prometheus::Counter>& FamilyInferenceCount()
+  {
+    return GetSingleton()->inf_count_family_;
+  }
+  static prometheus::Family<prometheus::Counter>& FamilyQueryDuration()
+  {
+    return GetSingleton()->query_duration_us_family_;
+  }
+  static prometheus::Family<prometheus::Counter>& FamilyInferenceDuration()
+  {
+    return GetSingleton()->inf_duration_us_family_;
+  }
+ private:
+  PrometheusMetric();
+  virtual ~PrometheusMetric();
+  static PrometheusMetric* GetSingleton();
+  std::shared_ptr<prometheus::Registry> registry_;
+  std::unique_ptr<prometheus::Serializer> serializer_;
+  std::shared_ptr<prometheus::Exposer> exposer_;
+  prometheus::Family<prometheus::Counter>& query_success_family_;
+  prometheus::Family<prometheus::Counter>& query_failure_family_;
+  prometheus::Family<prometheus::Counter>& inf_count_family_;
+  prometheus::Family<prometheus::Counter>& query_duration_us_family_;
+  prometheus::Family<prometheus::Counter>& inf_duration_us_family_;
+  bool metrics_enabled_;
+};
+class PrometheusMetricManager {
+ public:
+  static PrometheusMetricManager* GetGeneralSingleton();
+  ~PrometheusMetricManager();
+  prometheus::Counter& MetricQuerySuccess() const
+  {
+    return *metric_query_success_;
+  }
+  prometheus::Counter& MetricQueryFailure() const
+  {
+    return *metric_query_failure_;
+  }
+  prometheus::Counter& MetricInferenceCount() const
+  {
+    return *metric_inf_count_;
+  }
+  prometheus::Counter& MetricQueryDuration() const
+  {
+    return *metric_query_duration_us_;
+  }
+  prometheus::Counter& MetricInferenceDuration() const
+  {
+    return *metric_inf_duration_us_;
+  }
+ private:
+  PrometheusMetricManager(
+      const std::string& model_name, const int64_t model_version,
+      const int device);
+  static void GetMetricLabels(
+      std::map<std::string, std::string>* labels, const std::string& model_name,
+      const int64_t model_version, const int device);
+  prometheus::Counter* CreateCounterMetric(
+      prometheus::Family<prometheus::Counter>& family,
+      const std::map<std::string, std::string>& labels);
+  prometheus::Counter* metric_query_success_;
+  prometheus::Counter* metric_query_failure_;
+  prometheus::Counter* metric_inf_count_;
+  prometheus::Counter* metric_query_duration_us_;
+  prometheus::Counter* metric_inf_duration_us_;
+};
+}  // namespace predictor
+}  // namespace paddle_serving
+}  // namespace baidu
--- a/core/predictor/src/pdserving.cpp
+++ b/core/predictor/src/pdserving.cpp
@@ -37,6 +37,7 @@ using baidu::paddle_serving::predictor::ServerManager;
 using baidu::paddle_serving::predictor::WorkflowManager;
 using baidu::paddle_serving::predictor::InferServiceManager;
 using baidu::paddle_serving::predictor::Resource;
+using baidu::paddle_serving::predictor::PrometheusMetric;
 using baidu::paddle_serving::predictor::FLAGS_workflow_path;
 using baidu::paddle_serving::predictor::FLAGS_workflow_file;
 using baidu::paddle_serving::predictor::FLAGS_inferservice_path;
@@ -47,6 +48,7 @@ using baidu::paddle_serving::predictor::FLAGS_resource_path;
 using baidu::paddle_serving::predictor::FLAGS_resource_file;
 using baidu::paddle_serving::predictor::FLAGS_reload_interval_s;
 using baidu::paddle_serving::predictor::FLAGS_port;
+using baidu::paddle_serving::predictor::FLAGS_enable_prometheus;
 using baidu::paddle_serving::configure::InferServiceConf;
 using baidu::paddle_serving::configure::read_proto_conf;
@@ -216,6 +218,11 @@ int main(int argc, char** argv) {
  VLOG(2) << "Succ initialize general model";
+  // enable prometheus
+  if (FLAGS_enable_prometheus) {
+    PrometheusMetric::EnableMetrics();
+  }
 #ifndef BCLOUD
  // FATAL messages are output to stderr
  FLAGS_stderrthreshold = 3;

--- a/doc/C++_Serving/Performance_Tuning_CN.md
+++ b/doc/C++_Serving/Performance_Tuning_CN.md
@@ -26,7 +26,7 @@ Server端<mark>**线程数N**</mark>的设置需要结合三个因素来综合
 当您使用CPU进行预测时，预测阶段的计算是使用CPU完成的，此时，请参考前两者来进行设置线程数。
-当您使用GPU进行预测时，情况有些不同，此时预测阶段的计算是由GPU完成的，此时CPU资源是空闲的，而预测操作是阻塞该线程的，类似于Sleep操作，此时若您的线程数==机器core数量，将没有其他可切换的线程从而导致必然有部分core是空闲的状态。具体来说，当模型预测时间较短时（<10ms），Server端线程数不宜过多（线程数=1~10倍core数量），否则线程切换带来的开销不可忽视。当模型预测时间较长时，Server端线程数应稍大一些（线程数=4~200倍core数量）。
+当您使用GPU进行预测时，情况有些不同，此时预测阶段的计算是由GPU完成的，此时CPU资源是空闲的，而预测操作是阻塞该线程的，类似于Sleep操作，此时若您的线程数==机器core数量，将没有其他可切换的线程从而导致必然有部分core是空闲的状态。具体来说，当模型预测时间较短时（<10ms），Server端线程数不宜过多（线程数=1——10倍core数量），否则线程切换带来的开销不可忽视。当模型预测时间较长时，Server端线程数应稍大一些（线程数=4——200倍core数量）。
 # 3.异步模式
 当<mark>**大部分用户的Request请求batch数<<模型最大支持的Batch数**</mark>时，采用异步模式的收益是明显的。

--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
@@ -166,8 +166,8 @@ class PaddleInferenceEngine : public EngineCore {
    }
    Config config;
-    std::vector<std::string> suffixParaVector = {".pdiparams", "__params__"};
+    std::vector<std::string> suffixParaVector = {".pdiparams", "__params__", "params"};
-    std::vector<std::string> suffixModelVector = {".pdmodel", "__model__"};
+    std::vector<std::string> suffixModelVector = {".pdmodel", "__model__", "model"};
    std::string paraFileName = getFileBySuffix(model_path, suffixParaVector);
    std::string modelFileName = getFileBySuffix(model_path, suffixModelVector);
@@ -296,7 +296,7 @@ class PaddleInferenceEngine : public EngineCore {
                  << FLAGS_nnadapter_model_cache_dir;
      } else {
        // for ascend 910
-	      config.EnableNpu(gpu_id);
+        config.EnableNpu(gpu_id);
      }
    }

--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -204,6 +204,10 @@ def serve_args():
        default=False,
        action="store_true",
        help="Use gpu_multi_stream")
+    parser.add_argument(
+        "--enable_prometheus", default=False, action="store_true", help="Use Prometheus")
+    parser.add_argument(
+        "--prometheus_port", type=int, default=19393, help="Port of the Prometheus")
    return parser.parse_args()
@@ -285,6 +289,8 @@ def start_gpu_card_model(gpu_mode, port, args):  # pylint: disable=doc-string-mi
    server.set_memory_optimize(mem_optim)
    server.set_ir_optimize(ir_optim)
    server.set_max_body_size(max_body_size)
+    server.set_enable_prometheus(args.enable_prometheus)
+    server.set_prometheus_port(args.prometheus_port)
    if args.use_trt and device == "gpu":
        server.set_trt()

--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -98,6 +98,8 @@ class Server(object):
            'GeneralDistKVQuantInferOp',
            'GeneralDetectionOp',
        ]
+        self.enable_prometheus = False
+        self.prometheus_port = 19393
    def get_fetch_list(self, infer_node_idx=-1):
        fetch_names = [
@@ -199,6 +201,12 @@ class Server(object):
    def set_ascend_cl(self):
        self.use_ascend_cl = True
+    def set_enable_prometheus(self, flag=False):
+        self.enable_prometheus = flag
+    def set_prometheus_port(self, prometheus_port):
+        self.prometheus_port = prometheus_port
    def _prepare_engine(self, model_config_paths, device, use_encryption_model):
        self.device = device
        if self.model_toolkit_conf == None:
@@ -587,7 +595,9 @@ class Server(object):
                    "-workflow_path {} " \
                    "-workflow_file {} " \
                    "-bthread_concurrency {} " \
-                    "-max_body_size {} ".format(
+                    "-max_body_size {} " \
+                    "-enable_prometheus={} " \
+                    "-prometheus_port {} ".format(
                        self.bin_path,
                        self.workdir,
                        self.infer_service_fn,
@@ -602,7 +612,9 @@ class Server(object):
                        self.workdir,
                        self.workflow_fn,
                        self.num_threads,
-                        self.max_body_size)
+                        self.max_body_size,
+                        self.enable_prometheus,
+                        self.prometheus_port)
        print("Going to Run Comand")
        print(command)

--- a/tools/generate_runtime_docker.sh
+++ b/tools/generate_runtime_docker.sh
@@ -11,7 +11,7 @@ function usage
    echo "   --python              : python version, 3.6/3.7/3.8 ";
    #echo "   --serving             : serving version(0.6.0/0.6.2)";
    #echo "   --paddle              : paddle version(2.1.0/2.2.0)"
-    echo "   --image_name          : image name(default serving_runtime:env-python)"
+    echo "   --image_name          : image name(default serving_runtime:env-python)";
    echo "  -h | --help            : helper";
 }
@@ -25,9 +25,9 @@ function parse_args
      case "$1" in
          --env )               env="$2";             shift;;
          --python )            python="$2";     shift;;
-          #--serving )           serving="$2";      shift;;
+          #--serving )          serving="$2";      shift;;
-          #--paddle )            paddle="$2";      shift;;
+          #--paddle )           paddle="$2";      shift;;
-      --image_name )          image_name="$2";    shift;;
+          --image_name )        image_name="$2";    shift;;
          -h | --help )         usage;            exit;; # quit and show usage
          * )                 args+=("$1")             # if no match, add it to the positional args
      esac
@@ -41,7 +41,7 @@ function parse_args
  positional_2="${args[1]}"
  # validate required args
-  if [[ -z "${paddle}" || -z "${env}" || -z "${python}" || -z "${serving}" ]]; then
+  if [[ -z "${env}" || -z "${python}" ]]; then
      echo "Invalid arguments. paddle or env or python or serving is missing."
      usage
      exit;
@@ -57,6 +57,8 @@ function parse_args
 function run
 {
+  python="2.2.0"
+  serving="0.7.0"
  parse_args "$@"
  echo "named arg: env: $env"
@@ -69,8 +71,6 @@ function run
  elif [ $env == "cuda11.2" ]; then
      base_image="nvidia\/cuda:11.2.0-cudnn8-runtime-ubuntu16.04"
  fi
-  python="2.2.0"
-  serving="0.7.0"
  echo "base image: $base_image"
  echo "named arg: python: $python"
  echo "named arg: serving: $serving"
@@ -78,7 +78,8 @@ function run
  echo "named arg: image_name: $image_name"
  sed -e "s/<<base_image>>/$base_image/g" -e "s/<<python_version>>/$python/g" -e "s/<<run_env>>/$env/g" -e "s/<<serving_version>>/$serving/g" -e "s/<<paddle_version>>/$paddle/g" tools/Dockerfile.runtime_template > Dockerfile.tmp
-  docker build --network=host --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp .
+  #docker build --network=host --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp .
+  docker build -t $image_name -f Dockerfile.tmp .
 }
 run "$@";