Merge branch 'develop' of https://github.com/TeslaZhao/Serving into develop

fac829e5 · TeslaZhao · 5b2b8b5a · d917098f · fac829e5 · fac829e5
9 changed file
--- a/doc/Python_Pipeline/Pipeline_Design_CN.md
+++ b/doc/Python_Pipeline/Pipeline_Design_CN.md
@@ -700,6 +700,8 @@ Pipeline Serving支持低精度推理，CPU、GPU和TensoRT支持的精度类型
  - fp16
  - int8 
+使用int8时，要开启use_calib: True
 参考[simple_web_service](../../examples/Pipeline/simple_web_service)示例
 ***

--- a/doc/Serving_Configure_CN.md
+++ b/doc/Serving_Configure_CN.md
@@ -489,4 +489,7 @@ Python Pipeline支持低精度推理，CPU、GPU和TensoRT支持的精度类型
 #GPU 支持: "fp32"(default), "fp16(TensorRT)", "int8"；
 #CPU 支持: "fp32"(default), "fp16", "bf16"(mkldnn); 不支持: "int8"
 precision: "fp32"
+#cablic, open it when using int8
+use_calib: True
 ```
--- a/doc/Serving_Configure_EN.md
+++ b/doc/Serving_Configure_EN.md
@@ -495,4 +495,7 @@ Python Pipeline supports low-precision inference. The precision types supported
 #GPU support: "fp32"(default), "fp16(TensorRT)", "int8"；
 #CPU support: "fp32"(default), "fp16", "bf16"(mkldnn); not support: "int8"
 precision: "fp32"
+#cablic, open it when using int8
+use_calib: True
 ```
--- a/python/paddle_serving_server/env_check/run.py
+++ b/python/paddle_serving_server/env_check/run.py
@@ -30,18 +30,40 @@ import pytest
 inference_test_cases = ["test_fit_a_line.py::TestFitALine::test_inference"]
 cpp_test_cases = ["test_fit_a_line.py::TestFitALine::test_cpu", "test_fit_a_line.py::TestFitALine::test_gpu"]
 pipeline_test_cases = ["test_uci_pipeline.py::TestUCIPipeline::test_cpu", "test_uci_pipeline.py::TestUCIPipeline::test_gpu"]
+log_files = ["PipelineServingLogs", "log", "stderr.log", "stdout.log"]
-def run_test_cases(cases_list, case_type):
+def set_serving_log_path():
+    if 'SERVING_LOG_PATH' not in os.environ:
+        serving_log_path = os.path.expanduser(os.getcwd())
+        os.environ['SERVING_LOG_PATH']=serving_log_path
+def mv_log_to_new_dir(dir_path):
+    import shutil
+    if not os.path.exists(dir_path):
+        os.mkdir(dir_path)
+    serving_log_path = os.environ['SERVING_LOG_PATH']
+    for file_name in log_files:
+        file_path = os.path.join(serving_log_path, file_name)
+        if os.path.exists(file_path):
+            shutil.move(file_path, dir_path)   
+def run_test_cases(cases_list, case_type, is_open_std):
    old_stdout, old_stderr = sys.stdout, sys.stderr
    real_path = os.path.dirname(os.path.realpath(__file__))
    for case in cases_list:
-        sys.stdout = open('/dev/null', 'w')
+        if is_open_std is False:
-        sys.stderr = open('/dev/null', 'w')
+            sys.stdout = open('/dev/null', 'w')
+            sys.stderr = open('/dev/null', 'w')
        args_str = "--disable-warnings " + str(real_path) + "/" + case
        args = args_str.split(" ")
        res = pytest.main(args)
        sys.stdout, sys.stderr = old_stdout, old_stderr
        case_name = case.split('_')[-1]
+        serving_log_path = os.environ['SERVING_LOG_PATH']
+        dir_name = str(case_type) + '_' + case.split(':')[-1]
+        new_dir_path = os.path.join(serving_log_path, dir_name)
+        mv_log_to_new_dir(new_dir_path)
        if res == 0:
            print("{} {} environment running success".format(case_type, case_name))
        elif res == 1:
@@ -49,15 +71,27 @@ def run_test_cases(cases_list, case_type):
                print("{} {} environment running failure. Please refer to https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html to configure environment".format(case_type, case_name))
                os._exit(0)
            else:
-                print("{} {} environment running failure, if you need this environment, please refer to https://github.com/PaddlePaddle/Serving/blob/HEAD/doc/Compile_CN.md to configure environment".format(case_type, case_name))
+                print("{} {} environment running failure, if you need this environment, please refer to https://github.com/PaddlePaddle/Serving/blob/develop/doc/Install_CN.md".format(case_type, case_name))
-def unset_proxy(key):
+def unset_env(key):
-    os.unsetenv(key)
+    del os.environ[key]
-def check_env():
+def check_env(mode):
+    set_serving_log_path()
    if 'https_proxy' in os.environ or 'http_proxy' in os.environ:
-        unset_proxy("https_proxy") 
+        unset_env("https_proxy") 
-        unset_proxy("http_proxy")     
+        unset_env("http_proxy")     
-    run_test_cases(inference_test_cases, "PaddlePaddle")
+    if 'GREP_OPTIONS' in os.environ:
-    run_test_cases(cpp_test_cases, "C++")
+        unset_env("GREP_OPTIONS") 
-    run_test_cases(pipeline_test_cases, "Pipeline")
+    is_open_std = False 
+    if mode is "debug":
+        is_open_std = True
+    if mode is "all" or mode is "inference" or mode is "debug":
+        run_test_cases(inference_test_cases, "PaddlePaddle", is_open_std)
+    if mode is "all" or mode is "cpp" or mode is "debug":
+        run_test_cases(cpp_test_cases, "C++", is_open_std)
+    if mode is "all" or mode is "pipeline" or mode is "debug":
+        run_test_cases(pipeline_test_cases, "Pipeline", is_open_std)
+if __name__ == '__main__':
+    check_env("debug")
--- a/python/paddle_serving_server/env_check/test_fit_a_line.py
+++ b/python/paddle_serving_server/env_check/test_fit_a_line.py
@@ -20,12 +20,14 @@ class TestFitALine(object):
        serving_util.check_model_data_exist()
        self.get_truth_val_by_inference(self)
        self.serving_util = serving_util
+        self.serving_util.release('service')
+        kill_process(9494)
    def teardown_method(self):
        print_log(["stderr.log", "stdout.log",
-                   "log/serving.ERROR", "PipelineServingLogs/pipeline.log"], iden="after predict")
+                   "log/serving.ERROR", "PipelineServingLogs/pipeline.log"])
        kill_process(9494)
-        self.serving_util.release()
+        self.serving_util.release('service')
    def get_truth_val_by_inference(self):
        try:
@@ -58,11 +60,9 @@ class TestFitALine(object):
            output_data = output_handle.copy_to_cpu()
            output_data_dict[output_data_name] = output_data
        # convert to the same format of Serving output
-        print(output_data_dict)
        output_data_dict["price"] = output_data_dict["fc_0.tmp_1"]
        del output_data_dict["fc_0.tmp_1"]
        self.truth_val = output_data_dict
-        print(self.truth_val, self.truth_val["price"].shape)
    def predict_brpc(self, batch_size=1):
        data = np.array(
@@ -74,7 +74,6 @@ class TestFitALine(object):
        fetch_list = client.get_fetch_names()
        fetch_map = client.predict(
            feed={"x": data}, fetch=fetch_list, batch=True)
-        print(fetch_map)
        return fetch_map
    def predict_http(self, batch_size=1):
@@ -87,12 +86,12 @@ class TestFitALine(object):
        fetch_list = client.get_fetch_names()
        fetch_map = client.predict(
            feed={"x": data}, fetch=fetch_list, batch=True)
-        print(fetch_map)
        output_dict = self.serving_util.parse_http_result(fetch_map)
        return output_dict
    def test_inference(self):
-        assert self.truth_val['price'].size != 0
+        self.serving_util.start_server_by_shell(cmd="", sleep=1)
+        assert self.truth_val['price'].size != 0, "The result of inference is empty"
    def test_cpu(self):
@@ -103,7 +102,7 @@ class TestFitALine(object):
        )
        # 2.resource check
-        assert count_process_num_on_port(9494) == 1
+        assert count_process_num_on_port(9494) == 1, "Error occured when Paddle Server started"
        # 4.predict by brpc
        # batch_size 1
@@ -123,7 +122,7 @@ class TestFitALine(object):
        )
        # 2.resource check
-        assert count_process_num_on_port(9494) == 1
+        assert count_process_num_on_port(9494) == 1, "Error occured when Paddle Server started"
        # 4.predict by brpc 
        # batch_size 1

--- a/python/paddle_serving_server/env_check/test_uci_pipeline.py
+++ b/python/paddle_serving_server/env_check/test_uci_pipeline.py
@@ -21,12 +21,14 @@ class TestUCIPipeline(object):
        serving_util.check_model_data_exist()
        self.get_truth_val_by_inference(self)
        self.serving_util = serving_util
+        self.serving_util.release('web_service')
    def teardown_method(self):
        print_log(["stderr.log", "stdout.log",
-                   "log/serving.ERROR", "PipelineServingLogs/pipeline.log"], iden="after predict")
+                   "PipelineServingLogs/pipeline.log"])
        kill_process(9998)
-        self.serving_util.release()
+        kill_process(18082)
+        self.serving_util.release('web_service')
    def get_truth_val_by_inference(self):
        try:
@@ -62,7 +64,6 @@ class TestUCIPipeline(object):
        output_data_dict["prob"] = output_data_dict["fc_0.tmp_1"]
        del output_data_dict["fc_0.tmp_1"]
        self.truth_val = output_data_dict
-        print(self.truth_val, self.truth_val["prob"].shape)
    def predict_pipeline_rpc(self, batch_size=1):
        # 1.prepare feed_data
@@ -74,10 +75,8 @@ class TestUCIPipeline(object):
        # 3.predict for fetch_map
        ret = client.predict(feed_dict=feed_dict)
-        print(ret)
        # 4.convert dict to numpy
        result = {"prob": np.array(eval(ret.value[0]))}
-        print(result)
        return result
    def predict_pipeline_http(self, batch_size=1):
@@ -91,7 +90,6 @@ class TestUCIPipeline(object):
        # 2.predict for fetch_map
        url = "http://127.0.0.1:18082/uci/prediction"
        r = requests.post(url=url, data=json.dumps(feed_dict))
-        print(r.json())
        # 3.convert dict to numpy array
        result = {"prob": np.array(eval(r.json()["value"][0]))}
        return result
@@ -104,11 +102,8 @@ class TestUCIPipeline(object):
        )
        # 2.resource check
-        assert count_process_num_on_port(9998) == 1  # gRPC Server
+        assert count_process_num_on_port(9998) == 1, "Error occured when Paddle Server started"  # gRPC Server
-        assert count_process_num_on_port(18082) == 1  # gRPC gateway
+        assert count_process_num_on_port(18082) == 1, "Error occured when Paddle Server started"  # gRPC gateway
-        # 3.keywords check
-        check_keywords_in_server_log("MKLDNN is enabled", filename="stderr.log")
        # 4.predict by rpc
        result = self.predict_pipeline_rpc(batch_size=1)
@@ -130,8 +125,8 @@ class TestUCIPipeline(object):
        )
        # 2.resource check
-        assert count_process_num_on_port(9998) == 1  # gRPC Server
+        assert count_process_num_on_port(9998) == 1, "Error occured when Paddle Server started"  # gRPC Server
-        assert count_process_num_on_port(18082) == 1  # gRPC gateway
+        assert count_process_num_on_port(18082) == 1, "Error occured when Paddle Server started"  # gRPC gateway
        # 3.predict by rpc
        result = self.predict_pipeline_rpc(batch_size=1)

--- a/python/paddle_serving_server/env_check/util.py
+++ b/python/paddle_serving_server/env_check/util.py
 import os
-import pynvml
 import argparse
 import base64
 import subprocess
 import numpy as np
+import sys
 class ServingTest(object):
    def __init__(self, data_path: str, example_path: str, model_dir: str, client_dir: str):
@@ -13,16 +13,17 @@ class ServingTest(object):
        DATA_PATH: 数据集根目录
        py_version: python版本 python3.6~3.8
        """
+        self.serving_log_path = os.environ['SERVING_LOG_PATH']
        code_path = os.path.dirname(os.path.realpath(__file__))
        self.data_path = f"{code_path}/{data_path}/"
        self.example_path = f"{code_path}/{example_path}/"
-        self.py_version = os.environ.get("PYTHON_EXECUTABLE")
+        self.py_version = sys.executable
+        if 'PYTHON_EXECUTABLE' in os.environ:
+            self.py_version = os.environ.get("PYTHON_EXECUTABLE")
        self.model_dir = model_dir
        self.client_config = f"{client_dir}/serving_client_conf.prototxt"
        os.chdir(self.example_path)
-        print("======================cur path======================")
-        print(os.getcwd())
        self.check_model_data_exist()
    def check_model_data_exist(self):
@@ -37,6 +38,9 @@ class ServingTest(object):
                os.system(f"ln -s {abs_path} {file}")
    def start_server_by_shell(self, cmd: str, sleep: int = 5, err="stderr.log", out="stdout.log", wait=False):
+        err = os.path.join(self.serving_log_path, err)
+        out = os.path.join(self.serving_log_path, out) 
        self.err = open(err, "w")
        self.out = open(out, "w")
        p = subprocess.Popen(cmd, shell=True, stdout=self.out, stderr=self.err)
@@ -44,7 +48,6 @@ class ServingTest(object):
        if wait:
            p.wait()
-        print_log([err, out])
    @staticmethod
    def check_result(result_data: dict, truth_data: dict, batch_size=1, delta=1e-3):
@@ -87,20 +90,9 @@ def kill_process(port, sleep_time=0):
    # 解决端口占用
    os.system(f"sleep {sleep_time}")
-def check_gpu_memory(gpu_id):
-    pynvml.nvmlInit()
-    handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
-    mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
-    mem_used = mem_info.used / 1024 ** 2
-    print(f"GPU-{gpu_id} memory used:", mem_used)
-    return mem_used > 100
 def count_process_num_on_port(port):
    command = "netstat -nlp | grep :" + str(port) + " | wc -l"
    count = eval(os.popen(command).read())
-    print(f"port-{port} processes num:", count)
    return count
@@ -140,17 +132,15 @@ def diff_compare(array1, array2):
 def print_log(file_list, iden=""):
+    serving_log_path = os.environ['SERVING_LOG_PATH']
    for file in file_list:
-        print(f"======================{file} {iden}=====================")
+        print(f"======================{file}=====================")
-        if os.path.exists(file):
+        file_path = os.path.join(serving_log_path, file)
-            with open(file, "r") as f:
+        if os.path.exists(file_path):
+            with open(file_path, "r") as f:
                print(f.read())
-            if file.startswith("log") or file.startswith("PipelineServingLogs"):
-                os.remove(file)
        else:
-            print(f"{file} not exist")
+            pass
-        print("======================================================")
 def parse_prototxt(file):
    with open(file, "r") as f:

--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -35,6 +35,7 @@ from paddle_serving_server.env import CONF_HOME
 import signal
 from paddle_serving_server.util import *
 from paddle_serving_server.env_check.run import check_env
+import cmd
 # web_service.py is still used by Pipeline.
@@ -474,6 +475,47 @@ def stop_serving(command: str, port: int=None):
                os.remove(filepath)
    return True
+class Check_Env_Shell(cmd.Cmd):
+    intro = "Welcome to the check env shell.Type help to list commands.\n"
+    # ----- basic  commands -----
+    def do_help(self, arg):
+        print("\nCommand list\t\tDescription\n"\
+               "check_all\t\tCheck Environment of Paddle Inference, Pipeline Serving, C++ Serving. "\
+               "If failed, using debug command to debug\n"\
+               "check_pipeline\t\tCheck Environment of Pipeline Serving. "\
+               "If failed, using debug command to debug\n"\
+               "check_cpp\t\tCheck Environment of C++ Serving. "\
+               "If failed, using debug command to debug\n"\
+               "check_inference\t\tCheck Environment of Paddle Inference. "\
+               "If failed, using debug command to debug\n"\
+               "debug\t\t\tWhen checking was failed, open log to debug\n"\
+               "exit\t\t\tExit Check Env Shell\n")
+    def do_check_all(self, arg):
+        "Check Environment of Paddle Inference, Pipeline Serving, C++ Serving"
+        check_env("all") 
+    def do_check_pipeline(self, arg):
+        "Check Environment of Pipeline Serving"
+        check_env("pipeline") 
+    def do_check_cpp(self, arg):
+        "Check Environment of C++ Serving"
+        check_env("cpp") 
+    def do_check_inference(self, arg):
+        "Check Environment of Paddle Inference"
+        check_env("inference") 
+    def do_debug(self, arg):
+        "Open pytest log to debug"
+        check_env("debug") 
+    def do_exit(self, arg):
+        "Exit Check Env Shell"
+        print('Check Environment Shell Exit')
+        os._exit(0)
+        return True
 if __name__ == "__main__":
    # args.device is not used at all.
@@ -491,8 +533,7 @@ if __name__ == "__main__":
        else:
            os._exit(-1)
    elif args.server == "check":
-         check_env() 
+         Check_Env_Shell().cmdloop() 
-         os._exit(0)
    for single_model_config in args.model:
        if os.path.isdir(single_model_config):
            pass

--- a/python/pipeline/logger.py
+++ b/python/pipeline/logger.py
@@ -24,8 +24,11 @@ class SectionLevelFilter(object):
    def filter(self, logRecord):
        return logRecord.levelno in self._levels
 log_dir = "PipelineServingLogs"
+if 'SERVING_LOG_PATH' in os.environ:
+    serving_log_path = os.environ['SERVING_LOG_PATH']
+    log_dir = os.path.join(serving_log_path, log_dir)
 if not os.path.exists(log_dir):
    os.makedirs(log_dir)