提交 fac829e5 编写于 作者: T TeslaZhao

Merge branch 'develop' of https://github.com/TeslaZhao/Serving into develop

...@@ -700,6 +700,8 @@ Pipeline Serving支持低精度推理,CPU、GPU和TensoRT支持的精度类型 ...@@ -700,6 +700,8 @@ Pipeline Serving支持低精度推理,CPU、GPU和TensoRT支持的精度类型
- fp16 - fp16
- int8 - int8
使用int8时,要开启use_calib: True
参考[simple_web_service](../../examples/Pipeline/simple_web_service)示例 参考[simple_web_service](../../examples/Pipeline/simple_web_service)示例
*** ***
......
...@@ -489,4 +489,7 @@ Python Pipeline支持低精度推理,CPU、GPU和TensoRT支持的精度类型 ...@@ -489,4 +489,7 @@ Python Pipeline支持低精度推理,CPU、GPU和TensoRT支持的精度类型
#GPU 支持: "fp32"(default), "fp16(TensorRT)", "int8"; #GPU 支持: "fp32"(default), "fp16(TensorRT)", "int8";
#CPU 支持: "fp32"(default), "fp16", "bf16"(mkldnn); 不支持: "int8" #CPU 支持: "fp32"(default), "fp16", "bf16"(mkldnn); 不支持: "int8"
precision: "fp32" precision: "fp32"
#cablic, open it when using int8
use_calib: True
``` ```
...@@ -495,4 +495,7 @@ Python Pipeline supports low-precision inference. The precision types supported ...@@ -495,4 +495,7 @@ Python Pipeline supports low-precision inference. The precision types supported
#GPU support: "fp32"(default), "fp16(TensorRT)", "int8"; #GPU support: "fp32"(default), "fp16(TensorRT)", "int8";
#CPU support: "fp32"(default), "fp16", "bf16"(mkldnn); not support: "int8" #CPU support: "fp32"(default), "fp16", "bf16"(mkldnn); not support: "int8"
precision: "fp32" precision: "fp32"
#cablic, open it when using int8
use_calib: True
``` ```
...@@ -30,18 +30,40 @@ import pytest ...@@ -30,18 +30,40 @@ import pytest
inference_test_cases = ["test_fit_a_line.py::TestFitALine::test_inference"] inference_test_cases = ["test_fit_a_line.py::TestFitALine::test_inference"]
cpp_test_cases = ["test_fit_a_line.py::TestFitALine::test_cpu", "test_fit_a_line.py::TestFitALine::test_gpu"] cpp_test_cases = ["test_fit_a_line.py::TestFitALine::test_cpu", "test_fit_a_line.py::TestFitALine::test_gpu"]
pipeline_test_cases = ["test_uci_pipeline.py::TestUCIPipeline::test_cpu", "test_uci_pipeline.py::TestUCIPipeline::test_gpu"] pipeline_test_cases = ["test_uci_pipeline.py::TestUCIPipeline::test_cpu", "test_uci_pipeline.py::TestUCIPipeline::test_gpu"]
log_files = ["PipelineServingLogs", "log", "stderr.log", "stdout.log"]
def run_test_cases(cases_list, case_type): def set_serving_log_path():
if 'SERVING_LOG_PATH' not in os.environ:
serving_log_path = os.path.expanduser(os.getcwd())
os.environ['SERVING_LOG_PATH']=serving_log_path
def mv_log_to_new_dir(dir_path):
import shutil
if not os.path.exists(dir_path):
os.mkdir(dir_path)
serving_log_path = os.environ['SERVING_LOG_PATH']
for file_name in log_files:
file_path = os.path.join(serving_log_path, file_name)
if os.path.exists(file_path):
shutil.move(file_path, dir_path)
def run_test_cases(cases_list, case_type, is_open_std):
old_stdout, old_stderr = sys.stdout, sys.stderr old_stdout, old_stderr = sys.stdout, sys.stderr
real_path = os.path.dirname(os.path.realpath(__file__)) real_path = os.path.dirname(os.path.realpath(__file__))
for case in cases_list: for case in cases_list:
sys.stdout = open('/dev/null', 'w') if is_open_std is False:
sys.stderr = open('/dev/null', 'w') sys.stdout = open('/dev/null', 'w')
sys.stderr = open('/dev/null', 'w')
args_str = "--disable-warnings " + str(real_path) + "/" + case args_str = "--disable-warnings " + str(real_path) + "/" + case
args = args_str.split(" ") args = args_str.split(" ")
res = pytest.main(args) res = pytest.main(args)
sys.stdout, sys.stderr = old_stdout, old_stderr sys.stdout, sys.stderr = old_stdout, old_stderr
case_name = case.split('_')[-1] case_name = case.split('_')[-1]
serving_log_path = os.environ['SERVING_LOG_PATH']
dir_name = str(case_type) + '_' + case.split(':')[-1]
new_dir_path = os.path.join(serving_log_path, dir_name)
mv_log_to_new_dir(new_dir_path)
if res == 0: if res == 0:
print("{} {} environment running success".format(case_type, case_name)) print("{} {} environment running success".format(case_type, case_name))
elif res == 1: elif res == 1:
...@@ -49,15 +71,27 @@ def run_test_cases(cases_list, case_type): ...@@ -49,15 +71,27 @@ def run_test_cases(cases_list, case_type):
print("{} {} environment running failure. Please refer to https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html to configure environment".format(case_type, case_name)) print("{} {} environment running failure. Please refer to https://www.paddlepaddle.org.cn/install/quick?docurl=/documentation/docs/zh/install/pip/linux-pip.html to configure environment".format(case_type, case_name))
os._exit(0) os._exit(0)
else: else:
print("{} {} environment running failure, if you need this environment, please refer to https://github.com/PaddlePaddle/Serving/blob/HEAD/doc/Compile_CN.md to configure environment".format(case_type, case_name)) print("{} {} environment running failure, if you need this environment, please refer to https://github.com/PaddlePaddle/Serving/blob/develop/doc/Install_CN.md".format(case_type, case_name))
def unset_proxy(key): def unset_env(key):
os.unsetenv(key) del os.environ[key]
def check_env(): def check_env(mode):
set_serving_log_path()
if 'https_proxy' in os.environ or 'http_proxy' in os.environ: if 'https_proxy' in os.environ or 'http_proxy' in os.environ:
unset_proxy("https_proxy") unset_env("https_proxy")
unset_proxy("http_proxy") unset_env("http_proxy")
run_test_cases(inference_test_cases, "PaddlePaddle") if 'GREP_OPTIONS' in os.environ:
run_test_cases(cpp_test_cases, "C++") unset_env("GREP_OPTIONS")
run_test_cases(pipeline_test_cases, "Pipeline") is_open_std = False
if mode is "debug":
is_open_std = True
if mode is "all" or mode is "inference" or mode is "debug":
run_test_cases(inference_test_cases, "PaddlePaddle", is_open_std)
if mode is "all" or mode is "cpp" or mode is "debug":
run_test_cases(cpp_test_cases, "C++", is_open_std)
if mode is "all" or mode is "pipeline" or mode is "debug":
run_test_cases(pipeline_test_cases, "Pipeline", is_open_std)
if __name__ == '__main__':
check_env("debug")
...@@ -20,12 +20,14 @@ class TestFitALine(object): ...@@ -20,12 +20,14 @@ class TestFitALine(object):
serving_util.check_model_data_exist() serving_util.check_model_data_exist()
self.get_truth_val_by_inference(self) self.get_truth_val_by_inference(self)
self.serving_util = serving_util self.serving_util = serving_util
self.serving_util.release('service')
kill_process(9494)
def teardown_method(self): def teardown_method(self):
print_log(["stderr.log", "stdout.log", print_log(["stderr.log", "stdout.log",
"log/serving.ERROR", "PipelineServingLogs/pipeline.log"], iden="after predict") "log/serving.ERROR", "PipelineServingLogs/pipeline.log"])
kill_process(9494) kill_process(9494)
self.serving_util.release() self.serving_util.release('service')
def get_truth_val_by_inference(self): def get_truth_val_by_inference(self):
try: try:
...@@ -58,11 +60,9 @@ class TestFitALine(object): ...@@ -58,11 +60,9 @@ class TestFitALine(object):
output_data = output_handle.copy_to_cpu() output_data = output_handle.copy_to_cpu()
output_data_dict[output_data_name] = output_data output_data_dict[output_data_name] = output_data
# convert to the same format of Serving output # convert to the same format of Serving output
print(output_data_dict)
output_data_dict["price"] = output_data_dict["fc_0.tmp_1"] output_data_dict["price"] = output_data_dict["fc_0.tmp_1"]
del output_data_dict["fc_0.tmp_1"] del output_data_dict["fc_0.tmp_1"]
self.truth_val = output_data_dict self.truth_val = output_data_dict
print(self.truth_val, self.truth_val["price"].shape)
def predict_brpc(self, batch_size=1): def predict_brpc(self, batch_size=1):
data = np.array( data = np.array(
...@@ -74,7 +74,6 @@ class TestFitALine(object): ...@@ -74,7 +74,6 @@ class TestFitALine(object):
fetch_list = client.get_fetch_names() fetch_list = client.get_fetch_names()
fetch_map = client.predict( fetch_map = client.predict(
feed={"x": data}, fetch=fetch_list, batch=True) feed={"x": data}, fetch=fetch_list, batch=True)
print(fetch_map)
return fetch_map return fetch_map
def predict_http(self, batch_size=1): def predict_http(self, batch_size=1):
...@@ -87,12 +86,12 @@ class TestFitALine(object): ...@@ -87,12 +86,12 @@ class TestFitALine(object):
fetch_list = client.get_fetch_names() fetch_list = client.get_fetch_names()
fetch_map = client.predict( fetch_map = client.predict(
feed={"x": data}, fetch=fetch_list, batch=True) feed={"x": data}, fetch=fetch_list, batch=True)
print(fetch_map)
output_dict = self.serving_util.parse_http_result(fetch_map) output_dict = self.serving_util.parse_http_result(fetch_map)
return output_dict return output_dict
def test_inference(self): def test_inference(self):
assert self.truth_val['price'].size != 0 self.serving_util.start_server_by_shell(cmd="", sleep=1)
assert self.truth_val['price'].size != 0, "The result of inference is empty"
def test_cpu(self): def test_cpu(self):
...@@ -103,7 +102,7 @@ class TestFitALine(object): ...@@ -103,7 +102,7 @@ class TestFitALine(object):
) )
# 2.resource check # 2.resource check
assert count_process_num_on_port(9494) == 1 assert count_process_num_on_port(9494) == 1, "Error occured when Paddle Server started"
# 4.predict by brpc # 4.predict by brpc
# batch_size 1 # batch_size 1
...@@ -123,7 +122,7 @@ class TestFitALine(object): ...@@ -123,7 +122,7 @@ class TestFitALine(object):
) )
# 2.resource check # 2.resource check
assert count_process_num_on_port(9494) == 1 assert count_process_num_on_port(9494) == 1, "Error occured when Paddle Server started"
# 4.predict by brpc # 4.predict by brpc
# batch_size 1 # batch_size 1
......
...@@ -21,12 +21,14 @@ class TestUCIPipeline(object): ...@@ -21,12 +21,14 @@ class TestUCIPipeline(object):
serving_util.check_model_data_exist() serving_util.check_model_data_exist()
self.get_truth_val_by_inference(self) self.get_truth_val_by_inference(self)
self.serving_util = serving_util self.serving_util = serving_util
self.serving_util.release('web_service')
def teardown_method(self): def teardown_method(self):
print_log(["stderr.log", "stdout.log", print_log(["stderr.log", "stdout.log",
"log/serving.ERROR", "PipelineServingLogs/pipeline.log"], iden="after predict") "PipelineServingLogs/pipeline.log"])
kill_process(9998) kill_process(9998)
self.serving_util.release() kill_process(18082)
self.serving_util.release('web_service')
def get_truth_val_by_inference(self): def get_truth_val_by_inference(self):
try: try:
...@@ -62,7 +64,6 @@ class TestUCIPipeline(object): ...@@ -62,7 +64,6 @@ class TestUCIPipeline(object):
output_data_dict["prob"] = output_data_dict["fc_0.tmp_1"] output_data_dict["prob"] = output_data_dict["fc_0.tmp_1"]
del output_data_dict["fc_0.tmp_1"] del output_data_dict["fc_0.tmp_1"]
self.truth_val = output_data_dict self.truth_val = output_data_dict
print(self.truth_val, self.truth_val["prob"].shape)
def predict_pipeline_rpc(self, batch_size=1): def predict_pipeline_rpc(self, batch_size=1):
# 1.prepare feed_data # 1.prepare feed_data
...@@ -74,10 +75,8 @@ class TestUCIPipeline(object): ...@@ -74,10 +75,8 @@ class TestUCIPipeline(object):
# 3.predict for fetch_map # 3.predict for fetch_map
ret = client.predict(feed_dict=feed_dict) ret = client.predict(feed_dict=feed_dict)
print(ret)
# 4.convert dict to numpy # 4.convert dict to numpy
result = {"prob": np.array(eval(ret.value[0]))} result = {"prob": np.array(eval(ret.value[0]))}
print(result)
return result return result
def predict_pipeline_http(self, batch_size=1): def predict_pipeline_http(self, batch_size=1):
...@@ -91,7 +90,6 @@ class TestUCIPipeline(object): ...@@ -91,7 +90,6 @@ class TestUCIPipeline(object):
# 2.predict for fetch_map # 2.predict for fetch_map
url = "http://127.0.0.1:18082/uci/prediction" url = "http://127.0.0.1:18082/uci/prediction"
r = requests.post(url=url, data=json.dumps(feed_dict)) r = requests.post(url=url, data=json.dumps(feed_dict))
print(r.json())
# 3.convert dict to numpy array # 3.convert dict to numpy array
result = {"prob": np.array(eval(r.json()["value"][0]))} result = {"prob": np.array(eval(r.json()["value"][0]))}
return result return result
...@@ -104,11 +102,8 @@ class TestUCIPipeline(object): ...@@ -104,11 +102,8 @@ class TestUCIPipeline(object):
) )
# 2.resource check # 2.resource check
assert count_process_num_on_port(9998) == 1 # gRPC Server assert count_process_num_on_port(9998) == 1, "Error occured when Paddle Server started" # gRPC Server
assert count_process_num_on_port(18082) == 1 # gRPC gateway assert count_process_num_on_port(18082) == 1, "Error occured when Paddle Server started" # gRPC gateway
# 3.keywords check
check_keywords_in_server_log("MKLDNN is enabled", filename="stderr.log")
# 4.predict by rpc # 4.predict by rpc
result = self.predict_pipeline_rpc(batch_size=1) result = self.predict_pipeline_rpc(batch_size=1)
...@@ -130,8 +125,8 @@ class TestUCIPipeline(object): ...@@ -130,8 +125,8 @@ class TestUCIPipeline(object):
) )
# 2.resource check # 2.resource check
assert count_process_num_on_port(9998) == 1 # gRPC Server assert count_process_num_on_port(9998) == 1, "Error occured when Paddle Server started" # gRPC Server
assert count_process_num_on_port(18082) == 1 # gRPC gateway assert count_process_num_on_port(18082) == 1, "Error occured when Paddle Server started" # gRPC gateway
# 3.predict by rpc # 3.predict by rpc
result = self.predict_pipeline_rpc(batch_size=1) result = self.predict_pipeline_rpc(batch_size=1)
......
import os import os
import pynvml
import argparse import argparse
import base64 import base64
import subprocess import subprocess
import numpy as np import numpy as np
import sys
class ServingTest(object): class ServingTest(object):
def __init__(self, data_path: str, example_path: str, model_dir: str, client_dir: str): def __init__(self, data_path: str, example_path: str, model_dir: str, client_dir: str):
...@@ -13,16 +13,17 @@ class ServingTest(object): ...@@ -13,16 +13,17 @@ class ServingTest(object):
DATA_PATH: 数据集根目录 DATA_PATH: 数据集根目录
py_version: python版本 python3.6~3.8 py_version: python版本 python3.6~3.8
""" """
self.serving_log_path = os.environ['SERVING_LOG_PATH']
code_path = os.path.dirname(os.path.realpath(__file__)) code_path = os.path.dirname(os.path.realpath(__file__))
self.data_path = f"{code_path}/{data_path}/" self.data_path = f"{code_path}/{data_path}/"
self.example_path = f"{code_path}/{example_path}/" self.example_path = f"{code_path}/{example_path}/"
self.py_version = os.environ.get("PYTHON_EXECUTABLE") self.py_version = sys.executable
if 'PYTHON_EXECUTABLE' in os.environ:
self.py_version = os.environ.get("PYTHON_EXECUTABLE")
self.model_dir = model_dir self.model_dir = model_dir
self.client_config = f"{client_dir}/serving_client_conf.prototxt" self.client_config = f"{client_dir}/serving_client_conf.prototxt"
os.chdir(self.example_path) os.chdir(self.example_path)
print("======================cur path======================")
print(os.getcwd())
self.check_model_data_exist() self.check_model_data_exist()
def check_model_data_exist(self): def check_model_data_exist(self):
...@@ -37,6 +38,9 @@ class ServingTest(object): ...@@ -37,6 +38,9 @@ class ServingTest(object):
os.system(f"ln -s {abs_path} {file}") os.system(f"ln -s {abs_path} {file}")
def start_server_by_shell(self, cmd: str, sleep: int = 5, err="stderr.log", out="stdout.log", wait=False): def start_server_by_shell(self, cmd: str, sleep: int = 5, err="stderr.log", out="stdout.log", wait=False):
err = os.path.join(self.serving_log_path, err)
out = os.path.join(self.serving_log_path, out)
self.err = open(err, "w") self.err = open(err, "w")
self.out = open(out, "w") self.out = open(out, "w")
p = subprocess.Popen(cmd, shell=True, stdout=self.out, stderr=self.err) p = subprocess.Popen(cmd, shell=True, stdout=self.out, stderr=self.err)
...@@ -44,7 +48,6 @@ class ServingTest(object): ...@@ -44,7 +48,6 @@ class ServingTest(object):
if wait: if wait:
p.wait() p.wait()
print_log([err, out])
@staticmethod @staticmethod
def check_result(result_data: dict, truth_data: dict, batch_size=1, delta=1e-3): def check_result(result_data: dict, truth_data: dict, batch_size=1, delta=1e-3):
...@@ -87,20 +90,9 @@ def kill_process(port, sleep_time=0): ...@@ -87,20 +90,9 @@ def kill_process(port, sleep_time=0):
# 解决端口占用 # 解决端口占用
os.system(f"sleep {sleep_time}") os.system(f"sleep {sleep_time}")
def check_gpu_memory(gpu_id):
pynvml.nvmlInit()
handle = pynvml.nvmlDeviceGetHandleByIndex(gpu_id)
mem_info = pynvml.nvmlDeviceGetMemoryInfo(handle)
mem_used = mem_info.used / 1024 ** 2
print(f"GPU-{gpu_id} memory used:", mem_used)
return mem_used > 100
def count_process_num_on_port(port): def count_process_num_on_port(port):
command = "netstat -nlp | grep :" + str(port) + " | wc -l" command = "netstat -nlp | grep :" + str(port) + " | wc -l"
count = eval(os.popen(command).read()) count = eval(os.popen(command).read())
print(f"port-{port} processes num:", count)
return count return count
...@@ -140,17 +132,15 @@ def diff_compare(array1, array2): ...@@ -140,17 +132,15 @@ def diff_compare(array1, array2):
def print_log(file_list, iden=""): def print_log(file_list, iden=""):
serving_log_path = os.environ['SERVING_LOG_PATH']
for file in file_list: for file in file_list:
print(f"======================{file} {iden}=====================") print(f"======================{file}=====================")
if os.path.exists(file): file_path = os.path.join(serving_log_path, file)
with open(file, "r") as f: if os.path.exists(file_path):
with open(file_path, "r") as f:
print(f.read()) print(f.read())
if file.startswith("log") or file.startswith("PipelineServingLogs"):
os.remove(file)
else: else:
print(f"{file} not exist") pass
print("======================================================")
def parse_prototxt(file): def parse_prototxt(file):
with open(file, "r") as f: with open(file, "r") as f:
......
...@@ -35,6 +35,7 @@ from paddle_serving_server.env import CONF_HOME ...@@ -35,6 +35,7 @@ from paddle_serving_server.env import CONF_HOME
import signal import signal
from paddle_serving_server.util import * from paddle_serving_server.util import *
from paddle_serving_server.env_check.run import check_env from paddle_serving_server.env_check.run import check_env
import cmd
# web_service.py is still used by Pipeline. # web_service.py is still used by Pipeline.
...@@ -474,6 +475,47 @@ def stop_serving(command: str, port: int=None): ...@@ -474,6 +475,47 @@ def stop_serving(command: str, port: int=None):
os.remove(filepath) os.remove(filepath)
return True return True
class Check_Env_Shell(cmd.Cmd):
intro = "Welcome to the check env shell.Type help to list commands.\n"
# ----- basic commands -----
def do_help(self, arg):
print("\nCommand list\t\tDescription\n"\
"check_all\t\tCheck Environment of Paddle Inference, Pipeline Serving, C++ Serving. "\
"If failed, using debug command to debug\n"\
"check_pipeline\t\tCheck Environment of Pipeline Serving. "\
"If failed, using debug command to debug\n"\
"check_cpp\t\tCheck Environment of C++ Serving. "\
"If failed, using debug command to debug\n"\
"check_inference\t\tCheck Environment of Paddle Inference. "\
"If failed, using debug command to debug\n"\
"debug\t\t\tWhen checking was failed, open log to debug\n"\
"exit\t\t\tExit Check Env Shell\n")
def do_check_all(self, arg):
"Check Environment of Paddle Inference, Pipeline Serving, C++ Serving"
check_env("all")
def do_check_pipeline(self, arg):
"Check Environment of Pipeline Serving"
check_env("pipeline")
def do_check_cpp(self, arg):
"Check Environment of C++ Serving"
check_env("cpp")
def do_check_inference(self, arg):
"Check Environment of Paddle Inference"
check_env("inference")
def do_debug(self, arg):
"Open pytest log to debug"
check_env("debug")
def do_exit(self, arg):
"Exit Check Env Shell"
print('Check Environment Shell Exit')
os._exit(0)
return True
if __name__ == "__main__": if __name__ == "__main__":
# args.device is not used at all. # args.device is not used at all.
...@@ -491,8 +533,7 @@ if __name__ == "__main__": ...@@ -491,8 +533,7 @@ if __name__ == "__main__":
else: else:
os._exit(-1) os._exit(-1)
elif args.server == "check": elif args.server == "check":
check_env() Check_Env_Shell().cmdloop()
os._exit(0)
for single_model_config in args.model: for single_model_config in args.model:
if os.path.isdir(single_model_config): if os.path.isdir(single_model_config):
pass pass
......
...@@ -24,8 +24,11 @@ class SectionLevelFilter(object): ...@@ -24,8 +24,11 @@ class SectionLevelFilter(object):
def filter(self, logRecord): def filter(self, logRecord):
return logRecord.levelno in self._levels return logRecord.levelno in self._levels
log_dir = "PipelineServingLogs" log_dir = "PipelineServingLogs"
if 'SERVING_LOG_PATH' in os.environ:
serving_log_path = os.environ['SERVING_LOG_PATH']
log_dir = os.path.join(serving_log_path, log_dir)
if not os.path.exists(log_dir): if not os.path.exists(log_dir):
os.makedirs(log_dir) os.makedirs(log_dir)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册