diff --git a/doc/INFERENCE_TO_SERVING.md b/doc/INFERENCE_TO_SERVING.md index e10ee976fb455c8cc49a0d5fa44ed4cc1f300ba9..719aa63c0a9b408d6bff628e7be4f35dfb49c5c8 100644 --- a/doc/INFERENCE_TO_SERVING.md +++ b/doc/INFERENCE_TO_SERVING.md @@ -24,13 +24,13 @@ inference_model_dir = "your_inference_model" serving_client_dir = "serving_client_dir" serving_server_dir = "serving_server_dir" feed_var_names, fetch_var_names = inference_model_to_serving( - inference_model_dir, serving_client_dir, serving_server_dir) + inference_model_dir, serving_server_dir, serving_client_dir) ``` if your model file and params file are both standalone, please use the following api. ``` feed_var_names, fetch_var_names = inference_model_to_serving( - inference_model_dir, serving_client_dir, serving_server_dir, + inference_model_dir, serving_server_dir, serving_client_dir, model_filename="model", params_filename="params") ``` diff --git a/doc/INFERENCE_TO_SERVING_CN.md b/doc/INFERENCE_TO_SERVING_CN.md index e7e909ac04be3b1a0885b3390d99a153dfbd170e..5d783f25a3f367baa94d471e50f227d9e6f733d1 100644 --- a/doc/INFERENCE_TO_SERVING_CN.md +++ b/doc/INFERENCE_TO_SERVING_CN.md @@ -23,11 +23,11 @@ inference_model_dir = "your_inference_model" serving_client_dir = "serving_client_dir" serving_server_dir = "serving_server_dir" feed_var_names, fetch_var_names = inference_model_to_serving( - inference_model_dir, serving_client_dir, serving_server_dir) + inference_model_dir, serving_server_dir, serving_client_dir) ``` 如果模型中有模型描述文件`model_filename` 和 模型参数文件`params_filename`,那么请用 ``` feed_var_names, fetch_var_names = inference_model_to_serving( - inference_model_dir, serving_client_dir, serving_server_dir, + inference_model_dir, serving_server_dir, serving_client_dir, model_filename="model", params_filename="params") ``` diff --git a/java/examples/pom.xml b/java/examples/pom.xml index b6c8bc424f5d528d74a4a45828fd9b5e7e5d008e..745e8d4f0f3d47e488f99bd7fe73ed6a9f887373 100644 --- a/java/examples/pom.xml +++ b/java/examples/pom.xml @@ -75,7 +75,7 @@ junit junit - 4.11 + 4.13.1 test diff --git a/python/examples/bert/bert_client.py b/python/examples/bert/bert_client.py index b378f9f791bce4abfe79b068c1875d9b66f1791c..4111589b3ddfde980e415fbac1a5f38f4abafada 100644 --- a/python/examples/bert/bert_client.py +++ b/python/examples/bert/bert_client.py @@ -33,5 +33,5 @@ for line in sys.stdin: for key in feed_dict.keys(): feed_dict[key] = np.array(feed_dict[key]).reshape((128, 1)) #print(feed_dict) - result = client.predict(feed=feed_dict, fetch=fetch, batch=True) + result = client.predict(feed=feed_dict, fetch=fetch, batch=False) print(result) diff --git a/python/examples/bert/bert_web_service.py b/python/examples/bert/bert_web_service.py index e1260dd1c2942fc806f6fd6b2199feb9467a8c2b..7cd34fb99e0ecebbf2f6bec47e9c9d163ac3a44c 100644 --- a/python/examples/bert/bert_web_service.py +++ b/python/examples/bert/bert_web_service.py @@ -29,7 +29,7 @@ class BertService(WebService): def preprocess(self, feed=[], fetch=[]): feed_res = [] - is_batch = True + is_batch = False for ins in feed: feed_dict = self.reader.process(ins["words"].encode("utf-8")) for key in feed_dict.keys(): diff --git a/python/examples/faster_rcnn_model/benchmark.py b/python/examples/faster_rcnn_model/benchmark.py new file mode 100755 index 0000000000000000000000000000000000000000..1930312341c0dac55e43b36c946c6e174a472b65 --- /dev/null +++ b/python/examples/faster_rcnn_model/benchmark.py @@ -0,0 +1,125 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# pylint: disable=doc-string-missing + +from __future__ import unicode_literals, absolute_import +import os +import sys +import time +import json +import requests +from paddle_serving_client import Client +from paddle_serving_client.utils import MultiThreadRunner +from paddle_serving_client.utils import benchmark_args, show_latency +from paddle_serving_app.reader import ChineseBertReader + +from paddle_serving_app.reader import * +import numpy as np + + + +args = benchmark_args() + + +def single_func(idx, resource): + img="./000000570688.jpg" + profile_flags = False + latency_flags = False + if os.getenv("FLAGS_profile_client"): + profile_flags = True + if os.getenv("FLAGS_serving_latency"): + latency_flags = True + latency_list = [] + + if args.request == "rpc": + preprocess = Sequential([ + File2Image(), BGR2RGB(), Div(255.0), + Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False), + Resize(640, 640), Transpose((2, 0, 1)) + ]) + + postprocess = RCNNPostprocess("label_list.txt", "output") + client = Client() + + client.load_client_config(args.model) + client.connect([resource["endpoint"][idx % len(resource["endpoint"])]]) + + start = time.time() + for i in range(turns): + if args.batch_size >= 1: + l_start = time.time() + feed_batch = [] + b_start = time.time() + im = preprocess(img) + for bi in range(args.batch_size): + print("1111batch") + print(bi) + feed_batch.append({"image": im, + "im_info": np.array(list(im.shape[1:]) + [1.0]), + "im_shape": np.array(list(im.shape[1:]) + [1.0])}) + # im = preprocess(img) + b_end = time.time() + + if profile_flags: + sys.stderr.write( + "PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}\n".format( + os.getpid(), + int(round(b_start * 1000000)), + int(round(b_end * 1000000)))) + #result = client.predict(feed=feed_batch, fetch=fetch) + fetch_map = client.predict( + feed=feed_batch, + fetch=["multiclass_nms"]) + fetch_map["image"] = img + postprocess(fetch_map) + + l_end = time.time() + if latency_flags: + latency_list.append(l_end * 1000 - l_start * 1000) + else: + print("unsupport batch size {}".format(args.batch_size)) + else: + raise ValueError("not implemented {} request".format(args.request)) + end = time.time() + if latency_flags: + return [[end - start], latency_list] + else: + return [[end - start]] + + +if __name__ == '__main__': + multi_thread_runner = MultiThreadRunner() + endpoint_list = [ + "127.0.0.1:7777" + ] + turns = 10 + start = time.time() + result = multi_thread_runner.run( + single_func, args.thread, {"endpoint": endpoint_list,"turns": turns}) + end = time.time() + total_cost = end - start + + avg_cost = 0 + for i in range(args.thread): + avg_cost += result[0][i] + avg_cost = avg_cost / args.thread + + print("total cost: {}s".format(total_cost)) + print("each thread cost: {}s. ".format(avg_cost)) + print("qps: {}samples/s".format(args.batch_size * args.thread * turns / + total_cost)) + if os.getenv("FLAGS_serving_latency"): + show_latency(result[1]) diff --git a/python/examples/faster_rcnn_model/benchmark.sh b/python/examples/faster_rcnn_model/benchmark.sh new file mode 100755 index 0000000000000000000000000000000000000000..5706fd03c7a0e266bcac18b0544c64f327cbbe9b --- /dev/null +++ b/python/examples/faster_rcnn_model/benchmark.sh @@ -0,0 +1,52 @@ +rm profile_log* +export CUDA_VISIBLE_DEVICES=0 +export FLAGS_profile_server=1 +export FLAGS_profile_client=1 +export FLAGS_serving_latency=1 + +gpu_id=0 +#save cpu and gpu utilization log +if [ -d utilization ];then + rm -rf utilization +else + mkdir utilization +fi +#start server +$PYTHONROOT/bin/python3 -m paddle_serving_server_gpu.serve --model $1 --port 7777 --thread 4 --gpu_ids 0 --ir_optim > elog 2>&1 & +sleep 5 + +#warm up +$PYTHONROOT/bin/python3 benchmark.py --thread 4 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 +echo -e "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py +for thread_num in 1 4 8 16 +do +for batch_size in 1 +do + job_bt=`date '+%Y%m%d%H%M%S'` + nvidia-smi --id=0 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 & + nvidia-smi --id=0 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 & + gpu_memory_pid=$! + $PYTHONROOT/bin/python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1 + kill ${gpu_memory_pid} + kill `ps -ef|grep used_memory|awk '{print $2}'` + echo "model_name:" $1 + echo "thread_num:" $thread_num + echo "batch_size:" $batch_size + echo "=================Done====================" + echo "model_name:$1" >> profile_log_$1 + echo "batch_size:$batch_size" >> profile_log_$1 + $PYTHONROOT/bin/python3 cpu_utilization.py >> profile_log_$1 + job_et=`date '+%Y%m%d%H%M%S'` + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$1 + awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$1 + rm -rf gpu_use.log gpu_utilization.log + $PYTHONROOT/bin/python3 ../util/show_profile.py profile $thread_num >> profile_log_$1 + tail -n 8 profile >> profile_log_$1 + echo "" >> profile_log_$1 +done +done + +#Divided log +awk 'BEGIN{RS="\n\n"}{i++}{print > "bert_log_"i}' profile_log_$1 +mkdir bert_log && mv bert_log_* bert_log +ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9 diff --git a/python/examples/imdb/text_classify_service.py b/python/examples/imdb/text_classify_service.py index 1d292194f963466d3e53859dc9e4c6da1789ea20..ca1e26002baf0284f282add235706080f7902c33 100755 --- a/python/examples/imdb/text_classify_service.py +++ b/python/examples/imdb/text_classify_service.py @@ -29,13 +29,14 @@ class IMDBService(WebService): def preprocess(self, feed={}, fetch=[]): feed_batch = [] words_lod = [0] + is_batch = True for ins in feed: words = self.dataset.get_words_only(ins["words"]) words = np.array(words).reshape(len(words), 1) words_lod.append(words_lod[-1] + len(words)) feed_batch.append(words) feed = {"words": np.concatenate(feed_batch), "words.lod": words_lod} - return feed, fetch + return feed, fetch, is_batch imdb_service = IMDBService(name="imdb") diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py index 2b5d8a64962c5fea8c93bde3c8b9a95c1ffd97e3..3a314dc5f6690146d472484b5520624074c14ce6 100644 --- a/python/paddle_serving_server/__init__.py +++ b/python/paddle_serving_server/__init__.py @@ -23,13 +23,13 @@ import paddle_serving_server as paddle_serving_server from .version import serving_server_version from contextlib import closing import collections -import fcntl - import shutil import numpy as np import grpc from .proto import multi_lang_general_model_service_pb2 import sys +if sys.platform.startswith('win') is False: + import fcntl sys.path.append( os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto')) from .proto import multi_lang_general_model_service_pb2_grpc diff --git a/python/setup.py.app.in b/python/setup.py.app.in index 1a06b0d352c1da4cdd09f74cb900853d4016afa8..8480ed8471e60c7e7eb8f14bf11a1cc2d23204cf 100644 --- a/python/setup.py.app.in +++ b/python/setup.py.app.in @@ -32,8 +32,8 @@ if '${PACK}' == 'ON': REQUIRED_PACKAGES = [ - 'six >= 1.10.0', 'sentencepiece', 'opencv-python<=4.2.0.32', 'pillow', - 'shapely<=1.6.1', 'pyclipper' + 'six >= 1.10.0', 'sentencepiece<=0.1.92', 'opencv-python<=4.2.0.32', 'pillow', + 'pyclipper' ] packages=['paddle_serving_app', diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..a1eb26e5cec23a8f76a50be48608f8a4532c6993 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +sphinx==2.1.0 +mistune +sphinx_rtd_theme +paddlepaddle>=1.8.4 +shapely<=1.6.1 diff --git a/doc/requirements.txt b/requirements_win.txt similarity index 58% rename from doc/requirements.txt rename to requirements_win.txt index 1560ebc5f9d74fbae773ac5bc45c5b42b044287a..a202642af70fec0c57642cc53b8ead82a5a7c7f1 100644 --- a/doc/requirements.txt +++ b/requirements_win.txt @@ -1,4 +1,5 @@ sphinx==2.1.0 mistune sphinx_rtd_theme -paddlepaddle>=1.6 +paddlepaddle>=1.8.4 +shapely diff --git a/tools/Dockerfile.cuda10.1-cudnn7-trt6.devel b/tools/Dockerfile.cuda10.1-cudnn7-trt6.devel new file mode 100644 index 0000000000000000000000000000000000000000..c6e1c1e050505e631493efe21732a98abd1bd52e --- /dev/null +++ b/tools/Dockerfile.cuda10.1-cudnn7-trt6.devel @@ -0,0 +1,60 @@ +FROM nvidia/cuda:10.1-cudnn7-devel-centos7 + +RUN export http_proxy="http://172.19.56.199:3128" \ + && export https_proxy="http://172.19.56.199:3128" \ + && yum -y install wget >/dev/null \ + && yum -y install gcc gcc-c++ make glibc-static which \ + && yum -y install git openssl-devel curl-devel bzip2-devel python-devel \ + && yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false \ + && yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \ + && yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false + +RUN export http_proxy="http://172.19.56.199:3128" \ + && export https_proxy="http://172.19.56.199:3128" && \ + wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \ + tar zxf protobuf-all-3.11.2.tar.gz && \ + cd protobuf-3.11.2 && \ + ./configure && make -j4 && make install && \ + make clean && \ + cd .. && rm -rf protobuf-* + +RUN export http_proxy="http://172.19.56.199:3128" \ + && export https_proxy="http://172.19.56.199:3128" && \ + wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \ + && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \ + && mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \ + && echo 'export PATH=/usr/local/cmake3.2.0/bin:$PATH' >> /root/.bashrc \ + && rm cmake-3.2.0-Linux-x86_64.tar.gz + + +RUN export http_proxy="http://172.19.56.199:3128" \ + && export https_proxy="http://172.19.56.199:3128" && \ + wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \ + && tar xzf go1.14.linux-amd64.tar.gz \ + && mv go /usr/local/go \ + && echo 'export GOROOT=/usr/local/go' >> /root/.bashrc \ + && echo 'export PATH=/usr/local/go/bin:$PATH' >> /root/.bashrc \ + && rm go1.14.linux-amd64.tar.gz + +RUN export http_proxy="http://172.19.56.199:3128" \ + && export https_proxy="http://172.19.56.199:3128" && \ + yum -y install python-devel sqlite-devel \ + && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \ + && python get-pip.py >/dev/null \ + && rm get-pip.py + +RUN export http_proxy="http://172.19.56.199:3128" \ + && export https_proxy="http://172.19.56.199:3128" && \ + yum install -y python3 python3-devel \ + && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\ + && yum clean all + +RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \ + && echo "export LANG=en_US.utf8" >> /root/.bashrc \ + && echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc + +RUN wget https://paddle-serving.bj.bcebos.com/tools/TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \ + && tar -xzf TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \ + && mv TensorRT-6.0.1.5 /usr/local/ \ + && rm TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \ + && echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/TensorRT-6.0.1.5/lib/' >> /root/.bashrc