diff --git a/doc/INFERENCE_TO_SERVING.md b/doc/INFERENCE_TO_SERVING.md
index e10ee976fb455c8cc49a0d5fa44ed4cc1f300ba9..719aa63c0a9b408d6bff628e7be4f35dfb49c5c8 100644
--- a/doc/INFERENCE_TO_SERVING.md
+++ b/doc/INFERENCE_TO_SERVING.md
@@ -24,13 +24,13 @@ inference_model_dir = "your_inference_model"
serving_client_dir = "serving_client_dir"
serving_server_dir = "serving_server_dir"
feed_var_names, fetch_var_names = inference_model_to_serving(
- inference_model_dir, serving_client_dir, serving_server_dir)
+ inference_model_dir, serving_server_dir, serving_client_dir)
```
if your model file and params file are both standalone, please use the following api.
```
feed_var_names, fetch_var_names = inference_model_to_serving(
- inference_model_dir, serving_client_dir, serving_server_dir,
+ inference_model_dir, serving_server_dir, serving_client_dir,
model_filename="model", params_filename="params")
```
diff --git a/doc/INFERENCE_TO_SERVING_CN.md b/doc/INFERENCE_TO_SERVING_CN.md
index e7e909ac04be3b1a0885b3390d99a153dfbd170e..5d783f25a3f367baa94d471e50f227d9e6f733d1 100644
--- a/doc/INFERENCE_TO_SERVING_CN.md
+++ b/doc/INFERENCE_TO_SERVING_CN.md
@@ -23,11 +23,11 @@ inference_model_dir = "your_inference_model"
serving_client_dir = "serving_client_dir"
serving_server_dir = "serving_server_dir"
feed_var_names, fetch_var_names = inference_model_to_serving(
- inference_model_dir, serving_client_dir, serving_server_dir)
+ inference_model_dir, serving_server_dir, serving_client_dir)
```
如果模型中有模型描述文件`model_filename` 和 模型参数文件`params_filename`,那么请用
```
feed_var_names, fetch_var_names = inference_model_to_serving(
- inference_model_dir, serving_client_dir, serving_server_dir,
+ inference_model_dir, serving_server_dir, serving_client_dir,
model_filename="model", params_filename="params")
```
diff --git a/java/examples/pom.xml b/java/examples/pom.xml
index b6c8bc424f5d528d74a4a45828fd9b5e7e5d008e..745e8d4f0f3d47e488f99bd7fe73ed6a9f887373 100644
--- a/java/examples/pom.xml
+++ b/java/examples/pom.xml
@@ -75,7 +75,7 @@
junit
junit
- 4.11
+ 4.13.1
test
diff --git a/python/examples/bert/bert_client.py b/python/examples/bert/bert_client.py
index b378f9f791bce4abfe79b068c1875d9b66f1791c..4111589b3ddfde980e415fbac1a5f38f4abafada 100644
--- a/python/examples/bert/bert_client.py
+++ b/python/examples/bert/bert_client.py
@@ -33,5 +33,5 @@ for line in sys.stdin:
for key in feed_dict.keys():
feed_dict[key] = np.array(feed_dict[key]).reshape((128, 1))
#print(feed_dict)
- result = client.predict(feed=feed_dict, fetch=fetch, batch=True)
+ result = client.predict(feed=feed_dict, fetch=fetch, batch=False)
print(result)
diff --git a/python/examples/bert/bert_web_service.py b/python/examples/bert/bert_web_service.py
index e1260dd1c2942fc806f6fd6b2199feb9467a8c2b..7cd34fb99e0ecebbf2f6bec47e9c9d163ac3a44c 100644
--- a/python/examples/bert/bert_web_service.py
+++ b/python/examples/bert/bert_web_service.py
@@ -29,7 +29,7 @@ class BertService(WebService):
def preprocess(self, feed=[], fetch=[]):
feed_res = []
- is_batch = True
+ is_batch = False
for ins in feed:
feed_dict = self.reader.process(ins["words"].encode("utf-8"))
for key in feed_dict.keys():
diff --git a/python/examples/faster_rcnn_model/benchmark.py b/python/examples/faster_rcnn_model/benchmark.py
new file mode 100755
index 0000000000000000000000000000000000000000..1930312341c0dac55e43b36c946c6e174a472b65
--- /dev/null
+++ b/python/examples/faster_rcnn_model/benchmark.py
@@ -0,0 +1,125 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
+from __future__ import unicode_literals, absolute_import
+import os
+import sys
+import time
+import json
+import requests
+from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args, show_latency
+from paddle_serving_app.reader import ChineseBertReader
+
+from paddle_serving_app.reader import *
+import numpy as np
+
+
+
+args = benchmark_args()
+
+
+def single_func(idx, resource):
+ img="./000000570688.jpg"
+ profile_flags = False
+ latency_flags = False
+ if os.getenv("FLAGS_profile_client"):
+ profile_flags = True
+ if os.getenv("FLAGS_serving_latency"):
+ latency_flags = True
+ latency_list = []
+
+ if args.request == "rpc":
+ preprocess = Sequential([
+ File2Image(), BGR2RGB(), Div(255.0),
+ Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
+ Resize(640, 640), Transpose((2, 0, 1))
+ ])
+
+ postprocess = RCNNPostprocess("label_list.txt", "output")
+ client = Client()
+
+ client.load_client_config(args.model)
+ client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
+
+ start = time.time()
+ for i in range(turns):
+ if args.batch_size >= 1:
+ l_start = time.time()
+ feed_batch = []
+ b_start = time.time()
+ im = preprocess(img)
+ for bi in range(args.batch_size):
+ print("1111batch")
+ print(bi)
+ feed_batch.append({"image": im,
+ "im_info": np.array(list(im.shape[1:]) + [1.0]),
+ "im_shape": np.array(list(im.shape[1:]) + [1.0])})
+ # im = preprocess(img)
+ b_end = time.time()
+
+ if profile_flags:
+ sys.stderr.write(
+ "PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}\n".format(
+ os.getpid(),
+ int(round(b_start * 1000000)),
+ int(round(b_end * 1000000))))
+ #result = client.predict(feed=feed_batch, fetch=fetch)
+ fetch_map = client.predict(
+ feed=feed_batch,
+ fetch=["multiclass_nms"])
+ fetch_map["image"] = img
+ postprocess(fetch_map)
+
+ l_end = time.time()
+ if latency_flags:
+ latency_list.append(l_end * 1000 - l_start * 1000)
+ else:
+ print("unsupport batch size {}".format(args.batch_size))
+ else:
+ raise ValueError("not implemented {} request".format(args.request))
+ end = time.time()
+ if latency_flags:
+ return [[end - start], latency_list]
+ else:
+ return [[end - start]]
+
+
+if __name__ == '__main__':
+ multi_thread_runner = MultiThreadRunner()
+ endpoint_list = [
+ "127.0.0.1:7777"
+ ]
+ turns = 10
+ start = time.time()
+ result = multi_thread_runner.run(
+ single_func, args.thread, {"endpoint": endpoint_list,"turns": turns})
+ end = time.time()
+ total_cost = end - start
+
+ avg_cost = 0
+ for i in range(args.thread):
+ avg_cost += result[0][i]
+ avg_cost = avg_cost / args.thread
+
+ print("total cost: {}s".format(total_cost))
+ print("each thread cost: {}s. ".format(avg_cost))
+ print("qps: {}samples/s".format(args.batch_size * args.thread * turns /
+ total_cost))
+ if os.getenv("FLAGS_serving_latency"):
+ show_latency(result[1])
diff --git a/python/examples/faster_rcnn_model/benchmark.sh b/python/examples/faster_rcnn_model/benchmark.sh
new file mode 100755
index 0000000000000000000000000000000000000000..5706fd03c7a0e266bcac18b0544c64f327cbbe9b
--- /dev/null
+++ b/python/examples/faster_rcnn_model/benchmark.sh
@@ -0,0 +1,52 @@
+rm profile_log*
+export CUDA_VISIBLE_DEVICES=0
+export FLAGS_profile_server=1
+export FLAGS_profile_client=1
+export FLAGS_serving_latency=1
+
+gpu_id=0
+#save cpu and gpu utilization log
+if [ -d utilization ];then
+ rm -rf utilization
+else
+ mkdir utilization
+fi
+#start server
+$PYTHONROOT/bin/python3 -m paddle_serving_server_gpu.serve --model $1 --port 7777 --thread 4 --gpu_ids 0 --ir_optim > elog 2>&1 &
+sleep 5
+
+#warm up
+$PYTHONROOT/bin/python3 benchmark.py --thread 4 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+echo -e "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
+for thread_num in 1 4 8 16
+do
+for batch_size in 1
+do
+ job_bt=`date '+%Y%m%d%H%M%S'`
+ nvidia-smi --id=0 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
+ nvidia-smi --id=0 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
+ gpu_memory_pid=$!
+ $PYTHONROOT/bin/python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+ kill ${gpu_memory_pid}
+ kill `ps -ef|grep used_memory|awk '{print $2}'`
+ echo "model_name:" $1
+ echo "thread_num:" $thread_num
+ echo "batch_size:" $batch_size
+ echo "=================Done===================="
+ echo "model_name:$1" >> profile_log_$1
+ echo "batch_size:$batch_size" >> profile_log_$1
+ $PYTHONROOT/bin/python3 cpu_utilization.py >> profile_log_$1
+ job_et=`date '+%Y%m%d%H%M%S'`
+ awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$1
+ awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$1
+ rm -rf gpu_use.log gpu_utilization.log
+ $PYTHONROOT/bin/python3 ../util/show_profile.py profile $thread_num >> profile_log_$1
+ tail -n 8 profile >> profile_log_$1
+ echo "" >> profile_log_$1
+done
+done
+
+#Divided log
+awk 'BEGIN{RS="\n\n"}{i++}{print > "bert_log_"i}' profile_log_$1
+mkdir bert_log && mv bert_log_* bert_log
+ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9
diff --git a/python/examples/imdb/text_classify_service.py b/python/examples/imdb/text_classify_service.py
index 1d292194f963466d3e53859dc9e4c6da1789ea20..ca1e26002baf0284f282add235706080f7902c33 100755
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -29,13 +29,14 @@ class IMDBService(WebService):
def preprocess(self, feed={}, fetch=[]):
feed_batch = []
words_lod = [0]
+ is_batch = True
for ins in feed:
words = self.dataset.get_words_only(ins["words"])
words = np.array(words).reshape(len(words), 1)
words_lod.append(words_lod[-1] + len(words))
feed_batch.append(words)
feed = {"words": np.concatenate(feed_batch), "words.lod": words_lod}
- return feed, fetch
+ return feed, fetch, is_batch
imdb_service = IMDBService(name="imdb")
diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py
index 2b5d8a64962c5fea8c93bde3c8b9a95c1ffd97e3..3a314dc5f6690146d472484b5520624074c14ce6 100644
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -23,13 +23,13 @@ import paddle_serving_server as paddle_serving_server
from .version import serving_server_version
from contextlib import closing
import collections
-import fcntl
-
import shutil
import numpy as np
import grpc
from .proto import multi_lang_general_model_service_pb2
import sys
+if sys.platform.startswith('win') is False:
+ import fcntl
sys.path.append(
os.path.join(os.path.abspath(os.path.dirname(__file__)), 'proto'))
from .proto import multi_lang_general_model_service_pb2_grpc
diff --git a/python/setup.py.app.in b/python/setup.py.app.in
index 1a06b0d352c1da4cdd09f74cb900853d4016afa8..8480ed8471e60c7e7eb8f14bf11a1cc2d23204cf 100644
--- a/python/setup.py.app.in
+++ b/python/setup.py.app.in
@@ -32,8 +32,8 @@ if '${PACK}' == 'ON':
REQUIRED_PACKAGES = [
- 'six >= 1.10.0', 'sentencepiece', 'opencv-python<=4.2.0.32', 'pillow',
- 'shapely<=1.6.1', 'pyclipper'
+ 'six >= 1.10.0', 'sentencepiece<=0.1.92', 'opencv-python<=4.2.0.32', 'pillow',
+ 'pyclipper'
]
packages=['paddle_serving_app',
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..a1eb26e5cec23a8f76a50be48608f8a4532c6993
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+sphinx==2.1.0
+mistune
+sphinx_rtd_theme
+paddlepaddle>=1.8.4
+shapely<=1.6.1
diff --git a/doc/requirements.txt b/requirements_win.txt
similarity index 58%
rename from doc/requirements.txt
rename to requirements_win.txt
index 1560ebc5f9d74fbae773ac5bc45c5b42b044287a..a202642af70fec0c57642cc53b8ead82a5a7c7f1 100644
--- a/doc/requirements.txt
+++ b/requirements_win.txt
@@ -1,4 +1,5 @@
sphinx==2.1.0
mistune
sphinx_rtd_theme
-paddlepaddle>=1.6
+paddlepaddle>=1.8.4
+shapely
diff --git a/tools/Dockerfile.cuda10.1-cudnn7-trt6.devel b/tools/Dockerfile.cuda10.1-cudnn7-trt6.devel
new file mode 100644
index 0000000000000000000000000000000000000000..c6e1c1e050505e631493efe21732a98abd1bd52e
--- /dev/null
+++ b/tools/Dockerfile.cuda10.1-cudnn7-trt6.devel
@@ -0,0 +1,60 @@
+FROM nvidia/cuda:10.1-cudnn7-devel-centos7
+
+RUN export http_proxy="http://172.19.56.199:3128" \
+ && export https_proxy="http://172.19.56.199:3128" \
+ && yum -y install wget >/dev/null \
+ && yum -y install gcc gcc-c++ make glibc-static which \
+ && yum -y install git openssl-devel curl-devel bzip2-devel python-devel \
+ && yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false \
+ && yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
+ && yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
+
+RUN export http_proxy="http://172.19.56.199:3128" \
+ && export https_proxy="http://172.19.56.199:3128" && \
+ wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
+ tar zxf protobuf-all-3.11.2.tar.gz && \
+ cd protobuf-3.11.2 && \
+ ./configure && make -j4 && make install && \
+ make clean && \
+ cd .. && rm -rf protobuf-*
+
+RUN export http_proxy="http://172.19.56.199:3128" \
+ && export https_proxy="http://172.19.56.199:3128" && \
+ wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
+ && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
+ && mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \
+ && echo 'export PATH=/usr/local/cmake3.2.0/bin:$PATH' >> /root/.bashrc \
+ && rm cmake-3.2.0-Linux-x86_64.tar.gz
+
+
+RUN export http_proxy="http://172.19.56.199:3128" \
+ && export https_proxy="http://172.19.56.199:3128" && \
+ wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
+ && tar xzf go1.14.linux-amd64.tar.gz \
+ && mv go /usr/local/go \
+ && echo 'export GOROOT=/usr/local/go' >> /root/.bashrc \
+ && echo 'export PATH=/usr/local/go/bin:$PATH' >> /root/.bashrc \
+ && rm go1.14.linux-amd64.tar.gz
+
+RUN export http_proxy="http://172.19.56.199:3128" \
+ && export https_proxy="http://172.19.56.199:3128" && \
+ yum -y install python-devel sqlite-devel \
+ && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
+ && python get-pip.py >/dev/null \
+ && rm get-pip.py
+
+RUN export http_proxy="http://172.19.56.199:3128" \
+ && export https_proxy="http://172.19.56.199:3128" && \
+ yum install -y python3 python3-devel \
+ && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
+ && yum clean all
+
+RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
+ && echo "export LANG=en_US.utf8" >> /root/.bashrc \
+ && echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
+
+RUN wget https://paddle-serving.bj.bcebos.com/tools/TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \
+ && tar -xzf TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \
+ && mv TensorRT-6.0.1.5 /usr/local/ \
+ && rm TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \
+ && echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/TensorRT-6.0.1.5/lib/' >> /root/.bashrc