Merge pull request #910 from wangjiawei04/v0.4.0

V0.4.0

Merge pull request #910 from wangjiawei04/v0.4.0
V0.4.0
1af347ab · Jiawei Wang · GitHub · ae8f75c6 · 104788fc · 1af347ab
9 changed file
--- a/doc/INFERENCE_TO_SERVING.md
+++ b/doc/INFERENCE_TO_SERVING.md
@@ -24,13 +24,13 @@ inference_model_dir = "your_inference_model"
 serving_client_dir = "serving_client_dir"
 serving_server_dir = "serving_server_dir"
 feed_var_names, fetch_var_names = inference_model_to_serving(
-		inference_model_dir, serving_client_dir, serving_server_dir)
+		inference_model_dir, serving_server_dir, serving_client_dir)
 ```
 if your model file and params file are both standalone, please use the following api.
 ```
 feed_var_names, fetch_var_names = inference_model_to_serving(
-		inference_model_dir, serving_client_dir, serving_server_dir,
+		inference_model_dir, serving_server_dir, serving_client_dir,
 		model_filename="model", params_filename="params")
 ```
--- a/doc/INFERENCE_TO_SERVING_CN.md
+++ b/doc/INFERENCE_TO_SERVING_CN.md
@@ -23,11 +23,11 @@ inference_model_dir = "your_inference_model"
 serving_client_dir = "serving_client_dir"
 serving_server_dir = "serving_server_dir"
 feed_var_names, fetch_var_names = inference_model_to_serving(
-		inference_model_dir, serving_client_dir, serving_server_dir)
+		inference_model_dir, serving_server_dir, serving_client_dir)
 ```
 如果模型中有模型描述文件`model_filename` 和 模型参数文件`params_filename`，那么请用
 ```
 feed_var_names, fetch_var_names = inference_model_to_serving(
-		inference_model_dir, serving_client_dir, serving_server_dir,
+		inference_model_dir, serving_server_dir, serving_client_dir,
 		 model_filename="model", params_filename="params")
 ```
--- a/java/examples/pom.xml
+++ b/java/examples/pom.xml
@@ -75,7 +75,7 @@
        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
-            <version>4.11</version>
+            <version>4.13.1</version>
            <scope>test</scope>
        </dependency>
        <dependency>

--- a/python/examples/faster_rcnn_model/benchmark.py
+++ b/python/examples/faster_rcnn_model/benchmark.py
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+from __future__ import unicode_literals, absolute_import
+import os
+import sys
+import time
+import json
+import requests
+from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args, show_latency
+from paddle_serving_app.reader import ChineseBertReader
+from paddle_serving_app.reader import *
+import numpy as np
+args = benchmark_args()
+def single_func(idx, resource):
+    img="./000000570688.jpg"
+    profile_flags = False
+    latency_flags = False
+    if os.getenv("FLAGS_profile_client"):
+        profile_flags = True
+    if os.getenv("FLAGS_serving_latency"):
+        latency_flags = True
+        latency_list = []
+    if args.request == "rpc":
+        preprocess = Sequential([
+            File2Image(), BGR2RGB(), Div(255.0),
+            Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
+            Resize(640, 640), Transpose((2, 0, 1))
+        ])
+        postprocess = RCNNPostprocess("label_list.txt", "output")
+        client = Client()
+        client.load_client_config(args.model)
+        client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
+        start = time.time()
+        for i in range(turns):
+            if args.batch_size >= 1:
+                l_start = time.time()
+                feed_batch = []
+                b_start = time.time()
+                im = preprocess(img)
+                for bi in range(args.batch_size):
+                    print("1111batch")
+                    print(bi)
+                    feed_batch.append({"image": im,
+                        "im_info": np.array(list(im.shape[1:]) + [1.0]),
+                        "im_shape": np.array(list(im.shape[1:]) + [1.0])})
+               # im = preprocess(img)
+                b_end = time.time()
+                if profile_flags:
+                    sys.stderr.write(
+                        "PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}\n".format(
+                            os.getpid(),
+                            int(round(b_start * 1000000)),
+                            int(round(b_end * 1000000))))
+                #result = client.predict(feed=feed_batch, fetch=fetch)
+                fetch_map = client.predict(
+                    feed=feed_batch,
+                    fetch=["multiclass_nms"])
+                fetch_map["image"] = img
+                postprocess(fetch_map)
+                l_end = time.time()
+                if latency_flags:
+                    latency_list.append(l_end * 1000 - l_start * 1000)
+            else:
+                print("unsupport batch size {}".format(args.batch_size))
+    else:
+        raise ValueError("not implemented {} request".format(args.request))
+    end = time.time()
+    if latency_flags:
+        return [[end - start], latency_list]
+    else:
+        return [[end - start]]
+if __name__ == '__main__':
+    multi_thread_runner = MultiThreadRunner()
+    endpoint_list = [
+        "127.0.0.1:7777"
+    ]
+    turns = 10
+    start = time.time()
+    result = multi_thread_runner.run(
+        single_func, args.thread, {"endpoint": endpoint_list,"turns": turns})
+    end = time.time()
+    total_cost = end - start
+    avg_cost = 0
+    for i in range(args.thread):
+        avg_cost += result[0][i]
+    avg_cost = avg_cost / args.thread
+    print("total cost: {}s".format(total_cost))
+    print("each thread cost: {}s. ".format(avg_cost))
+    print("qps: {}samples/s".format(args.batch_size * args.thread * turns /
+                                    total_cost))
+    if os.getenv("FLAGS_serving_latency"):
+        show_latency(result[1])
--- a/python/examples/faster_rcnn_model/benchmark.sh
+++ b/python/examples/faster_rcnn_model/benchmark.sh
+rm profile_log*
+export CUDA_VISIBLE_DEVICES=0
+export FLAGS_profile_server=1
+export FLAGS_profile_client=1
+export FLAGS_serving_latency=1
+gpu_id=0
+#save cpu and gpu utilization log
+if [ -d utilization ];then
+    rm -rf utilization
+else
+    mkdir utilization
+fi
+#start server
+$PYTHONROOT/bin/python3 -m paddle_serving_server_gpu.serve --model $1 --port 7777 --thread 4 --gpu_ids 0  --ir_optim >  elog  2>&1 &
+sleep 5
+#warm up
+$PYTHONROOT/bin/python3 benchmark.py --thread 4 --batch_size 1 --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+echo -e "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
+for thread_num in 1 4 8 16
+do
+for batch_size in 1
+do
+    job_bt=`date '+%Y%m%d%H%M%S'`
+    nvidia-smi --id=0 --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
+    nvidia-smi --id=0 --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
+    gpu_memory_pid=$!
+    $PYTHONROOT/bin/python3 benchmark.py --thread $thread_num --batch_size $batch_size --model $2/serving_client_conf.prototxt --request rpc > profile 2>&1
+    kill ${gpu_memory_pid}
+    kill `ps -ef|grep used_memory|awk '{print $2}'`
+    echo "model_name:" $1
+    echo "thread_num:" $thread_num
+    echo "batch_size:" $batch_size
+    echo "=================Done===================="
+    echo "model_name:$1" >> profile_log_$1
+    echo "batch_size:$batch_size" >> profile_log_$1
+    $PYTHONROOT/bin/python3 cpu_utilization.py >> profile_log_$1
+    job_et=`date '+%Y%m%d%H%M%S'`
+    awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "MAX_GPU_MEMORY:", max}' gpu_use.log >> profile_log_$1
+    awk 'BEGIN {max = 0} {if(NR>1){if ($1 > max) max=$1}} END {print "GPU_UTILIZATION:", max}' gpu_utilization.log >> profile_log_$1
+    rm -rf gpu_use.log gpu_utilization.log
+    $PYTHONROOT/bin/python3 ../util/show_profile.py profile $thread_num >> profile_log_$1
+    tail -n 8 profile >> profile_log_$1
+    echo "" >> profile_log_$1
+done
+done
+#Divided log
+awk 'BEGIN{RS="\n\n"}{i++}{print > "bert_log_"i}' profile_log_$1
+mkdir bert_log && mv bert_log_* bert_log
+ps -ef|grep 'serving'|grep -v grep|cut -c 9-15 | xargs kill -9
--- a/python/setup.py.client.in
+++ b/python/setup.py.client.in
@@ -43,8 +43,8 @@ if '${PACK}' == 'ON':
    copy_lib()
 REQUIRED_PACKAGES = [
-    'six >= 1.10.0', 'protobuf >= 3.11.0', 'numpy >= 1.12', 'grpcio >= 1.28.1',
+    'six >= 1.10.0', 'protobuf >= 3.11.0', 'numpy >= 1.12', 'grpcio <= 1.33.2',
-    'grpcio-tools >= 1.28.1'
+    'grpcio-tools <= 1.33.2'
 ]

--- a/python/setup.py.server.in
+++ b/python/setup.py.server.in
@@ -28,7 +28,7 @@ max_version, mid_version, min_version = util.python_version()
 util.gen_pipeline_code("paddle_serving_server")
 REQUIRED_PACKAGES = [
-    'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
+    'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio <= 1.33.2', 'grpcio-tools <= 1.33.2',
    'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app', 'func_timeout', 'pyyaml'
 ]

--- a/python/setup.py.server_gpu.in
+++ b/python/setup.py.server_gpu.in
@@ -30,7 +30,7 @@ max_version, mid_version, min_version = util.python_version()
 util.gen_pipeline_code("paddle_serving_server_gpu")
 REQUIRED_PACKAGES = [
-    'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
+    'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio <= 1.33.2', 'grpcio-tools <= 1.33.2',
    'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app', 'func_timeout', 'pyyaml'
 ]

--- a/tools/Dockerfile.cuda10.1-cudnn7-trt6.devel
+++ b/tools/Dockerfile.cuda10.1-cudnn7-trt6.devel
+FROM nvidia/cuda:10.1-cudnn7-devel-centos7
+RUN export http_proxy="http://172.19.56.199:3128" \
+    && export https_proxy="http://172.19.56.199:3128" \
+    && yum -y install wget >/dev/null \
+    && yum -y install gcc gcc-c++ make glibc-static which  \
+    && yum -y install git openssl-devel curl-devel bzip2-devel python-devel \
+    && yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false \
+    && yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
+    && yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false 
+RUN export http_proxy="http://172.19.56.199:3128" \
+    && export https_proxy="http://172.19.56.199:3128" && \
+    wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
+    tar zxf protobuf-all-3.11.2.tar.gz && \
+    cd protobuf-3.11.2 && \
+    ./configure && make -j4 && make install && \
+    make clean && \
+    cd .. && rm -rf protobuf-*
+RUN export http_proxy="http://172.19.56.199:3128" \
+    && export https_proxy="http://172.19.56.199:3128" && \
+    wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
+    && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
+    && mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \
+    && echo 'export PATH=/usr/local/cmake3.2.0/bin:$PATH' >> /root/.bashrc \
+    && rm cmake-3.2.0-Linux-x86_64.tar.gz 
+RUN export http_proxy="http://172.19.56.199:3128" \
+    && export https_proxy="http://172.19.56.199:3128" && \
+    wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
+    && tar xzf go1.14.linux-amd64.tar.gz \
+    && mv go /usr/local/go \
+    && echo 'export GOROOT=/usr/local/go' >> /root/.bashrc \
+    && echo 'export PATH=/usr/local/go/bin:$PATH' >> /root/.bashrc \
+    && rm go1.14.linux-amd64.tar.gz 
+RUN export http_proxy="http://172.19.56.199:3128" \
+    && export https_proxy="http://172.19.56.199:3128" && \
+    yum -y install python-devel sqlite-devel  \
+    && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
+    && python get-pip.py >/dev/null \
+    && rm get-pip.py 
+RUN export http_proxy="http://172.19.56.199:3128" \
+    && export https_proxy="http://172.19.56.199:3128" && \
+    yum install -y python3 python3-devel \
+    && yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
+    && yum clean all 
+RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
+    && echo "export LANG=en_US.utf8" >> /root/.bashrc \
+    && echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
+RUN wget https://paddle-serving.bj.bcebos.com/tools/TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \
+    && tar -xzf TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \
+    && mv TensorRT-6.0.1.5 /usr/local/ \
+    && rm TensorRT-6.0.1.5.CentOS-7.6.x86_64-gnu.cuda-10.1.cudnn7.6.tar.gz \
+    && echo 'export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/TensorRT-6.0.1.5/lib/' >> /root/.bashrc