提交 fcb2d534 编写于 作者: T TeslaZhao

Merge branch 'PaddlePaddle:develop' into develop

# Faster RCNN model on Pipeline Paddle Serving
### Get The Faster RCNN Model
```
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/faster_rcnn_r50_fpn_1x_coco.tar
```
### Start the service
```
tar xf faster_rcnn_r50_fpn_1x_coco.tar
python web_service.py
```
### Perform prediction
```
python pipeline_http_client.py
```
import sys
import os
import yaml
import requests
import time
import json
import cv2
import base64
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
def parse_benchmark(filein, fileout):
with open(filein, "r") as fin:
res = yaml.load(fin)
del_list = []
for key in res["DAG"].keys():
if "call" in key:
del_list.append(key)
for key in del_list:
del res["DAG"][key]
with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id):
fin = open("config.yml", "r")
config = yaml.load(fin)
fin.close()
config["dag"]["tracer"] = {"interval_s": 30}
if device == "gpu":
config["op"]["faster_rcnn"]["local_service_conf"]["device_type"] = 1
config["op"]["faster_rcnn"]["local_service_conf"]["devices"] = gpu_id
with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size):
print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/faster_rcnn/prediction"
with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
start = time.time()
while True:
data = {"key": [], "value": []}
for j in range(batch_size):
data["key"].append("image_" + str(j))
data["value"].append(image)
r = requests.post(url=url, data=json.dumps(data))
end = time.time()
if end - start > 70:
print("70s end")
break
return [[end - start]]
def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size)
def run_rpc(thread, batch_size):
pass
def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size)
if __name__ == "__main__":
if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3])
device = sys.argv[4]
gpu_id = sys.argv[5]
gen_yml(device, gpu_id)
elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3])
batch_size = int(sys.argv[4])
if mode == "http":
multithread_http(thread, batch_size)
elif mode == "rpc":
multithread_rpc(thread, batch_size)
elif sys.argv[1] == "dump":
filein = sys.argv[2]
fileout = sys.argv[3]
parse_benchmark(filein, fileout)
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="faster_rcnn_r50_fpn_1x_coco"
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----FasterRCNN thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
ps -ef | grep nvidia-smi | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.2"
cpu: "Xeon 6148"
gpu: "P4"
xpu: "None"
api: ""
owner: "wangjiawei04"
model_name: "faster_rcnn"
model_type: "static"
model_source: "paddledetection"
model_url: ""
batch_size: 1
num_of_samples: 1000
input_shape: "3, 480, 640"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: true
cpu_math_library_num_threads: ""
dag:
is_thread_op: false
tracer:
interval_s: 30
http_port: 18082
op:
faster_rcnn:
local_service_conf:
client_type: local_predictor
concurrency: 2
device_type: 1
devices: '2'
fetch_list:
- save_infer_model/scale_0.tmp_1
model_config: serving_server/
rpc_port: 9998
worker_num: 20
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# from paddle_serving_server.pipeline import PipelineClient
import numpy as np
import requests
import json
import cv2
import base64
import os
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
url = "http://127.0.0.1:18082/faster_rcnn/prediction"
with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
for i in range(1):
data = {"key": ["image"], "value": [image]}
r = requests.post(url=url, data=json.dumps(data))
print(r.json())
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_server.web_service import WebService, Op
import logging
import numpy as np
import sys
import cv2
from paddle_serving_app.reader import *
import base64
class FasterRCNNOp(Op):
def init_op(self):
self.img_preprocess = Sequential([
BGR2RGB(), Div(255.0),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
Resize((640, 640)), Transpose((2, 0, 1))
])
self.img_postprocess = RCNNPostprocess("label_list.txt", "output")
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
imgs = []
#print("keys", input_dict.keys())
for key in input_dict.keys():
data = base64.b64decode(input_dict[key].encode('utf8'))
data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR)
im = self.img_preprocess(im)
imgs.append({
"image": im[np.newaxis,:],
"im_shape": np.array(list(im.shape[1:])).reshape(-1)[np.newaxis,:],
"scale_factor": np.array([1.0, 1.0]).reshape(-1)[np.newaxis,:],
})
feed_dict = {
"image": np.concatenate([x["image"] for x in imgs], axis=0),
"im_shape": np.concatenate([x["im_shape"] for x in imgs], axis=0),
"scale_factor": np.concatenate([x["scale_factor"] for x in imgs], axis=0)
}
#for key in feed_dict.keys():
# print(key, feed_dict[key].shape)
return feed_dict, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id):
#print(fetch_dict)
res_dict = {"bbox_result": str(self.img_postprocess(fetch_dict, visualize=False))}
return res_dict, None, ""
class FasterRCNNService(WebService):
def get_pipeline_response(self, read_op):
faster_rcnn_op = FasterRCNNOp(name="faster_rcnn", input_ops=[read_op])
return faster_rcnn_op
fasterrcnn_service = FasterRCNNService(name="faster_rcnn")
fasterrcnn_service.prepare_pipeline_config("config2.yml")
fasterrcnn_service.run_service()
# PPYOLO model on Pipeline Paddle Serving
([简体中文](./README_CN.md)|English)
### Get Model
```
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ppyolo_mbv3_large_coco.tar
```
### Start the service
```
tar xf ppyolo_mbv3_large_coco.tar
python web_service.py
```
### Perform prediction
```
python pipeline_http_client.py
```
import sys
import os
import yaml
import requests
import time
import json
import cv2
import base64
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
def parse_benchmark(filein, fileout):
with open(filein, "r") as fin:
res = yaml.load(fin)
del_list = []
for key in res["DAG"].keys():
if "call" in key:
del_list.append(key)
for key in del_list:
del res["DAG"][key]
with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id):
fin = open("config.yml", "r")
config = yaml.load(fin)
fin.close()
config["dag"]["tracer"] = {"interval_s": 30}
if device == "gpu":
config["op"]["ppyolo_mbv3"]["local_service_conf"]["device_type"] = 1
config["op"]["ppyolo_mbv3"]["local_service_conf"]["devices"] = gpu_id
with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size):
print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/ppyolo_mbv3/prediction"
with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
start = time.time()
while True:
data = {"key": [], "value": []}
for j in range(batch_size):
data["key"].append("image_" + str(j))
data["value"].append(image)
r = requests.post(url=url, data=json.dumps(data))
end = time.time()
if end - start > 70:
print("70s end")
break
return [[end - start]]
def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size)
def run_rpc(thread, batch_size):
pass
def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size)
if __name__ == "__main__":
if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3])
device = sys.argv[4]
gpu_id = sys.argv[5]
gen_yml(device, gpu_id)
elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3])
batch_size = int(sys.argv[4])
if mode == "http":
multithread_http(thread, batch_size)
elif mode == "rpc":
multithread_rpc(thread, batch_size)
elif sys.argv[1] == "dump":
filein = sys.argv[2]
fileout = sys.argv[3]
parse_benchmark(filein, fileout)
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="ppyolo_mbv3_large"
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 gpu $gpu_id
rm -rf profile_log_$modelname
for thread_num in 1
do
for batch_size in 1
do
echo "#----PPyolo thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.2"
cpu: "Xeon 6148"
gpu: "P4"
xpu: "None"
api: ""
owner: "wangjiawei04"
model_name: "ppyolo"
model_type: "static"
model_source: "paddledetection"
model_url: ""
batch_size: 1
num_of_samples: 1000
input_shape: "3, 480, 640"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: true
cpu_math_library_num_threads: ""
dag:
is_thread_op: false
tracer:
interval_s: 30
http_port: 18082
op:
ppyolo_mbv3:
local_service_conf:
client_type: local_predictor
concurrency: 10
device_type: 1
devices: '2'
fetch_list:
- save_infer_model/scale_0.tmp_1
model_config: serving_server/
rpc_port: 9998
worker_num: 20
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# from paddle_serving_server.pipeline import PipelineClient
import numpy as np
import requests
import json
import cv2
import base64
import os
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
url = "http://127.0.0.1:18082/ppyolo_mbv3/prediction"
with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
for i in range(1):
data = {"key": ["image"], "value": [image]}
r = requests.post(url=url, data=json.dumps(data))
print(r.json())
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_server.web_service import WebService, Op
import logging
import numpy as np
import sys
import cv2
from paddle_serving_app.reader import *
import base64
class PPYoloMbvOp(Op):
def init_op(self):
self.img_preprocess = Sequential([
BGR2RGB(), Div(255.0),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
Resize((320, 320)), Transpose((2, 0, 1))
])
self.img_postprocess = RCNNPostprocess("label_list.txt", "output")
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
imgs = []
#print("keys", input_dict.keys())
for key in input_dict.keys():
data = base64.b64decode(input_dict[key].encode('utf8'))
data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR)
im = self.img_preprocess(im)
imgs.append({
"image": im[np.newaxis,:],
"im_shape": np.array(list(im.shape[1:])).reshape(-1)[np.newaxis,:],
"scale_factor": np.array([1.0, 1.0]).reshape(-1)[np.newaxis,:],
})
feed_dict = {
"image": np.concatenate([x["image"] for x in imgs], axis=0),
"im_shape": np.concatenate([x["im_shape"] for x in imgs], axis=0),
"scale_factor": np.concatenate([x["scale_factor"] for x in imgs], axis=0)
}
for key in feed_dict.keys():
print(key, feed_dict[key].shape)
return feed_dict, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id):
#print(fetch_dict)
res_dict = {"bbox_result": str(self.img_postprocess(fetch_dict, visualize=False))}
return res_dict, None, ""
class PPYoloMbv(WebService):
def get_pipeline_response(self, read_op):
ppyolo_mbv3_op = PPYoloMbvOp(name="ppyolo_mbv3", input_ops=[read_op])
return ppyolo_mbv3_op
ppyolo_mbv3_service = PPYoloMbv(name="ppyolo_mbv3")
ppyolo_mbv3_service.prepare_pipeline_config("config2.yml")
ppyolo_mbv3_service.run_service()
# YOLOv3 model on Pipeline Paddle Serving
([简体中文](./README_CN.md)|English)
### Get Model
```
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/yolov3_darknet53_270e_coco.tar
```
### Start the service
```
tar xf yolov3_darknet53_270e_coco.tar
python web_service.py
```
### Perform prediction
```
python pipeline_http_client.py
```
import sys
import os
import yaml
import requests
import time
import json
import cv2
import base64
try:
from paddle_serving_server_gpu.pipeline import PipelineClient
except ImportError:
from paddle_serving_server.pipeline import PipelineClient
import numpy as np
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args, show_latency
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
def parse_benchmark(filein, fileout):
with open(filein, "r") as fin:
res = yaml.load(fin)
del_list = []
for key in res["DAG"].keys():
if "call" in key:
del_list.append(key)
for key in del_list:
del res["DAG"][key]
with open(fileout, "w") as fout:
yaml.dump(res, fout, default_flow_style=False)
def gen_yml(device, gpu_id):
fin = open("config.yml", "r")
config = yaml.load(fin)
fin.close()
config["dag"]["tracer"] = {"interval_s": 30}
if device == "gpu":
config["op"]["faster_rcnn"]["local_service_conf"]["device_type"] = 1
config["op"]["faster_rcnn"]["local_service_conf"]["devices"] = gpu_id
with open("config2.yml", "w") as fout:
yaml.dump(config, fout, default_flow_style=False)
def run_http(idx, batch_size):
print("start thread ({})".format(idx))
url = "http://127.0.0.1:18082/yolov3/prediction"
with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
start = time.time()
while True:
data = {"key": [], "value": []}
for j in range(batch_size):
data["key"].append("image_" + str(j))
data["value"].append(image)
r = requests.post(url=url, data=json.dumps(data))
end = time.time()
if end - start > 70:
print("70s end")
break
return [[end - start]]
def multithread_http(thread, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_http , thread, batch_size)
def run_rpc(thread, batch_size):
pass
def multithread_rpc(thraed, batch_size):
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(run_rpc , thread, batch_size)
if __name__ == "__main__":
if sys.argv[1] == "yaml":
mode = sys.argv[2] # brpc/ local predictor
thread = int(sys.argv[3])
device = sys.argv[4]
gpu_id = sys.argv[5]
gen_yml(device, gpu_id)
elif sys.argv[1] == "run":
mode = sys.argv[2] # http/ rpc
thread = int(sys.argv[3])
batch_size = int(sys.argv[4])
if mode == "http":
multithread_http(thread, batch_size)
elif mode == "rpc":
multithread_rpc(thread, batch_size)
elif sys.argv[1] == "dump":
filein = sys.argv[2]
fileout = sys.argv[3]
parse_benchmark(filein, fileout)
export FLAGS_profile_pipeline=1
alias python3="python3.7"
modelname="yolov3_darknet53_270e_coco"
gpu_id="0"
benchmark_config_filename="benchmark_config.yaml"
# HTTP
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
sleep 3
python3 benchmark.py yaml local_predictor 1 cpu
rm -rf profile_log_$modelname
for thread_num in 1 8 16
do
for batch_size in 1
do
echo "#----Yolov3 thread num: $thread_num batch size: $batch_size mode:http ----" >>profile_log_$modelname
rm -rf PipelineServingLogs
rm -rf cpu_utilization.py
python3 web_service.py >web.log 2>&1 &
sleep 3
nvidia-smi --id=${gpu_id} --query-compute-apps=used_memory --format=csv -lms 100 > gpu_use.log 2>&1 &
nvidia-smi --id=${gpu_id} --query-gpu=utilization.gpu --format=csv -lms 100 > gpu_utilization.log 2>&1 &
echo "import psutil\ncpu_utilization=psutil.cpu_percent(1,False)\nprint('CPU_UTILIZATION:', cpu_utilization)\n" > cpu_utilization.py
python3 benchmark.py run http $thread_num $batch_size
python3 cpu_utilization.py >>profile_log_$modelname
python3 -m paddle_serving_server_gpu.profiler >>profile_log_$modelname
ps -ef | grep web_service | awk '{print $2}' | xargs kill -9
python3 benchmark.py dump benchmark.log benchmark.tmp
mv benchmark.tmp benchmark.log
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_MEM:", max}' gpu_use.log >> profile_log_$modelname
awk 'BEGIN {max = 0} {if(NR>1){if ($modelname > max) max=$modelname}} END {print "GPU_UTIL:", max}' gpu_utilization.log >> profile_log_$modelname
cat benchmark.log >> profile_log_$modelname
python3 -m paddle_serving_server_gpu.parse_profile --benchmark_cfg $benchmark_config_filename --benchmark_log profile_log_$modelname
#rm -rf gpu_use.log gpu_utilization.log
done
done
cuda_version: "10.1"
cudnn_version: "7.6"
trt_version: "6.0"
python_version: "3.7"
gcc_version: "8.2"
paddle_version: "2.0.2"
cpu: "Xeon 6148"
gpu: "P4"
xpu: "None"
api: ""
owner: "wangjiawei04"
model_name: "yolov3"
model_type: "static"
model_source: "paddledetection"
model_url: ""
batch_size: 1
num_of_samples: 1000
input_shape: "3, 480, 640"
runtime_device: "gpu"
ir_optim: true
enable_memory_optim: true
enable_tensorrt: false
precision: "fp32"
enable_mkldnn: true
cpu_math_library_num_threads: ""
dag:
is_thread_op: false
tracer:
interval_s: 30
http_port: 18082
op:
yolov3:
local_service_conf:
client_type: local_predictor
concurrency: 10
device_type: 1
devices: '2'
fetch_list:
- save_infer_model/scale_0.tmp_1
model_config: serving_server/
rpc_port: 9998
worker_num: 20
person
bicycle
car
motorcycle
airplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
couch
potted plant
bed
dining table
toilet
tv
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# from paddle_serving_server.pipeline import PipelineClient
import numpy as np
import requests
import json
import cv2
import base64
import os
def cv2_to_base64(image):
return base64.b64encode(image).decode('utf8')
url = "http://127.0.0.1:18082/yolov3/prediction"
with open(os.path.join(".", "000000570688.jpg"), 'rb') as file:
image_data1 = file.read()
image = cv2_to_base64(image_data1)
for i in range(1):
data = {"key": ["image"], "value": [image]}
r = requests.post(url=url, data=json.dumps(data))
print(r.json())
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle_serving_server.web_service import WebService, Op
import logging
import numpy as np
import sys
import cv2
from paddle_serving_app.reader import *
import base64
class Yolov3Op(Op):
def init_op(self):
self.img_preprocess = Sequential([
BGR2RGB(), Div(255.0),
Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], False),
Resize((640, 640)), Transpose((2, 0, 1))
])
self.img_postprocess = RCNNPostprocess("label_list.txt", "output")
def preprocess(self, input_dicts, data_id, log_id):
(_, input_dict), = input_dicts.items()
imgs = []
#print("keys", input_dict.keys())
for key in input_dict.keys():
data = base64.b64decode(input_dict[key].encode('utf8'))
data = np.fromstring(data, np.uint8)
im = cv2.imdecode(data, cv2.IMREAD_COLOR)
im = self.img_preprocess(im)
imgs.append({
"image": im[np.newaxis,:],
"im_shape": np.array(list(im.shape[1:])).reshape(-1)[np.newaxis,:],
"scale_factor": np.array([1.0, 1.0]).reshape(-1)[np.newaxis,:],
})
feed_dict = {
"image": np.concatenate([x["image"] for x in imgs], axis=0),
"im_shape": np.concatenate([x["im_shape"] for x in imgs], axis=0),
"scale_factor": np.concatenate([x["scale_factor"] for x in imgs], axis=0)
}
#for key in feed_dict.keys():
# print(key, feed_dict[key].shape)
return feed_dict, False, None, ""
def postprocess(self, input_dicts, fetch_dict, log_id):
#print(fetch_dict)
res_dict = {"bbox_result": str(self.img_postprocess(fetch_dict, visualize=False))}
return res_dict, None, ""
class Yolov3Service(WebService):
def get_pipeline_response(self, read_op):
yolov3_op = Yolov3Op(name="yolov3", input_ops=[read_op])
return yolov3_op
yolov3_service = Yolov3Service(name="yolov3")
yolov3_service.prepare_pipeline_config("config2.yml")
yolov3_service.run_service()
......@@ -415,7 +415,7 @@ class RCNNPostprocess(object):
out_path = os.path.join(self.output_dir, image_path)
image.save(out_path, quality=95)
def __call__(self, image_with_bbox):
def __call__(self, image_with_bbox, visualize=True):
fetch_name = ""
for key in image_with_bbox:
if key == "image":
......@@ -427,6 +427,8 @@ class RCNNPostprocess(object):
self.clsid2catid)
if os.path.isdir(self.output_dir) is False:
os.mkdir(self.output_dir)
if visualize is False:
return bbox_result
self.visualize(image_with_bbox["image"], bbox_result, self.catid2name,
len(self.label_list))
if os.path.isdir(self.output_dir) is False:
......@@ -434,6 +436,7 @@ class RCNNPostprocess(object):
bbox_file = os.path.join(self.output_dir, 'bbox.json')
with open(bbox_file, 'w') as f:
json.dump(bbox_result, f, indent=4)
return bbox_result
def __repr__(self):
return self.__class__.__name__ + "label_file: {1}, output_dir: {2}".format(
......
......@@ -30,6 +30,10 @@ WORKDIR /home
COPY tools/dockerfiles/build_scripts /build_scripts
RUN bash /build_scripts/install_whl.sh <<serving_version>> <<paddle_version>> <<run_env>> <<python_version>> && rm -rf /build_scripts
WORKDIR /home
COPY tools/dockerfiles/build_scripts /build_scripts
RUN bash /build_scripts/soft_link.sh <<run_env>>
# install tensorrt
WORKDIR /home
COPY tools/dockerfiles/build_scripts /build_scripts
......
......@@ -42,7 +42,7 @@ if [[ $SERVING_VERSION == "0.5.0" ]]; then
serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-102-${SERVING_VERSION}.tar.gz"
elif [[ "$RUN_ENV" == "cuda11" ]];then
server_release="paddle-serving-server-gpu==$SERVING_VERSION.post11"
serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-11-${SERVING_VERSION}.tar.gz"
serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-cuda11-${SERVING_VERSION}.tar.gz"
fi
client_release="paddle-serving-client==$SERVING_VERSION"
app_release="paddle-serving-app==0.3.1"
......@@ -58,7 +58,7 @@ elif [[ $SERVING_VERSION == "0.6.0" ]]; then
serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-102-$SERVING_VERSION.tar.gz"
elif [[ "$RUN_ENV" == "cuda11" ]];then
server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post11-py3-none-any.whl"
serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-11-$SERVING_VERSION.tar.gz"
serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-cuda11-$SERVING_VERSION.tar.gz"
fi
client_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-$SERVING_VERSION-cp$CPYTHON-none-any.whl"
app_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-$SERVING_VERSION-py3-none-any.whl"
......@@ -99,10 +99,10 @@ elif [[ "$RUN_ENV" == "cuda11" ]];then
python$PYTHON_VERSION -m pip install paddlepaddle-gpu==${PADDLE_VERSION}
cd /usr/local/
wget $serving_bin
tar xf serving-gpu-11-${SERVING_VERSION}.tar.gz
mv $PWD/serving-gpu-11-${SERVING_VERSION} $PWD/serving_bin
tar xf serving-gpu-cuda11-${SERVING_VERSION}.tar.gz
mv $PWD/serving-gpu-cuda11-${SERVING_VERSION} $PWD/serving_bin
echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc
rm -rf serving-gpu-11-${SERVING_VERSION}.tar.gz
rm -rf serving-gpu-cuda11-${SERVING_VERSION}.tar.gz
cd -
fi
......
RUN_ENV=$1
if [[ "$RUN_ENV" == "cuda10.1" ]];then
ln -sf /usr/local/cuda-10.1/targets/x86_64-linux/lib/libcudart.so.10.1 /usr/lib/libcudart.so && \
ln -sf /usr/local/cuda-10.1/targets/x86_64-linux/lib/libcusolver.so.10 /usr/lib/libcusolver.so && \
ln -sf /usr/lib/x86_64-linux-gnu/libcuda.so /usr/lib/libcuda.so && \
ln -sf /usr/lib/x86_64-linux-gnu/libcublas.so.10 /usr/lib/libcublas.so && \
ln -sf /usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so
elif [[ "$RUN_ENV" == "cuda10.1" ]];then
ln -sf /usr/local/cuda-10.2/targets/x86_64-linux/lib/libcudart.so.10.2 /usr/lib/libcudart.so && \
ln -sf /usr/local/cuda-10.2/targets/x86_64-linux/lib/libcusolver.so.10 /usr/lib/libcusolver.so && \
ln -sf /usr/lib/x86_64-linux-gnu/libcuda.so /usr/lib/libcuda.so && \
ln -sf /usr/lib/x86_64-linux-gnu/libcublas.so.10 /usr/lib/libcublas.so && \
ln -sf /usr/lib/x86_64-linux-gnu/libcudnn.so.8 /usr/lib/libcudnn.so
elif [[ "$RUN_ENV" == "cuda10.1" ]];then
ln -sf /usr/local/cuda-11.0/targets/x86_64-linux/lib/libcudart.so.11.0 /usr/lib/libcudart.so && \
ln -sf /usr/local/cuda-11.0/targets/x86_64-linux/lib/libcusolver.so.10 /usr/lib/libcusolver.so && \
ln -sf /usr/lib/x86_64-linux-gnu/libcuda.so /usr/lib/libcuda.so && \
ln -sf /usr/local/cuda-11.0/targets/x86_64-linux/lib/libcublas.so.11 /usr/lib/libcublas.so && \
ln -sf /usr/lib/x86_64-linux-gnu/libcudnn.so.8 /usr/lib/libcudnn.so
fi
......@@ -39,7 +39,8 @@ go get -u google.golang.org/grpc@v1.33.0
build_whl_list=(build_cpu_server build_gpu_server build_client build_app)
rpc_model_list=(grpc_fit_a_line grpc_yolov4 pipeline_imagenet bert_rpc_gpu bert_rpc_cpu ResNet50_rpc \
lac_rpc cnn_rpc bow_rpc lstm_rpc fit_a_line_rpc deeplabv3_rpc mobilenet_rpc unet_rpc resnetv2_rpc \
criteo_ctr_rpc_cpu criteo_ctr_rpc_gpu ocr_rpc yolov4_rpc_gpu faster_rcnn_hrnetv2p_w18_1x_encrypt)
criteo_ctr_rpc_cpu criteo_ctr_rpc_gpu ocr_rpc yolov4_rpc_gpu faster_rcnn_hrnetv2p_w18_1x_encrypt \
low_precision_resnet50_int8)
http_model_list=(fit_a_line_http lac_http cnn_http bow_http lstm_http ResNet50_http bert_http\
pipeline_ocr_cpu_http)
......@@ -148,7 +149,7 @@ function before_hook() {
setproxy
unsetproxy
cd ${build_path}/python
python3.6 -m pip install --upgrade pip==20.0.1
python3.6 -m pip install --upgrade pip
python3.6 -m pip install requests
python3.6 -m pip install -r requirements.txt -i https://mirror.baidu.com/pypi/simple
python3.6 -m pip install numpy==1.16.4
......@@ -260,6 +261,22 @@ function build_app() {
python3.6 -m pip install ${build_path}/build/python/dist/*
}
function low_precision_resnet50_int8 () {
dir=${log_dir}rpc_model/low_precision/resnet50/
cd ${build_path}/python/examples/low_precision/resnet50/
check_dir ${dir}
wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz
tar zxvf ResNet50_quant.tar.gz
python3.6 -m paddle_serving_client.convert --dirname ResNet50_quant
echo -e "${GREEN_COLOR}low_precision_resnet50_int8_GPU_RPC server started${RES}" | tee -a ${log_dir}server_total.txt
python3.6 -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_trt --precision int8 > ${dir}server_log.txt 2>&1 &
check_result server 10
echo -e "${GREEN_COLOR}low_precision_resnet50_int8_GPU_RPC client started${RES}" | tee -a ${log_dir}client_total.txt
python3.6 resnet50_client.py > ${dir}client_log.txt 2>&1
check_result client "low_precision_resnet50_int8_GPU_RPC server test completed"
kill_server_process
}
function faster_rcnn_hrnetv2p_w18_1x_encrypt() {
dir=${log_dir}rpc_model/faster_rcnn_hrnetv2p_w18_1x/
cd ${build_path}/python/examples/detection/faster_rcnn_hrnetv2p_w18_1x
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册