提交 573983e3 编写于 作者: B barrierye

Merge branch 'develop' of https://github.com/PaddlePaddle/Serving into...

Merge branch 'develop' of https://github.com/PaddlePaddle/Serving into fix-not-thread-safe-in-client
......@@ -264,8 +264,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa
### About Efficiency
- [How to profile Paddle Serving latency?](python/examples/util)
- [How to optimize performance?(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [Deploy multi-services on one GPU(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
- [How to optimize performance?(Chinese)](doc/PERFORMANCE_OPTIM_CN.md)
- [Deploy multi-services on one GPU(Chinese)](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [CPU Benchmarks(Chinese)](doc/BENCHMARKING.md)
- [GPU Benchmarks(Chinese)](doc/GPU_BENCHMARKING.md)
......
......@@ -270,8 +270,8 @@ curl -H "Content-Type:application/json" -X POST -d '{"feed":[{"url": "https://pa
### 关于Paddle Serving性能
- [如何测试Paddle Serving性能?](python/examples/util/)
- [如何优化性能?](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [在一张GPU上启动多个预测服务](doc/PERFORMANCE_OPTIM_CN.md)
- [如何优化性能?](doc/PERFORMANCE_OPTIM_CN.md)
- [在一张GPU上启动多个预测服务](doc/MULTI_SERVICE_ON_ONE_GPU_CN.md)
- [CPU版Benchmarks](doc/BENCHMARKING.md)
- [GPU版Benchmarks](doc/GPU_BENCHMARKING.md)
......
......@@ -131,7 +131,7 @@ int GeneralReaderOp::inference() {
lod_tensor.dtype = paddle::PaddleDType::FLOAT32;
}
if (req->insts(0).tensor_array(i).shape(0) == -1) {
if (model_config->_is_lod_feed[i]) {
lod_tensor.lod.resize(1);
lod_tensor.lod[0].push_back(0);
VLOG(2) << "var[" << i << "] is lod_tensor";
......@@ -153,6 +153,7 @@ int GeneralReaderOp::inference() {
// specify the memory needed for output tensor_vector
for (int i = 0; i < var_num; ++i) {
if (out->at(i).lod.size() == 1) {
int tensor_size = 0;
for (int j = 0; j < batch_size; ++j) {
const Tensor &tensor = req->insts(j).tensor_array(i);
int data_len = 0;
......@@ -162,15 +163,28 @@ int GeneralReaderOp::inference() {
data_len = tensor.float_data_size();
}
VLOG(2) << "tensor size for var[" << i << "]: " << data_len;
tensor_size += data_len;
int cur_len = out->at(i).lod[0].back();
VLOG(2) << "current len: " << cur_len;
out->at(i).lod[0].push_back(cur_len + data_len);
VLOG(2) << "new len: " << cur_len + data_len;
int sample_len = 0;
if (tensor.shape_size() == 1) {
sample_len = data_len;
} else {
sample_len = tensor.shape(0);
}
out->at(i).lod[0].push_back(cur_len + sample_len);
VLOG(2) << "new len: " << cur_len + sample_len;
}
out->at(i).data.Resize(tensor_size * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back()};
for (int j = 1; j < req->insts(0).tensor_array(i).shape_size(); ++j) {
out->at(i).shape.push_back(req->insts(0).tensor_array(i).shape(j));
}
if (out->at(i).shape.size() == 1) {
out->at(i).shape.push_back(1);
}
out->at(i).data.Resize(out->at(i).lod[0].back() * elem_size[i]);
out->at(i).shape = {out->at(i).lod[0].back(), 1};
VLOG(2) << "var[" << i
<< "] is lod_tensor and len=" << out->at(i).lod[0].back();
} else {
......
......@@ -15,8 +15,10 @@
#include "core/general-server/op/general_response_op.h"
#include <algorithm>
#include <iostream>
#include <map>
#include <memory>
#include <sstream>
#include <utility>
#include "core/general-server/op/general_infer_helper.h"
#include "core/predictor/framework/infer.h"
#include "core/predictor/framework/memory.h"
......@@ -86,37 +88,51 @@ int GeneralResponseOp::inference() {
// To get the order of model return values
output->set_engine_name(pre_name);
FetchInst *fetch_inst = output->add_insts();
std::map<std::string, int> fetch_index_map;
for (int i = 0; i < in->size(); ++i) {
VLOG(2) << "index " << i << " var " << in->at(i).name;
fetch_index_map.insert(std::pair<std::string, int>(in->at(i).name, i));
}
for (auto &idx : fetch_index) {
Tensor *tensor = fetch_inst->add_tensor_array();
tensor->set_elem_type(1);
int true_idx = fetch_index_map[model_config->_fetch_name[idx]];
if (model_config->_is_lod_fetch[idx]) {
VLOG(2) << "out[" << idx << "] is lod_tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx]
<< " is lod_tensor";
for (int k = 0; k < in->at(true_idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
tensor->add_shape(in->at(true_idx).shape[k]);
}
} else {
VLOG(2) << "out[" << idx << "] is tensor";
for (int k = 0; k < in->at(idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k << "]: " << in->at(idx).shape[k];
tensor->add_shape(in->at(idx).shape[k]);
VLOG(2) << "out[" << idx << "] " << model_config->_fetch_name[idx]
<< " is tensor";
for (int k = 0; k < in->at(true_idx).shape.size(); ++k) {
VLOG(2) << "shape[" << k << "]: " << in->at(true_idx).shape[k];
tensor->add_shape(in->at(true_idx).shape[k]);
}
}
}
int var_idx = 0;
for (auto &idx : fetch_index) {
int true_idx = fetch_index_map[model_config->_fetch_name[idx]];
int cap = 1;
for (int j = 0; j < in->at(idx).shape.size(); ++j) {
cap *= in->at(idx).shape[j];
for (int j = 0; j < in->at(true_idx).shape.size(); ++j) {
cap *= in->at(true_idx).shape[j];
}
if (in->at(idx).dtype == paddle::PaddleDType::INT64) {
int64_t *data_ptr = static_cast<int64_t *>(in->at(idx).data.data());
if (in->at(true_idx).dtype == paddle::PaddleDType::INT64) {
VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx]
<< "].";
int64_t *data_ptr =
static_cast<int64_t *>(in->at(true_idx).data.data());
if (model_config->_is_lod_fetch[idx]) {
FetchInst *fetch_p = output->mutable_insts(0);
for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
for (int j = 0; j < in->at(true_idx).lod[0].size(); ++j) {
fetch_p->mutable_tensor_array(var_idx)->add_lod(
in->at(idx).lod[0][j]);
in->at(true_idx).lod[0][j]);
}
for (int j = 0; j < cap; ++j) {
fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]);
......@@ -127,14 +143,17 @@ int GeneralResponseOp::inference() {
fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[j]);
}
}
VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready";
var_idx++;
} else if (in->at(idx).dtype == paddle::PaddleDType::FLOAT32) {
float *data_ptr = static_cast<float *>(in->at(idx).data.data());
} else if (in->at(true_idx).dtype == paddle::PaddleDType::FLOAT32) {
VLOG(2) << "Prepare float var [" << model_config->_fetch_name[idx]
<< "].";
float *data_ptr = static_cast<float *>(in->at(true_idx).data.data());
if (model_config->_is_lod_fetch[idx]) {
FetchInst *fetch_p = output->mutable_insts(0);
for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
for (int j = 0; j < in->at(true_idx).lod[0].size(); ++j) {
fetch_p->mutable_tensor_array(var_idx)->add_lod(
in->at(idx).lod[0][j]);
in->at(true_idx).lod[0][j]);
}
for (int j = 0; j < cap; ++j) {
fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
......@@ -145,6 +164,7 @@ int GeneralResponseOp::inference() {
fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[j]);
}
}
VLOG(2) << "fetch var [" << model_config->_fetch_name[idx] << "] ready";
var_idx++;
}
}
......
......@@ -13,10 +13,10 @@ import paddlehub as hub
model_name = "bert_chinese_L-12_H-768_A-12"
module = hub.Module(model_name)
inputs, outputs, program = module.context(trainable=True, max_seq_len=20)
feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask", "pooled_output", "sequence_output"]
feed_keys = ["input_ids", "position_ids", "segment_ids", "input_mask"]
fetch_keys = ["pooled_output", "sequence_output"]
feed_dict = dict(zip(feed_keys, [inputs[x] for x in feed_keys]))
fetch_dict = dict(zip(fetch_keys, [outputs[x]] for x in fetch_keys))
fetch_dict = dict(zip(fetch_keys, [outputs[x] for x in fetch_keys]))
import paddle_serving_client.io as serving_io
serving_io.save_model("bert_seq20_model", "bert_seq20_client", feed_dict, fetch_dict, program)
......
......@@ -10,8 +10,9 @@ serving_io.save_model("imdb_model", "imdb_client_conf",
{"words": data}, {"prediction": prediction},
fluid.default_main_program())
```
`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. Serving has a
dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations.
Serving has a dictionary for `Feed` and `Fetch` variables for client to assign. In the example, `{"words": data}` is the feed dict that specify the input of saved inference model. `{"prediction": prediction}` is the fetch dic that specify the output of saved inference model. An alias name can be defined for feed and fetch variables. An example of how to use alias name
is as follows:
``` python
from paddle_serving_client import Client
......@@ -35,10 +36,14 @@ for line in sys.stdin:
If you have saved model files using Paddle's `save_inference_model` API, you can use Paddle Serving's` inference_model_to_serving` API to convert it into a model file that can be used for Paddle Serving.
```
import paddle_serving_client.io as serving_io
serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None )
```
dirname (str) - Path of saved model files. Program file and parameter files are saved in this directory.
model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename __model__ will be used. Default: None.
paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
serving_server (str, optional) - The path of model files and configuration files for server. Default: "serving_server".
serving_client (str, optional) - The path of configuration files for client. Default: "serving_client".
model_filename (str, optional) - The name of file to load the inference program. If it is None, the default filename `__model__` will be used. Default: None.
paras_filename (str, optional) - The name of file to load all parameters. It is only used for the case that all parameters were saved in a single binary file. If parameters were saved in separate files, set it as None. Default: None.
......@@ -11,7 +11,9 @@ serving_io.save_model("imdb_model", "imdb_client_conf",
{"words": data}, {"prediction": prediction},
fluid.default_main_program())
```
imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。 Serving有一个 提供给用户存放Feed和Fetch变量信息的字典。 在示例中,`{words”:data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction":projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下:
imdb_model是具有服务配置的服务器端模型。 imdb_client_conf是客户端rpc配置。
Serving有一个提供给用户存放Feed和Fetch变量信息的字典。 在示例中,`{"words":data}` 是用于指定已保存推理模型输入的提要字典。`{"prediction":projection}`是指定保存的推理模型输出的字典。可以为feed和fetch变量定义一个别名。 如何使用别名的例子 示例如下:
``` python
from paddle_serving_client import Client
......@@ -35,10 +37,14 @@ for line in sys.stdin:
如果已使用Paddle 的`save_inference_model`接口保存出预测要使用的模型,则可以通过Paddle Serving的`inference_model_to_serving`接口转换成可用于Paddle Serving的模型文件。
```
import paddle_serving_client.io as serving_io
serving_io.inference_model_to_serving(dirname, model_filename=None, params_filename=None, serving_server="serving_server", serving_client="serving_client")
serving_io.inference_model_to_serving(dirname, serving_server="serving_server", serving_client="serving_client", model_filename=None, params_filename=None)
```
dirname (str) – 需要转换的模型文件存储路径,Program结构文件和参数文件均保存在此目录。
model_filename (str,可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None,则使用 __model__ 作为默认的文件名。默认值为None。
serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为serving_server。
serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为serving_client。
model_filename (str,可选) – 存储需要转换的模型Inference Program结构的文件名称。如果设置为None,则使用 `__model__` 作为默认的文件名。默认值为None。
params_filename (str,可选) – 存储需要转换的模型所有参数的文件名称。当且仅当所有模型参数被保存在一个单独的二进制文件中,它才需要被指定。如果模型参数是存储在各自分离的文件中,设置它的值为None。默认值为None。
serving_server (str, 可选) - 转换后的模型文件和配置文件的存储路径。默认值为"serving_server"。
serving_client (str, 可选) - 转换后的客户端配置文件存储路径。默认值为"serving_client"。
......@@ -23,10 +23,10 @@ class BertService(WebService):
def load(self):
self.reader = BertReader(vocab_file="vocab.txt", max_seq_len=128)
def preprocess(self, feed={}, fetch=[]):
feed_res = [{
"words": self.reader.process(ins["words"].encode("utf-8"))
} for ins in feed]
def preprocess(self, feed=[], fetch=[]):
feed_res = [
self.reader.process(ins["words"].encode("utf-8")) for ins in feed
]
return feed_res, fetch
......
......@@ -44,6 +44,6 @@ python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696
client send inference request
```
python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
```
*the port of server side in this example is 9696
......@@ -44,6 +44,6 @@ python -m paddle_serving_server_gpu.serve --model ResNet50_vd_model --port 9696
client端进行预测
```
python image_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
python resnet50_rpc_client.py ResNet50_vd_client_config/serving_client_conf.prototxt
```
*server端示例中服务端口为9696端口
......@@ -39,8 +39,8 @@ def single_func(idx, resource):
client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
start = time.time()
for i in range(1000):
img = reader.process_image(img_list[i]).reshape(-1)
for i in range(100):
img = reader.process_image(img_list[i])
fetch_map = client.predict(feed={"image": img}, fetch=["score"])
end = time.time()
return [[end - start]]
......@@ -49,7 +49,7 @@ def single_func(idx, resource):
if __name__ == "__main__":
multi_thread_runner = MultiThreadRunner()
endpoint_list = ["127.0.0.1:9393"]
endpoint_list = ["127.0.0.1:9292"]
#card_num = 4
#for i in range(args.thread):
# endpoint_list.append("127.0.0.1:{}".format(9295 + i % card_num))
......
......@@ -24,6 +24,7 @@ from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args
import requests
import json
import base64
from image_reader import ImageReader
args = benchmark_args()
......@@ -36,6 +37,10 @@ def single_func(idx, resource):
img_list = []
for i in range(1000):
img_list.append(open("./image_data/n01440764/" + file_list[i]).read())
profile_flags = False
if "FLAGS_profile_client" in os.environ and os.environ[
"FLAGS_profile_client"]:
profile_flags = True
if args.request == "rpc":
reader = ImageReader()
fetch = ["score"]
......@@ -46,23 +51,43 @@ def single_func(idx, resource):
for i in range(1000):
if args.batch_size >= 1:
feed_batch = []
i_start = time.time()
for bi in range(args.batch_size):
img = reader.process_image(img_list[i])
img = img.reshape(-1)
feed_batch.append({"image": img})
i_end = time.time()
if profile_flags:
print("PROFILE\tpid:{}\timage_pre_0:{} image_pre_1:{}".
format(os.getpid(),
int(round(i_start * 1000000)),
int(round(i_end * 1000000))))
result = client.predict(feed=feed_batch, fetch=fetch)
else:
print("unsupport batch size {}".format(args.batch_size))
elif args.request == "http":
raise ("no batch predict for http")
py_version = 2
server = "http://" + resource["endpoint"][idx % len(resource[
"endpoint"])] + "/image/prediction"
start = time.time()
for i in range(1000):
if py_version == 2:
image = base64.b64encode(
open("./image_data/n01440764/" + file_list[i]).read())
else:
image = base64.b64encode(open(image_path, "rb").read()).decode(
"utf-8")
req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})
r = requests.post(
server, data=req, headers={"Content-Type": "application/json"})
end = time.time()
return [[end - start]]
if __name__ == '__main__':
multi_thread_runner = MultiThreadRunner()
endpoint_list = ["127.0.0.1:9393"]
endpoint_list = ["127.0.0.1:9292"]
#endpoint_list = endpoint_list + endpoint_list + endpoint_list
result = multi_thread_runner.run(single_func, args.thread,
{"endpoint": endpoint_list})
......
......@@ -16,7 +16,7 @@
import sys
import time
import requests
from imdb_reader import IMDBDataset
from paddle_serving_app import IMDBDataset
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args
......@@ -37,26 +37,39 @@ def single_func(idx, resource):
client.load_client_config(args.model)
client.connect([args.endpoint])
for i in range(1000):
if args.batch_size == 1:
word_ids, label = imdb_dataset.get_words_and_label(line)
fetch_map = client.predict(
feed={"words": word_ids}, fetch=["prediction"])
if args.batch_size >= 1:
feed_batch = []
for bi in range(args.batch_size):
word_ids, label = imdb_dataset.get_words_and_label(dataset[
bi])
feed_batch.append({"words": word_ids})
result = client.predict(feed=feed_batch, fetch=["prediction"])
if result is None:
raise ("predict failed.")
else:
print("unsupport batch size {}".format(args.batch_size))
elif args.request == "http":
for fn in filelist:
fin = open(fn)
for line in fin:
word_ids, label = imdb_dataset.get_words_and_label(line)
r = requests.post(
"http://{}/imdb/prediction".format(args.endpoint),
data={"words": word_ids,
"fetch": ["prediction"]})
if args.batch_size >= 1:
feed_batch = []
for bi in range(args.batch_size):
feed_batch.append({"words": dataset[bi]})
r = requests.post(
"http://{}/imdb/prediction".format(args.endpoint),
json={"feed": feed_batch,
"fetch": ["prediction"]})
if r.status_code != 200:
print('HTTP status code -ne 200')
raise ("predict failed.")
else:
print("unsupport batch size {}".format(args.batch_size))
end = time.time()
return [[end - start]]
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(single_func, args.thread, {})
print(result)
avg_cost = 0
for cost in result[0]:
avg_cost += cost
print("total cost {} s of each thread".format(avg_cost / args.thread))
rm profile_log
for thread_num in 1 2 4 8 16
do
$PYTHONROOT/bin/python benchmark.py --thread $thread_num --model imdbo_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
for batch_size in 1 2 4 8 16 32 64 128 256 512
do
$PYTHONROOT/bin/python benchmark.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "========================================"
echo "batch size : $batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 1 profile >> profile_log
done
done
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
import sys
import time
import requests
from imdb_reader import IMDBDataset
from paddle_serving_client import Client
from paddle_serving_client.utils import MultiThreadRunner
from paddle_serving_client.utils import benchmark_args
args = benchmark_args()
def single_func(idx, resource):
imdb_dataset = IMDBDataset()
imdb_dataset.load_resource("./imdb.vocab")
dataset = []
with open("./test_data/part-0") as fin:
for line in fin:
dataset.append(line.strip())
start = time.time()
if args.request == "rpc":
client = Client()
client.load_client_config(args.model)
client.connect([args.endpoint])
for i in range(1000):
if args.batch_size >= 1:
feed_batch = []
for bi in range(args.batch_size):
word_ids, label = imdb_dataset.get_words_and_label(dataset[
bi])
feed_batch.append({"words": word_ids})
result = client.predict(feed=feed_batch, fetch=["prediction"])
if result is None:
raise ("predict failed.")
else:
print("unsupport batch size {}".format(args.batch_size))
elif args.request == "http":
if args.batch_size >= 1:
feed_batch = []
for bi in range(args.batch_size):
feed_batch.append({"words": dataset[bi]})
r = requests.post(
"http://{}/imdb/prediction".format(args.endpoint),
json={"feed": feed_batch,
"fetch": ["prediction"]})
if r.status_code != 200:
print('HTTP status code -ne 200')
raise ("predict failed.")
else:
print("unsupport batch size {}".format(args.batch_size))
end = time.time()
return [[end - start]]
multi_thread_runner = MultiThreadRunner()
result = multi_thread_runner.run(single_func, args.thread, {})
avg_cost = 0
for cost in result[0]:
avg_cost += cost
print("total cost {} s of each thread".format(avg_cost / args.thread))
rm profile_log
for thread_num in 1 2 4 8 16
do
for batch_size in 1 2 4 8 16 32 64 128 256 512
do
$PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
echo "========================================"
echo "batch size : $batch_size" >> profile_log
$PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
tail -n 1 profile >> profile_log
done
done
......@@ -13,7 +13,7 @@
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import Client
from imdb_reader import IMDBDataset
from paddle_serving_app import IMDBDataset
import sys
client = Client()
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
from paddle_serving_client import Client
import sys
import subprocess
from multiprocessing import Pool
import time
def batch_predict(batch_size=4):
client = Client()
client.load_client_config(conf_file)
client.connect(["127.0.0.1:9292"])
fetch = ["acc", "cost", "prediction"]
feed_batch = []
for line in sys.stdin:
group = line.strip().split()
words = [int(x) for x in group[1:int(group[0])]]
label = [int(group[-1])]
feed = {"words": words, "label": label}
feed_batch.append(feed)
if len(feed_batch) == batch_size:
fetch_batch = client.batch_predict(
feed_batch=feed_batch, fetch=fetch)
for i in range(batch_size):
print("{} {}".format(fetch_batch[i]["prediction"][1],
feed_batch[i]["label"][0]))
feed_batch = []
if len(feed_batch) > 0:
fetch_batch = client.batch_predict(feed_batch=feed_batch, fetch=fetch)
for i in range(len(feed_batch)):
print("{} {}".format(fetch_batch[i]["prediction"][1], feed_batch[i][
"label"][0]))
if __name__ == '__main__':
conf_file = sys.argv[1]
batch_size = int(sys.argv[2])
batch_predict(batch_size)
......@@ -14,7 +14,7 @@
# pylint: disable=doc-string-missing
from paddle_serving_server.web_service import WebService
from imdb_reader import IMDBDataset
from paddle_serving_app import IMDBDataset
import sys
......
([简体中文](./README_CN.md)|English)
paddle_serving_app is a tool component of the Paddle Serving framework, and includes functions such as pre-training model download and data pre-processing methods.
It is convenient for users to quickly test and deploy model examples, analyze the performance of prediction services, and debug model prediction services.
## Install
```shell
pip install paddle_serving_app
```
## Get model list
```shell
python -m paddle_serving_app.package --model_list
```
## Download pre-training model
```shell
python -m paddle_serving_app.package --get_model senta_bilstm
```
11 pre-trained models are built into paddle_serving_app, covering 6 kinds of prediction tasks.
The model files can be directly used for deployment, and the `--tutorial` argument can be added to obtain the deployment method.
| Prediction task | Model name |
| ------------ | ------------------------------------------------ |
| SentimentAnalysis | 'senta_bilstm', 'senta_bow', 'senta_cnn' |
| SemanticRepresentation | 'ernie_base' |
| ChineseWordSegmentation | 'lac' |
| ObjectDetection | 'faster_rcnn', 'yolov3' |
| ImageSegmentation | 'unet', 'deeplabv3' |
| ImageClassification | 'resnet_v2_50_imagenet', 'mobilenet_v2_imagenet' |
## Data preprocess API
paddle_serving_app provides a variety of data preprocessing methods for prediction tasks in the field of CV and NLP.
- class ChineseBertReader
Preprocessing for Chinese semantic representation task.
- `__init__(vocab_file, max_seq_len=20)`
- vocab_file(st ):Path of dictionary file.
- max_seq_len(in ,optional):The length of sample after processing. The excess part will be truncated, and the insufficient part will be padding 0. Default 20.
- `process(line)`
- line(st ):Text input.
[example](../examples/bert/bert_client.py)
- class LACReader
Preprocessing for Chinese word segmentation task.
- `__init__(dict_floder)`
- dict_floder(st )Path of dictionary file.
- `process(sent)`
- sent(st ):Text input.
- `parse_result`
- words(st ):Original text input.
- crf_decode(np.array):CRF code predicted by model.
[example](../examples/bert/lac_web_service.py)
- class SentaReader
- `__init__(vocab_path)`
- vocab_path(st ):Path of dictionary file.
- `process(cols)`
- cols(st ):Word segmentation result.
[example](../examples/senta/senta_web_service.py)
- The image preprocessing method is more flexible than the above method, and can be combined by the following multiple classes,[example](../examples/imagenet/image_rpc_client.py)
- class Sequentia
- `__init__(transforms)`
- transforms(list):List of image preprocessing classes
- `__call__(img)`
- img:The input of image preprocessing. The data type is is related to the first preprocessing method in transforms.
- class File2Image
- `__call__(img_path)`
- img_path(str):Path of image file.
- class URL2Image
- `__call__(img_url)`
- img_url(str):url of image file.
- class Normalize
- `__init__(mean,std)`
- mean(float):Mean
- std(float):Variance
- `__call__(img)`
- img(np.array):Image data in (C,H,W) channels.
- class CenterCrop
- `__init__(size)`
- size(list/int):
- `__call__(img)`
- img(np.array):Image data.
- class Resize
- `__init__(size, max_size=2147483647, interpolation=None)`
- size(list/int):The expected image size, when the input is a list type, it needs to contain the expected length and width. When the input is int type, the short side will be set to the length of size, and the long side will be scaled proportionally.
- `__call__(img)`
- img(numpy array):Image data.
## Timeline tools
The Timeline tool can be used to visualize the start and end time of various stages such as the preparation data of the prediction service, client wait and server op.
This tool is convenient to analyze the proportion of time occupancy in the prediction service. On this basis, prediction services can be optimized in a targeted manner.
### How to use
1. Before making predictions on the client side, turn on the timeline function of each stage in the Paddle Serving framework by environment variables. It will print timeline information in log.
```shell
export FLAGS_profile_client=1 # Turn on timeline function of client
export FLAGS_profile_server=1 # Turn on timeline function of server
```
2. Perform predictions and redirect client-side logs to files, for example, named as profile.
3. Export the information in the log file into a trace file.
```shell
python -m paddle_serving_app.trace --profile_file profile --trace_file trace
```
4. Open the `chrome: // tracing /` URL using Chrome browser.
Load the trace file generated in the previous step through the load button, you can
Visualize the time information of each stage of the forecast service.
As shown in next figure, the figure shows the timeline of GPU prediction service using [bert example](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert).
The server side starts service with 4 GPU cards, the client side starts 4 processes to request, and the batch size is 1.
In the figure, bert_pre represents the data pre-processing stage of the client, and client_infer represents the stage where the client completes the sending of the prediction request to the receiving result.
The process in the figure represents the process number of the client, and the second line of each process shows the timeline of each op of the server.
![timeline](../../doc/timeline-example.png)
## Debug tools
The inference op of Paddle Serving is implemented based on Paddle inference lib.
Before deploying the prediction service, you may need to check the input and output of the prediction service or check the resource consumption.
Therefore, a local prediction tool is built into the paddle_serving_app, which is used in the same way as sending a request to the server through the client.
Taking [fit_a_line prediction service](../examples/fit_a_line) as an example, the following code can be used to run local prediction.
```python
from paddle_serving_app import Debugger
import numpy as np
debugger = Debugger()
debugger.load_model_config("./uci_housing_model", gpu=False)
data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
-0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
fetch_map = debugger.predict(feed={"x":data}, fetch = ["price"])
```
(简体中文|[English](./README.md))
paddle_serving_app是Paddle Serving框架的工具组件,包含了预训练模型下载、数据预处理方法等功能。方便用户快速体验和部署模型示例、分析预测服务性能、调试模型预测服务等。
## 安装
```shell
pip install paddle_serving_app
```
## 获取模型列表
```shell
python -m paddle_serving_app.package --model_list
```
## 下载预训练模型
```shell
python -m paddle_serving_app.package --get_model senta_bilstm
```
paddle_serving_app中内置了11中预训练模型,涵盖了6种预测任务。获取到的模型文件可以直接用于部署,添加`--tutorial`参数可以获取对应的部署方式。
| 预测服务类型 | 模型名称 |
| ------------ | ------------------------------------------------ |
| 中文情感分析 | 'senta_bilstm', 'senta_bow', 'senta_cnn' |
| 语义理解 | 'ernie_base' |
| 中文分词 | 'lac' |
| 图像检测 | 'faster_rcnn', 'yolov3' |
| 图像分割 | 'unet', 'deeplabv3' |
| 图像分类 | 'resnet_v2_50_imagenet', 'mobilenet_v2_imagenet' |
## 数据预处理API
paddle_serving_app针对CV和NLP领域的模型任务,提供了多种常见的数据预处理方法。
- class ChineseBertReader
中文语义理解模型预处理
- `__init__(vocab_file, max_seq_len=20)`
- vocab_file(str):词典文件路径。
- max_seq_len(int,可选):处理后的样本长度,超出的部分会截断,不足的部分会padding 0。默认值20。
- `process(line)`
- line(str):输入文本
[参考示例](../examples/bert/bert_client.py)
- class LACReader 中文分词预处理
- `__init__(dict_floder)`
- dict_floder(str)词典文件目录
- `process(sent)`
- sent(str):输入文本
- `parse_result`
- words(str):原始文本
- crf_decode(np.array):模型预测结果中的CRF编码
[参考示例](../examples/lac/lac_web_service.py)
- class SentaReader
- `__init__(vocab_path)`
- vocab_path(str):词典文件目录
- `process(cols)`
- cols(str):分词后的文本
[参考示例](../examples/senta/senta_web_service.py)
- 图像的预处理方法相比于上述的方法更加灵活多变,可以通过以下的多个类进行组合,[参考示例](../examples/imagenet/image_rpc_client.py)
- class Sequentia
- `__init__(transforms)`
- transforms(list):图像预处理方法类的列表
- `__call__(img)`
- img:图像处理的输入,具体类型与transforms中的第一个预处理方法有关
- class File2Image
- `__call__(img_path)`
- img_path(str):图像文件路径
- class URL2Image
- `__call__(img_url)`
- img_url(str):图像url
- class Normalize
- `__init__(mean,std)`
- mean(float):均值
- std(float):方差
- `__call__(img)`
- img(np.array):(C,H,W)排列的图像数据
- class CenterCrop
- `__init__(size)`
- size(list/int):预期的裁剪后的大小,list类型时需要包含预期的长和宽,int类型时会返回边长为size的正方形图片
- `__call__(img)`
- img(np.array):输入图像
- class Resize
- `__init__(size, max_size=2147483647, interpolation=None)`
- size(list/int):预期的图像大小,list类型时需要包含预期的长和宽,int类型时,短边会设置为size的长度,长边按比例缩放
- `__call__(img)`
- img(numpy array):输入图像
## Timeline 工具
通过Timeline工具可以将预测服务的准备数据、client等待、server端op等各阶段起止时间可视化,方便分析预测服务中的时间占用比重,在此基础上有针对性地优化预测服务。
### 使用方式
1. client端在进行预测之前,通过环境变量打开Paddle Serving框架中的各阶段日志打点功能
```shell
export FLAGS_profile_client=1 #开启client端各阶段时间打点
export FLAGS_profile_server=1 #开启server端各阶段时间打点
```
2. 执行预测,并将client端的日志重定向到文件中,例如profile文件。
3. 将日志文件中的信息导出成为trace文件
```shell
python -m paddle_serving_app.trace --profile_file profile --trace_file trace
```
4. 使用chrome浏览器,打开`chrome://tracing/`网址,通过load按钮加载上一步产生的trace文件,即可将预测服务的各阶段时间信息可视化。
效果如下图,图中展示了使用[bert示例](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/bert)的GPU预测服务,server端开启4卡预测,client端启动4进程,batch size为1时的各阶段timeline。
其中bert_pre代表client端的数据预处理阶段,client_infer代表client完成预测请求的发送到接收结果的阶段,图中的process代表的是client的进程号,每个进程的第二行展示的是server各个op的timeline。
![timeline](../../doc/timeline-example.png)
## Debug工具
Paddle Serving框架的server预测op使用了Paddle 的预测框架,在部署预测服务之前可能需要对预测服务的输入输出进行检验或者查看资源占用等。因此在paddle_serving_app中内置了本地预测工具,使用方式与通过client向服务端发送请求一致。
[fit_a_line预测服务](../examples/fit_a_line)为例,使用以下代码即可执行本地预测。
```python
from paddle_serving_app import Debugger
import numpy as np
debugger = Debugger()
debugger.load_model_config("./uci_housing_model", gpu=False)
data = [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727,
-0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332]
fetch_map = debugger.predict(feed={"x":data}, fetch = ["price"])
```
......@@ -15,5 +15,6 @@ from .reader.chinese_bert_reader import ChineseBertReader
from .reader.image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, CenterCrop, Resize, PadStride
from .reader.lac_reader import LACReader
from .reader.senta_reader import SentaReader
from .reader.imdb_reader import IMDBDataset
from .models import ServingModels
from .local_predict import Debugger
......@@ -25,7 +25,9 @@ class ServingModels(object):
self.model_dict["SemanticRepresentation"] = ["ernie_base"]
self.model_dict["ChineseWordSegmentation"] = ["lac"]
self.model_dict["ObjectDetection"] = ["faster_rcnn", "yolov3"]
self.model_dict["ImageSegmentation"] = ["unet", "deeplabv3"]
self.model_dict["ImageSegmentation"] = [
"unet", "deeplabv3", "mobilenet_cityspaces"
]
self.model_dict["ImageClassification"] = [
"resnet_v2_50_imagenet", "mobilenet_v2_imagenet"
]
......
......@@ -72,7 +72,7 @@ if __name__ == "__main__":
Usage:
Download a package for serving directly
Example:
python -m paddle_serving_app.models --get senta_bilstm
python -m paddle_serving_app.models --get_model senta_bilstm
python -m paddle_serving_app.models --list_model
""")
pass
......@@ -13,14 +13,19 @@
# limitations under the License.
import cv2
import os
import urllib
import numpy as np
import base64
import sys
from . import functional as F
from PIL import Image, ImageDraw
import json
_cv2_interpolation_to_str = {cv2.INTER_LINEAR: "cv2.INTER_LINEAR", None: "None"}
py_version = sys.version_info[0]
if py_version == 2:
import urllib
else:
import urllib.request as urllib
def generate_colormap(num_classes):
......@@ -393,7 +398,7 @@ class Normalize(object):
class Lambda(object):
"""Apply a user-defined lambda as a transform.
Very shame to just copy from
Very shame to just copy from
https://github.com/pytorch/vision/blob/master/torchvision/transforms/transforms.py#L301
Args:
......
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# pylint: disable=doc-string-missing
import sys
import os
import paddle
import re
import paddle.fluid.incubate.data_generator as dg
py_version = sys.version_info[0]
class IMDBDataset(dg.MultiSlotDataGenerator):
def load_resource(self, dictfile):
self._vocab = {}
wid = 0
if py_version == 2:
with open(dictfile) as f:
for line in f:
self._vocab[line.strip()] = wid
wid += 1
else:
with open(dictfile, encoding="utf-8") as f:
for line in f:
self._vocab[line.strip()] = wid
wid += 1
self._unk_id = len(self._vocab)
self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
def get_words_only(self, line):
sent = line.lower().replace("<br />", " ").strip()
words = [x for x in self._pattern.split(sent) if x and x != " "]
feas = [
self._vocab[x] if x in self._vocab else self._unk_id for x in words
]
return feas
def get_words_and_label(self, line):
send = '|'.join(line.split('|')[:-1]).lower().replace("<br />",
" ").strip()
label = [int(line.split('|')[-1])]
words = [x for x in self._pattern.split(send) if x and x != " "]
feas = [
self._vocab[x] if x in self._vocab else self._unk_id for x in words
]
return feas, label
def infer_reader(self, infer_filelist, batch, buf_size):
def local_iter():
for fname in infer_filelist:
with open(fname, "r") as fin:
for line in fin:
feas, label = self.get_words_and_label(line)
yield feas, label
import paddle
batch_iter = paddle.batch(
paddle.reader.shuffle(
local_iter, buf_size=buf_size),
batch_size=batch)
return batch_iter
def generate_sample(self, line):
def memory_iter():
for i in range(1000):
yield self.return_value
def data_iter():
feas, label = self.get_words_and_label(line)
yield ("words", feas), ("label", label)
return data_iter
if __name__ == "__main__":
imdb = IMDBDataset()
imdb.load_resource("imdb.vocab")
imdb.run_from_stdin()
# -*- coding: utf-8 -*-
"""
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
import json
import sys
import argparse
def parse_args():
parser = argparse.ArgumentParser("Convert profile log to trace")
parser.add_argument(
"--profile_file",
type=str,
default="",
required=True,
help="Profile log")
parser.add_argument(
"--trace_file", type=str, default="trace", help="Trace file")
return parser.parse_args()
def prase(pid_str, time_str, counter):
pid = pid_str.split(":")[1]
event_list = time_str.split(" ")
trace_list = []
for event in event_list:
name, ts = event.split(":")
name_list = name.split("_")
ph = "B" if (name_list[-1] == "0") else "E"
if len(name_list) == 2:
name = name_list[0]
else:
name = name_list[0] + "_" + name_list[1]
event_dict = {}
event_dict["name"] = name
event_dict["tid"] = 0
event_dict["pid"] = pid
event_dict["ts"] = ts
event_dict["ph"] = ph
trace_list.append(event_dict)
return trace_list
if __name__ == "__main__":
args = parse_args()
profile_file = args.profile_file
trace_file = args.trace_file
all_list = []
counter = 0
with open(profile_file) as f:
for line in f.readlines():
line = line.strip().split("\t")
if line[0] == "PROFILE":
trace_list = prase(line[1], line[2], counter)
counter += 1
for trace in trace_list:
all_list.append(trace)
trace = json.dumps(all_list, indent=2, separators=(',', ':'))
with open(trace_file, "w") as f:
f.write(trace)
......@@ -201,7 +201,12 @@ class Client(object):
def shape_check(self, feed, key):
if key in self.lod_tensor_set:
return
if len(feed[key]) != self.feed_tensor_len[key]:
if isinstance(feed[key],
list) and len(feed[key]) != self.feed_tensor_len[key]:
raise SystemExit("The shape of feed tensor {} not match.".format(
key))
if type(feed[key]).__module__ == np.__name__ and np.size(feed[
key]) != self.feed_tensor_len[key]:
raise SystemExit("The shape of feed tensor {} not match.".format(
key))
......@@ -252,23 +257,16 @@ class Client(object):
for key in feed_i:
if key not in self.feed_names_:
raise ValueError("Wrong feed name: {}.".format(key))
if not isinstance(feed_i[key], np.ndarray):
self.shape_check(feed_i, key)
#if not isinstance(feed_i[key], np.ndarray):
self.shape_check(feed_i, key)
if self.feed_types_[key] == int_type:
if i == 0:
int_feed_names.append(key)
if isinstance(feed_i[key], np.ndarray):
if key in self.lod_tensor_set:
raise ValueError(
"LodTensor var can not be ndarray type.")
int_shape.append(list(feed_i[key].shape))
else:
int_shape.append(self.feed_shapes_[key])
if isinstance(feed_i[key], np.ndarray):
if key in self.lod_tensor_set:
raise ValueError(
"LodTensor var can not be ndarray type.")
#int_slot.append(np.reshape(feed_i[key], (-1)).tolist())
int_slot.append(feed_i[key])
self.has_numpy_input = True
else:
......@@ -278,17 +276,10 @@ class Client(object):
if i == 0:
float_feed_names.append(key)
if isinstance(feed_i[key], np.ndarray):
if key in self.lod_tensor_set:
raise ValueError(
"LodTensor var can not be ndarray type.")
float_shape.append(list(feed_i[key].shape))
else:
float_shape.append(self.feed_shapes_[key])
if isinstance(feed_i[key], np.ndarray):
if key in self.lod_tensor_set:
raise ValueError(
"LodTensor var can not be ndarray type.")
#float_slot.append(np.reshape(feed_i[key], (-1)).tolist())
float_slot.append(feed_i[key])
self.has_numpy_input = True
else:
......
......@@ -104,10 +104,10 @@ def save_model(server_model_folder,
def inference_model_to_serving(dirname,
model_filename=None,
params_filename=None,
serving_server="serving_server",
serving_client="serving_client"):
serving_client="serving_client",
model_filename=None,
params_filename=None):
place = fluid.CPUPlace()
exe = fluid.Executor(place)
inference_program, feed_target_names, fetch_targets = \
......
......@@ -274,7 +274,8 @@ class Server(object):
self.model_config_paths[node.name] = path
print("You have specified multiple model paths, please ensure "
"that the input and output of multiple models are the same.")
workflow_oi_config_path = self.model_config_paths.items()[0][1]
workflow_oi_config_path = list(self.model_config_paths.items())[0][
1]
else:
raise Exception("The type of model_config_paths must be str or "
"dict({op: model_path}), not {}.".format(
......
......@@ -320,7 +320,8 @@ class Server(object):
self.model_config_paths[node.name] = path
print("You have specified multiple model paths, please ensure "
"that the input and output of multiple models are the same.")
workflow_oi_config_path = self.model_config_paths.items()[0][1]
workflow_oi_config_path = list(self.model_config_paths.items())[0][
1]
else:
raise Exception("The type of model_config_paths must be str or "
"dict({op: model_path}), not {}.".format(
......
......@@ -43,5 +43,5 @@ RUN yum -y install wget && \
source /root/.bashrc && \
cd .. && rm -rf Python-3.6.8* && \
pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
yum -y install epel-release && yum -y install patchelf && \
yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
yum clean all
......@@ -43,5 +43,5 @@ RUN yum -y install wget && \
source /root/.bashrc && \
cd .. && rm -rf Python-3.6.8* && \
pip3 install google protobuf setuptools wheel flask numpy==1.16.4 && \
yum -y install epel-release && yum -y install patchelf && \
yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
yum clean all
......@@ -20,5 +20,5 @@ RUN yum -y install wget >/dev/null \
&& rm get-pip.py \
&& yum install -y python3 python3-devel \
&& pip3 install google protobuf setuptools wheel flask \
&& yum -y install epel-release && yum -y install patchelf \
&& yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
&& yum clean all
......@@ -21,5 +21,5 @@ RUN yum -y install wget >/dev/null \
&& rm get-pip.py \
&& yum install -y python3 python3-devel \
&& pip3 install google protobuf setuptools wheel flask \
&& yum -y install epel-release && yum -y install patchelf \
&& yum -y install epel-release && yum -y install patchelf libXext libSM libXrender\
&& yum clean all
......@@ -15,6 +15,6 @@
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
import re
with open("setup.cfg", "w") as f:
line = "[bdist_wheel]\npython-tag={0}{1}\nplat-name=linux_x86_64".format(
line = "[bdist_wheel]\npython-tag={0}{1}\nplat-name=manylinux1_x86_64".format(
get_abbr_impl(), get_impl_ver())
f.write(line)
......@@ -343,7 +343,7 @@ function python_test_imdb() {
sleep 5
check_cmd "head test_data/part-0 | python test_client.py imdb_cnn_client_conf/serving_client_conf.prototxt imdb.vocab"
# test batch predict
check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request rpc --endpoint 127.0.0.1:9292"
echo "imdb CPU RPC inference pass"
kill_server_process
rm -rf work_dir1
......@@ -359,7 +359,7 @@ function python_test_imdb() {
exit 1
fi
# test batch predict
check_cmd "python benchmark_batch.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
check_cmd "python benchmark.py --thread 4 --batch_size 8 --model imdb_bow_client_conf/serving_client_conf.prototxt --request http --endpoint 127.0.0.1:9292"
setproxy # recover proxy state
kill_server_process
ps -ef | grep "text_classify_service.py" | grep -v grep | awk '{print $2}' | xargs kill
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册