diff --git a/README.md b/README.md
index ad330602a330d029aebc12b2b2c0e5a3651b812e..ba0540ddcf807cbdb5b761736339d113bcc6b15a 100644
--- a/README.md
+++ b/README.md
@@ -30,17 +30,18 @@ pip install paddle-serving-server
## Quick Start Example
-### download trained model
+### Boston House Price Prediction model
``` shell
wget --no-check-certificate https://paddle-serving.bj.bcebos.com/uci_housing.tar.gz
tar -xzf uci_housing.tar.gz
```
+
Paddle Serving provides HTTP and RPC based service for users to access
### HTTP service
``` shell
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci
+python -m paddle_serving_server.web_serve --model uci_housing_model --thread 10 --port 9292 --name uci
```
``` shell
curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9292/uci/prediction
@@ -51,9 +52,9 @@ curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.25
``` shell
python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292
```
-python client api
``` python
+# A user can visit rpc service through paddle_serving_client API
from paddle_serving_client import Client
client = Client()
@@ -66,9 +67,23 @@ print(fetch_map)
```
+## Models waiting for you to deploy
+
+
+
+
+| Model Name | Resnet50 |
+|:--------------------: |:----------------------------------: |
+| Package URL | To be released |
+| Description | Get the representation of an image |
+| Training Data Source | Imagenet |
+
+
+
+
## Document
-[Design Doc(Chinese)](doc/DESIGN.md)
+[How to save a servable model?](doc/SAVE.md)
[How to config Serving native operators on server side?](doc/SERVER_DAG.md)
@@ -80,9 +95,17 @@ print(fetch_map)
[FAQ(Chinese)](doc/FAQ.md)
+[Design Doc(Chinese)](doc/DESIGN.md)
+
## Join Community
To connect with other users and contributors, welcome to join our [Slack channel](https://paddleserving.slack.com/archives/CUBPKHKMJ)
## Contribution
If you want to contribute code to Paddle Serving, please reference [Contribution Guidelines](doc/CONTRIBUTE.md)
+
+### Feedback
+For any feedback or to report a bug, please propose a [GitHub Issue](https://github.com/PaddlePaddle/Serving/issues).
+
+## License
+[Apache 2.0 License](https://github.com/PaddlePaddle/Serving/blob/develop/LICENSE)
diff --git a/core/general-client/include/general_model.h b/core/general-client/include/general_model.h
index 1bac4375b2995605d604b9f8ef7e863b6a646788..0e042d1880acd15151d3476d6061fb759f92e11b 100644
--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -45,12 +45,12 @@ class PredictorRes {
~PredictorRes() {}
public:
- const std::vector> & get_int64_by_name(
- const std::string & name) {
+ const std::vector>& get_int64_by_name(
+ const std::string& name) {
return _int64_map[name];
}
- const std::vector> & get_float_by_name(
- const std::string & name) {
+ const std::vector>& get_float_by_name(
+ const std::string& name) {
return _float_map[name];
}
@@ -71,7 +71,7 @@ class PredictorClient {
void set_predictor_conf(const std::string& conf_path,
const std::string& conf_file);
- int create_predictor_by_desc(const std::string & sdk_desc);
+ int create_predictor_by_desc(const std::string& sdk_desc);
int create_predictor();
int destroy_predictor();
@@ -81,7 +81,8 @@ class PredictorClient {
const std::vector>& int_feed,
const std::vector& int_feed_name,
const std::vector& fetch_name,
- PredictorRes & predict_res); // NOLINT
+ PredictorRes& predict_res, // NOLINT
+ const int& pid);
std::vector> predict(
const std::vector>& float_feed,
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index 1dcf261f96769f9c33ddcbe792eda9a6057c8502..9fafe0abc59ce543cfc1783003296d68102d0c8d 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -132,13 +132,13 @@ int PredictorClient::create_predictor() {
_api.thrd_initialize();
}
-int PredictorClient::predict(
- const std::vector>& float_feed,
- const std::vector& float_feed_name,
- const std::vector>& int_feed,
- const std::vector& int_feed_name,
- const std::vector& fetch_name,
- PredictorRes & predict_res) { // NOLINT
+int PredictorClient::predict(const std::vector> &float_feed,
+ const std::vector &float_feed_name,
+ const std::vector> &int_feed,
+ const std::vector &int_feed_name,
+ const std::vector &fetch_name,
+ PredictorRes &predict_res,
+ const int &pid) { // NOLINT
predict_res._int64_map.clear();
predict_res._float_map.clear();
Timer timeline;
@@ -218,6 +218,7 @@ int PredictorClient::predict(
VLOG(2) << "fetch name: " << name;
if (_fetch_name_to_type[name] == 0) {
int len = res.insts(0).tensor_array(idx).int64_data_size();
+ VLOG(2) << "fetch tensor : " << name << " type: int64 len : " << len;
predict_res._int64_map[name].resize(1);
predict_res._int64_map[name][0].resize(len);
for (int i = 0; i < len; ++i) {
@@ -226,6 +227,7 @@ int PredictorClient::predict(
}
} else if (_fetch_name_to_type[name] == 1) {
int len = res.insts(0).tensor_array(idx).float_data_size();
+ VLOG(2) << "fetch tensor : " << name << " type: float32 len : " << len;
predict_res._float_map[name].resize(1);
predict_res._float_map[name][0].resize(len);
for (int i = 0; i < len; ++i) {
@@ -240,11 +242,12 @@ int PredictorClient::predict(
if (FLAGS_profile_client) {
std::ostringstream oss;
oss << "PROFILE\t"
+ << "pid:" << pid << "\t"
<< "prepro_0:" << preprocess_start << " "
<< "prepro_1:" << preprocess_end << " "
<< "client_infer_0:" << client_infer_start << " "
<< "client_infer_1:" << client_infer_end << " ";
-
+
if (FLAGS_profile_server) {
int op_num = res.profile_time_size() / 2;
for (int i = 0; i < op_num; ++i) {
@@ -252,10 +255,10 @@ int PredictorClient::predict(
oss << "op" << i << "_1:" << res.profile_time(i * 2 + 1) << " ";
}
}
-
+
oss << "postpro_0:" << postprocess_start << " ";
oss << "postpro_1:" << postprocess_end;
-
+
fprintf(stderr, "%s\n", oss.str().c_str());
}
return 0;
@@ -342,7 +345,7 @@ std::vector>> PredictorClient::batch_predict(
}
VLOG(2) << "batch [" << bi << "] "
- << "itn feed value prepared";
+ << "int feed value prepared";
}
int64_t preprocess_end = timeline.TimeStampUS();
diff --git a/core/general-client/src/pybind_general_model.cpp b/core/general-client/src/pybind_general_model.cpp
index 9056a7af298238cf6ffb365ccc9d40f1035bf187..287fa2dcb55344a6e71bf8e76171de5f94e89de5 100644
--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -31,13 +31,15 @@ PYBIND11_MODULE(serving_client, m) {
py::class_(m, "PredictorRes", py::buffer_protocol())
.def(py::init())
.def("get_int64_by_name",
- [](PredictorRes &self, std::string & name) {
+ [](PredictorRes &self, std::string &name) {
return self.get_int64_by_name(name);
- }, py::return_value_policy::reference)
+ },
+ py::return_value_policy::reference)
.def("get_float_by_name",
- [](PredictorRes &self, std::string & name) {
+ [](PredictorRes &self, std::string &name) {
return self.get_float_by_name(name);
- }, py::return_value_policy::reference);
+ },
+ py::return_value_policy::reference);
py::class_(m, "PredictorClient", py::buffer_protocol())
.def(py::init())
@@ -56,26 +58,29 @@ PYBIND11_MODULE(serving_client, m) {
self.set_predictor_conf(conf_path, conf_file);
})
.def("create_predictor_by_desc",
- [](PredictorClient &self, const std::string & sdk_desc) {
- self.create_predictor_by_desc(sdk_desc); })
+ [](PredictorClient &self, const std::string &sdk_desc) {
+ self.create_predictor_by_desc(sdk_desc);
+ })
.def("create_predictor",
[](PredictorClient &self) { self.create_predictor(); })
.def("destroy_predictor",
[](PredictorClient &self) { self.destroy_predictor(); })
.def("predict",
[](PredictorClient &self,
- const std::vector> &float_feed,
- const std::vector &float_feed_name,
- const std::vector> &int_feed,
- const std::vector &int_feed_name,
- const std::vector &fetch_name,
- PredictorRes & predict_res) {
+ const std::vector> &float_feed,
+ const std::vector &float_feed_name,
+ const std::vector> &int_feed,
+ const std::vector &int_feed_name,
+ const std::vector &fetch_name,
+ PredictorRes &predict_res,
+ const int &pid) {
return self.predict(float_feed,
float_feed_name,
int_feed,
int_feed_name,
fetch_name,
- predict_res);
+ predict_res,
+ pid);
})
.def("batch_predict",
[](PredictorClient &self,
diff --git a/doc/SAVE.md b/doc/SAVE.md
new file mode 100644
index 0000000000000000000000000000000000000000..d2cb82980017e900b6c5a8d0d939abd96281bc1c
--- /dev/null
+++ b/doc/SAVE.md
@@ -0,0 +1,28 @@
+## How to save a servable model of Paddle Serving?
+- Currently, paddle serving provides a save_model interface for users to access, the interface is similar with `save_inference_model` of Paddle.
+``` python
+import paddle_serving_client.io as serving_io
+serving_io.save_model("imdb_model", "imdb_client_conf",
+ {"words": data}, {"prediction": prediction},
+ fluid.default_main_program())
+```
+`imdb_model` is the server side model with serving configurations. `imdb_client_conf` is the client rpc configurations. Serving has a
+dictionary for `Feed` and `Fetch` variables for client to assign. An alias name can be defined for each variable. An example of how to use alias name
+ is as follows:
+ ``` python
+ from paddle_serving_client import Client
+import sys
+
+client = Client()
+client.load_client_config(sys.argv[1])
+client.connect(["127.0.0.1:9393"])
+
+for line in sys.stdin:
+ group = line.strip().split()
+ words = [int(x) for x in group[1:int(group[0]) + 1]]
+ label = [int(group[-1])]
+ feed = {"words": words, "label": label}
+ fetch = ["acc", "cost", "prediction"]
+ fetch_map = client.predict(feed=feed, fetch=fetch)
+ print("{} {}".format(fetch_map["prediction"][1], label[0]))
+ ```
diff --git a/doc/bert-benchmark-batch-size-1.png b/doc/bert-benchmark-batch-size-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..73cccde83ab6f7163eb2280f99655f5378ebe261
Binary files /dev/null and b/doc/bert-benchmark-batch-size-1.png differ
diff --git a/doc/imdb-benchmark-server-16.png b/doc/imdb-benchmark-server-16.png
new file mode 100644
index 0000000000000000000000000000000000000000..9e39d257ad4e6e487c4d3d1c86230304f97e738e
Binary files /dev/null and b/doc/imdb-benchmark-server-16.png differ
diff --git a/python/examples/bert/README.md b/python/examples/bert/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..b92bf69ed8e1c6f5b9c732d387ff1173abcd215f
--- /dev/null
+++ b/python/examples/bert/README.md
@@ -0,0 +1,61 @@
+## 语义理解预测服务
+
+示例中采用BERT模型进行语义理解预测,将文本表示为向量的形式,可以用来做进一步的分析和预测。
+
+### 获取模型
+
+示例中采用[Paddlehub](https://github.com/PaddlePaddle/PaddleHub)中的[BERT中文模型](https://www.paddlepaddle.org.cn/hubdetail?name=bert_chinese_L-12_H-768_A-12&en_category=SemanticModel)。
+执行
+```
+python prepare_model.py
+```
+生成server端配置文件与模型文件,存放在serving_server_model文件夹
+生成client端配置文件,存放在serving_client_conf文件夹
+
+### 启动预测服务
+执行
+```
+python bert_server.py serving_server_model 9292 #启动cpu预测服务
+```
+或者
+```
+python bert_gpu_server.py serving_server_model 9292 0 #在gpu 0上启动gpu预测服务
+```
+
+### 执行预测
+
+执行
+```
+sh get_data.sh
+```
+获取中文样例数据
+
+执行
+```
+head data-c.txt | python bert_client.py
+```
+将预测样例数据中的前十条样例,并将向量表示打印到标准输出。
+
+### Benchmark
+
+模型:bert_chinese_L-12_H-768_A-12
+
+设备:GPU V100 * 1
+
+环境:CUDA 9.2,cudnn 7.1.4
+
+测试中将样例数据中的1W个样本复制为10W个样本,每个client线程发送线程数分之一个样本,batch size为1,max_seq_len为20,时间单位为秒.
+
+在client线程数为4时,预测速度可以达到432样本每秒。
+由于单张GPU内部只能串行计算,client线程增多只能减少GPU的空闲时间,因此在线程数达到4之后,线程数增多对预测速度没有提升。
+
+| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | total |
+| ------------------ | ------ | ------------ | ----- | ------ | ---- | ------- | ------ |
+| 1 | 3.05 | 290.54 | 0.37 | 239.15 | 6.43 | 0.71 | 365.63 |
+| 4 | 0.85 | 213.66 | 0.091 | 200.39 | 1.62 | 0.2 | 231.45 |
+| 8 | 0.42 | 223.12 | 0.043 | 110.99 | 0.8 | 0.098 | 232.05 |
+| 12 | 0.32 | 225.26 | 0.029 | 73.87 | 0.53 | 0.078 | 231.45 |
+| 16 | 0.23 | 227.26 | 0.022 | 55.61 | 0.4 | 0.056 | 231.9 |
+
+总耗时变化规律如下:
+![bert benchmark](../../../doc/bert-benchmark-batch-size-1.png)
diff --git a/python/examples/bert/benchmark.py b/python/examples/bert/benchmark.py
index 774956a324f1e7c45df0e246af4d580c570822d2..9e8af21cc622986e0cf8233a827341030900ff6b 100644
--- a/python/examples/bert/benchmark.py
+++ b/python/examples/bert/benchmark.py
@@ -17,7 +17,7 @@ from paddle_serving_client import Client
from paddle_serving_client.metric import auc
from paddle_serving_client.utils import MultiThreadRunner
import time
-from test_bert_client import BertService
+from bert_client import BertService
def predict(thr_id, resource):
@@ -55,7 +55,7 @@ if __name__ == '__main__':
thread_num = sys.argv[3]
resource = {}
resource["conf_file"] = conf_file
- resource["server_endpoint"] = ["127.0.0.1:9293"]
+ resource["server_endpoint"] = ["127.0.0.1:9292"]
resource["filelist"] = [data_file]
resource["thread_num"] = int(thread_num)
diff --git a/python/examples/bert/benchmark_batch.py b/python/examples/bert/benchmark_batch.py
index 8cf4b3a082d3472bc2130a0ed52184e75f165eb9..d8d310132c256e141d00ec66d4eaaa68cd61497c 100644
--- a/python/examples/bert/benchmark_batch.py
+++ b/python/examples/bert/benchmark_batch.py
@@ -17,7 +17,7 @@ from paddle_serving_client import Client
from paddle_serving_client.metric import auc
from paddle_serving_client.utils import MultiThreadRunner
import time
-from test_bert_client import BertService
+from bert_client import BertService
def predict(thr_id, resource, batch_size):
diff --git a/python/examples/bert/test_bert_client.py b/python/examples/bert/bert_client.py
similarity index 73%
rename from python/examples/bert/test_bert_client.py
rename to python/examples/bert/bert_client.py
index 47b5bf6c290f3ddb9341e0462eab7d197dfc4180..7ac7f4fd8676e853547aa05b99222820c764b5ab 100644
--- a/python/examples/bert/test_bert_client.py
+++ b/python/examples/bert/bert_client.py
@@ -1,9 +1,11 @@
# coding:utf-8
+import os
import sys
import numpy as np
import paddlehub as hub
import ujson
import random
+import time
from paddlehub.common.logger import logger
import socket
from paddle_serving_client import Client
@@ -20,29 +22,23 @@ if is_py3:
class BertService():
def __init__(self,
- profile=False,
max_seq_len=128,
model_name="bert_uncased_L-12_H-768_A-12",
show_ids=False,
do_lower_case=True,
process_id=0,
- retry=3,
- load_balance='round_robin'):
+ retry=3):
self.process_id = process_id
self.reader_flag = False
self.batch_size = 0
self.max_seq_len = max_seq_len
- self.profile = profile
self.model_name = model_name
self.show_ids = show_ids
self.do_lower_case = do_lower_case
- self.con_list = []
- self.con_index = 0
- self.load_balance = load_balance
- self.server_list = []
- self.serving_list = []
- self.feed_var_names = ''
self.retry = retry
+ self.pid = os.getpid()
+ self.profile = True if ("FLAGS_profile_client" in os.environ and
+ os.environ["FLAGS_profile_client"]) else False
module = hub.Module(name=self.model_name)
inputs, outputs, program = module.context(
@@ -51,7 +47,6 @@ class BertService():
position_ids = inputs["position_ids"]
segment_ids = inputs["segment_ids"]
input_mask = inputs["input_mask"]
- self.feed_var_names = input_ids.name + ';' + position_ids.name + ';' + segment_ids.name + ';' + input_mask.name
self.reader = hub.reader.ClassifyReader(
vocab_path=module.get_vocab_path(),
dataset=None,
@@ -69,6 +64,7 @@ class BertService():
data_generator = self.reader.data_generator(
batch_size=self.batch_size, phase='predict', data=text)
result = []
+ prepro_start = time.time()
for run_step, batch in enumerate(data_generator(), start=1):
token_list = batch[0][0].reshape(-1).tolist()
pos_list = batch[0][1].reshape(-1).tolist()
@@ -81,6 +77,12 @@ class BertService():
"segment_ids": sent_list,
"input_mask": mask_list
}
+ prepro_end = time.time()
+ if self.profile:
+ print("PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}".format(
+ self.pid,
+ int(round(prepro_start * 1000000)),
+ int(round(prepro_end * 1000000))))
fetch_map = self.client.predict(feed=feed, fetch=fetch)
return fetch_map
@@ -90,6 +92,7 @@ class BertService():
data_generator = self.reader.data_generator(
batch_size=self.batch_size, phase='predict', data=text)
result = []
+ prepro_start = time.time()
for run_step, batch in enumerate(data_generator(), start=1):
token_list = batch[0][0].reshape(-1).tolist()
pos_list = batch[0][1].reshape(-1).tolist()
@@ -108,33 +111,46 @@ class BertService():
mask_list[si * self.max_seq_len:(si + 1) * self.max_seq_len]
}
feed_batch.append(feed)
+ prepro_end = time.time()
+ if self.profile:
+ print("PROFILE\tpid:{}\tbert_pre_0:{} bert_pre_1:{}".format(
+ self.pid,
+ int(round(prepro_start * 1000000)),
+ int(round(prepro_end * 1000000))))
fetch_map_batch = self.client.batch_predict(
feed_batch=feed_batch, fetch=fetch)
return fetch_map_batch
def test():
-
bc = BertService(
- model_name='bert_uncased_L-12_H-768_A-12',
+ model_name='bert_chinese_L-12_H-768_A-12',
max_seq_len=20,
show_ids=False,
do_lower_case=True)
- server_addr = ["127.0.0.1:9293"]
+ server_addr = ["127.0.0.1:9292"]
config_file = './serving_client_conf/serving_client_conf.prototxt'
fetch = ["pooled_output"]
bc.load_client(config_file, server_addr)
- batch_size = 4
+ batch_size = 1
batch = []
for line in sys.stdin:
- if len(batch) < batch_size:
- batch.append([line.strip()])
+ if batch_size == 1:
+ result = bc.run_general([[line.strip()]], fetch)
+ print(result)
else:
- result = bc.run_batch_general(batch, fetch)
- batch = []
- for r in result:
- for e in r["pooled_output"]:
- print(e)
+ if len(batch) < batch_size:
+ batch.append([line.strip()])
+ else:
+ result = bc.run_batch_general(batch, fetch)
+ batch = []
+ for r in result:
+ print(r)
+ if len(batch) > 0:
+ result = bc.run_batch_general(batch, fetch)
+ batch = []
+ for r in result:
+ print(r)
if __name__ == '__main__':
diff --git a/python/examples/bert/test_gpu_server.py b/python/examples/bert/bert_gpu_server.py
similarity index 96%
rename from python/examples/bert/test_gpu_server.py
rename to python/examples/bert/bert_gpu_server.py
index 54459a12e88646555bd37b33441a3b50e2b0e62d..3fd64c345405e7ea031345f0694cce84029af385 100644
--- a/python/examples/bert/test_gpu_server.py
+++ b/python/examples/bert/bert_gpu_server.py
@@ -36,5 +36,7 @@ server.set_gpuid(1)
server.load_model_config(sys.argv[1])
port = int(sys.argv[2])
+gpuid = sys.argv[3]
+server.set_gpuid(gpuid)
server.prepare_server(workdir="work_dir1", port=port, device="gpu")
server.run_server()
diff --git a/python/examples/imdb/test_server.py b/python/examples/bert/bert_server.py
similarity index 100%
rename from python/examples/imdb/test_server.py
rename to python/examples/bert/bert_server.py
diff --git a/python/examples/bert/get_data.sh b/python/examples/bert/get_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..37174e725e22d4ae1ea000723a9e8f1a026b017d
--- /dev/null
+++ b/python/examples/bert/get_data.sh
@@ -0,0 +1 @@
+wget https://paddle-serving.bj.bcebos.com/bert_example/data-c.txt --no-check-certificate
diff --git a/python/examples/bert/test_server.py b/python/examples/bert/test_server.py
deleted file mode 100644
index 52b74b4622cfa3add6ad41678339924e3f9c3b0c..0000000000000000000000000000000000000000
--- a/python/examples/bert/test_server.py
+++ /dev/null
@@ -1,40 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-from paddle_serving_server import OpMaker
-from paddle_serving_server import OpSeqMaker
-from paddle_serving_server import Server
-
-op_maker = OpMaker()
-read_op = op_maker.create('general_reader')
-general_infer_op = op_maker.create('general_infer')
-general_response_op = op_maker.create('general_response')
-
-op_seq_maker = OpSeqMaker()
-op_seq_maker.add_op(read_op)
-op_seq_maker.add_op(general_infer_op)
-op_seq_maker.add_op(general_response_op)
-
-server = Server()
-server.set_op_sequence(op_seq_maker.get_op_sequence())
-server.set_num_threads(4)
-server.set_local_bin(
- "~/github/Serving/build_server/core/general-server/serving")
-
-server.load_model_config(sys.argv[1])
-port = int(sys.argv[2])
-server.prepare_server(workdir="work_dir1", port=port, device="cpu")
-server.run_server()
diff --git a/python/examples/fit_a_line/benchmark.py b/python/examples/fit_a_line/benchmark.py
new file mode 100644
index 0000000000000000000000000000000000000000..b68d37e6e1e9166e01730427324c8743bd8dc08e
--- /dev/null
+++ b/python/examples/fit_a_line/benchmark.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args
+import time
+import paddle
+import sys
+import requests
+
+args = benchmark_args()
+
+def single_func(idx, resource):
+ if args.request == "rpc":
+ client = Client()
+ client.load_client_config(args.model)
+ client.connect([args.endpoint])
+ train_reader = paddle.batch(paddle.reader.shuffle(
+ paddle.dataset.uci_housing.train(), buf_size=500), batch_size=1)
+ start = time.time()
+ for data in train_reader():
+ fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
+ end = time.time()
+ return [[end - start]]
+ elif args.request == "http":
+ train_reader = paddle.batch(paddle.reader.shuffle(
+ paddle.dataset.uci_housing.train(), buf_size=500), batch_size=1)
+ start = time.time()
+ for data in train_reader():
+ r = requests.post('http://{}/uci/prediction'.format(args.endpoint),
+ data = {"x": data[0]})
+ end = time.time()
+ return [[end - start]]
+
+multi_thread_runner = MultiThreadRunner()
+result = multi_thread_runner.run(single_func, args.thread, {})
+print(result)
diff --git a/python/examples/imdb/README.md b/python/examples/imdb/README.md
index a4225026f800797e462b6186567b3b677ad401dc..f54414d35a9f1642aa641be60848e8995a773a17 100644
--- a/python/examples/imdb/README.md
+++ b/python/examples/imdb/README.md
@@ -19,48 +19,12 @@ cat test.data | python test_client_batch.py inference.conf 4 > result
设备 :Intel(R) Xeon(R) Gold 6271 CPU @ 2.60GHz * 48
-模型 :IMDB-CNN
-
-测试中,client共发送2500条测试样本,图中数据为单个线程的耗时,时间单位为秒
-
-server thread num :4
-
-| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | total |
-| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
-| 1 | 0.99 | 27.39 | 0.085 | 19.92 | 0.046 | 0.032 | 29.84 |
-| 4 | 0.22 | 7.66 | 0.021 | 4.93 | 0.011 | 0.0082 | 8.28 |
-| 8 | 0.1 | 6.66 | 0.01 | 2.42 | 0.0038 | 0.0046 | 6.95 |
-| 12 | 0.074 | 6.87 | 0.0069 | 1.61 | 0.0059 | 0.0032 | 7.07 |
-| 16 | 0.056 | 7.01 | 0.0053 | 1.23 | 0.0029 | 0.0026 | 7.17 |
-| 20 | 0.045 | 7.02 | 0.0042 | 0.97 | 0.0023 | 0.002 | 7.15 |
-| 24 | 0.039 | 7.012 | 0.0034 | 0.8 | 0.0019 | 0.0016 | 7.12 |
-
-server thread num : 8
-
-| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | total |
-| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
-| 1 | 1.02 | 28.9 | 0.096 | 20.64 | 0.047 | 0.036 | 31.51 |
-| 4 | 0.22 | 7.83 | 0.021 | 5.08 | 0.012 | 0.01 | 8.45 |
-| 8 | 0.11 | 4.44 | 0.01 | 2.5 | 0.0059 | 0.0051 | 4.73 |
-| 12 | 0.074 | 4.11 | 0.0069 | 1.65 | 0.0039 | 0.0029 | 4.31 |
-| 16 | 0.057 | 4.2 | 0.0052 | 1.24 | 0.0029 | 0.0024 | 4.35 |
-| 20 | 0.046 | 4.05 | 0.0043 | 1.01 | 0.0024 | 0.0021 | 4.18 |
-| 24 | 0.038 | 4.02 | 0.0034 | 0.81 | 0.0019 | 0.0015 | 4.13 |
-
-server thread num : 12
-
-| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | total |
-| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
-| 1 | 1.02 | 29.47 | 0.098 | 20.95 | 0.048 | 0.038 | 31.96 |
-| 4 | 0.21 | 7.36 | 0.022 | 5.01 | 0.011 | 0.0081 | 7.95 |
-| 8 | 0.11 | 4.52 | 0.011 | 2.58 | 0.0061 | 0.0051 | 4.83 |
-| 12 | 0.072 | 3.25 | 0.0076 | 1.72 | 0.0042 | 0.0038 | 3.45 |
-| 16 | 0.059 | 3.93 | 0.0055 | 1.26 | 0.0029 | 0.0023 | 4.1 |
-| 20 | 0.047 | 3.79 | 0.0044 | 1.01 | 0.0024 | 0.0021 | 3.92 |
-| 24 | 0.041 | 3.76 | 0.0036 | 0.83 | 0.0019 | 0.0017 | 3.87 |
+模型 :[CNN](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/imdb/nets.py)
server thread num : 16
+测试中,client共发送25000条测试样本,图中数据为单个线程的耗时,时间单位为秒。可以看出,client端多线程的预测速度相比单线程有明显提升,在16线程时预测速度是单线程的8.7倍。
+
| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | total |
| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
| 1 | 1.09 | 28.79 | 0.094 | 20.59 | 0.047 | 0.034 | 31.41 |
@@ -71,26 +35,6 @@ server thread num : 16
| 20 | 0.049 | 3.77 | 0.0047 | 1.03 | 0.0025 | 0.0022 | 3.91 |
| 24 | 0.041 | 3.86 | 0.0039 | 0.85 | 0.002 | 0.0017 | 3.98 |
-server thread num : 20
-
-| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | total |
-| ------------------ | ------ | ------------ | ------ | ----- | ------ | ------- | ----- |
-| 1 | 1.03 | 28.42 | 0.085 | 20.47 | 0.046 | 0.037 | 30.98 |
-| 4 | 0.22 | 7.94 | 0.022 | 5.33 | 0.012 | 0.011 | 8.53 |
-| 8 | 0.11 | 4.54 | 0.01 | 2.58 | 0.006 | 0.0046 | 4.84 |
-| 12 | 0.079 | 4.54 | 0.0076 | 1.78 | 0.0042 | 0.0039 | 4.76 |
-| 16 | 0.059 | 3.41 | 0.0057 | 1.33 | 0.0032 | 0.0027 | 3.58 |
-| 20 | 0.051 | 4.33 | 0.0047 | 1.06 | 0.0025 | 0.0023 | 4.48 |
-| 24 | 0.043 | 4.51 | 0.004 | 0.88 | 0.0021 | 0.0018 | 4.63 |
-
-server thread num :24
+预测总耗时变化规律如下:
-| client thread num | prepro | client infer | op0 | op1 | op2 | postpro | total |
-| ------------------ | ------ | ------------ | ------ | ---- | ------ | ------- | ----- |
-| 1 | 0.93 | 29.28 | 0.099 | 20.5 | 0.048 | 0.028 | 31.61 |
-| 4 | 0.22 | 7.72 | 0.023 | 4.98 | 0.011 | 0.0095 | 8.33 |
-| 8 | 0.11 | 4.77 | 0.012 | 2.65 | 0.0062 | 0.0049 | 5.09 |
-| 12 | 0.081 | 4.22 | 0.0078 | 1.77 | 0.0042 | 0.0033 | 4.44 |
-| 16 | 0.062 | 4.21 | 0.0061 | 1.34 | 0.0032 | 0.0026 | 4.39 |
-| 20 | 0.5 | 3.58 | 0.005 | 1.07 | 0.0026 | 0.0023 | 3.72 |
-| 24 | 0.043 | 4.27 | 0.0042 | 0.89 | 0.0022 | 0.0018 | 4.4 |
+![total cost](../../../doc/imdb-benchmark-server-16.png)
diff --git a/python/examples/imdb/benchmark.py b/python/examples/imdb/benchmark.py
index 1bec0057adebc59a0b4029766f4c22e227b28c1c..1254ed21fd8ff30acdb9e8192b26b7918da315bc 100644
--- a/python/examples/imdb/benchmark.py
+++ b/python/examples/imdb/benchmark.py
@@ -13,55 +13,45 @@
# limitations under the License.
import sys
+import time
+import requests
+from imdb_reader import IMDBDataset
from paddle_serving_client import Client
-from paddle_serving_client.metric import auc
from paddle_serving_client.utils import MultiThreadRunner
-import time
+from paddle_serving_client.utils import benchmark_args
+args = benchmark_args()
-def predict(thr_id, resource):
- client = Client()
- client.load_client_config(resource["conf_file"])
- client.connect(resource["server_endpoint"])
- thread_num = resource["thread_num"]
- file_list = resource["filelist"]
- line_id = 0
- prob = []
- label_list = []
- dataset = []
- for fn in file_list:
- fin = open(fn)
- for line in fin:
- if line_id % thread_num == thr_id - 1:
- group = line.strip().split()
- words = [int(x) for x in group[1:int(group[0])]]
- label = [int(group[-1])]
- feed = {"words": words, "label": label}
- dataset.append(feed)
- line_id += 1
- fin.close()
-
+def single_func(idx, resource):
+ imdb_dataset = IMDBDataset()
+ imdb_dataset.load_resource(args.vocab)
+ filelist_fn = args.filelist
+ filelist = []
start = time.time()
- fetch = ["acc", "cost", "prediction"]
- for inst in dataset:
- fetch_map = client.predict(feed=inst, fetch=fetch)
- prob.append(fetch_map["prediction"][1])
- label_list.append(label[0])
+ with open(filelist_fn) as fin:
+ for line in fin:
+ filelist.append(line.strip())
+ filelist = filelist[idx::args.thread]
+ if args.request == "rpc":
+ client = Client()
+ client.load_client_config(args.model)
+ client.connect([args.endpoint])
+ for fn in filelist:
+ fin = open(fn)
+ for line in fin:
+ word_ids, label = imdb_dataset.get_words_and_label(line)
+ fetch_map = client.predict(feed={"words": word_ids},
+ fetch=["prediction"])
+ elif args.request == "http":
+ for fn in filelist:
+ fin = open(fn)
+ for line in fin:
+ word_ids, label = imdb_dataset.get_words_and_label(line)
+ r = requests.post("http://{}/imdb/prediction".format(args.endpoint),
+ data={"words": word_ids})
end = time.time()
- client.release()
- return [prob, label_list, [end - start]]
-
-
-if __name__ == '__main__':
- conf_file = sys.argv[1]
- data_file = sys.argv[2]
- resource = {}
- resource["conf_file"] = conf_file
- resource["server_endpoint"] = ["127.0.0.1:9293"]
- resource["filelist"] = [data_file]
- resource["thread_num"] = int(sys.argv[3])
-
- thread_runner = MultiThreadRunner()
- result = thread_runner.run(predict, int(sys.argv[3]), resource)
+ return [[end - start]]
- print("total time {} s".format(sum(result[-1]) / len(result[-1])))
+multi_thread_runner = MultiThreadRunner()
+result = multi_thread_runner.run(single_func, args.thread, {})
+print(result)
diff --git a/python/examples/imdb/get_data.sh b/python/examples/imdb/get_data.sh
index 87aaa7a6c7913f63cccc16e7666e23fa9392616c..81d8d5d3b018f133c41e211d1501cf3cd9a3d8a4 100644
--- a/python/examples/imdb/get_data.sh
+++ b/python/examples/imdb/get_data.sh
@@ -1,4 +1,4 @@
wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz
tar -zxvf text_classification_data.tar.gz
-#wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imdb-demo%2Fimdb.tar.gz
-#tar -xzf imdb-demo%2Fimdb.tar.gz
+tar -zxvf imdb_model.tar.gz
diff --git a/python/examples/imdb/imdb_reader.py b/python/examples/imdb/imdb_reader.py
index def7ce2197bfd24bc4f17f97e5e4a1aa541bcabc..cad28ab2cdcc2983bb74bc721ec837b0b6e5fe88 100644
--- a/python/examples/imdb/imdb_reader.py
+++ b/python/examples/imdb/imdb_reader.py
@@ -30,6 +30,14 @@ class IMDBDataset(dg.MultiSlotDataGenerator):
self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
+ def get_words_only(self, line):
+ sent = line.lower().replace("
", " ").strip()
+ words = [x for x in self._pattern.split(sent) if x and x != " "]
+ feas = [
+ self._vocab[x] if x in self._vocab else self._unk_id for x in words
+ ]
+ return feas
+
def get_words_and_label(self, line):
send = '|'.join(line.split('|')[:-1]).lower().replace("
",
" ").strip()
diff --git a/python/examples/imdb/imdb_web_service_demo.sh b/python/examples/imdb/imdb_web_service_demo.sh
index e0db60a5e065b079d63cc1f67ab1cdc8d63a9e84..0b69a3532b7e8a924c5b19eb7e483e63226ee945 100644
--- a/python/examples/imdb/imdb_web_service_demo.sh
+++ b/python/examples/imdb/imdb_web_service_demo.sh
@@ -1,4 +1,4 @@
-wget https://paddle-serving.bj.bcebos.com/imdb-demo%2Fimdb_service.tar.gz
+wget https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_service.tar.gz
tar -xzf imdb_service.tar.gz
wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
tar -zxvf text_classification_data.tar.gz
diff --git a/python/examples/imdb/local_train.py b/python/examples/imdb/local_train.py
index b4cad7f465d840c08949da9cf968f131dfc1f93f..53692298a2b4caefb92f0a686bd020feaf3fa168 100644
--- a/python/examples/imdb/local_train.py
+++ b/python/examples/imdb/local_train.py
@@ -49,8 +49,9 @@ if __name__ == "__main__":
dataset.set_batch_size(128)
dataset.set_filelist(filelist)
dataset.set_thread(10)
- from nets import bow_net
- avg_cost, acc, prediction = bow_net(data, label, dict_dim)
+ from nets import lstm_net
+ model_name = "imdb_lstm"
+ avg_cost, acc, prediction = lstm_net(data, label, dict_dim)
optimizer = fluid.optimizer.SGD(learning_rate=0.01)
optimizer.minimize(avg_cost)
@@ -65,6 +66,7 @@ if __name__ == "__main__":
program=fluid.default_main_program(), dataset=dataset, debug=False)
logger.info("TRAIN --> pass: {}".format(i))
if i == 5:
- serving_io.save_model("imdb_model", "imdb_client_conf",
+ serving_io.save_model("{}_model".format(model_name),
+ "{}_client_conf".format(model_name),
{"words": data}, {"prediction": prediction},
fluid.default_main_program())
diff --git a/python/examples/imdb/test_client.py b/python/examples/imdb/test_client.py
index 935fec52a4b36007da511eb9db48259e3bb181f0..bb0b9790669173abf2761a5d900dce15d91a5d71 100644
--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -1,15 +1,33 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
from paddle_serving_client import Client
+from imdb_reader import IMDBDataset
import sys
client = Client()
client.load_client_config(sys.argv[1])
client.connect(["127.0.0.1:9393"])
+# you can define any english sentence or dataset here
+# This example reuses imdb reader in training, you
+# can define your own data preprocessing easily.
+imdb_dataset = IMDBDataset()
+imdb_dataset.load_resource(sys.argv[2])
+
for line in sys.stdin:
- group = line.strip().split()
- words = [int(x) for x in group[1:int(group[0]) + 1]]
- label = [int(group[-1])]
- feed = {"words": words, "label": label}
+ word_ids, label = imdb_dataset.get_words_and_label(line)
+ feed = {"words": word_ids, "label": label}
fetch = ["acc", "cost", "prediction"]
fetch_map = client.predict(feed=feed, fetch=fetch)
print("{} {}".format(fetch_map["prediction"][1], label[0]))
diff --git a/python/examples/imdb/test_client_multithread.py b/python/examples/imdb/test_client_multithread.py
deleted file mode 100644
index 62b2250f21de28e3c6137a5aebe43fd1027b72c6..0000000000000000000000000000000000000000
--- a/python/examples/imdb/test_client_multithread.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle_serving_client import Client
-import sys
-import subprocess
-from multiprocessing import Pool
-import time
-
-
-def predict(p_id, p_size, data_list):
- client = Client()
- client.load_client_config(conf_file)
- client.connect(["127.0.0.1:8010"])
- result = []
- for line in data_list:
- group = line.strip().split()
- words = [int(x) for x in group[1:int(group[0])]]
- label = [int(group[-1])]
- feed = {"words": words, "label": label}
- fetch = ["acc", "cost", "prediction"]
- fetch_map = client.predict(feed=feed, fetch=fetch)
- #print("{} {}".format(fetch_map["prediction"][1], label[0]))
- result.append([fetch_map["prediction"][1], label[0]])
- return result
-
-
-def predict_multi_thread(p_num):
- data_list = []
- with open(data_file) as f:
- for line in f.readlines():
- data_list.append(line)
- start = time.time()
- p = Pool(p_num)
- p_size = len(data_list) / p_num
- result_list = []
- for i in range(p_num):
- result_list.append(
- p.apply_async(predict,
- [i, p_size, data_list[i * p_size:(i + 1) * p_size]]))
- p.close()
- p.join()
- for i in range(p_num):
- result = result_list[i].get()
- for j in result:
- print("{} {}".format(j[0], j[1]))
- cost = time.time() - start
- print("{} threads cost {}".format(p_num, cost))
-
-
-if __name__ == '__main__':
- conf_file = sys.argv[1]
- data_file = sys.argv[2]
- p_num = int(sys.argv[3])
- predict_multi_thread(p_num)
diff --git a/python/examples/imdb/test_gpu_server.py b/python/examples/imdb/test_gpu_server.py
deleted file mode 100644
index d8b313eedb58cbbb9c5a2045e889cb1372b9d276..0000000000000000000000000000000000000000
--- a/python/examples/imdb/test_gpu_server.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-from paddle_serving_server_gpu import OpMaker
-from paddle_serving_server_gpu import OpSeqMaker
-from paddle_serving_server_gpu import Server
-
-op_maker = OpMaker()
-read_op = op_maker.create('general_reader')
-general_infer_op = op_maker.create('general_infer')
-
-op_seq_maker = OpSeqMaker()
-op_seq_maker.add_op(read_op)
-op_seq_maker.add_op(general_infer_op)
-
-server = Server()
-server.set_op_sequence(op_seq_maker.get_op_sequence())
-server.set_num_threads(12)
-server.load_model_config(sys.argv[1])
-port = int(sys.argv[2])
-server.prepare_server(workdir="work_dir1", port=port, device="gpu")
-server.run_server()
diff --git a/python/examples/imdb/text_classify_service.py b/python/examples/imdb/text_classify_service.py
index 0576d2901803386ad4e0172a1442fd62e3d17b42..8a6836f0646c5d2bbeeb672c28f16486f6bdd8c2 100755
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-#!flask/bin/python
from paddle_serving_server.web_service import WebService
from imdb_reader import IMDBDataset
import sys
@@ -27,7 +26,7 @@ class IMDBService(WebService):
if "words" not in feed:
exit(-1)
res_feed = {}
- res_feed["words"] = self.dataset.get_words_and_label(feed["words"])[0]
+ res_feed["words"] = self.dataset.get_words_only(feed["words"])[0]
return res_feed, fetch
imdb_service = IMDBService(name="imdb")
diff --git a/python/examples/util/README.md b/python/examples/util/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..94dbd5639221273912f8b47c512d860f91015803
--- /dev/null
+++ b/python/examples/util/README.md
@@ -0,0 +1,23 @@
+## Timeline工具使用
+
+serving框架中内置了预测服务中各阶段时间打点的功能,通过环境变量来控制是否开启。
+```
+export FLAGS_profile_client=1 #开启client端各阶段时间打点
+export FLAGS_profile_server=1 #开启server端各阶段时间打点
+```
+开启该功能后,client端在预测的过程中会将对应的日志信息打印到标准输出。
+
+为了更直观地展现各阶段的耗时,提供脚本对日志文件做进一步的分析处理。
+
+使用时先将client的输出保存到文件,以profile为例。
+```
+python show_profile.py profile ${thread_num}
+```
+脚本将计算各阶段的耗时,并除以线程数做平均,打印到标准输出。
+
+```
+python timeline_trace.py profile trace
+```
+脚本将日志中的时间打点信息转换成json格式保存到trace文件,trace文件可以通过chrome浏览器的tracing功能进行可视化。
+
+具体操作:打开chrome浏览器,在地址栏输入chrome://tracing/,跳转至tracing页面,点击load按钮,打开保存的trace文件,即可将预测服务的各阶段时间信息可视化。
diff --git a/python/examples/util/timeline_trace.py b/python/examples/util/timeline_trace.py
index e8bffedfbde3d0ff00d398172303da6d91d04b61..144c21cb4458cf8f73fa9e198617b735970897bd 100644
--- a/python/examples/util/timeline_trace.py
+++ b/python/examples/util/timeline_trace.py
@@ -5,8 +5,9 @@ import sys
profile_file = sys.argv[1]
-def prase(line, counter):
- event_list = line.split(" ")
+def prase(pid_str, time_str, counter):
+ pid = pid_str.split(":")[1]
+ event_list = time_str.split(" ")
trace_list = []
for event in event_list:
name, ts = event.split(":")
@@ -19,7 +20,7 @@ def prase(line, counter):
event_dict = {}
event_dict["name"] = name
event_dict["tid"] = 0
- event_dict["pid"] = 0
+ event_dict["pid"] = pid
event_dict["ts"] = ts
event_dict["ph"] = ph
@@ -36,7 +37,7 @@ if __name__ == "__main__":
for line in f.readlines():
line = line.strip().split("\t")
if line[0] == "PROFILE":
- trace_list = prase(line[1], counter)
+ trace_list = prase(line[1], line[2], counter)
counter += 1
for trace in trace_list:
all_list.append(trace)
diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py
index f86dab617cc59a400bc915b2d497f112335f3bab..bea80f84bc9d29cabe4f31af612c694980b71d09 100644
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -74,10 +74,11 @@ class Client(object):
self.fetch_names_ = []
self.client_handle_ = None
self.result_handle_ = None
- self.feed_shapes_ = []
+ self.feed_shapes_ = {}
self.feed_types_ = {}
self.feed_names_to_idx_ = {}
self.rpath()
+ self.pid = os.getpid()
def rpath(self):
lib_path = os.path.dirname(paddle_serving_client.__file__)
@@ -85,7 +86,6 @@ class Client(object):
lib_path = os.path.join(lib_path, 'lib')
os.popen('patchelf --set-rpath {} {}'.format(lib_path, client_path))
-
def load_client_config(self, path):
from .serving_client import PredictorClient
from .serving_client import PredictorRes
@@ -106,13 +106,23 @@ class Client(object):
0]] + ["--tryfromenv=" + ",".join(read_env_flags)])
self.feed_names_ = [var.alias_name for var in model_conf.feed_var]
self.fetch_names_ = [var.alias_name for var in model_conf.fetch_var]
- self.feed_shapes_ = [var.shape for var in model_conf.feed_var]
self.feed_names_to_idx_ = {}
self.fetch_names_to_type_ = {}
self.fetch_names_to_idx_ = {}
+ self.lod_tensor_set = set()
+ self.feed_tensor_len = {}
for i, var in enumerate(model_conf.feed_var):
self.feed_names_to_idx_[var.alias_name] = i
self.feed_types_[var.alias_name] = var.feed_type
+ self.feed_shapes_[var.alias_name] = var.shape
+
+ if var.is_lod_tensor:
+ self.lod_tensor_set.add(var.alias_name)
+ else:
+ counter = 1
+ for dim in self.feed_shapes_[var.alias_name]:
+ counter *= dim
+ self.feed_tensor_len[var.alias_name] = counter
for i, var in enumerate(model_conf.fetch_var):
self.fetch_names_to_idx_[var.alias_name] = i
@@ -128,9 +138,8 @@ class Client(object):
predictor_sdk.set_server_endpoints(endpoints)
sdk_desc = predictor_sdk.gen_desc()
print(sdk_desc)
- self.client_handle_.create_predictor_by_desc(
- sdk_desc.SerializeToString())
-
+ self.client_handle_.create_predictor_by_desc(sdk_desc.SerializeToString(
+ ))
def get_feed_names(self):
return self.feed_names_
@@ -138,13 +147,23 @@ class Client(object):
def get_fetch_names(self):
return self.fetch_names_
+ def shape_check(self, feed, key):
+ seq_shape = 1
+ if key in self.lod_tensor_set:
+ return
+ if len(feed[key]) != self.feed_tensor_len[key]:
+ raise SystemExit("The shape of feed tensor {} not match.".format(
+ key))
+
def predict(self, feed={}, fetch=[]):
int_slot = []
float_slot = []
int_feed_names = []
float_feed_names = []
fetch_names = []
+
for key in feed:
+ self.shape_check(feed, key)
if key not in self.feed_names_:
continue
if self.feed_types_[key] == int_type:
@@ -158,16 +177,18 @@ class Client(object):
if key in self.fetch_names_:
fetch_names.append(key)
- ret = self.client_handle_.predict(
- float_slot, float_feed_names, int_slot,
- int_feed_names, fetch_names, self.result_handle_)
+ ret = self.client_handle_.predict(float_slot, float_feed_names,
+ int_slot, int_feed_names, fetch_names,
+ self.result_handle_, self.pid)
result_map = {}
for i, name in enumerate(fetch_names):
if self.fetch_names_to_type_[name] == int_type:
- result_map[name] = self.result_handle_.get_int64_by_name(name)[0]
+ result_map[name] = self.result_handle_.get_int64_by_name(name)[
+ 0]
elif self.fetch_names_to_type_[name] == float_type:
- result_map[name] = self.result_handle_.get_float_by_name(name)[0]
+ result_map[name] = self.result_handle_.get_float_by_name(name)[
+ 0]
return result_map
diff --git a/python/paddle_serving_client/utils/__init__.py b/python/paddle_serving_client/utils/__init__.py
index 67884cf89d6fc7c8ba20f966cbb0a3a32f098855..62642314fa8f7f0ece778e34ff78fe3b2f039dfa 100644
--- a/python/paddle_serving_client/utils/__init__.py
+++ b/python/paddle_serving_client/utils/__init__.py
@@ -11,16 +11,28 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-
+import os
import sys
import subprocess
+import argparse
from multiprocessing import Pool
+def benchmark_args():
+ parser = argparse.ArgumentParser("benchmark")
+ parser.add_argument("--thread", type=int, default=10, help="concurrecy")
+ parser.add_argument("--model", type=str, default="", help="model for evaluation")
+ parser.add_argument("--endpoint", type=str, default="127.0.0.1:9292", help="endpoint of server")
+ parser.add_argument("--request", type=str, default="rpc", help="mode of service")
+ return parser.parse_args()
+
+
class MultiThreadRunner(object):
def __init__(self):
pass
def run(self, thread_func, thread_num, global_resource):
+ os.environ["http_proxy"] = ""
+ os.environ["https_proxy"] = ""
p = Pool(thread_num)
result_list = []
for i in range(thread_num):
diff --git a/python/paddle_serving_client/version.py b/python/paddle_serving_client/version.py
index 3513e4ce98ac9cd6655495d3bbaacb6943782837..ca18ebdf448561961e0f83d6786180482ff5a72a 100644
--- a/python/paddle_serving_client/version.py
+++ b/python/paddle_serving_client/version.py
@@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving Client version string """
-serving_client_version = "0.1.1"
-serving_server_version = "0.1.0"
-module_proto_version = "0.1.0"
+serving_client_version = "0.1.2"
+serving_server_version = "0.1.2"
+module_proto_version = "0.1.2"
diff --git a/python/paddle_serving_server/version.py b/python/paddle_serving_server/version.py
index d67c284b10a57ec866bafcb4a1f28a2529d5073d..ca18ebdf448561961e0f83d6786180482ff5a72a 100644
--- a/python/paddle_serving_server/version.py
+++ b/python/paddle_serving_server/version.py
@@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving Client version string """
-serving_client_version = "0.1.0"
-serving_server_version = "0.1.0"
-module_proto_version = "0.1.0"
+serving_client_version = "0.1.2"
+serving_server_version = "0.1.2"
+module_proto_version = "0.1.2"
diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a88797b285c0b168e52e54755da3b7ea5bad434
--- /dev/null
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -0,0 +1,77 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Usage:
+ Host a trained paddle model with one line command
+ Example:
+ python -m paddle_serving_server.serve --model ./serving_server_model --port 9292
+"""
+import argparse
+
+
+def parse_args():
+ parser = argparse.ArgumentParser("serve")
+ parser.add_argument(
+ "--thread", type=int, default=10, help="Concurrency of server")
+ parser.add_argument(
+ "--model", type=str, default="", help="Model for serving")
+ parser.add_argument(
+ "--port", type=int, default=9292, help="Port the server")
+ parser.add_argument(
+ "--workdir",
+ type=str,
+ default="workdir",
+ help="Working dir of current service")
+ parser.add_argument(
+ "--device", type=str, default="gpu", help="Type of device")
+ parser.add_argument("--gpuid", type=int, default=0, help="Index of GPU")
+ return parser.parse_args()
+
+
+def start_standard_model():
+ args = parse_args()
+ thread_num = args.thread
+ model = args.model
+ port = args.port
+ workdir = args.workdir
+ device = args.device
+ gpuid = args.gpuid
+
+ if model == "":
+ print("You must specify your serving model")
+ exit(-1)
+
+ import paddle_serving_server_gpu as serving
+ op_maker = serving.OpMaker()
+ read_op = op_maker.create('general_reader')
+ general_infer_op = op_maker.create('general_infer')
+ general_response_op = op_maker.create('general_response')
+
+ op_seq_maker = serving.OpSeqMaker()
+ op_seq_maker.add_op(read_op)
+ op_seq_maker.add_op(general_infer_op)
+ op_seq_maker.add_op(general_response_op)
+
+ server = serving.Server()
+ server.set_op_sequence(op_seq_maker.get_op_sequence())
+ server.set_num_threads(thread_num)
+
+ server.load_model_config(model)
+ server.prepare_server(workdir=workdir, port=port, device=device)
+ server.set_gpuid(gpuid)
+ server.run_server()
+
+
+if __name__ == "__main__":
+ start_standard_model()
diff --git a/python/paddle_serving_server_gpu/version.py b/python/paddle_serving_server_gpu/version.py
index d67c284b10a57ec866bafcb4a1f28a2529d5073d..ca18ebdf448561961e0f83d6786180482ff5a72a 100644
--- a/python/paddle_serving_server_gpu/version.py
+++ b/python/paddle_serving_server_gpu/version.py
@@ -12,6 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
""" Paddle Serving Client version string """
-serving_client_version = "0.1.0"
-serving_server_version = "0.1.0"
-module_proto_version = "0.1.0"
+serving_client_version = "0.1.2"
+serving_server_version = "0.1.2"
+module_proto_version = "0.1.2"
diff --git a/python/paddle_serving_server_gpu/web_serve.py b/python/paddle_serving_server_gpu/web_serve.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7b44034797a8de75ca8dc5d97f7dc93c9671954
--- /dev/null
+++ b/python/paddle_serving_server_gpu/web_serve.py
@@ -0,0 +1,51 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Usage:
+ Host a trained paddle model with one line command
+ Example:
+ python -m paddle_serving_server.web_serve --model ./serving_server_model --port 9292
+"""
+import argparse
+from multiprocessing import Pool, Process
+from .web_service import WebService
+
+
+def parse_args():
+ parser = argparse.ArgumentParser("web_serve")
+ parser.add_argument(
+ "--thread", type=int, default=10, help="Concurrency of server")
+ parser.add_argument(
+ "--model", type=str, default="", help="Model for serving")
+ parser.add_argument(
+ "--port", type=int, default=9292, help="Port the server")
+ parser.add_argument(
+ "--workdir",
+ type=str,
+ default="workdir",
+ help="Working dir of current service")
+ parser.add_argument(
+ "--device", type=str, default="cpu", help="Type of device")
+ parser.add_argument(
+ "--name", type=str, default="default", help="Default service name")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ service = WebService(name=args.name)
+ service.load_model_config(args.model)
+ service.prepare_server(
+ workdir=args.workdir, port=args.port, device=args.device)
+ service.run_server()
diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py
new file mode 100755
index 0000000000000000000000000000000000000000..3f129a45853b02711f96953b0b902015d2f2d3e8
--- /dev/null
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -0,0 +1,94 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#!flask/bin/python
+from flask import Flask, request, abort
+from multiprocessing import Pool, Process
+from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
+from paddle_serving_client import Client
+
+
+class WebService(object):
+ def __init__(self, name="default_service"):
+ self.name = name
+
+ def load_model_config(self, model_config):
+ self.model_config = model_config
+
+ def _launch_rpc_service(self):
+ op_maker = OpMaker()
+ read_op = op_maker.create('general_reader')
+ general_infer_op = op_maker.create('general_infer')
+ general_response_op = op_maker.create('general_response')
+ op_seq_maker = OpSeqMaker()
+ op_seq_maker.add_op(read_op)
+ op_seq_maker.add_op(general_infer_op)
+ op_seq_maker.add_op(general_response_op)
+ server = Server()
+ server.set_op_sequence(op_seq_maker.get_op_sequence())
+ server.set_num_threads(16)
+ server.set_gpuid = self.gpuid
+ server.load_model_config(self.model_config)
+ server.prepare_server(
+ workdir=self.workdir, port=self.port + 1, device=self.device)
+ server.run_server()
+
+ def prepare_server(self, workdir="", port=9393, device="gpu", gpuid=0):
+ self.workdir = workdir
+ self.port = port
+ self.device = device
+ self.gpuid = gpuid
+
+ def _launch_web_service(self):
+ app_instance = Flask(__name__)
+ client_service = Client()
+ client_service.load_client_config(
+ "{}/serving_server_conf.prototxt".format(self.model_config))
+ client_service.connect(["127.0.0.1:{}".format(self.port + 1)])
+ service_name = "/" + self.name + "/prediction"
+
+ @app_instance.route(service_name, methods=['POST'])
+ def get_prediction():
+ if not request.json:
+ abort(400)
+ if "fetch" not in request.json:
+ abort(400)
+ feed, fetch = self.preprocess(request.json, request.json["fetch"])
+ fetch_map = client_service.predict(feed=feed, fetch=fetch)
+ fetch_map = self.postprocess(
+ feed=request.json, fetch=fetch, fetch_map=fetch_map)
+ return fetch_map
+
+ app_instance.run(host="127.0.0.1",
+ port=self.port,
+ threaded=False,
+ processes=1)
+
+ def run_server(self):
+ import socket
+ localIP = socket.gethostbyname(socket.gethostname())
+ print("web service address:")
+ print("http://{}:{}/{}/prediction".format(localIP, self.port,
+ self.name))
+ p_rpc = Process(target=self._launch_rpc_service)
+ p_web = Process(target=self._launch_web_service)
+ p_rpc.start()
+ p_web.start()
+ p_web.join()
+ p_rpc.join()
+
+ def preprocess(self, feed={}, fetch=[]):
+ return feed, fetch
+
+ def postprocess(self, feed={}, fetch=[], fetch_map={}):
+ return fetch_map