diff --git a/python/examples/imdb/benchmark.py b/python/examples/imdb/benchmark.py
index 1bec0057adebc59a0b4029766f4c22e227b28c1c..1254ed21fd8ff30acdb9e8192b26b7918da315bc 100644
--- a/python/examples/imdb/benchmark.py
+++ b/python/examples/imdb/benchmark.py
@@ -13,55 +13,45 @@
# limitations under the License.
import sys
+import time
+import requests
+from imdb_reader import IMDBDataset
from paddle_serving_client import Client
-from paddle_serving_client.metric import auc
from paddle_serving_client.utils import MultiThreadRunner
-import time
+from paddle_serving_client.utils import benchmark_args
+args = benchmark_args()
-def predict(thr_id, resource):
- client = Client()
- client.load_client_config(resource["conf_file"])
- client.connect(resource["server_endpoint"])
- thread_num = resource["thread_num"]
- file_list = resource["filelist"]
- line_id = 0
- prob = []
- label_list = []
- dataset = []
- for fn in file_list:
- fin = open(fn)
- for line in fin:
- if line_id % thread_num == thr_id - 1:
- group = line.strip().split()
- words = [int(x) for x in group[1:int(group[0])]]
- label = [int(group[-1])]
- feed = {"words": words, "label": label}
- dataset.append(feed)
- line_id += 1
- fin.close()
-
+def single_func(idx, resource):
+ imdb_dataset = IMDBDataset()
+ imdb_dataset.load_resource(args.vocab)
+ filelist_fn = args.filelist
+ filelist = []
start = time.time()
- fetch = ["acc", "cost", "prediction"]
- for inst in dataset:
- fetch_map = client.predict(feed=inst, fetch=fetch)
- prob.append(fetch_map["prediction"][1])
- label_list.append(label[0])
+ with open(filelist_fn) as fin:
+ for line in fin:
+ filelist.append(line.strip())
+ filelist = filelist[idx::args.thread]
+ if args.request == "rpc":
+ client = Client()
+ client.load_client_config(args.model)
+ client.connect([args.endpoint])
+ for fn in filelist:
+ fin = open(fn)
+ for line in fin:
+ word_ids, label = imdb_dataset.get_words_and_label(line)
+ fetch_map = client.predict(feed={"words": word_ids},
+ fetch=["prediction"])
+ elif args.request == "http":
+ for fn in filelist:
+ fin = open(fn)
+ for line in fin:
+ word_ids, label = imdb_dataset.get_words_and_label(line)
+ r = requests.post("http://{}/imdb/prediction".format(args.endpoint),
+ data={"words": word_ids})
end = time.time()
- client.release()
- return [prob, label_list, [end - start]]
-
-
-if __name__ == '__main__':
- conf_file = sys.argv[1]
- data_file = sys.argv[2]
- resource = {}
- resource["conf_file"] = conf_file
- resource["server_endpoint"] = ["127.0.0.1:9293"]
- resource["filelist"] = [data_file]
- resource["thread_num"] = int(sys.argv[3])
-
- thread_runner = MultiThreadRunner()
- result = thread_runner.run(predict, int(sys.argv[3]), resource)
+ return [[end - start]]
- print("total time {} s".format(sum(result[-1]) / len(result[-1])))
+multi_thread_runner = MultiThreadRunner()
+result = multi_thread_runner.run(single_func, args.thread, {})
+print(result)
diff --git a/python/examples/imdb/get_data.sh b/python/examples/imdb/get_data.sh
index 87aaa7a6c7913f63cccc16e7666e23fa9392616c..81d8d5d3b018f133c41e211d1501cf3cd9a3d8a4 100644
--- a/python/examples/imdb/get_data.sh
+++ b/python/examples/imdb/get_data.sh
@@ -1,4 +1,4 @@
wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz
tar -zxvf text_classification_data.tar.gz
-#wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imdb-demo%2Fimdb.tar.gz
-#tar -xzf imdb-demo%2Fimdb.tar.gz
+tar -zxvf imdb_model.tar.gz
diff --git a/python/examples/imdb/imdb_reader.py b/python/examples/imdb/imdb_reader.py
index def7ce2197bfd24bc4f17f97e5e4a1aa541bcabc..cad28ab2cdcc2983bb74bc721ec837b0b6e5fe88 100644
--- a/python/examples/imdb/imdb_reader.py
+++ b/python/examples/imdb/imdb_reader.py
@@ -30,6 +30,14 @@ class IMDBDataset(dg.MultiSlotDataGenerator):
self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))')
self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0])
+ def get_words_only(self, line):
+ sent = line.lower().replace("
", " ").strip()
+ words = [x for x in self._pattern.split(sent) if x and x != " "]
+ feas = [
+ self._vocab[x] if x in self._vocab else self._unk_id for x in words
+ ]
+ return feas
+
def get_words_and_label(self, line):
send = '|'.join(line.split('|')[:-1]).lower().replace("
",
" ").strip()
diff --git a/python/examples/imdb/imdb_web_service_demo.sh b/python/examples/imdb/imdb_web_service_demo.sh
index e0db60a5e065b079d63cc1f67ab1cdc8d63a9e84..0b69a3532b7e8a924c5b19eb7e483e63226ee945 100644
--- a/python/examples/imdb/imdb_web_service_demo.sh
+++ b/python/examples/imdb/imdb_web_service_demo.sh
@@ -1,4 +1,4 @@
-wget https://paddle-serving.bj.bcebos.com/imdb-demo%2Fimdb_service.tar.gz
+wget https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_service.tar.gz
tar -xzf imdb_service.tar.gz
wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
tar -zxvf text_classification_data.tar.gz
diff --git a/python/examples/imdb/local_train.py b/python/examples/imdb/local_train.py
index b4cad7f465d840c08949da9cf968f131dfc1f93f..53692298a2b4caefb92f0a686bd020feaf3fa168 100644
--- a/python/examples/imdb/local_train.py
+++ b/python/examples/imdb/local_train.py
@@ -49,8 +49,9 @@ if __name__ == "__main__":
dataset.set_batch_size(128)
dataset.set_filelist(filelist)
dataset.set_thread(10)
- from nets import bow_net
- avg_cost, acc, prediction = bow_net(data, label, dict_dim)
+ from nets import lstm_net
+ model_name = "imdb_lstm"
+ avg_cost, acc, prediction = lstm_net(data, label, dict_dim)
optimizer = fluid.optimizer.SGD(learning_rate=0.01)
optimizer.minimize(avg_cost)
@@ -65,6 +66,7 @@ if __name__ == "__main__":
program=fluid.default_main_program(), dataset=dataset, debug=False)
logger.info("TRAIN --> pass: {}".format(i))
if i == 5:
- serving_io.save_model("imdb_model", "imdb_client_conf",
+ serving_io.save_model("{}_model".format(model_name),
+ "{}_client_conf".format(model_name),
{"words": data}, {"prediction": prediction},
fluid.default_main_program())
diff --git a/python/examples/imdb/test_client.py b/python/examples/imdb/test_client.py
index 935fec52a4b36007da511eb9db48259e3bb181f0..bb0b9790669173abf2761a5d900dce15d91a5d71 100644
--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -1,15 +1,33 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
from paddle_serving_client import Client
+from imdb_reader import IMDBDataset
import sys
client = Client()
client.load_client_config(sys.argv[1])
client.connect(["127.0.0.1:9393"])
+# you can define any english sentence or dataset here
+# This example reuses imdb reader in training, you
+# can define your own data preprocessing easily.
+imdb_dataset = IMDBDataset()
+imdb_dataset.load_resource(sys.argv[2])
+
for line in sys.stdin:
- group = line.strip().split()
- words = [int(x) for x in group[1:int(group[0]) + 1]]
- label = [int(group[-1])]
- feed = {"words": words, "label": label}
+ word_ids, label = imdb_dataset.get_words_and_label(line)
+ feed = {"words": word_ids, "label": label}
fetch = ["acc", "cost", "prediction"]
fetch_map = client.predict(feed=feed, fetch=fetch)
print("{} {}".format(fetch_map["prediction"][1], label[0]))
diff --git a/python/examples/imdb/test_client_multithread.py b/python/examples/imdb/test_client_multithread.py
deleted file mode 100644
index 62b2250f21de28e3c6137a5aebe43fd1027b72c6..0000000000000000000000000000000000000000
--- a/python/examples/imdb/test_client_multithread.py
+++ /dev/null
@@ -1,66 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from paddle_serving_client import Client
-import sys
-import subprocess
-from multiprocessing import Pool
-import time
-
-
-def predict(p_id, p_size, data_list):
- client = Client()
- client.load_client_config(conf_file)
- client.connect(["127.0.0.1:8010"])
- result = []
- for line in data_list:
- group = line.strip().split()
- words = [int(x) for x in group[1:int(group[0])]]
- label = [int(group[-1])]
- feed = {"words": words, "label": label}
- fetch = ["acc", "cost", "prediction"]
- fetch_map = client.predict(feed=feed, fetch=fetch)
- #print("{} {}".format(fetch_map["prediction"][1], label[0]))
- result.append([fetch_map["prediction"][1], label[0]])
- return result
-
-
-def predict_multi_thread(p_num):
- data_list = []
- with open(data_file) as f:
- for line in f.readlines():
- data_list.append(line)
- start = time.time()
- p = Pool(p_num)
- p_size = len(data_list) / p_num
- result_list = []
- for i in range(p_num):
- result_list.append(
- p.apply_async(predict,
- [i, p_size, data_list[i * p_size:(i + 1) * p_size]]))
- p.close()
- p.join()
- for i in range(p_num):
- result = result_list[i].get()
- for j in result:
- print("{} {}".format(j[0], j[1]))
- cost = time.time() - start
- print("{} threads cost {}".format(p_num, cost))
-
-
-if __name__ == '__main__':
- conf_file = sys.argv[1]
- data_file = sys.argv[2]
- p_num = int(sys.argv[3])
- predict_multi_thread(p_num)
diff --git a/python/examples/imdb/test_gpu_server.py b/python/examples/imdb/test_gpu_server.py
deleted file mode 100644
index d8b313eedb58cbbb9c5a2045e889cb1372b9d276..0000000000000000000000000000000000000000
--- a/python/examples/imdb/test_gpu_server.py
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-from paddle_serving_server_gpu import OpMaker
-from paddle_serving_server_gpu import OpSeqMaker
-from paddle_serving_server_gpu import Server
-
-op_maker = OpMaker()
-read_op = op_maker.create('general_reader')
-general_infer_op = op_maker.create('general_infer')
-
-op_seq_maker = OpSeqMaker()
-op_seq_maker.add_op(read_op)
-op_seq_maker.add_op(general_infer_op)
-
-server = Server()
-server.set_op_sequence(op_seq_maker.get_op_sequence())
-server.set_num_threads(12)
-server.load_model_config(sys.argv[1])
-port = int(sys.argv[2])
-server.prepare_server(workdir="work_dir1", port=port, device="gpu")
-server.run_server()
diff --git a/python/examples/imdb/test_server.py b/python/examples/imdb/test_server.py
deleted file mode 100644
index 35d38be0cac50b899b58085c7f103f32537859c4..0000000000000000000000000000000000000000
--- a/python/examples/imdb/test_server.py
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import sys
-from paddle_serving_server import OpMaker
-from paddle_serving_server import OpSeqMaker
-from paddle_serving_server import Server
-
-op_maker = OpMaker()
-read_op = op_maker.create('general_reader')
-general_infer_op = op_maker.create('general_infer')
-general_response_op = op_maker.create('general_response')
-
-op_seq_maker = OpSeqMaker()
-op_seq_maker.add_op(read_op)
-op_seq_maker.add_op(general_infer_op)
-op_seq_maker.add_op(general_response_op)
-
-server = Server()
-server.set_op_sequence(op_seq_maker.get_op_sequence())
-server.set_num_threads(4)
-
-server.load_model_config(sys.argv[1])
-port = int(sys.argv[2])
-server.prepare_server(workdir="work_dir1", port=port, device="cpu")
-server.run_server()
diff --git a/python/examples/imdb/text_classify_service.py b/python/examples/imdb/text_classify_service.py
index 0576d2901803386ad4e0172a1442fd62e3d17b42..8a6836f0646c5d2bbeeb672c28f16486f6bdd8c2 100755
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
-#!flask/bin/python
from paddle_serving_server.web_service import WebService
from imdb_reader import IMDBDataset
import sys
@@ -27,7 +26,7 @@ class IMDBService(WebService):
if "words" not in feed:
exit(-1)
res_feed = {}
- res_feed["words"] = self.dataset.get_words_and_label(feed["words"])[0]
+ res_feed["words"] = self.dataset.get_words_only(feed["words"])[0]
return res_feed, fetch
imdb_service = IMDBService(name="imdb")