From c3b0b21367365e65a630d949a6c9887b01dc3cfe Mon Sep 17 00:00:00 2001 From: MRXLT Date: Fri, 13 Mar 2020 09:52:04 +0800 Subject: [PATCH] refine imdb demo --- python/examples/bert/benchmark.py | 3 +- python/examples/imdb/README.md | 30 ++++++++++++++----- python/examples/imdb/benchmark.py | 28 ++++++++++------- python/examples/imdb/local_train.py | 4 +-- python/examples/imdb/test_client.py | 4 +-- python/examples/imdb/text_classify_service.py | 5 ++-- 6 files changed, 47 insertions(+), 27 deletions(-) diff --git a/python/examples/bert/benchmark.py b/python/examples/bert/benchmark.py index 6bdeb76f..70954d27 100644 --- a/python/examples/bert/benchmark.py +++ b/python/examples/bert/benchmark.py @@ -58,7 +58,6 @@ def single_func(idx, resource): else: print("unsupport batch size {}".format(args.batch_size)) - end = time.time() elif args.request == "http": start = time.time() header = {"Content-Type": "application/json"} @@ -69,7 +68,7 @@ def single_func(idx, resource): idx % len(resource["endpoint"])]), data=json.dumps(dict_data), headers=header) - end = time.time() + end = time.time() return [[end - start]] diff --git a/python/examples/imdb/README.md b/python/examples/imdb/README.md index f54414d3..c7de4a83 100644 --- a/python/examples/imdb/README.md +++ b/python/examples/imdb/README.md @@ -1,18 +1,32 @@ -### 使用方法 +## IMDB评论情绪预测服务 -假设数据文件为test.data,配置文件为inference.conf -单进程client +### 获取模型文件和样例数据 + +``` +sh get_data.sh +``` +脚本会下载和解压出cnn、lstm和bow三种模型的配置文文件以及test_data和train_data。 + +### 启动RPC预测服务 + ``` -cat test.data | python test_client.py inference.conf > result +python -m paddle_serving_server.serve --model imdb_bow_model/ --port 9292 ``` -多进程client,若进程数为4 +### 执行预测 ``` -python test_client_multithread.py inference.conf test.data 4 > result +head test_data/part-0 | python test_client.py imdb_lstm_client_conf/serving_client_conf.prototxt imdb.vocab ``` -batch clienit,若batch size为4 +预测test_data/part-0的前十个样例。 + +### 启动HTTP预测服务 +``` +python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab +``` +### 执行预测 + ``` -cat test.data | python test_client_batch.py inference.conf 4 > result +curl -H "Content-Type:application/json" -X POST -d '{"words": "i am very sad | 0", "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction ``` ### Benchmark diff --git a/python/examples/imdb/benchmark.py b/python/examples/imdb/benchmark.py index 05459257..5d79f830 100644 --- a/python/examples/imdb/benchmark.py +++ b/python/examples/imdb/benchmark.py @@ -26,24 +26,30 @@ args = benchmark_args() def single_func(idx, resource): imdb_dataset = IMDBDataset() - imdb_dataset.load_resource(args.vocab) - filelist_fn = args.filelist - filelist = [] - start = time.time() - with open(filelist_fn) as fin: + imdb_dataset.load_resource("./imdb.vocab") + dataset = [] + with open("./test_data/part-0") as fin: for line in fin: - filelist.append(line.strip()) - filelist = filelist[idx::args.thread] + dataset.append(line.strip()) + start = time.time() if args.request == "rpc": client = Client() client.load_client_config(args.model) client.connect([args.endpoint]) - for fn in filelist: - fin = open(fn) - for line in fin: - word_ids, label = imdb_dataset.get_words_and_label(line) + for i in range(1000): + word_ids, label = imdb_dataset.get_words_and_label(line) + if args.batch_size == 1: fetch_map = client.predict( feed={"words": word_ids}, fetch=["prediction"]) + elif args.batch_size > 1: + feed_batch = [] + for bi in range(args.batch_size): + feed_batch.append({"words": word_ids}) + result = client.batch_predict( + feed_batch=feed_batch, fetch=["prediction"]) + else: + print("unsupport batch size {}".format(args.batch_size)) + elif args.request == "http": for fn in filelist: fin = open(fn) diff --git a/python/examples/imdb/local_train.py b/python/examples/imdb/local_train.py index b5b46073..3cb08af1 100644 --- a/python/examples/imdb/local_train.py +++ b/python/examples/imdb/local_train.py @@ -35,6 +35,8 @@ def load_vocab(filename): if __name__ == "__main__": + from nets import lstm_net + model_name = "imdb_lstm" vocab = load_vocab('imdb.vocab') dict_dim = len(vocab) @@ -50,8 +52,6 @@ if __name__ == "__main__": dataset.set_batch_size(128) dataset.set_filelist(filelist) dataset.set_thread(10) - from nets import lstm_net - model_name = "imdb_lstm" avg_cost, acc, prediction = lstm_net(data, label, dict_dim) optimizer = fluid.optimizer.SGD(learning_rate=0.01) optimizer.minimize(avg_cost) diff --git a/python/examples/imdb/test_client.py b/python/examples/imdb/test_client.py index a938de19..548a40e4 100644 --- a/python/examples/imdb/test_client.py +++ b/python/examples/imdb/test_client.py @@ -18,7 +18,7 @@ import sys client = Client() client.load_client_config(sys.argv[1]) -client.connect(["127.0.0.1:9393"]) +client.connect(["127.0.0.1:9292"]) # you can define any english sentence or dataset here # This example reuses imdb reader in training, you @@ -28,7 +28,7 @@ imdb_dataset.load_resource(sys.argv[2]) for line in sys.stdin: word_ids, label = imdb_dataset.get_words_and_label(line) - feed = {"words": word_ids, "label": label} + feed = {"words": word_ids} fetch = ["acc", "cost", "prediction"] fetch_map = client.predict(feed=feed, fetch=fetch) print("{} {}".format(fetch_map["prediction"][1], label[0])) diff --git a/python/examples/imdb/text_classify_service.py b/python/examples/imdb/text_classify_service.py index 33399360..bbf63bb0 100755 --- a/python/examples/imdb/text_classify_service.py +++ b/python/examples/imdb/text_classify_service.py @@ -35,6 +35,7 @@ class IMDBService(WebService): imdb_service = IMDBService(name="imdb") imdb_service.load_model_config(sys.argv[1]) -imdb_service.prepare_server(workdir=sys.argv[2], port=9393, device="cpu") -imdb_service.prepare_dict({"dict_file_path": sys.argv[3]}) +imdb_service.prepare_server( + workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu") +imdb_service.prepare_dict({"dict_file_path": sys.argv[4]}) imdb_service.run_server() -- GitLab