From c3b0b21367365e65a630d949a6c9887b01dc3cfe Mon Sep 17 00:00:00 2001
From: MRXLT <xlt2024@gmail.com>
Date: Fri, 13 Mar 2020 09:52:04 +0800
Subject: [PATCH] refine imdb demo

---
 python/examples/bert/benchmark.py             |  3 +-
 python/examples/imdb/README.md                | 30 ++++++++++++++-----
 python/examples/imdb/benchmark.py             | 28 ++++++++++-------
 python/examples/imdb/local_train.py           |  4 +--
 python/examples/imdb/test_client.py           |  4 +--
 python/examples/imdb/text_classify_service.py |  5 ++--
 6 files changed, 47 insertions(+), 27 deletions(-)

diff --git a/python/examples/bert/benchmark.py b/python/examples/bert/benchmark.py
index 6bdeb76f..70954d27 100644
--- a/python/examples/bert/benchmark.py
+++ b/python/examples/bert/benchmark.py
@@ -58,7 +58,6 @@ def single_func(idx, resource):
             else:
                 print("unsupport batch size {}".format(args.batch_size))
 
-        end = time.time()
     elif args.request == "http":
         start = time.time()
         header = {"Content-Type": "application/json"}
@@ -69,7 +68,7 @@ def single_func(idx, resource):
                     idx % len(resource["endpoint"])]),
                 data=json.dumps(dict_data),
                 headers=header)
-        end = time.time()
+    end = time.time()
     return [[end - start]]
 
 
diff --git a/python/examples/imdb/README.md b/python/examples/imdb/README.md
index f54414d3..c7de4a83 100644
--- a/python/examples/imdb/README.md
+++ b/python/examples/imdb/README.md
@@ -1,18 +1,32 @@
-### 使用方法
+## IMDB评论情绪预测服务
 
-假设数据文件为test.data，配置文件为inference.conf
 
-单进程client
+### 获取模型文件和样例数据
+
+```
+sh get_data.sh
+```
+脚本会下载和解压出cnn、lstm和bow三种模型的配置文文件以及test_data和train_data。
+
+### 启动RPC预测服务
+
 ```
-cat test.data | python test_client.py inference.conf > result
+python -m paddle_serving_server.serve --model imdb_bow_model/ --port 9292
 ```
-多进程client，若进程数为4
+### 执行预测
 ```
-python test_client_multithread.py inference.conf test.data 4 > result
+head test_data/part-0 | python test_client.py imdb_lstm_client_conf/serving_client_conf.prototxt imdb.vocab
 ```
-batch clienit，若batch size为4
+预测test_data/part-0的前十个样例。
+
+### 启动HTTP预测服务
+```
+python text_classify_service.py imdb_cnn_model/ workdir/ 9292 imdb.vocab
+```
+### 执行预测
+
 ```
-cat test.data | python test_client_batch.py inference.conf 4 > result
+curl -H "Content-Type:application/json" -X POST -d '{"words": "i am very sad | 0", "fetch":["prediction"]}' http://127.0.0.1:9292/imdb/prediction
 ```
 
 ### Benchmark
diff --git a/python/examples/imdb/benchmark.py b/python/examples/imdb/benchmark.py
index 05459257..5d79f830 100644
--- a/python/examples/imdb/benchmark.py
+++ b/python/examples/imdb/benchmark.py
@@ -26,24 +26,30 @@ args = benchmark_args()
 
 def single_func(idx, resource):
     imdb_dataset = IMDBDataset()
-    imdb_dataset.load_resource(args.vocab)
-    filelist_fn = args.filelist
-    filelist = []
-    start = time.time()
-    with open(filelist_fn) as fin:
+    imdb_dataset.load_resource("./imdb.vocab")
+    dataset = []
+    with open("./test_data/part-0") as fin:
         for line in fin:
-            filelist.append(line.strip())
-    filelist = filelist[idx::args.thread]
+            dataset.append(line.strip())
+    start = time.time()
     if args.request == "rpc":
         client = Client()
         client.load_client_config(args.model)
         client.connect([args.endpoint])
-        for fn in filelist:
-            fin = open(fn)
-            for line in fin:
-                word_ids, label = imdb_dataset.get_words_and_label(line)
+        for i in range(1000):
+            word_ids, label = imdb_dataset.get_words_and_label(line)
+            if args.batch_size == 1:
                 fetch_map = client.predict(
                     feed={"words": word_ids}, fetch=["prediction"])
+            elif args.batch_size > 1:
+                feed_batch = []
+                for bi in range(args.batch_size):
+                    feed_batch.append({"words": word_ids})
+                result = client.batch_predict(
+                    feed_batch=feed_batch, fetch=["prediction"])
+            else:
+                print("unsupport batch size {}".format(args.batch_size))
+
     elif args.request == "http":
         for fn in filelist:
             fin = open(fn)
diff --git a/python/examples/imdb/local_train.py b/python/examples/imdb/local_train.py
index b5b46073..3cb08af1 100644
--- a/python/examples/imdb/local_train.py
+++ b/python/examples/imdb/local_train.py
@@ -35,6 +35,8 @@ def load_vocab(filename):
 
 
 if __name__ == "__main__":
+    from nets import lstm_net
+    model_name = "imdb_lstm"
     vocab = load_vocab('imdb.vocab')
     dict_dim = len(vocab)
 
@@ -50,8 +52,6 @@ if __name__ == "__main__":
     dataset.set_batch_size(128)
     dataset.set_filelist(filelist)
     dataset.set_thread(10)
-    from nets import lstm_net
-    model_name = "imdb_lstm"
     avg_cost, acc, prediction = lstm_net(data, label, dict_dim)
     optimizer = fluid.optimizer.SGD(learning_rate=0.01)
     optimizer.minimize(avg_cost)
diff --git a/python/examples/imdb/test_client.py b/python/examples/imdb/test_client.py
index a938de19..548a40e4 100644
--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -18,7 +18,7 @@ import sys
 
 client = Client()
 client.load_client_config(sys.argv[1])
-client.connect(["127.0.0.1:9393"])
+client.connect(["127.0.0.1:9292"])
 
 # you can define any english sentence or dataset here
 # This example reuses imdb reader in training, you
@@ -28,7 +28,7 @@ imdb_dataset.load_resource(sys.argv[2])
 
 for line in sys.stdin:
     word_ids, label = imdb_dataset.get_words_and_label(line)
-    feed = {"words": word_ids, "label": label}
+    feed = {"words": word_ids}
     fetch = ["acc", "cost", "prediction"]
     fetch_map = client.predict(feed=feed, fetch=fetch)
     print("{} {}".format(fetch_map["prediction"][1], label[0]))
diff --git a/python/examples/imdb/text_classify_service.py b/python/examples/imdb/text_classify_service.py
index 33399360..bbf63bb0 100755
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -35,6 +35,7 @@ class IMDBService(WebService):
 
 imdb_service = IMDBService(name="imdb")
 imdb_service.load_model_config(sys.argv[1])
-imdb_service.prepare_server(workdir=sys.argv[2], port=9393, device="cpu")
-imdb_service.prepare_dict({"dict_file_path": sys.argv[3]})
+imdb_service.prepare_server(
+    workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
+imdb_service.prepare_dict({"dict_file_path": sys.argv[4]})
 imdb_service.run_server()
-- 
GitLab