From 5c4985a088bd3411a67f56e459bcaafa79e5199f Mon Sep 17 00:00:00 2001 From: guru4elephant Date: Fri, 6 Mar 2020 14:45:12 +0800 Subject: [PATCH] refine imdb benchmark scripts and text_classify_service --- python/examples/imdb/get_data.sh | 4 +- python/examples/imdb/imdb_reader.py | 8 ++++ python/examples/imdb/imdb_web_service_demo.sh | 2 +- python/examples/imdb/local_train.py | 8 ++-- python/examples/imdb/test_gpu_server.py | 35 ----------------- python/examples/imdb/test_server.py | 38 ------------------- python/examples/imdb/text_classify_service.py | 3 +- 7 files changed, 17 insertions(+), 81 deletions(-) delete mode 100644 python/examples/imdb/test_gpu_server.py delete mode 100644 python/examples/imdb/test_server.py diff --git a/python/examples/imdb/get_data.sh b/python/examples/imdb/get_data.sh index 87aaa7a6..81d8d5d3 100644 --- a/python/examples/imdb/get_data.sh +++ b/python/examples/imdb/get_data.sh @@ -1,4 +1,4 @@ wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz +wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_model.tar.gz tar -zxvf text_classification_data.tar.gz -#wget --no-check-certificate https://paddle-serving.bj.bcebos.com/imdb-demo%2Fimdb.tar.gz -#tar -xzf imdb-demo%2Fimdb.tar.gz +tar -zxvf imdb_model.tar.gz diff --git a/python/examples/imdb/imdb_reader.py b/python/examples/imdb/imdb_reader.py index def7ce21..cad28ab2 100644 --- a/python/examples/imdb/imdb_reader.py +++ b/python/examples/imdb/imdb_reader.py @@ -30,6 +30,14 @@ class IMDBDataset(dg.MultiSlotDataGenerator): self._pattern = re.compile(r'(;|,|\.|\?|!|\s|\(|\))') self.return_value = ("words", [1, 2, 3, 4, 5, 6]), ("label", [0]) + def get_words_only(self, line): + sent = line.lower().replace("
", " ").strip() + words = [x for x in self._pattern.split(sent) if x and x != " "] + feas = [ + self._vocab[x] if x in self._vocab else self._unk_id for x in words + ] + return feas + def get_words_and_label(self, line): send = '|'.join(line.split('|')[:-1]).lower().replace("
", " ").strip() diff --git a/python/examples/imdb/imdb_web_service_demo.sh b/python/examples/imdb/imdb_web_service_demo.sh index e0db60a5..0b69a353 100644 --- a/python/examples/imdb/imdb_web_service_demo.sh +++ b/python/examples/imdb/imdb_web_service_demo.sh @@ -1,4 +1,4 @@ -wget https://paddle-serving.bj.bcebos.com/imdb-demo%2Fimdb_service.tar.gz +wget https://paddle-serving.bj.bcebos.com/imdb-demo/imdb_service.tar.gz tar -xzf imdb_service.tar.gz wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz tar -zxvf text_classification_data.tar.gz diff --git a/python/examples/imdb/local_train.py b/python/examples/imdb/local_train.py index b4cad7f4..53692298 100644 --- a/python/examples/imdb/local_train.py +++ b/python/examples/imdb/local_train.py @@ -49,8 +49,9 @@ if __name__ == "__main__": dataset.set_batch_size(128) dataset.set_filelist(filelist) dataset.set_thread(10) - from nets import bow_net - avg_cost, acc, prediction = bow_net(data, label, dict_dim) + from nets import lstm_net + model_name = "imdb_lstm" + avg_cost, acc, prediction = lstm_net(data, label, dict_dim) optimizer = fluid.optimizer.SGD(learning_rate=0.01) optimizer.minimize(avg_cost) @@ -65,6 +66,7 @@ if __name__ == "__main__": program=fluid.default_main_program(), dataset=dataset, debug=False) logger.info("TRAIN --> pass: {}".format(i)) if i == 5: - serving_io.save_model("imdb_model", "imdb_client_conf", + serving_io.save_model("{}_model".format(model_name), + "{}_client_conf".format(model_name), {"words": data}, {"prediction": prediction}, fluid.default_main_program()) diff --git a/python/examples/imdb/test_gpu_server.py b/python/examples/imdb/test_gpu_server.py deleted file mode 100644 index d8b313ee..00000000 --- a/python/examples/imdb/test_gpu_server.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from paddle_serving_server_gpu import OpMaker -from paddle_serving_server_gpu import OpSeqMaker -from paddle_serving_server_gpu import Server - -op_maker = OpMaker() -read_op = op_maker.create('general_reader') -general_infer_op = op_maker.create('general_infer') - -op_seq_maker = OpSeqMaker() -op_seq_maker.add_op(read_op) -op_seq_maker.add_op(general_infer_op) - -server = Server() -server.set_op_sequence(op_seq_maker.get_op_sequence()) -server.set_num_threads(12) -server.load_model_config(sys.argv[1]) -port = int(sys.argv[2]) -server.prepare_server(workdir="work_dir1", port=port, device="gpu") -server.run_server() diff --git a/python/examples/imdb/test_server.py b/python/examples/imdb/test_server.py deleted file mode 100644 index 35d38be0..00000000 --- a/python/examples/imdb/test_server.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import os -import sys -from paddle_serving_server import OpMaker -from paddle_serving_server import OpSeqMaker -from paddle_serving_server import Server - -op_maker = OpMaker() -read_op = op_maker.create('general_reader') -general_infer_op = op_maker.create('general_infer') -general_response_op = op_maker.create('general_response') - -op_seq_maker = OpSeqMaker() -op_seq_maker.add_op(read_op) -op_seq_maker.add_op(general_infer_op) -op_seq_maker.add_op(general_response_op) - -server = Server() -server.set_op_sequence(op_seq_maker.get_op_sequence()) -server.set_num_threads(4) - -server.load_model_config(sys.argv[1]) -port = int(sys.argv[2]) -server.prepare_server(workdir="work_dir1", port=port, device="cpu") -server.run_server() diff --git a/python/examples/imdb/text_classify_service.py b/python/examples/imdb/text_classify_service.py index 0576d290..8a6836f0 100755 --- a/python/examples/imdb/text_classify_service.py +++ b/python/examples/imdb/text_classify_service.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -#!flask/bin/python from paddle_serving_server.web_service import WebService from imdb_reader import IMDBDataset import sys @@ -27,7 +26,7 @@ class IMDBService(WebService): if "words" not in feed: exit(-1) res_feed = {} - res_feed["words"] = self.dataset.get_words_and_label(feed["words"])[0] + res_feed["words"] = self.dataset.get_words_only(feed["words"])[0] return res_feed, fetch imdb_service = IMDBService(name="imdb") -- GitLab