提交 266b8eeb 编写于 作者: P peterzhang2029

refine notation

上级 e89af968
...@@ -76,7 +76,7 @@ python train.py ...@@ -76,7 +76,7 @@ python train.py
### 预测 ### 预测
训练结束后模型将存储在指定目录当中(默认models目录),在终端执行: 训练结束后模型将存储在指定目录当中(默认models目录),在终端执行:
```bash ```bash
python infer.py python infer.py --model_path 'models/params_pass_00000.tar.gz'
``` ```
默认情况下,预测脚本将加载训练一个pass的模型对 `imdb的测试集` 进行测试。 默认情况下,预测脚本将加载训练一个pass的模型对 `imdb的测试集` 进行测试。
...@@ -139,20 +139,21 @@ def train_reader(data_dir, word_dict): ...@@ -139,20 +139,21 @@ def train_reader(data_dir, word_dict):
`train.py`训练脚本中包含以下参数: `train.py`训练脚本中包含以下参数:
``` ```
Options: Options:
--train_data_dir TEXT path of training dataset (default: None). if this --train_data_dir TEXT The path of training dataset (default: None). If this
parameter is not set, imdb dataset will be used. parameter is not set, imdb dataset will be used.
--test_data_dir TEXT path of testing dataset (default: None). if this --test_data_dir TEXT The path of testing dataset (default: None). If this
parameter is not set, imdb dataset will be used. parameter is not set, imdb dataset will be used.
--word_dict_path TEXT path of word dictionary (default: None).if this --word_dict_path TEXT The path of word dictionary (default: None). If this
parameter is not set, imdb dataset will be used.if parameter is not set, imdb dataset will be used. If
this parameter is set, but the file does not exist, this parameter is set, but the file does not exist,
word dictionay will be built from the training data word dictionay will be built from the training data
automatically. automatically.
--class_num INTEGER class number (default: 2). --class_num INTEGER The class number (default: 2).
--batch_size INTEGER the number of training examples in one batch --batch_size INTEGER The number of training examples in one batch
(default: 32). (default: 32).
--num_passes INTEGER number of passes to train (default: 10). --num_passes INTEGER The number of passes to train (default: 10).
--model_save_dir TEXT path to save the trained models (default: 'models'). --model_save_dir TEXT The path to save the trained models (default:
'models').
--help Show this message and exit. --help Show this message and exit.
``` ```
...@@ -170,20 +171,20 @@ python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_da ...@@ -170,20 +171,20 @@ python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_da
``` ```
Options: Options:
--data_path TEXT path of data for inference (default: None). if this --data_path TEXT The path of data for inference (default: None). If
parameter is not set, imdb test dataset will be used. this parameter is not set, imdb test dataset will be
--model_path TEXT path of saved model. (default: used.
'models/params_pass_00000.tar.gz') --model_path TEXT The path of saved model. [required]
--word_dict_path TEXT path of word dictionary (default: None).if this --word_dict_path TEXT The path of word dictionary (default: None). If this
parameter is not set, imdb dataset will be used. parameter is not set, imdb dataset will be used.
--class_num INTEGER class number (default: 2). --class_num INTEGER The class number (default: 2).
--batch_size INTEGER the number of examples in one batch (default: 32). --batch_size INTEGER The number of examples in one batch (default: 32).
--help Show this message and exit. --help Show this message and exit.
``` ```
2.`data`目录下的示例数据为例,在终端执行: 2.`data`目录下的示例数据为例,在终端执行:
```bash ```bash
python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' --model_path 'models/params_pass_00000.tar.gz'
``` ```
即可对样例数据进行预测。 即可对样例数据进行预测。
...@@ -118,7 +118,7 @@ python train.py ...@@ -118,7 +118,7 @@ python train.py
### 预测 ### 预测
训练结束后模型将存储在指定目录当中(默认models目录),在终端执行: 训练结束后模型将存储在指定目录当中(默认models目录),在终端执行:
```bash ```bash
python infer.py python infer.py --model_path 'models/params_pass_00000.tar.gz'
``` ```
默认情况下,预测脚本将加载训练一个pass的模型对 `imdb的测试集` 进行测试。 默认情况下,预测脚本将加载训练一个pass的模型对 `imdb的测试集` 进行测试。
...@@ -181,20 +181,21 @@ def train_reader(data_dir, word_dict): ...@@ -181,20 +181,21 @@ def train_reader(data_dir, word_dict):
`train.py`训练脚本中包含以下参数: `train.py`训练脚本中包含以下参数:
``` ```
Options: Options:
--train_data_dir TEXT path of training dataset (default: None). if this --train_data_dir TEXT The path of training dataset (default: None). If this
parameter is not set, imdb dataset will be used. parameter is not set, imdb dataset will be used.
--test_data_dir TEXT path of testing dataset (default: None). if this --test_data_dir TEXT The path of testing dataset (default: None). If this
parameter is not set, imdb dataset will be used. parameter is not set, imdb dataset will be used.
--word_dict_path TEXT path of word dictionary (default: None).if this --word_dict_path TEXT The path of word dictionary (default: None). If this
parameter is not set, imdb dataset will be used.if parameter is not set, imdb dataset will be used. If
this parameter is set, but the file does not exist, this parameter is set, but the file does not exist,
word dictionay will be built from the training data word dictionay will be built from the training data
automatically. automatically.
--class_num INTEGER class number (default: 2). --class_num INTEGER The class number (default: 2).
--batch_size INTEGER the number of training examples in one batch --batch_size INTEGER The number of training examples in one batch
(default: 32). (default: 32).
--num_passes INTEGER number of passes to train (default: 10). --num_passes INTEGER The number of passes to train (default: 10).
--model_save_dir TEXT path to save the trained models (default: 'models'). --model_save_dir TEXT The path to save the trained models (default:
'models').
--help Show this message and exit. --help Show this message and exit.
``` ```
...@@ -212,20 +213,20 @@ python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_da ...@@ -212,20 +213,20 @@ python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_da
``` ```
Options: Options:
--data_path TEXT path of data for inference (default: None). if this --data_path TEXT The path of data for inference (default: None). If
parameter is not set, imdb test dataset will be used. this parameter is not set, imdb test dataset will be
--model_path TEXT path of saved model. (default: used.
'models/params_pass_00000.tar.gz') --model_path TEXT The path of saved model. [required]
--word_dict_path TEXT path of word dictionary (default: None).if this --word_dict_path TEXT The path of word dictionary (default: None). If this
parameter is not set, imdb dataset will be used. parameter is not set, imdb dataset will be used.
--class_num INTEGER class number (default: 2). --class_num INTEGER The class number (default: 2).
--batch_size INTEGER the number of examples in one batch (default: 32). --batch_size INTEGER The number of examples in one batch (default: 32).
--help Show this message and exit. --help Show this message and exit.
``` ```
2.以`data`目录下的示例数据为例,在终端执行: 2.以`data`目录下的示例数据为例,在终端执行:
```bash ```bash
python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' --model_path 'models/params_pass_00000.tar.gz'
``` ```
即可对样例数据进行预测。 即可对样例数据进行预测。
......
...@@ -14,28 +14,24 @@ from utils import logger, load_dict ...@@ -14,28 +14,24 @@ from utils import logger, load_dict
@click.option( @click.option(
"--data_path", "--data_path",
default=None, default=None,
help=("path of data for inference (default: None). " help=("The path of data for inference (default: None). "
"if this parameter is not set, " "If this parameter is not set, "
"imdb test dataset will be used.")) "imdb test dataset will be used."))
@click.option( @click.option(
"--model_path", "--model_path", type=str, required=True, help="The path of saved model.")
type=str,
default='models/params_pass_00000.tar.gz',
help=("path of saved model. "
"(default: 'models/params_pass_00000.tar.gz')"))
@click.option( @click.option(
"--word_dict_path", "--word_dict_path",
type=str, type=str,
default=None, default=None,
help=("path of word dictionary (default: None)." help=("The path of word dictionary (default: None). "
"if this parameter is not set, imdb dataset will be used.")) "If this parameter is not set, imdb dataset will be used."))
@click.option( @click.option(
"--class_num", type=int, default=2, help="class number (default: 2).") "--class_num", type=int, default=2, help="The class number (default: 2).")
@click.option( @click.option(
"--batch_size", "--batch_size",
type=int, type=int,
default=32, default=32,
help="the number of examples in one batch (default: 32).") help="The number of examples in one batch (default: 32).")
def infer(data_path, model_path, word_dict_path, batch_size, class_num): def infer(data_path, model_path, word_dict_path, batch_size, class_num):
def _infer_a_batch(inferer, test_batch, ids_2_word): def _infer_a_batch(inferer, test_batch, ids_2_word):
probs = inferer.infer(input=test_batch, field=["value"]) probs = inferer.infer(input=test_batch, field=["value"])
...@@ -49,8 +45,8 @@ def infer(data_path, model_path, word_dict_path, batch_size, class_num): ...@@ -49,8 +45,8 @@ def infer(data_path, model_path, word_dict_path, batch_size, class_num):
" ".join(["{:0.4f}".format(p) " ".join(["{:0.4f}".format(p)
for p in prob]), word_text)) for p in prob]), word_text))
assert os.path.exists(model_path), "the trained model does not exist." assert os.path.exists(model_path), "The trained model does not exist."
logger.info("begin to predict...") logger.info("Begin to predict...")
use_default_data = (data_path is None) use_default_data = (data_path is None)
if use_default_data: if use_default_data:
...@@ -61,7 +57,7 @@ def infer(data_path, model_path, word_dict_path, batch_size, class_num): ...@@ -61,7 +57,7 @@ def infer(data_path, model_path, word_dict_path, batch_size, class_num):
class_num = 2 class_num = 2
else: else:
assert os.path.exists( assert os.path.exists(
word_dict_path), "the word dictionary file does not exist" word_dict_path), "The word dictionary file does not exist"
word_dict = load_dict(word_dict_path) word_dict = load_dict(word_dict_path)
word_reverse_dict = dict((value, key) word_reverse_dict = dict((value, key)
......
...@@ -7,8 +7,6 @@ def cnn_cov_group(group_input, hidden_size): ...@@ -7,8 +7,6 @@ def cnn_cov_group(group_input, hidden_size):
conv4 = paddle.networks.sequence_conv_pool( conv4 = paddle.networks.sequence_conv_pool(
input=group_input, context_len=4, hidden_size=hidden_size) input=group_input, context_len=4, hidden_size=hidden_size)
#output_group = paddle.layer.concat(input=[conv3, conv4])
output_group = paddle.layer.fc( output_group = paddle.layer.fc(
input=[conv3, conv4], input=[conv3, conv4],
size=hidden_size, size=hidden_size,
......
...@@ -14,42 +14,42 @@ from utils import build_dict, load_dict, logger ...@@ -14,42 +14,42 @@ from utils import build_dict, load_dict, logger
@click.option( @click.option(
"--train_data_dir", "--train_data_dir",
default=None, default=None,
help=("path of training dataset (default: None). " help=("The path of training dataset (default: None). "
"if this parameter is not set, " "If this parameter is not set, "
"imdb dataset will be used.")) "imdb dataset will be used."))
@click.option( @click.option(
"--test_data_dir", "--test_data_dir",
default=None, default=None,
help=("path of testing dataset (default: None). " help=("The path of testing dataset (default: None). "
"if this parameter is not set, " "If this parameter is not set, "
"imdb dataset will be used.")) "imdb dataset will be used."))
@click.option( @click.option(
"--word_dict_path", "--word_dict_path",
type=str, type=str,
default=None, default=None,
help=("path of word dictionary (default: None)." help=("The path of word dictionary (default: None). "
"if this parameter is not set, imdb dataset will be used." "If this parameter is not set, imdb dataset will be used. "
"if this parameter is set, but the file does not exist, " "If this parameter is set, but the file does not exist, "
"word dictionay will be built from " "word dictionay will be built from "
"the training data automatically.")) "the training data automatically."))
@click.option( @click.option(
"--class_num", type=int, default=2, help="class number (default: 2).") "--class_num", type=int, default=2, help="The class number (default: 2).")
@click.option( @click.option(
"--batch_size", "--batch_size",
type=int, type=int,
default=32, default=32,
help=("the number of training examples in one batch " help=("The number of training examples in one batch "
"(default: 32).")) "(default: 32)."))
@click.option( @click.option(
"--num_passes", "--num_passes",
type=int, type=int,
default=10, default=10,
help="number of passes to train (default: 10).") help="The number of passes to train (default: 10).")
@click.option( @click.option(
"--model_save_dir", "--model_save_dir",
type=str, type=str,
default="models", default="models",
help="path to save the trained models (default: 'models').") help="The path to save the trained models (default: 'models').")
def train(train_data_dir, test_data_dir, word_dict_path, class_num, def train(train_data_dir, test_data_dir, word_dict_path, class_num,
model_save_dir, batch_size, num_passes): model_save_dir, batch_size, num_passes):
""" """
...@@ -70,7 +70,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, ...@@ -70,7 +70,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
:type num_pass: int :type num_pass: int
""" """
if train_data_dir is not None: if train_data_dir is not None:
assert word_dict_path, ("the parameter train_data_dir, word_dict_path " assert word_dict_path, ("The parameter train_data_dir, word_dict_path "
"should be set at the same time.") "should be set at the same time.")
if not os.path.exists(model_save_dir): if not os.path.exists(model_save_dir):
...@@ -81,7 +81,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, ...@@ -81,7 +81,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
if use_default_data: if use_default_data:
logger.info(("No training data are porivided, " logger.info(("No training data are porivided, "
"use imdb to train the model.")) "use imdb to train the model."))
logger.info("please wait to build the word dictionary ...") logger.info("Please wait to build the word dictionary ...")
word_dict = reader.imdb_word_dict() word_dict = reader.imdb_word_dict()
...@@ -94,7 +94,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, ...@@ -94,7 +94,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
class_num = 2 class_num = 2
else: else:
if word_dict_path is None or not os.path.exists(word_dict_path): if word_dict_path is None or not os.path.exists(word_dict_path):
logger.info(("word dictionary is not given, the dictionary " logger.info(("Word dictionary is not given, the dictionary "
"is automatically built from the training data.")) "is automatically built from the training data."))
# build the word dictionary to map the original string-typed # build the word dictionary to map the original string-typed
...@@ -107,7 +107,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, ...@@ -107,7 +107,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
word_dict = load_dict(word_dict_path) word_dict = load_dict(word_dict_path)
class_num = class_num class_num = class_num
logger.info("class number is : %d." % class_num) logger.info("Class number is : %d." % class_num)
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -129,7 +129,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num, ...@@ -129,7 +129,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
emb_size = 28 emb_size = 28
hidden_size = 128 hidden_size = 128
logger.info("length of word dictionary is : %d." % (dict_dim)) logger.info("Length of word dictionary is : %d." % (dict_dim))
paddle.init(use_gpu=True, trainer_count=4) paddle.init(use_gpu=True, trainer_count=4)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册