提交 266b8eeb 编写于 作者: P peterzhang2029

refine notation

上级 e89af968
......@@ -76,7 +76,7 @@ python train.py
### 预测
训练结束后模型将存储在指定目录当中(默认models目录),在终端执行:
```bash
python infer.py
python infer.py --model_path 'models/params_pass_00000.tar.gz'
```
默认情况下,预测脚本将加载训练一个pass的模型对 `imdb的测试集` 进行测试。
......@@ -139,20 +139,21 @@ def train_reader(data_dir, word_dict):
`train.py`训练脚本中包含以下参数:
```
Options:
--train_data_dir TEXT path of training dataset (default: None). if this
--train_data_dir TEXT The path of training dataset (default: None). If this
parameter is not set, imdb dataset will be used.
--test_data_dir TEXT path of testing dataset (default: None). if this
--test_data_dir TEXT The path of testing dataset (default: None). If this
parameter is not set, imdb dataset will be used.
--word_dict_path TEXT path of word dictionary (default: None).if this
parameter is not set, imdb dataset will be used.if
--word_dict_path TEXT The path of word dictionary (default: None). If this
parameter is not set, imdb dataset will be used. If
this parameter is set, but the file does not exist,
word dictionay will be built from the training data
automatically.
--class_num INTEGER class number (default: 2).
--batch_size INTEGER the number of training examples in one batch
--class_num INTEGER The class number (default: 2).
--batch_size INTEGER The number of training examples in one batch
(default: 32).
--num_passes INTEGER number of passes to train (default: 10).
--model_save_dir TEXT path to save the trained models (default: 'models').
--num_passes INTEGER The number of passes to train (default: 10).
--model_save_dir TEXT The path to save the trained models (default:
'models').
--help Show this message and exit.
```
......@@ -170,20 +171,20 @@ python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_da
```
Options:
--data_path TEXT path of data for inference (default: None). if this
parameter is not set, imdb test dataset will be used.
--model_path TEXT path of saved model. (default:
'models/params_pass_00000.tar.gz')
--word_dict_path TEXT path of word dictionary (default: None).if this
--data_path TEXT The path of data for inference (default: None). If
this parameter is not set, imdb test dataset will be
used.
--model_path TEXT The path of saved model. [required]
--word_dict_path TEXT The path of word dictionary (default: None). If this
parameter is not set, imdb dataset will be used.
--class_num INTEGER class number (default: 2).
--batch_size INTEGER the number of examples in one batch (default: 32).
--class_num INTEGER The class number (default: 2).
--batch_size INTEGER The number of examples in one batch (default: 32).
--help Show this message and exit.
```
2.`data`目录下的示例数据为例,在终端执行:
```bash
python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt'
python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' --model_path 'models/params_pass_00000.tar.gz'
```
即可对样例数据进行预测。
......@@ -118,7 +118,7 @@ python train.py
### 预测
训练结束后模型将存储在指定目录当中(默认models目录),在终端执行:
```bash
python infer.py
python infer.py --model_path 'models/params_pass_00000.tar.gz'
```
默认情况下,预测脚本将加载训练一个pass的模型对 `imdb的测试集` 进行测试。
......@@ -181,20 +181,21 @@ def train_reader(data_dir, word_dict):
`train.py`训练脚本中包含以下参数:
```
Options:
--train_data_dir TEXT path of training dataset (default: None). if this
--train_data_dir TEXT The path of training dataset (default: None). If this
parameter is not set, imdb dataset will be used.
--test_data_dir TEXT path of testing dataset (default: None). if this
--test_data_dir TEXT The path of testing dataset (default: None). If this
parameter is not set, imdb dataset will be used.
--word_dict_path TEXT path of word dictionary (default: None).if this
parameter is not set, imdb dataset will be used.if
--word_dict_path TEXT The path of word dictionary (default: None). If this
parameter is not set, imdb dataset will be used. If
this parameter is set, but the file does not exist,
word dictionay will be built from the training data
automatically.
--class_num INTEGER class number (default: 2).
--batch_size INTEGER the number of training examples in one batch
--class_num INTEGER The class number (default: 2).
--batch_size INTEGER The number of training examples in one batch
(default: 32).
--num_passes INTEGER number of passes to train (default: 10).
--model_save_dir TEXT path to save the trained models (default: 'models').
--num_passes INTEGER The number of passes to train (default: 10).
--model_save_dir TEXT The path to save the trained models (default:
'models').
--help Show this message and exit.
```
......@@ -212,20 +213,20 @@ python train.py --train_data_dir 'data/train_data' --test_data_dir 'data/test_da
```
Options:
--data_path TEXT path of data for inference (default: None). if this
parameter is not set, imdb test dataset will be used.
--model_path TEXT path of saved model. (default:
'models/params_pass_00000.tar.gz')
--word_dict_path TEXT path of word dictionary (default: None).if this
--data_path TEXT The path of data for inference (default: None). If
this parameter is not set, imdb test dataset will be
used.
--model_path TEXT The path of saved model. [required]
--word_dict_path TEXT The path of word dictionary (default: None). If this
parameter is not set, imdb dataset will be used.
--class_num INTEGER class number (default: 2).
--batch_size INTEGER the number of examples in one batch (default: 32).
--class_num INTEGER The class number (default: 2).
--batch_size INTEGER The number of examples in one batch (default: 32).
--help Show this message and exit.
```
2.以`data`目录下的示例数据为例,在终端执行:
```bash
python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt'
python infer.py --data_path 'data/infer.txt' --word_dict_path 'dict.txt' --model_path 'models/params_pass_00000.tar.gz'
```
即可对样例数据进行预测。
......
......@@ -14,28 +14,24 @@ from utils import logger, load_dict
@click.option(
"--data_path",
default=None,
help=("path of data for inference (default: None). "
"if this parameter is not set, "
help=("The path of data for inference (default: None). "
"If this parameter is not set, "
"imdb test dataset will be used."))
@click.option(
"--model_path",
type=str,
default='models/params_pass_00000.tar.gz',
help=("path of saved model. "
"(default: 'models/params_pass_00000.tar.gz')"))
"--model_path", type=str, required=True, help="The path of saved model.")
@click.option(
"--word_dict_path",
type=str,
default=None,
help=("path of word dictionary (default: None)."
"if this parameter is not set, imdb dataset will be used."))
help=("The path of word dictionary (default: None). "
"If this parameter is not set, imdb dataset will be used."))
@click.option(
"--class_num", type=int, default=2, help="class number (default: 2).")
"--class_num", type=int, default=2, help="The class number (default: 2).")
@click.option(
"--batch_size",
type=int,
default=32,
help="the number of examples in one batch (default: 32).")
help="The number of examples in one batch (default: 32).")
def infer(data_path, model_path, word_dict_path, batch_size, class_num):
def _infer_a_batch(inferer, test_batch, ids_2_word):
probs = inferer.infer(input=test_batch, field=["value"])
......@@ -49,8 +45,8 @@ def infer(data_path, model_path, word_dict_path, batch_size, class_num):
" ".join(["{:0.4f}".format(p)
for p in prob]), word_text))
assert os.path.exists(model_path), "the trained model does not exist."
logger.info("begin to predict...")
assert os.path.exists(model_path), "The trained model does not exist."
logger.info("Begin to predict...")
use_default_data = (data_path is None)
if use_default_data:
......@@ -61,7 +57,7 @@ def infer(data_path, model_path, word_dict_path, batch_size, class_num):
class_num = 2
else:
assert os.path.exists(
word_dict_path), "the word dictionary file does not exist"
word_dict_path), "The word dictionary file does not exist"
word_dict = load_dict(word_dict_path)
word_reverse_dict = dict((value, key)
......
......@@ -7,8 +7,6 @@ def cnn_cov_group(group_input, hidden_size):
conv4 = paddle.networks.sequence_conv_pool(
input=group_input, context_len=4, hidden_size=hidden_size)
#output_group = paddle.layer.concat(input=[conv3, conv4])
output_group = paddle.layer.fc(
input=[conv3, conv4],
size=hidden_size,
......
......@@ -14,42 +14,42 @@ from utils import build_dict, load_dict, logger
@click.option(
"--train_data_dir",
default=None,
help=("path of training dataset (default: None). "
"if this parameter is not set, "
help=("The path of training dataset (default: None). "
"If this parameter is not set, "
"imdb dataset will be used."))
@click.option(
"--test_data_dir",
default=None,
help=("path of testing dataset (default: None). "
"if this parameter is not set, "
help=("The path of testing dataset (default: None). "
"If this parameter is not set, "
"imdb dataset will be used."))
@click.option(
"--word_dict_path",
type=str,
default=None,
help=("path of word dictionary (default: None)."
"if this parameter is not set, imdb dataset will be used."
"if this parameter is set, but the file does not exist, "
help=("The path of word dictionary (default: None). "
"If this parameter is not set, imdb dataset will be used. "
"If this parameter is set, but the file does not exist, "
"word dictionay will be built from "
"the training data automatically."))
@click.option(
"--class_num", type=int, default=2, help="class number (default: 2).")
"--class_num", type=int, default=2, help="The class number (default: 2).")
@click.option(
"--batch_size",
type=int,
default=32,
help=("the number of training examples in one batch "
help=("The number of training examples in one batch "
"(default: 32)."))
@click.option(
"--num_passes",
type=int,
default=10,
help="number of passes to train (default: 10).")
help="The number of passes to train (default: 10).")
@click.option(
"--model_save_dir",
type=str,
default="models",
help="path to save the trained models (default: 'models').")
help="The path to save the trained models (default: 'models').")
def train(train_data_dir, test_data_dir, word_dict_path, class_num,
model_save_dir, batch_size, num_passes):
"""
......@@ -70,7 +70,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
:type num_pass: int
"""
if train_data_dir is not None:
assert word_dict_path, ("the parameter train_data_dir, word_dict_path "
assert word_dict_path, ("The parameter train_data_dir, word_dict_path "
"should be set at the same time.")
if not os.path.exists(model_save_dir):
......@@ -81,7 +81,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
if use_default_data:
logger.info(("No training data are porivided, "
"use imdb to train the model."))
logger.info("please wait to build the word dictionary ...")
logger.info("Please wait to build the word dictionary ...")
word_dict = reader.imdb_word_dict()
......@@ -94,7 +94,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
class_num = 2
else:
if word_dict_path is None or not os.path.exists(word_dict_path):
logger.info(("word dictionary is not given, the dictionary "
logger.info(("Word dictionary is not given, the dictionary "
"is automatically built from the training data."))
# build the word dictionary to map the original string-typed
......@@ -107,7 +107,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
word_dict = load_dict(word_dict_path)
class_num = class_num
logger.info("class number is : %d." % class_num)
logger.info("Class number is : %d." % class_num)
train_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -129,7 +129,7 @@ def train(train_data_dir, test_data_dir, word_dict_path, class_num,
emb_size = 28
hidden_size = 128
logger.info("length of word dictionary is : %d." % (dict_dim))
logger.info("Length of word dictionary is : %d." % (dict_dim))
paddle.init(use_gpu=True, trainer_count=4)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册